From 70ce04b44c7e065fcc409682e1cb127b4987dca6 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:13:54 +0200 Subject: [PATCH 001/141] Added new --calibrate-algo and --save-config command line options --- src/common/config/CommonConfig.cpp | 11 +++++++++++ src/common/config/CommonConfig.h | 5 +++++ src/common/interfaces/IConfig.h | 3 +++ src/core/ConfigLoader_platform.h | 5 +++++ 4 files changed, 24 insertions(+) diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 7e43b39d..c9cbc971 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -183,6 +184,14 @@ bool xmrig::CommonConfig::parseBoolean(int key, bool enable) m_dryRun = enable; break; + case IConfig::CalibrateAlgoKey: /* --calibrate-algo */ + m_calibrateAlgo = enable; + break; + + case IConfig::SaveConfigKey: /* --save-config */ + m_saveConfig = enable; + break; + default: break; } @@ -267,6 +276,8 @@ bool xmrig::CommonConfig::parseString(int key, const char *arg) case NicehashKey: /* --nicehash */ case ApiIPv6Key: /* --api-ipv6 */ case DryRunKey: /* --dry-run */ + case CalibrateAlgoKey: /* --calibrate-algo */ + case SaveConfigKey: /* --save-config */ return parseBoolean(key, true); case ColorKey: /* --no-color */ diff --git a/src/common/config/CommonConfig.h b/src/common/config/CommonConfig.h index ffebb6b2..17bab1d8 100644 --- a/src/common/config/CommonConfig.h +++ b/src/common/config/CommonConfig.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,6 +49,8 @@ public: inline bool isBackground() const { return m_background; } inline bool isColors() const { return m_colors; } inline bool isDryRun() const { return m_dryRun; } + inline bool isCalibrateAlgo() const { return m_calibrateAlgo; } + inline bool isSaveConfig() const { return m_saveConfig; } inline bool isSyslog() const { return m_syslog; } inline const char *apiToken() const { return m_apiToken.data(); } inline const char *apiWorkerId() const { return m_apiWorkerId.data(); } @@ -87,6 +90,8 @@ protected: bool m_background; bool m_colors; bool m_dryRun; + bool m_calibrateAlgo; + bool m_saveConfig; bool m_syslog; bool m_watch; int m_apiPort; diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index 62c7ba94..cd24ac90 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -5,6 +5,7 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2016-2018 XMRig + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -67,6 +68,8 @@ public: CPUPriorityKey = 1021, NicehashKey = 1006, PrintTimeKey = 1007, + CalibrateAlgoKey = 10001, + SaveConfigKey = 10002, // xmrig cpu AVKey = 'v', diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index bc6657d1..05c4545a 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * * This program is free software: you can redistribute it and/or modify @@ -54,6 +55,7 @@ Options:\n\ cryptonight-heavy\n" #endif "\ + --calibrate-algo run benchmarks before mining to measure hashrates of all supported algos\n\ -o, --url=URL URL of mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\ -u, --user=USERNAME username for mining server\n\ @@ -88,6 +90,7 @@ Options:\n\ --api-worker-id=ID custom worker-id for API\n\ --api-ipv6 enable IPv6 support for API\n\ --api-no-restricted enable full remote access (only if API token set)\n\ + --save-config save config file including generated configuration\n\ -h, --help display this help and exit\n\ -V, --version output version information and exit\n\ "; @@ -110,6 +113,8 @@ static struct option const options[] = { { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, + { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, + { "save-config", 0, nullptr, xmrig::IConfig::SaveConfigKey }, { "help", 0, nullptr, xmrig::IConfig::HelpKey }, { "keepalive", 0, nullptr, xmrig::IConfig::KeepAliveKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, From 4c256f9ab7fa71d4dedfb02f4166219288f290c2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:24:29 +0200 Subject: [PATCH 002/141] Report all possible algorithms to the pool --- src/common/net/Pool.cpp | 62 ++++++++++------------------------------- src/common/net/Pool.h | 4 +-- 2 files changed, 16 insertions(+), 50 deletions(-) diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index 053f2507..bfe42b51 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,6 +49,20 @@ Pool::Pool() : m_keepAlive(0), m_port(kDefaultPort) { + // here xmrig now resuts all possible supported algorithms + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_1)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_0)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO)); + + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0)); + + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE)); } @@ -233,19 +248,8 @@ void Pool::adjust(const xmrig::Algorithm &algorithm) m_algorithm.setAlgo(algorithm.algo()); adjustVariant(algorithm.variant()); } - - rebuild(); } - -void Pool::setAlgo(const xmrig::Algorithm &algorithm) -{ - m_algorithm = algorithm; - - rebuild(); -} - - #ifdef APP_DEBUG void Pool::print() const { @@ -284,18 +288,6 @@ bool Pool::parseIPv6(const char *addr) return true; } - -void Pool::addVariant(xmrig::Variant variant) -{ - const xmrig::Algorithm algorithm(m_algorithm.algo(), variant); - if (!algorithm.isValid() || m_algorithm == algorithm) { - return; - } - - m_algorithms.push_back(algorithm); -} - - void Pool::adjustVariant(const xmrig::Variant variantHint) { # ifndef XMRIG_PROXY_PROJECT @@ -364,27 +356,3 @@ void Pool::adjustVariant(const xmrig::Variant variantHint) } # endif } - - -void Pool::rebuild() -{ - m_algorithms.clear(); - - if (!m_algorithm.isValid()) { - return; - } - - m_algorithms.push_back(m_algorithm); - -# ifndef XMRIG_PROXY_PROJECT - addVariant(xmrig::VARIANT_1); - addVariant(xmrig::VARIANT_0); - addVariant(xmrig::VARIANT_XTL); - addVariant(xmrig::VARIANT_TUBE); - addVariant(xmrig::VARIANT_MSR); - addVariant(xmrig::VARIANT_XHV); - addVariant(xmrig::VARIANT_XAO); - addVariant(xmrig::VARIANT_RTO); - addVariant(xmrig::VARIANT_AUTO); -# endif -} diff --git a/src/common/net/Pool.h b/src/common/net/Pool.h index 57a30d1e..04a26818 100644 --- a/src/common/net/Pool.h +++ b/src/common/net/Pool.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -78,7 +79,6 @@ public: bool setUserpass(const char *userpass); rapidjson::Value toJSON(rapidjson::Document &doc) const; void adjust(const xmrig::Algorithm &algorithm); - void setAlgo(const xmrig::Algorithm &algorithm); # ifdef APP_DEBUG void print() const; @@ -86,9 +86,7 @@ public: private: bool parseIPv6(const char *addr); - void addVariant(xmrig::Variant variant); void adjustVariant(const xmrig::Variant variantHint); - void rebuild(); bool m_nicehash; int m_keepAlive; From c8425e99500f32ca92b5677bc1d461314d6f7a35 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:28:45 +0200 Subject: [PATCH 003/141] Removed unused method --- src/common/net/Client.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/net/Client.h b/src/common/net/Client.h index 4be8badb..7efa1f75 100644 --- a/src/common/net/Client.h +++ b/src/common/net/Client.h @@ -74,7 +74,6 @@ public: inline int id() const { return m_id; } inline SocketState state() const { return m_state; } inline uint16_t port() const { return m_pool.port(); } - inline void setAlgo(const xmrig::Algorithm &algo) { m_pool.setAlgo(algo); } inline void setQuiet(bool quiet) { m_quiet = quiet; } inline void setRetries(int retries) { m_retries = retries; } inline void setRetryPause(int ms) { m_retryPause = ms; } From 5f73919be151f5deb17bc07d51e9a5c8d06fff19 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:33:05 +0200 Subject: [PATCH 004/141] Added perf algo (PerfAlgo) basic support --- src/common/crypto/Algorithm.cpp | 49 +++++++++++++++++++++++++++++++++ src/common/crypto/Algorithm.h | 6 ++++ src/common/xmrig.h | 10 +++++++ 3 files changed, 65 insertions(+) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 31035fb1..c253d9c8 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -7,6 +7,7 @@ * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -223,3 +224,51 @@ const char *xmrig::Algorithm::name(bool shortName) const return "invalid"; } + + +// returns string name of the PerfAlgo +const char *xmrig::Algorithm::perfAlgoName(const xmrig::PerfAlgo pa) { + static const char* perf_algo_names[xmrig::PerfAlgo::PA_MAX] = { + "cn", + "cn-fast", + "cn-lite", + "cn-heavy", + }; + return perf_algo_names[pa]; +} + +// constructs Algorithm from PerfAlgo +xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { + switch (pa) { + case PA_CN: + m_algo = xmrig::CRYPTONIGHT; + m_variant = xmrig::VARIANT_1; + break; + case PA_CN_FAST: + m_algo = xmrig::CRYPTONIGHT; + m_variant = xmrig::VARIANT_MSR; + break; + case PA_CN_LITE: + m_algo = xmrig::CRYPTONIGHT_LITE; + m_variant = xmrig::VARIANT_1; + break; + case PA_CN_HEAVY: + m_algo = xmrig::CRYPTONIGHT_HEAVY; + m_variant = xmrig::VARIANT_0; + break; + default: + m_algo = xmrig::INVALID_ALGO; + m_variant = xmrig::VARIANT_AUTO; + } +} + +// returns PerfAlgo that corresponds to current Algorithm +xmrig::PerfAlgo xmrig::Algorithm::perf_algo() const { + if (m_variant == VARIANT_MSR) return PA_CN_FAST; + switch (m_algo) { + case CRYPTONIGHT: return PA_CN; + case CRYPTONIGHT_LITE: return PA_CN_LITE; + case CRYPTONIGHT_HEAVY: return PA_CN_HEAVY; + default: return PA_INVALID; + } +} diff --git a/src/common/crypto/Algorithm.h b/src/common/crypto/Algorithm.h index bcf029d8..77e2dfff 100644 --- a/src/common/crypto/Algorithm.h +++ b/src/common/crypto/Algorithm.h @@ -7,6 +7,7 @@ * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,6 +50,9 @@ public: setAlgo(algo); } + // constructs Algorithm from PerfAlgo + Algorithm(const xmrig::PerfAlgo); + inline Algorithm(const char *algo) { parseAlgorithm(algo); @@ -56,8 +60,10 @@ public: bool isEqual(const Algorithm &other) const { return m_algo == other.m_algo && m_variant == other.m_variant; } inline Algo algo() const { return m_algo; } + xmrig::PerfAlgo perf_algo() const; // returns PerfAlgo that corresponds to current Algorithm inline const char *name() const { return name(false); } inline const char *shortName() const { return name(true); } + static const char *perfAlgoName(xmrig::PerfAlgo); // returns string name of the PerfAlgo inline Variant variant() const { return m_variant; } inline void setVariant(Variant variant) { m_variant = variant; } diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 0ff945b9..515c6709 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +37,15 @@ enum Algo { CRYPTONIGHT_HEAVY /* CryptoNight-Heavy (SUMO) */ }; +// algorithms that can has different performance +enum PerfAlgo { + PA_INVALID = -1, + PA_CN, /* CryptoNight (Monero) */ + PA_CN_FAST, /* CryptoNight-Fast (Masari) */ + PA_CN_LITE, /* CryptoNight-Lite (AEON) */ + PA_CN_HEAVY, /* CryptoNight-Heavy (SUMO) */ + PA_MAX +}; //--av=1 For CPUs with hardware AES. //--av=2 Lower power mode (double hash) of 1. From a8d399e450744fb396bd97d302f2ff68bffe2256 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:40:38 +0200 Subject: [PATCH 005/141] Added support for extended threads and algo-perf config parameters --- src/core/Config.cpp | 147 +++++++++++++++++++++++++++------------- src/core/Config.h | 32 +++++++-- src/core/Controller.cpp | 4 +- 3 files changed, 131 insertions(+), 52 deletions(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index fa6afdb4..f37e2e0f 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +37,10 @@ #include "rapidjson/prettywriter.h" #include "workers/CpuThread.h" +// for usage in Client::login to get_algo_perf +namespace xmrig { + Config* pconfig = nullptr; +}; static char affinity_tmp[20] = { 0 }; @@ -48,6 +53,11 @@ xmrig::Config::Config() : xmrig::CommonConfig(), m_maxCpuUsage(75), m_priority(-1) { + // not defined algo performance is considered to be 0 + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + m_algo_perf[pa] = 0; + } } @@ -111,18 +121,34 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const doc.AddMember("retry-pause", retryPause(), allocator); doc.AddMember("safe", m_safe, allocator); - if (threadsMode() == Advanced) { - Value threads(kArrayType); + // save extended "threads" based on m_threads + Value threads(kObjectType); + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + Value key(xmrig::Algorithm::perfAlgoName(pa), allocator); + if (threadsMode(pa) == Advanced) { + Value threads2(kArrayType); - for (const IThread *thread : m_threads.list) { - threads.PushBack(thread->toConfig(doc), allocator); + for (const IThread *thread : m_threads[pa].list) { + threads2.PushBack(thread->toConfig(doc), allocator); + } + + threads.AddMember(key, threads2, allocator); } + else { + threads.AddMember(key, threadsMode(pa) == Automatic ? Value(kNullType) : Value(threadsCount(pa)), allocator); + } + } + doc.AddMember("threads", threads, allocator); - doc.AddMember("threads", threads, allocator); - } - else { - doc.AddMember("threads", threadsMode() == Automatic ? Value(kNullType) : Value(threadsCount()), allocator); + // save "algo-perf" based on m_algo_perf + Value algo_perf(kObjectType); + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + Value key(xmrig::Algorithm::perfAlgoName(pa), allocator); + algo_perf.AddMember(key, Value(m_algo_perf[pa]), allocator); } + doc.AddMember("algo-perf", algo_perf, allocator); doc.AddMember("user-agent", userAgent() ? Value(StringRef(userAgent())).Move() : Value(kNullType).Move(), allocator); @@ -150,34 +176,36 @@ bool xmrig::Config::finalize() return false; } - if (!m_threads.cpu.empty()) { - m_threads.mode = Advanced; - const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; + // parse "threads" into m_threads + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + if (!m_threads[pa].cpu.empty()) { + m_threads[pa].mode = Advanced; + const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; - for (size_t i = 0; i < m_threads.cpu.size(); ++i) { - m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES)); + for (size_t i = 0; i < m_threads[pa].cpu.size(); ++i) { + m_threads[pa].list.push_back(CpuThread::createFromData(i, xmrig::Algorithm(pa).algo(), m_threads[pa].cpu[i], m_priority, softAES)); + } + } else { + const AlgoVariant av = getAlgoVariant(); + m_threads[pa].mode = m_threads[pa].count ? Simple : Automatic; + + const size_t size = CpuThread::multiway(av) * cn_select_memory(xmrig::Algorithm(pa).algo()) / 1024; + + if (!m_threads[pa].count) { + m_threads[pa].count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + } + else if (m_safe) { + const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + if (m_threads[pa].count > count) { + m_threads[pa].count = count; + } + } + + for (size_t i = 0; i < m_threads[pa].count; ++i) { + m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa).algo(), av, m_threads[pa].mask, m_priority)); + } } - - return true; - } - - const AlgoVariant av = getAlgoVariant(); - m_threads.mode = m_threads.count ? Simple : Automatic; - - const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024; - - if (!m_threads.count) { - m_threads.count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); - } - else if (m_safe) { - const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads.count > count) { - m_threads.count = count; - } - } - - for (size_t i = 0; i < m_threads.count; ++i) { - m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority)); } return true; @@ -231,7 +259,7 @@ bool xmrig::Config::parseString(int key, const char *arg) case ThreadsKey: /* --threads */ if (strncmp(arg, "all", 3) == 0) { - m_threads.count = Cpu::threads(); + m_threads[m_algorithm.perf_algo()].count = Cpu::threads(); // sets default algo threads return true; } @@ -260,7 +288,7 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg) switch (key) { case CPUAffinityKey: /* --cpu-affinity */ if (arg) { - m_threads.mask = arg; + m_threads[m_algorithm.perf_algo()].mask = arg; // sets default algo threads } break; @@ -272,22 +300,49 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg) } +// parse specific perf algo (or generic) threads config +void xmrig::Config::parseThreadsJSON(const rapidjson::Value &threads, const xmrig::PerfAlgo pa) +{ + for (const rapidjson::Value &value : threads.GetArray()) { + if (!value.IsObject()) { + continue; + } + + if (value.HasMember("low_power_mode")) { + auto data = CpuThread::parse(value); + + if (data.valid) { + m_threads[pa].cpu.push_back(std::move(data)); + } + } + } +} + void xmrig::Config::parseJSON(const rapidjson::Document &doc) { const rapidjson::Value &threads = doc["threads"]; if (threads.IsArray()) { - for (const rapidjson::Value &value : threads.GetArray()) { - if (!value.IsObject()) { - continue; + // parse generic (old) threads + parseThreadsJSON(threads, m_algorithm.perf_algo()); + } else if (threads.IsObject()) { + // parse new specific perf algo threads + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + const rapidjson::Value &threads2 = threads[xmrig::Algorithm::perfAlgoName(pa)]; + if (threads2.IsArray()) { + parseThreadsJSON(threads2, pa); } + } + } - if (value.HasMember("low_power_mode")) { - auto data = CpuThread::parse(value); - - if (data.valid) { - m_threads.cpu.push_back(std::move(data)); - } + const rapidjson::Value &algo_perf = doc["algo-perf"]; + if (algo_perf.IsObject()) { + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + const rapidjson::Value &key = algo_perf[xmrig::Algorithm::perfAlgoName(pa)]; + if (key.IsDouble()) { + m_algo_perf[pa] = key.GetDouble(); } } } @@ -299,7 +354,7 @@ bool xmrig::Config::parseInt(int key, int arg) switch (key) { case ThreadsKey: /* --threads */ if (arg >= 0 && arg < 1024) { - m_threads.count = arg; + m_threads[m_algorithm.perf_algo()].count = arg; // sets default algo threads } break; diff --git a/src/core/Config.h b/src/core/Config.h index f0f1404f..0bf7488d 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -78,11 +79,25 @@ public: inline AesMode aesMode() const { return m_aesMode; } inline AlgoVariant algoVariant() const { return m_algoVariant; } inline bool isHugePages() const { return m_hugePages; } - inline const std::vector &threads() const { return m_threads.list; } inline int priority() const { return m_priority; } - inline int threadsCount() const { return m_threads.list.size(); } - inline int64_t affinity() const { return m_threads.mask; } - inline ThreadsMode threadsMode() const { return m_threads.mode; } + + // access to m_threads taking into accoun that it is now separated for each perf algo + inline const std::vector &threads(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].list; + } + inline int threadsCount(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].list.size(); + } + inline int64_t affinity(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].mask; + } + inline ThreadsMode threadsMode(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].mode; + } + + // access to perf algo results + inline float get_algo_perf(const xmrig::PerfAlgo pa) const { return m_algo_perf[pa]; } + inline void set_algo_perf(const xmrig::PerfAlgo pa, const float value) { m_algo_perf[pa] = value; } static Config *load(int argc, char **argv, IWatcherListener *listener); @@ -92,6 +107,8 @@ protected: bool parseString(int key, const char *arg) override; bool parseUint64(int key, uint64_t arg) override; void parseJSON(const rapidjson::Document &doc) override; + // parse specific perf algo (or generic) threads config + void parseThreadsJSON(const rapidjson::Value &threads, xmrig::PerfAlgo); private: bool parseInt(int key, int arg); @@ -120,9 +137,14 @@ private: bool m_safe; int m_maxCpuUsage; int m_priority; - Threads m_threads; + // threads config for each perf algo + Threads m_threads[xmrig::PerfAlgo::PA_MAX]; + // perf algo hashrate results + float m_algo_perf[xmrig::PerfAlgo::PA_MAX]; }; +extern Config* pconfig; + } /* namespace xmrig */ diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index ce73f037..a21c8ad9 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -96,7 +97,8 @@ int xmrig::Controller::init(int argc, char **argv) { Cpu::init(); - d_ptr->config = xmrig::Config::load(argc, argv, this); + // init pconfig global pointer to config + pconfig = d_ptr->config = xmrig::Config::load(argc, argv, this); if (!d_ptr->config) { return 1; } From 9951bd756deb4b5d354ecb8d61e147471c757cbe Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:49:09 +0200 Subject: [PATCH 006/141] Added algo performance calibration (benchmarking) functionality --- CMakeLists.txt | 2 + src/App.cpp | 15 ++++++- src/common/net/Job.cpp | 7 +++ src/common/net/Job.h | 4 ++ src/config.json | 6 +++ src/workers/Benchmark.cpp | 92 +++++++++++++++++++++++++++++++++++++++ src/workers/Benchmark.h | 51 ++++++++++++++++++++++ src/workers/Workers.cpp | 64 +++++++++++++++++++++++++++ src/workers/Workers.h | 4 ++ 9 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 src/workers/Benchmark.cpp create mode 100644 src/workers/Benchmark.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e263808e..90c7d842 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,7 @@ set(HEADERS src/net/strategies/DonateStrategy.h src/Summary.h src/version.h + src/workers/Benchmark.h src/workers/CpuThread.h src/workers/Handle.h src/workers/Hashrate.h @@ -110,6 +111,7 @@ set(SOURCES src/net/Network.cpp src/net/strategies/DonateStrategy.cpp src/Summary.cpp + src/workers/Benchmark.cpp src/workers/CpuThread.cpp src/workers/Handle.cpp src/workers/Hashrate.cpp diff --git a/src/App.cpp b/src/App.cpp index adcc5752..c2e5ba9c 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +41,7 @@ #include "Summary.h" #include "version.h" #include "workers/Workers.h" +#include "workers/Benchmark.h" #ifndef XMRIG_NO_HTTPD @@ -84,6 +86,8 @@ App::~App() # endif } +// this should be global since we register onJobResult using this object method +static Benchmark benchmark; int App::exec() { @@ -125,7 +129,16 @@ int App::exec() Workers::start(m_controller); - m_controller->network()->connect(); + // run benchmark before pool mining or not? + if (m_controller->config()->isCalibrateAlgo()) { + benchmark.set_controller(m_controller); // we need controller there to access config and network objects + Workers::setListener(&benchmark); // register benchmark as job reault listener to compute hashrates there + benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list + } else { + // save config here to have option to store automatically generated "threads" + if (m_controller->config()->isSaveConfig()) m_controller->config()->save(); + m_controller->network()->connect(); + } const int r = uv_run(uv_default_loop(), UV_RUN_DEFAULT); uv_loop_close(uv_default_loop()); diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index 80b521ea..2e7204fb 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -7,6 +7,7 @@ * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -120,6 +121,12 @@ bool Job::setBlob(const char *blob) return true; } +// for algo benchmarking +void Job::setRawBlob(const uint8_t *blob, const size_t size) +{ + memcpy(m_blob, blob, m_size = size); +} + bool Job::setTarget(const char *target) { diff --git a/src/common/net/Job.h b/src/common/net/Job.h index 049eb7d4..050bff48 100644 --- a/src/common/net/Job.h +++ b/src/common/net/Job.h @@ -7,6 +7,7 @@ * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -42,7 +43,10 @@ public: ~Job(); bool setBlob(const char *blob); + void setRawBlob(const uint8_t *blob, const size_t size); // for algo benchmarking bool setTarget(const char *target); + // for algo benchmarking to set PoW variant + void setAlgorithm(const xmrig::Algorithm& algorithm) { m_algorithm = algorithm; } xmrig::Variant variant() const; inline bool isNicehash() const { return m_nicehash; } diff --git a/src/config.json b/src/config.json index b2dad4c9..b71a470b 100644 --- a/src/config.json +++ b/src/config.json @@ -33,6 +33,12 @@ "retry-pause": 5, "safe": false, "threads": null, + "algo-perf": { + "cn": 1000, + "cn-fast": 2000, + "cn-lite": 2000, + "cn-heavy": 700 + }, "user-agent": null, "watch": false } \ No newline at end of file diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp new file mode 100644 index 00000000..b2969f68 --- /dev/null +++ b/src/workers/Benchmark.cpp @@ -0,0 +1,92 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "workers/Benchmark.h" +#include "workers/Workers.h" +#include "core/Config.h" +#include "net/Network.h" +#include "common/log/Log.h" +#include + +// start performance measurements for specified perf algo +void Benchmark::start_perf_bench(const xmrig::PerfAlgo pa) { + m_pa = pa; // current perf algo + m_hash_count = 0; // number of hashes calculated for current perf algo + m_time_start = get_now(); // time of measurements start (in ms) + Workers::switch_algo(xmrig::Algorithm(pa)); // switch workers to new algo (Algo part) + + // prepare test job for benchmark runs + Job job; + job.setId(xmrig::Algorithm::perfAlgoName(pa)); // need to set different id so that workers will see job change + const static uint8_t test_input[76] = { + 0x99, // 0x99 here to trigger all future algo versions for auto veriant detection based on block version + 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00, + 0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B, + 0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62, + 0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92, + 0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01, + }; + job.setRawBlob(test_input, 76); + job.setTarget("FFFFFFFFFFFFFFFF"); // set difficulty to 1 to get to onJobResult after every computed hash + job.setAlgorithm(xmrig::Algorithm(pa)); // set job algo (for Variant part) + if (!m_is_benchmark_time) { // write test before first benchmark round + Log::i()->text(m_controller->config()->isColors() + ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION") + : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION" + ); + } + m_is_benchmark_time = true; // benchmarking is in process + Workers::setJob(job, false); // set job for workers to compute +} + +void Benchmark::onJobResult(const JobResult& result) { + if (!m_is_benchmark_time) return; // ignore job results if we already stopeed benchmarking (before new job from the pool comes) + ++ m_hash_count; + const uint64_t now = get_now(); + if (now - m_time_start > m_bench_secs*1000) { // end of becnhmark round for m_pa + const float hashrate = static_cast(m_hash_count) / (now - m_time_start) * 1000.0f; + m_controller->config()->set_algo_perf(m_pa, hashrate); // store hashrate result + Log::i()->text(m_controller->config()->isColors() + ? GREEN_BOLD(" ===> ") CYAN_BOLD("%s") WHITE_BOLD(" hashrate: ") CYAN_BOLD("%f") + : " ===> %s hasrate: %f", + xmrig::Algorithm::perfAlgoName(m_pa), + hashrate + ); + const xmrig::PerfAlgo pa = static_cast(m_pa + 1); // compute next perf algo to benchmark + if (pa != xmrig::PerfAlgo::PA_MAX) { + start_perf_bench(pa); + } else { // en of benchmarks and switching to jobs from the pool (network) + m_is_benchmark_time = false; + if (m_controller->config()->isSaveConfig()) m_controller->config()->save(); // save config with measured algo-perf + Workers::pause(); // do not compute anything before job from the pool + Workers::setListener(m_controller->network()); + m_controller->network()->connect(); + } + } +} + +uint64_t Benchmark::get_now() const { // get current time in ms + using namespace std::chrono; + return time_point_cast(high_resolution_clock::now()).time_since_epoch().count(); +} diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h new file mode 100644 index 00000000..479b14b3 --- /dev/null +++ b/src/workers/Benchmark.h @@ -0,0 +1,51 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include + +#include "common/xmrig.h" +#include "interfaces/IJobResultListener.h" +#include "core/Controller.h" + +class Benchmark : public IJobResultListener { + const uint64_t m_bench_secs = 5; // time in seconds to benchmark each perf algo + bool m_is_benchmark_time; // true is we benchmark some perf algo now + xmrig::PerfAlgo m_pa; // current perf algo we benchmark + uint64_t m_hash_count; // number of hashes calculated for current perf algo + uint64_t m_time_start; // time of measurements start for current perf algo (in ms) + xmrig::Controller* m_controller; // to get access to config and network + + uint64_t get_now() const; // get current time in ms + + void onJobResult(const JobResult&) override; // onJobResult is called after each computed benchmark hash + + public: + Benchmark() : m_is_benchmark_time(false) {} + virtual ~Benchmark() {} + + void set_controller(xmrig::Controller* controller) { m_controller = controller; } + void start_perf_bench(const xmrig::PerfAlgo); // start benchmark for specified perf algo +}; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 0e75e736..d77bf681 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -194,6 +195,69 @@ void Workers::start(xmrig::Controller *controller) } } +void Workers::soft_stop() // stop current workers leaving uv stuff intact (used in switch_algo) +{ + if (m_hashrate) { + m_hashrate->stop(); + delete m_hashrate; + } + + m_sequence = 0; + m_paused = 0; + + for (size_t i = 0; i < m_workers.size(); ++i) { + m_workers[i]->join(); + delete m_workers[i]; + } + m_workers.clear(); +} + +// setups workers based on specified algorithm (or its basic perf algo more specifically) +void Workers::switch_algo(const xmrig::Algorithm algorithm) +{ + if (m_status.algo == algorithm.algo()) return; + + soft_stop(); + + m_sequence = 1; + m_paused = 1; + + const std::vector &threads = m_controller->config()->threads(algorithm.perf_algo()); + m_status.algo = algorithm.algo(); + m_status.threads = threads.size(); + + // string with multiway thread info + std::string str_threads; + for (const xmrig::IThread *thread : threads) { + if (!str_threads.empty()) str_threads = str_threads + ", "; + str_threads = str_threads + "x" + std::to_string(thread->multiway()); + } + Log::i()->text(m_controller->config()->isColors() + ? GREEN_BOLD(" >>> ") WHITE_BOLD("ALGO CHANGE: ") CYAN_BOLD("%s") ", " CYAN_BOLD("%d (%s)") " thread(s)" + : " >>> ALGO CHANGE: %s, %d (%s) thread(s)", + algorithm.name(), + threads.size(), + str_threads.c_str() + ); + + m_status.ways = 0; + for (const xmrig::IThread *thread : threads) { + m_status.ways += thread->multiway(); + } + + m_hashrate = new Hashrate(threads.size(), m_controller); + + uint32_t offset = 0; + + for (xmrig::IThread *thread : threads) { + Handle *handle = new Handle(thread, offset, m_status.ways); + offset += thread->multiway(); + + m_workers.push_back(handle); + handle->start(Workers::onReady); + } +} + void Workers::stop() { diff --git a/src/workers/Workers.h b/src/workers/Workers.h index 1d619cea..8cfd02d8 100644 --- a/src/workers/Workers.h +++ b/src/workers/Workers.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -56,6 +57,8 @@ public: static void setEnabled(bool enabled); static void setJob(const Job &job, bool donate); static void start(xmrig::Controller *controller); + // setups workers based on specified algorithm (or its basic perf algo more specifically) + static void switch_algo(xmrig::Algorithm); static void stop(); static void submit(const JobResult &result); @@ -76,6 +79,7 @@ private: static void onResult(uv_async_t *handle); static void onTick(uv_timer_t *handle); static void start(IWorker *worker); + static void soft_stop(); // stop current workers leaving uv stuff intact (used in switch_algo) class LaunchStatus { From 3b77fc1dbd1e73e6a8b393d6240366f53e305a23 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 18 Jul 2018 16:54:21 +0200 Subject: [PATCH 007/141] Added pool job algo switch functionality --- src/common/net/Client.cpp | 25 +++++++++++++++++++++---- src/common/net/Client.h | 1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/common/net/Client.cpp b/src/common/net/Client.cpp index f4553d97..8ca39775 100644 --- a/src/common/net/Client.cpp +++ b/src/common/net/Client.cpp @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,6 +34,8 @@ #include "common/log/Log.h" #include "common/net/Client.h" #include "net/JobResult.h" +#include "core/Config.h" // for pconfig to access pconfig->get_algo_perf +#include "workers/Workers.h" // to do Workers::switch_algo #include "rapidjson/document.h" #include "rapidjson/error/en.h" #include "rapidjson/stringbuffer.h" @@ -269,10 +272,6 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code) return false; } - if (params.HasMember("algo")) { - job.algorithm().parseAlgorithm(params["algo"].GetString()); - } - if (params.HasMember("variant")) { const rapidjson::Value &variant = params["variant"]; @@ -284,6 +283,11 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code) } } + // moved algo after variant parsing to override variant that is considered to be outdated now + if (params.HasMember("algo")) { + job.algorithm().parseAlgorithm(params["algo"].GetString()); + } + if (!verifyAlgorithm(job.algorithm())) { *code = 6; @@ -291,6 +295,9 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code) return false; } + // retarget workers for possible new Algo profile (same algo profile is not reapplied) + Workers::switch_algo(job.algorithm()); + if (m_job != job) { m_jobs++; m_job = std::move(job); @@ -495,6 +502,16 @@ void Client::login() } params.AddMember("algo", algo, allocator); + + // addding algo-perf based on pconfig->get_algo_perf + Value algo_perf(kObjectType); + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + Value key(xmrig::Algorithm::perfAlgoName(pa), allocator); + algo_perf.AddMember(key, Value(xmrig::pconfig->get_algo_perf(pa)), allocator); + } + + params.AddMember("algo-perf", algo_perf, allocator); } doc.AddMember("params", params, allocator); diff --git a/src/common/net/Client.h b/src/common/net/Client.h index 7efa1f75..f98efe1f 100644 --- a/src/common/net/Client.h +++ b/src/common/net/Client.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , + * Copyright 2018 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From d6a56a2e0a4510b3b3f44e66bdefa046cc2440c4 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 24 Jul 2018 08:28:30 +0200 Subject: [PATCH 008/141] Converted algo-perf values to floats --- src/config.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/config.json b/src/config.json index b71a470b..ed5d8e37 100644 --- a/src/config.json +++ b/src/config.json @@ -34,10 +34,10 @@ "safe": false, "threads": null, "algo-perf": { - "cn": 1000, - "cn-fast": 2000, - "cn-lite": 2000, - "cn-heavy": 700 + "cn": 1000.0, + "cn-fast": 2000.0, + "cn-lite": 2000.0, + "cn-heavy": 700.0 }, "user-agent": null, "watch": false From 684feb38fdd8dc4e52df26da5f93851ff15cd78a Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 26 Jul 2018 19:59:39 +0200 Subject: [PATCH 009/141] Improved benchmark precision based on xmrig-amd findings --- src/App.cpp | 5 +++++ src/workers/Benchmark.cpp | 43 ++++++++++++++++++++------------------- src/workers/Benchmark.h | 3 +-- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index c2e5ba9c..66abc1b5 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -133,6 +133,11 @@ int App::exec() if (m_controller->config()->isCalibrateAlgo()) { benchmark.set_controller(m_controller); // we need controller there to access config and network objects Workers::setListener(&benchmark); // register benchmark as job reault listener to compute hashrates there + // write text before first benchmark round + Log::i()->text(m_controller->config()->isColors() + ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION") + : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION" + ); benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list } else { // save config here to have option to store automatically generated "threads" diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index b2969f68..d7e4cdb2 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -31,13 +31,11 @@ // start performance measurements for specified perf algo void Benchmark::start_perf_bench(const xmrig::PerfAlgo pa) { - m_pa = pa; // current perf algo - m_hash_count = 0; // number of hashes calculated for current perf algo - m_time_start = get_now(); // time of measurements start (in ms) Workers::switch_algo(xmrig::Algorithm(pa)); // switch workers to new algo (Algo part) // prepare test job for benchmark runs Job job; + job.setPoolId(-100); // to make sure we can detect benchmark jobs job.setId(xmrig::Algorithm::perfAlgoName(pa)); // need to set different id so that workers will see job change const static uint8_t test_input[76] = { 0x99, // 0x99 here to trigger all future algo versions for auto veriant detection based on block version @@ -48,24 +46,28 @@ void Benchmark::start_perf_bench(const xmrig::PerfAlgo pa) { 0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01, }; job.setRawBlob(test_input, 76); - job.setTarget("FFFFFFFFFFFFFFFF"); // set difficulty to 1 to get to onJobResult after every computed hash + job.setTarget("FFFFFFFFFFFFFF20"); // set difficulty to 8 cause onJobResult after every 8-th computed hash job.setAlgorithm(xmrig::Algorithm(pa)); // set job algo (for Variant part) - if (!m_is_benchmark_time) { // write test before first benchmark round - Log::i()->text(m_controller->config()->isColors() - ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION") - : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION" - ); - } - m_is_benchmark_time = true; // benchmarking is in process + + m_pa = pa; // current perf algo + m_hash_count = 0; // number of hashes calculated for current perf algo + m_time_start = 0; // init time of measurements start (in ms) during the first onJobResult Workers::setJob(job, false); // set job for workers to compute } void Benchmark::onJobResult(const JobResult& result) { - if (!m_is_benchmark_time) return; // ignore job results if we already stopeed benchmarking (before new job from the pool comes) + if (result.poolId != -100) { // switch to network pool jobs + Workers::setListener(m_controller->network()); + static_cast(m_controller->network())->onJobResult(result); + return; + } + // ignore benchmark results for other perf algo + if (m_pa == xmrig::PA_INVALID || result.jobId != xmrig::Id(xmrig::Algorithm::perfAlgoName(m_pa))) return; ++ m_hash_count; const uint64_t now = get_now(); - if (now - m_time_start > m_bench_secs*1000) { // end of becnhmark round for m_pa - const float hashrate = static_cast(m_hash_count) / (now - m_time_start) * 1000.0f; + if (!m_time_start) m_time_start = now; // time of measurements start (in ms) + else if (now - m_time_start > m_bench_secs*1000) { // end of becnhmark round for m_pa + const float hashrate = static_cast(m_hash_count) * result.diff / (now - m_time_start) * 1000.0f; m_controller->config()->set_algo_perf(m_pa, hashrate); // store hashrate result Log::i()->text(m_controller->config()->isColors() ? GREEN_BOLD(" ===> ") CYAN_BOLD("%s") WHITE_BOLD(" hashrate: ") CYAN_BOLD("%f") @@ -73,14 +75,13 @@ void Benchmark::onJobResult(const JobResult& result) { xmrig::Algorithm::perfAlgoName(m_pa), hashrate ); - const xmrig::PerfAlgo pa = static_cast(m_pa + 1); // compute next perf algo to benchmark - if (pa != xmrig::PerfAlgo::PA_MAX) { - start_perf_bench(pa); - } else { // en of benchmarks and switching to jobs from the pool (network) - m_is_benchmark_time = false; - if (m_controller->config()->isSaveConfig()) m_controller->config()->save(); // save config with measured algo-perf + const xmrig::PerfAlgo next_pa = static_cast(m_pa + 1); // compute next perf algo to benchmark + if (next_pa != xmrig::PerfAlgo::PA_MAX) { + start_perf_bench(next_pa); + } else { // end of benchmarks and switching to jobs from the pool (network) + m_pa = xmrig::PA_INVALID; + m_controller->config()->save(); // save config with measured algo-perf Workers::pause(); // do not compute anything before job from the pool - Workers::setListener(m_controller->network()); m_controller->network()->connect(); } } diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h index 479b14b3..f3262725 100644 --- a/src/workers/Benchmark.h +++ b/src/workers/Benchmark.h @@ -32,7 +32,6 @@ class Benchmark : public IJobResultListener { const uint64_t m_bench_secs = 5; // time in seconds to benchmark each perf algo - bool m_is_benchmark_time; // true is we benchmark some perf algo now xmrig::PerfAlgo m_pa; // current perf algo we benchmark uint64_t m_hash_count; // number of hashes calculated for current perf algo uint64_t m_time_start; // time of measurements start for current perf algo (in ms) @@ -43,7 +42,7 @@ class Benchmark : public IJobResultListener { void onJobResult(const JobResult&) override; // onJobResult is called after each computed benchmark hash public: - Benchmark() : m_is_benchmark_time(false) {} + Benchmark() {} virtual ~Benchmark() {} void set_controller(xmrig::Controller* controller) { m_controller = controller; } From 2ceb9c8749038cfe9539546c61edbb21bd5c2fa7 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 28 Jul 2018 11:30:24 +0200 Subject: [PATCH 010/141] Switched from value to reference in switch_algo xmrig::Algorithm param --- src/workers/Workers.cpp | 2 +- src/workers/Workers.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index d77bf681..4564bec8 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -213,7 +213,7 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used } // setups workers based on specified algorithm (or its basic perf algo more specifically) -void Workers::switch_algo(const xmrig::Algorithm algorithm) +void Workers::switch_algo(const xmrig::Algorithm& algorithm) { if (m_status.algo == algorithm.algo()) return; diff --git a/src/workers/Workers.h b/src/workers/Workers.h index 8cfd02d8..058564e3 100644 --- a/src/workers/Workers.h +++ b/src/workers/Workers.h @@ -58,7 +58,7 @@ public: static void setJob(const Job &job, bool donate); static void start(xmrig::Controller *controller); // setups workers based on specified algorithm (or its basic perf algo more specifically) - static void switch_algo(xmrig::Algorithm); + static void switch_algo(const xmrig::Algorithm&); static void stop(); static void submit(const JobResult &result); From 2dd5fe5e487c3c60fd93240c94ce2d15201b8699 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 31 Jul 2018 09:30:37 +0200 Subject: [PATCH 011/141] Fixed std has no member to_string error on MSVS --- src/workers/Workers.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 4564bec8..7e58548c 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -25,6 +25,7 @@ #include #include #include +#include // for MSVS std::to_string #include "api/Api.h" From 32984383a3fcfe4a4d1ecb951180ffa1411a7588 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 1 Aug 2018 16:43:56 +0200 Subject: [PATCH 012/141] Set m_calibrateAlgo to false by default --- src/common/config/CommonConfig.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index c9cbc971..24a45d50 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -46,6 +46,7 @@ xmrig::CommonConfig::CommonConfig() : m_background(false), m_colors(true), m_dryRun(false), + m_calibrateAlgo(false), m_syslog(false), # ifdef XMRIG_PROXY_PROJECT From 98f95584a07ca84065c6327254bbc3b93a104bb6 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 1 Aug 2018 16:46:22 +0200 Subject: [PATCH 013/141] Set m_saveConfig to false by default --- src/common/config/CommonConfig.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 24a45d50..f79c492a 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -47,6 +47,7 @@ xmrig::CommonConfig::CommonConfig() : m_colors(true), m_dryRun(false), m_calibrateAlgo(false), + m_saveConfig(false), m_syslog(false), # ifdef XMRIG_PROXY_PROJECT From 30a4f53c8ff411a11d426e09e0c1632ee9c6af5a Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 1 Aug 2018 18:01:16 +0200 Subject: [PATCH 014/141] Calibrate and save algo-perf and threads automatically --- src/App.cpp | 4 ++-- src/config.json | 7 +------ src/core/Config.cpp | 9 +++++++-- src/core/Config.h | 2 ++ src/workers/Benchmark.cpp | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 66abc1b5..f1dcdfce 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -130,7 +130,7 @@ int App::exec() Workers::start(m_controller); // run benchmark before pool mining or not? - if (m_controller->config()->isCalibrateAlgo()) { + if (m_controller->config()->get_algo_perf(xmrig::PA_CN) == 0.0f || m_controller->config()->isCalibrateAlgo()) { benchmark.set_controller(m_controller); // we need controller there to access config and network objects Workers::setListener(&benchmark); // register benchmark as job reault listener to compute hashrates there // write text before first benchmark round @@ -141,7 +141,7 @@ int App::exec() benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list } else { // save config here to have option to store automatically generated "threads" - if (m_controller->config()->isSaveConfig()) m_controller->config()->save(); + if (m_controller->config()->isShouldSave() || m_controller->config()->isSaveConfig()) m_controller->config()->save(); m_controller->network()->connect(); } diff --git a/src/config.json b/src/config.json index ed5d8e37..ede1ba6c 100644 --- a/src/config.json +++ b/src/config.json @@ -33,12 +33,7 @@ "retry-pause": 5, "safe": false, "threads": null, - "algo-perf": { - "cn": 1000.0, - "cn-fast": 2000.0, - "cn-lite": 2000.0, - "cn-heavy": 700.0 - }, + "algo-perf": null, "user-agent": null, "watch": false } \ No newline at end of file diff --git a/src/core/Config.cpp b/src/core/Config.cpp index f37e2e0f..ad2af466 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -46,6 +46,7 @@ static char affinity_tmp[20] = { 0 }; xmrig::Config::Config() : xmrig::CommonConfig(), + m_shouldSave(false), m_aesMode(AES_AUTO), m_algoVariant(AV_AUTO), m_hugePages(true), @@ -56,7 +57,7 @@ xmrig::Config::Config() : xmrig::CommonConfig(), // not defined algo performance is considered to be 0 for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { const xmrig::PerfAlgo pa = static_cast(a); - m_algo_perf[pa] = 0; + m_algo_perf[pa] = 0.0f; } } @@ -176,7 +177,7 @@ bool xmrig::Config::finalize() return false; } - // parse "threads" into m_threads + // auto configure m_threads for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { const xmrig::PerfAlgo pa = static_cast(a); if (!m_threads[pa].cpu.empty()) { @@ -205,6 +206,8 @@ bool xmrig::Config::finalize() for (size_t i = 0; i < m_threads[pa].count; ++i) { m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa).algo(), av, m_threads[pa].mask, m_priority)); } + + m_shouldSave = true; } } @@ -346,6 +349,8 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) } } } + + if (m_algo_perf[xmrig::PA_CN] == 0.0f) m_shouldSave = true; } diff --git a/src/core/Config.h b/src/core/Config.h index 0bf7488d..ec7141e1 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -76,6 +76,7 @@ public: void getJSON(rapidjson::Document &doc) const override; + inline bool isShouldSave() const { return m_shouldSave; } inline AesMode aesMode() const { return m_aesMode; } inline AlgoVariant algoVariant() const { return m_algoVariant; } inline bool isHugePages() const { return m_hugePages; } @@ -131,6 +132,7 @@ private: }; + bool m_shouldSave; AesMode m_aesMode; AlgoVariant m_algoVariant; bool m_hugePages; diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index d7e4cdb2..1ba1d23b 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -80,7 +80,7 @@ void Benchmark::onJobResult(const JobResult& result) { start_perf_bench(next_pa); } else { // end of benchmarks and switching to jobs from the pool (network) m_pa = xmrig::PA_INVALID; - m_controller->config()->save(); // save config with measured algo-perf + if (m_controller->config()->isShouldSave() || m_controller->config()->isSaveConfig()) m_controller->config()->save(); // save config with measured algo-perf Workers::pause(); // do not compute anything before job from the pool m_controller->network()->connect(); } From 0bc9d775c6cb51cd37f628ee6dbbebdb905b086e Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 10:40:03 +0200 Subject: [PATCH 015/141] Added --calibrate-algo-time command line switch support --- src/App.cpp | 5 +++-- src/common/config/CommonConfig.cpp | 7 +++++++ src/common/config/CommonConfig.h | 2 ++ src/common/interfaces/IConfig.h | 3 ++- src/core/ConfigLoader_platform.h | 2 ++ src/workers/Benchmark.cpp | 2 +- src/workers/Benchmark.h | 1 - 7 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index f1dcdfce..e0b735ca 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -135,8 +135,9 @@ int App::exec() Workers::setListener(&benchmark); // register benchmark as job reault listener to compute hashrates there // write text before first benchmark round Log::i()->text(m_controller->config()->isColors() - ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION") - : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION" + ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION (with %i seconds round)") + : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION (with %i seconds round)", + m_controller->config()->calibrateAlgoTime() ); benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list } else { diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index f79c492a..0f0ef541 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -47,6 +47,7 @@ xmrig::CommonConfig::CommonConfig() : m_colors(true), m_dryRun(false), m_calibrateAlgo(false), + m_calibrateAlgoTime(60), m_saveConfig(false), m_syslog(false), @@ -357,6 +358,12 @@ bool xmrig::CommonConfig::parseInt(int key, int arg) } break; + case CalibrateAlgoTimeKey: /* --calibrate-algo-time */ + if (arg >= 5 && arg <= 3600) { + m_calibrateAlgoTime = arg; + } + break; + default: break; } diff --git a/src/common/config/CommonConfig.h b/src/common/config/CommonConfig.h index 17bab1d8..6ab0d42b 100644 --- a/src/common/config/CommonConfig.h +++ b/src/common/config/CommonConfig.h @@ -50,6 +50,7 @@ public: inline bool isColors() const { return m_colors; } inline bool isDryRun() const { return m_dryRun; } inline bool isCalibrateAlgo() const { return m_calibrateAlgo; } + inline int calibrateAlgoTime() const { return m_calibrateAlgoTime; } inline bool isSaveConfig() const { return m_saveConfig; } inline bool isSyslog() const { return m_syslog; } inline const char *apiToken() const { return m_apiToken.data(); } @@ -91,6 +92,7 @@ protected: bool m_colors; bool m_dryRun; bool m_calibrateAlgo; + int m_calibrateAlgoTime; bool m_saveConfig; bool m_syslog; bool m_watch; diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index cd24ac90..73fa73d5 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -69,7 +69,8 @@ public: NicehashKey = 1006, PrintTimeKey = 1007, CalibrateAlgoKey = 10001, - SaveConfigKey = 10002, + CalibrateAlgoTimeKey = 10002, + SaveConfigKey = 10003, // xmrig cpu AVKey = 'v', diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index 05c4545a..9ec6a2a5 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -56,6 +56,7 @@ Options:\n\ #endif "\ --calibrate-algo run benchmarks before mining to measure hashrates of all supported algos\n\ + --calibrate-algo-time=N time in seconds to run each algo benchmark round (default: 60)\n\ -o, --url=URL URL of mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\ -u, --user=USERNAME username for mining server\n\ @@ -114,6 +115,7 @@ static struct option const options[] = { { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, + { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, { "save-config", 0, nullptr, xmrig::IConfig::SaveConfigKey }, { "help", 0, nullptr, xmrig::IConfig::HelpKey }, { "keepalive", 0, nullptr, xmrig::IConfig::KeepAliveKey }, diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index 1ba1d23b..add486d9 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -66,7 +66,7 @@ void Benchmark::onJobResult(const JobResult& result) { ++ m_hash_count; const uint64_t now = get_now(); if (!m_time_start) m_time_start = now; // time of measurements start (in ms) - else if (now - m_time_start > m_bench_secs*1000) { // end of becnhmark round for m_pa + else if (now - m_time_start > static_cast(m_controller->config()->calibrateAlgoTime())*1000) { // end of becnhmark round for m_pa const float hashrate = static_cast(m_hash_count) * result.diff / (now - m_time_start) * 1000.0f; m_controller->config()->set_algo_perf(m_pa, hashrate); // store hashrate result Log::i()->text(m_controller->config()->isColors() diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h index f3262725..ae74310e 100644 --- a/src/workers/Benchmark.h +++ b/src/workers/Benchmark.h @@ -31,7 +31,6 @@ #include "core/Controller.h" class Benchmark : public IJobResultListener { - const uint64_t m_bench_secs = 5; // time in seconds to benchmark each perf algo xmrig::PerfAlgo m_pa; // current perf algo we benchmark uint64_t m_hash_count; // number of hashes calculated for current perf algo uint64_t m_time_start; // time of measurements start for current perf algo (in ms) From f340ea69f2e3245c7c609d7528ebcd5be775c7fd Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 10:54:14 +0200 Subject: [PATCH 016/141] Fixed --calibrate-algo-time command line parsing --- src/common/config/CommonConfig.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 0f0ef541..25918e3a 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -297,6 +297,9 @@ bool xmrig::CommonConfig::parseString(int key, const char *arg) # endif return parseUint64(key, strtol(arg, nullptr, 10)); + case CalibrateAlgoTimeKey: /* --calibrate-algo-time */ + return parseUint64(key, strtol(arg, nullptr, 10)); + default: break; } From 846eca9743fb9e71eba150b2a0bbfd2293d6e553 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 11:31:42 +0200 Subject: [PATCH 017/141] Removed --save-config option and added calibrate algo control options to config file --- src/App.cpp | 2 +- src/common/config/CommonConfig.cpp | 5 ----- src/common/config/CommonConfig.h | 2 -- src/config.json | 2 ++ src/core/ConfigLoader_platform.h | 6 +++--- src/workers/Benchmark.cpp | 2 +- 6 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index e0b735ca..01d1fed7 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -142,7 +142,7 @@ int App::exec() benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list } else { // save config here to have option to store automatically generated "threads" - if (m_controller->config()->isShouldSave() || m_controller->config()->isSaveConfig()) m_controller->config()->save(); + if (m_controller->config()->isShouldSave()) m_controller->config()->save(); m_controller->network()->connect(); } diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 25918e3a..24d4b74f 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -48,7 +48,6 @@ xmrig::CommonConfig::CommonConfig() : m_dryRun(false), m_calibrateAlgo(false), m_calibrateAlgoTime(60), - m_saveConfig(false), m_syslog(false), # ifdef XMRIG_PROXY_PROJECT @@ -191,10 +190,6 @@ bool xmrig::CommonConfig::parseBoolean(int key, bool enable) m_calibrateAlgo = enable; break; - case IConfig::SaveConfigKey: /* --save-config */ - m_saveConfig = enable; - break; - default: break; } diff --git a/src/common/config/CommonConfig.h b/src/common/config/CommonConfig.h index 6ab0d42b..b5539bbe 100644 --- a/src/common/config/CommonConfig.h +++ b/src/common/config/CommonConfig.h @@ -51,7 +51,6 @@ public: inline bool isDryRun() const { return m_dryRun; } inline bool isCalibrateAlgo() const { return m_calibrateAlgo; } inline int calibrateAlgoTime() const { return m_calibrateAlgoTime; } - inline bool isSaveConfig() const { return m_saveConfig; } inline bool isSyslog() const { return m_syslog; } inline const char *apiToken() const { return m_apiToken.data(); } inline const char *apiWorkerId() const { return m_apiWorkerId.data(); } @@ -93,7 +92,6 @@ protected: bool m_dryRun; bool m_calibrateAlgo; int m_calibrateAlgoTime; - bool m_saveConfig; bool m_syslog; bool m_watch; int m_apiPort; diff --git a/src/config.json b/src/config.json index ede1ba6c..155167b0 100644 --- a/src/config.json +++ b/src/config.json @@ -34,6 +34,8 @@ "safe": false, "threads": null, "algo-perf": null, + "calibrate-algo": false, + "calibrate-algo-time": 60, "user-agent": null, "watch": false } \ No newline at end of file diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index 9ec6a2a5..7a69e575 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -91,7 +91,6 @@ Options:\n\ --api-worker-id=ID custom worker-id for API\n\ --api-ipv6 enable IPv6 support for API\n\ --api-no-restricted enable full remote access (only if API token set)\n\ - --save-config save config file including generated configuration\n\ -h, --help display this help and exit\n\ -V, --version output version information and exit\n\ "; @@ -114,9 +113,8 @@ static struct option const options[] = { { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, - { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, + { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, - { "save-config", 0, nullptr, xmrig::IConfig::SaveConfigKey }, { "help", 0, nullptr, xmrig::IConfig::HelpKey }, { "keepalive", 0, nullptr, xmrig::IConfig::KeepAliveKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, @@ -151,6 +149,8 @@ static struct option const config_options[] = { { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, + { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, + { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, { "huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index add486d9..1663a9e9 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -80,7 +80,7 @@ void Benchmark::onJobResult(const JobResult& result) { start_perf_bench(next_pa); } else { // end of benchmarks and switching to jobs from the pool (network) m_pa = xmrig::PA_INVALID; - if (m_controller->config()->isShouldSave() || m_controller->config()->isSaveConfig()) m_controller->config()->save(); // save config with measured algo-perf + if (m_controller->config()->isShouldSave()) m_controller->config()->save(); // save config with measured algo-perf Workers::pause(); // do not compute anything before job from the pool m_controller->network()->connect(); } From c33802280ecba49a845e9368852500b4311fe223 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 11:35:37 +0200 Subject: [PATCH 018/141] Added calibrate-algo and calibrate-algo-time save in config file --- src/core/Config.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index ad2af466..7fc2e913 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -151,6 +151,9 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const } doc.AddMember("algo-perf", algo_perf, allocator); + doc.AddMember("calibrate-algo", isCalibrateAlgo(), allocator); + doc.AddMember("calibrate-algo-time", calibrateAlgoTime(), allocator); + doc.AddMember("user-agent", userAgent() ? Value(StringRef(userAgent())).Move() : Value(kNullType).Move(), allocator); # ifdef HAVE_SYSLOG_H From 2979b2c8729f1da808b901c7464a8c7ade61863f Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 11:40:40 +0200 Subject: [PATCH 019/141] Only save config if threads were automatically calculated --- src/core/Config.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 7fc2e913..aab48cf7 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -198,19 +198,19 @@ bool xmrig::Config::finalize() if (!m_threads[pa].count) { m_threads[pa].count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + m_shouldSave = true; } else if (m_safe) { const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); if (m_threads[pa].count > count) { m_threads[pa].count = count; + m_shouldSave = true; } } for (size_t i = 0; i < m_threads[pa].count; ++i) { m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa).algo(), av, m_threads[pa].mask, m_priority)); } - - m_shouldSave = true; } } From a7832827390ad49fab0d2d40ffa11880d26b1f39 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 11:54:04 +0200 Subject: [PATCH 020/141] Disabled config save in case of auto adjusted threads --- src/App.cpp | 2 -- src/core/Config.cpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 01d1fed7..1f6fc2fb 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -141,8 +141,6 @@ int App::exec() ); benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list } else { - // save config here to have option to store automatically generated "threads" - if (m_controller->config()->isShouldSave()) m_controller->config()->save(); m_controller->network()->connect(); } diff --git a/src/core/Config.cpp b/src/core/Config.cpp index aab48cf7..0fc1bc5e 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -198,13 +198,11 @@ bool xmrig::Config::finalize() if (!m_threads[pa].count) { m_threads[pa].count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); - m_shouldSave = true; } else if (m_safe) { const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); if (m_threads[pa].count > count) { m_threads[pa].count = count; - m_shouldSave = true; } } From b2a2c1cacde5324a5c3bda351560b60e0985ca20 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sat, 4 Aug 2018 12:01:24 +0200 Subject: [PATCH 021/141] Removed stuff related to --save-config option --- src/common/config/CommonConfig.cpp | 1 - src/common/interfaces/IConfig.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 24d4b74f..49c17055 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -275,7 +275,6 @@ bool xmrig::CommonConfig::parseString(int key, const char *arg) case ApiIPv6Key: /* --api-ipv6 */ case DryRunKey: /* --dry-run */ case CalibrateAlgoKey: /* --calibrate-algo */ - case SaveConfigKey: /* --save-config */ return parseBoolean(key, true); case ColorKey: /* --no-color */ diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index 73fa73d5..b45a443f 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -68,9 +68,8 @@ public: CPUPriorityKey = 1021, NicehashKey = 1006, PrintTimeKey = 1007, - CalibrateAlgoKey = 10001, + CalibrateAlgoKey = 10001, CalibrateAlgoTimeKey = 10002, - SaveConfigKey = 10003, // xmrig cpu AVKey = 'v', From 0f880db56fa2be93ffca7893544ca3e8e12a1a7f Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sun, 5 Aug 2018 15:09:19 +0200 Subject: [PATCH 022/141] Script for Windows build --- build.bat | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 build.bat diff --git a/build.bat b/build.bat new file mode 100644 index 00000000..60429c7f --- /dev/null +++ b/build.bat @@ -0,0 +1,18 @@ +@echo off +call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" +rmdir /S /Q build +del %~dp0\xmrig-%1-win64.zip +mkdir build &&^ +cd build &&^ +git clone https://github.com/MoneroOcean/xmrig.git &&^ +git clone https://github.com/xmrig/xmrig-deps.git &&^ +mkdir xmrig\build &&^ +cd xmrig\build &&^ +git checkout %1 &&^ +cmake .. -G "Visual Studio 15 2017 Win64" -DXMRIG_DEPS=%~dp0\build\xmrig-deps\msvc2017\x64 &&^ +msbuild /p:Configuration=Release xmrig.sln &&^ +cd Release &&^ +copy ..\..\src\config.json . &&^ +7za a -tzip -mx %~dp0\xmrig-%1-win64.zip xmrig.exe config.json &&^ +cd %~dp0 &&^ +rmdir /S /Q build From bbe085f6a426d1c6ca781ab10755897db699b80c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sun, 5 Aug 2018 17:13:06 +0200 Subject: [PATCH 023/141] Added MO specific build version --- src/App.cpp | 2 ++ src/common/config/CommonConfig.h | 1 + src/config.json | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 1f6fc2fb..cbe6b1ef 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -64,6 +64,8 @@ App::App(int argc, char **argv) : return; } + if (!strstr(m_controller->config()->pools()[0]->host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); + if (!m_controller->config()->isBackground()) { m_console = new Console(this); } diff --git a/src/common/config/CommonConfig.h b/src/common/config/CommonConfig.h index b5539bbe..6de50857 100644 --- a/src/common/config/CommonConfig.h +++ b/src/common/config/CommonConfig.h @@ -59,6 +59,7 @@ public: inline const std::vector &pools() const { return m_activePools; } inline int apiPort() const { return m_apiPort; } inline int donateLevel() const { return m_donateLevel; } + inline void setDonateLevel(const int donate) { m_donateLevel = donate; } inline int printTime() const { return m_printTime; } inline int retries() const { return m_retries; } inline int retryPause() const { return m_retryPause; } diff --git a/src/config.json b/src/config.json index 155167b0..cd09dbee 100644 --- a/src/config.json +++ b/src/config.json @@ -19,7 +19,7 @@ "max-cpu-usage": 75, "pools": [ { - "url": "proxy.fee.xmrig.com:9999", + "url": "gulf.moneroocean.stream:10001", "user": "YOUR_WALLET", "pass": "x", "rig-id": null, @@ -35,7 +35,7 @@ "threads": null, "algo-perf": null, "calibrate-algo": false, - "calibrate-algo-time": 60, + "calibrate-algo-time": 10, "user-agent": null, "watch": false } \ No newline at end of file From 4d730a7a21873a7e05fcb4a7e316f96b316bfeb8 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sun, 5 Aug 2018 17:15:40 +0200 Subject: [PATCH 024/141] Fixed -> to . --- src/App.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/App.cpp b/src/App.cpp index cbe6b1ef..c85ba5ff 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -64,7 +64,7 @@ App::App(int argc, char **argv) : return; } - if (!strstr(m_controller->config()->pools()[0]->host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); + if (!strstr(m_controller->config()->pools()[0].host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); if (!m_controller->config()->isBackground()) { m_console = new Console(this); From 850ca88c48d0747d891a98950a376704aab3b6bd Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sun, 5 Aug 2018 17:19:40 +0200 Subject: [PATCH 025/141] Moved pool check --- src/App.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index c85ba5ff..36c6d5f4 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -64,8 +64,6 @@ App::App(int argc, char **argv) : return; } - if (!strstr(m_controller->config()->pools()[0].host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); - if (!m_controller->config()->isBackground()) { m_console = new Console(this); } @@ -105,6 +103,8 @@ int App::exec() Mem::init(m_controller->config()->isHugePages()); + if (!strstr(m_controller->config()->pools()[0].host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); + Summary::print(m_controller); if (m_controller->config()->isDryRun()) { From 9e231e322b41d809165328ec160b14fb7e92c0fd Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sun, 5 Aug 2018 17:22:05 +0200 Subject: [PATCH 026/141] Fixed pool check --- src/App.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/App.cpp b/src/App.cpp index 36c6d5f4..2b0f8156 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -103,7 +103,7 @@ int App::exec() Mem::init(m_controller->config()->isHugePages()); - if (!strstr(m_controller->config()->pools()[0].host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); + if (strstr(m_controller->config()->pools()[0].host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); Summary::print(m_controller); From 9b796b4b8ac34b79388903958229c0a0148131dc Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Sun, 5 Aug 2018 17:50:11 +0200 Subject: [PATCH 027/141] Added CentOS 6 build script --- build_rh6.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 build_rh6.sh diff --git a/build_rh6.sh b/build_rh6.sh new file mode 100644 index 00000000..b1eb407c --- /dev/null +++ b/build_rh6.sh @@ -0,0 +1,10 @@ +#!/bin/bash +yum update -y +yum install -y cmake make git +rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm +rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm +wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo +yum upgrade -y +yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ +scl enable devtoolset-2 bash +cmake . -DWITH_HTTPD=OFF From 67ed81f1ec0feb1a7e26c3adcf6e41984df3c98c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Mon, 6 Aug 2018 10:40:55 +0200 Subject: [PATCH 028/141] Moved from PerfAlgo to Algo in threads to removed not really used cn-fast threads setup --- src/common/crypto/Algorithm.h | 2 +- src/common/xmrig.h | 3 +- src/core/Config.cpp | 60 +++++++++++++++++------------------ src/core/Config.h | 22 ++++++------- src/workers/Workers.cpp | 2 +- 5 files changed, 45 insertions(+), 44 deletions(-) diff --git a/src/common/crypto/Algorithm.h b/src/common/crypto/Algorithm.h index 77e2dfff..bb8c5220 100644 --- a/src/common/crypto/Algorithm.h +++ b/src/common/crypto/Algorithm.h @@ -44,7 +44,7 @@ public: m_variant(VARIANT_AUTO) {} - inline Algorithm(Algo algo, Variant variant) : + inline Algorithm(Algo algo, Variant variant = VARIANT_AUTO) : m_variant(variant) { setAlgo(algo); diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 515c6709..9e17624d 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -34,7 +34,8 @@ enum Algo { INVALID_ALGO = -1, CRYPTONIGHT, /* CryptoNight (Monero) */ CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */ - CRYPTONIGHT_HEAVY /* CryptoNight-Heavy (SUMO) */ + CRYPTONIGHT_HEAVY, /* CryptoNight-Heavy (SUMO) */ + ALGO_MAX }; // algorithms that can has different performance diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 0fc1bc5e..a8a51d42 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -124,20 +124,20 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const // save extended "threads" based on m_threads Value threads(kObjectType); - for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { - const xmrig::PerfAlgo pa = static_cast(a); - Value key(xmrig::Algorithm::perfAlgoName(pa), allocator); - if (threadsMode(pa) == Advanced) { + for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { + const xmrig::Algo algo = static_cast(a); + Value key(xmrig::Algorithm::perfAlgoName(xmrig::Algorithm(algo).perf_algo()), allocator); + if (threadsMode(algo) == Advanced) { Value threads2(kArrayType); - for (const IThread *thread : m_threads[pa].list) { + for (const IThread *thread : m_threads[algo].list) { threads2.PushBack(thread->toConfig(doc), allocator); } threads.AddMember(key, threads2, allocator); } else { - threads.AddMember(key, threadsMode(pa) == Automatic ? Value(kNullType) : Value(threadsCount(pa)), allocator); + threads.AddMember(key, threadsMode(algo) == Automatic ? Value(kNullType) : Value(threadsCount(algo)), allocator); } } doc.AddMember("threads", threads, allocator); @@ -181,33 +181,33 @@ bool xmrig::Config::finalize() } // auto configure m_threads - for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { - const xmrig::PerfAlgo pa = static_cast(a); - if (!m_threads[pa].cpu.empty()) { - m_threads[pa].mode = Advanced; + for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { + const xmrig::Algo algo = static_cast(a); + if (!m_threads[algo].cpu.empty()) { + m_threads[algo].mode = Advanced; const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; - for (size_t i = 0; i < m_threads[pa].cpu.size(); ++i) { - m_threads[pa].list.push_back(CpuThread::createFromData(i, xmrig::Algorithm(pa).algo(), m_threads[pa].cpu[i], m_priority, softAES)); + for (size_t i = 0; i < m_threads[algo].cpu.size(); ++i) { + m_threads[algo].list.push_back(CpuThread::createFromData(i, algo, m_threads[algo].cpu[i], m_priority, softAES)); } } else { const AlgoVariant av = getAlgoVariant(); - m_threads[pa].mode = m_threads[pa].count ? Simple : Automatic; + m_threads[algo].mode = m_threads[algo].count ? Simple : Automatic; - const size_t size = CpuThread::multiway(av) * cn_select_memory(xmrig::Algorithm(pa).algo()) / 1024; + const size_t size = CpuThread::multiway(av) * cn_select_memory(algo) / 1024; - if (!m_threads[pa].count) { - m_threads[pa].count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + if (!m_threads[algo].count) { + m_threads[algo].count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); } else if (m_safe) { const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads[pa].count > count) { - m_threads[pa].count = count; + if (m_threads[algo].count > count) { + m_threads[algo].count = count; } } - for (size_t i = 0; i < m_threads[pa].count; ++i) { - m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa).algo(), av, m_threads[pa].mask, m_priority)); + for (size_t i = 0; i < m_threads[algo].count; ++i) { + m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority)); } } } @@ -263,7 +263,7 @@ bool xmrig::Config::parseString(int key, const char *arg) case ThreadsKey: /* --threads */ if (strncmp(arg, "all", 3) == 0) { - m_threads[m_algorithm.perf_algo()].count = Cpu::threads(); // sets default algo threads + m_threads[m_algorithm.algo()].count = Cpu::threads(); // sets default algo threads return true; } @@ -292,7 +292,7 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg) switch (key) { case CPUAffinityKey: /* --cpu-affinity */ if (arg) { - m_threads[m_algorithm.perf_algo()].mask = arg; // sets default algo threads + m_threads[m_algorithm.algo()].mask = arg; // sets default algo threads } break; @@ -305,7 +305,7 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg) // parse specific perf algo (or generic) threads config -void xmrig::Config::parseThreadsJSON(const rapidjson::Value &threads, const xmrig::PerfAlgo pa) +void xmrig::Config::parseThreadsJSON(const rapidjson::Value &threads, const xmrig::Algo algo) { for (const rapidjson::Value &value : threads.GetArray()) { if (!value.IsObject()) { @@ -316,7 +316,7 @@ void xmrig::Config::parseThreadsJSON(const rapidjson::Value &threads, const xmri auto data = CpuThread::parse(value); if (data.valid) { - m_threads[pa].cpu.push_back(std::move(data)); + m_threads[algo].cpu.push_back(std::move(data)); } } } @@ -328,14 +328,14 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) if (threads.IsArray()) { // parse generic (old) threads - parseThreadsJSON(threads, m_algorithm.perf_algo()); + parseThreadsJSON(threads, m_algorithm.algo()); } else if (threads.IsObject()) { // parse new specific perf algo threads - for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { - const xmrig::PerfAlgo pa = static_cast(a); - const rapidjson::Value &threads2 = threads[xmrig::Algorithm::perfAlgoName(pa)]; + for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { + const xmrig::Algo algo = static_cast(a); + const rapidjson::Value &threads2 = threads[xmrig::Algorithm::perfAlgoName(xmrig::Algorithm(algo).perf_algo())]; if (threads2.IsArray()) { - parseThreadsJSON(threads2, pa); + parseThreadsJSON(threads2, algo); } } } @@ -360,7 +360,7 @@ bool xmrig::Config::parseInt(int key, int arg) switch (key) { case ThreadsKey: /* --threads */ if (arg >= 0 && arg < 1024) { - m_threads[m_algorithm.perf_algo()].count = arg; // sets default algo threads + m_threads[m_algorithm.algo()].count = arg; // sets default algo threads } break; diff --git a/src/core/Config.h b/src/core/Config.h index ec7141e1..539944d4 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -83,17 +83,17 @@ public: inline int priority() const { return m_priority; } // access to m_threads taking into accoun that it is now separated for each perf algo - inline const std::vector &threads(const xmrig::PerfAlgo pa = PA_INVALID) const { - return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].list; + inline const std::vector &threads(const xmrig::Algo algo = INVALID_ALGO) const { + return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].list; } - inline int threadsCount(const xmrig::PerfAlgo pa = PA_INVALID) const { - return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].list.size(); + inline int threadsCount(const xmrig::Algo algo = INVALID_ALGO) const { + return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].list.size(); } - inline int64_t affinity(const xmrig::PerfAlgo pa = PA_INVALID) const { - return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].mask; + inline int64_t affinity(const xmrig::Algo algo = INVALID_ALGO) const { + return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].mask; } - inline ThreadsMode threadsMode(const xmrig::PerfAlgo pa = PA_INVALID) const { - return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].mode; + inline ThreadsMode threadsMode(const xmrig::Algo algo = INVALID_ALGO) const { + return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].mode; } // access to perf algo results @@ -109,7 +109,7 @@ protected: bool parseUint64(int key, uint64_t arg) override; void parseJSON(const rapidjson::Document &doc) override; // parse specific perf algo (or generic) threads config - void parseThreadsJSON(const rapidjson::Value &threads, xmrig::PerfAlgo); + void parseThreadsJSON(const rapidjson::Value &threads, xmrig::Algo); private: bool parseInt(int key, int arg); @@ -139,8 +139,8 @@ private: bool m_safe; int m_maxCpuUsage; int m_priority; - // threads config for each perf algo - Threads m_threads[xmrig::PerfAlgo::PA_MAX]; + // threads config for each algo + Threads m_threads[xmrig::Algo::ALGO_MAX]; // perf algo hashrate results float m_algo_perf[xmrig::PerfAlgo::PA_MAX]; }; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 7e58548c..b9d0cf50 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -223,7 +223,7 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) m_sequence = 1; m_paused = 1; - const std::vector &threads = m_controller->config()->threads(algorithm.perf_algo()); + const std::vector &threads = m_controller->config()->threads(algorithm.algo()); m_status.algo = algorithm.algo(); m_status.threads = threads.size(); From feeb226507ca14b0e33f7d90b990bf8cdcec041a Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Mon, 6 Aug 2018 10:58:35 +0200 Subject: [PATCH 029/141] Moved benchmark config save flag to the benchmark class --- src/App.cpp | 4 +++- src/core/Config.cpp | 3 --- src/core/Config.h | 2 -- src/workers/Benchmark.cpp | 2 +- src/workers/Benchmark.h | 10 ++++++---- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 1f6fc2fb..1af0c92e 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -139,7 +139,9 @@ int App::exec() : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION (with %i seconds round)", m_controller->config()->calibrateAlgoTime() ); - benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); // start benchmarking from first PerfAlgo in the list + // start benchmarking from first PerfAlgo in the list + if (m_controller->config()->get_algo_perf(xmrig::PA_CN) == 0.0f) benchmark.shoud_save_config(); + benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); } else { m_controller->network()->connect(); } diff --git a/src/core/Config.cpp b/src/core/Config.cpp index a8a51d42..d72ae86f 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -46,7 +46,6 @@ static char affinity_tmp[20] = { 0 }; xmrig::Config::Config() : xmrig::CommonConfig(), - m_shouldSave(false), m_aesMode(AES_AUTO), m_algoVariant(AV_AUTO), m_hugePages(true), @@ -350,8 +349,6 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) } } } - - if (m_algo_perf[xmrig::PA_CN] == 0.0f) m_shouldSave = true; } diff --git a/src/core/Config.h b/src/core/Config.h index 539944d4..ffb85f40 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -76,7 +76,6 @@ public: void getJSON(rapidjson::Document &doc) const override; - inline bool isShouldSave() const { return m_shouldSave; } inline AesMode aesMode() const { return m_aesMode; } inline AlgoVariant algoVariant() const { return m_algoVariant; } inline bool isHugePages() const { return m_hugePages; } @@ -132,7 +131,6 @@ private: }; - bool m_shouldSave; AesMode m_aesMode; AlgoVariant m_algoVariant; bool m_hugePages; diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index 1663a9e9..c32db0d0 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -80,7 +80,7 @@ void Benchmark::onJobResult(const JobResult& result) { start_perf_bench(next_pa); } else { // end of benchmarks and switching to jobs from the pool (network) m_pa = xmrig::PA_INVALID; - if (m_controller->config()->isShouldSave()) m_controller->config()->save(); // save config with measured algo-perf + if (m_shouldSaveConfig) m_controller->config()->save(); // save config with measured algo-perf Workers::pause(); // do not compute anything before job from the pool m_controller->network()->connect(); } diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h index ae74310e..e639c0d0 100644 --- a/src/workers/Benchmark.h +++ b/src/workers/Benchmark.h @@ -31,9 +31,10 @@ #include "core/Controller.h" class Benchmark : public IJobResultListener { - xmrig::PerfAlgo m_pa; // current perf algo we benchmark - uint64_t m_hash_count; // number of hashes calculated for current perf algo - uint64_t m_time_start; // time of measurements start for current perf algo (in ms) + bool m_shouldSaveConfig; // should save config after all benchmark rounds + xmrig::PerfAlgo m_pa; // current perf algo we benchmark + uint64_t m_hash_count; // number of hashes calculated for current perf algo + uint64_t m_time_start; // time of measurements start for current perf algo (in ms) xmrig::Controller* m_controller; // to get access to config and network uint64_t get_now() const; // get current time in ms @@ -41,9 +42,10 @@ class Benchmark : public IJobResultListener { void onJobResult(const JobResult&) override; // onJobResult is called after each computed benchmark hash public: - Benchmark() {} + Benchmark() : m_shouldSaveConfig(false) {} virtual ~Benchmark() {} void set_controller(xmrig::Controller* controller) { m_controller = controller; } + void shoud_save_config() { m_shouldSaveConfig = true; } void start_perf_bench(const xmrig::PerfAlgo); // start benchmark for specified perf algo }; From bd56b8892c7274776a153303cdcea20d321740d9 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Mon, 6 Aug 2018 11:01:10 +0200 Subject: [PATCH 030/141] Typo fix --- src/App.cpp | 2 +- src/workers/Benchmark.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 1af0c92e..48ac379d 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -140,7 +140,7 @@ int App::exec() m_controller->config()->calibrateAlgoTime() ); // start benchmarking from first PerfAlgo in the list - if (m_controller->config()->get_algo_perf(xmrig::PA_CN) == 0.0f) benchmark.shoud_save_config(); + if (m_controller->config()->get_algo_perf(xmrig::PA_CN) == 0.0f) benchmark.should_save_config(); benchmark.start_perf_bench(xmrig::PerfAlgo::PA_CN); } else { m_controller->network()->connect(); diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h index e639c0d0..795edc6d 100644 --- a/src/workers/Benchmark.h +++ b/src/workers/Benchmark.h @@ -46,6 +46,6 @@ class Benchmark : public IJobResultListener { virtual ~Benchmark() {} void set_controller(xmrig::Controller* controller) { m_controller = controller; } - void shoud_save_config() { m_shouldSaveConfig = true; } + void should_save_config() { m_shouldSaveConfig = true; } void start_perf_bench(const xmrig::PerfAlgo); // start benchmark for specified perf algo }; From d9d720eed28e19b42f997d26f8954de614b17664 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 8 Aug 2018 21:42:52 +0200 Subject: [PATCH 031/141] Allow integer algo-perf values --- src/core/Config.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 0fc1bc5e..1b23aa7f 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -346,7 +346,9 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) const xmrig::PerfAlgo pa = static_cast(a); const rapidjson::Value &key = algo_perf[xmrig::Algorithm::perfAlgoName(pa)]; if (key.IsDouble()) { - m_algo_perf[pa] = key.GetDouble(); + m_algo_perf[pa] = static_cast(key.GetDouble()); + } else if (key.IsInt()) { + m_algo_perf[pa] = static_cast(key.GetInt()); } } } From 143bd192a2aa73c1ad85936d49c53a7075e211e6 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 8 Aug 2018 21:55:51 +0200 Subject: [PATCH 032/141] Allow integer algo-perf values --- src/core/Config.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index d72ae86f..32a7322b 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -345,7 +345,9 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) const xmrig::PerfAlgo pa = static_cast(a); const rapidjson::Value &key = algo_perf[xmrig::Algorithm::perfAlgoName(pa)]; if (key.IsDouble()) { - m_algo_perf[pa] = key.GetDouble(); + m_algo_perf[pa] = static_cast(key.GetDouble()); + } else if (key.IsInt()) { + m_algo_perf[pa] = static_cast(key.GetInt()); } } } From 377047e96e1003c9c2d0bb11ea761c2410795c90 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 14 Aug 2018 11:34:29 +0200 Subject: [PATCH 033/141] Fixed config modification --- src/App.cpp | 2 -- src/core/Controller.cpp | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 76bc6a63..48ac379d 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -103,8 +103,6 @@ int App::exec() Mem::init(m_controller->config()->isHugePages()); - if (strstr(m_controller->config()->pools()[0].host(), "moneroocean.stream")) m_controller->config()->setDonateLevel(0); - Summary::print(m_controller); if (m_controller->config()->isDryRun()) { diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index a21c8ad9..90651d62 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -121,6 +121,8 @@ int xmrig::Controller::init(int argc, char **argv) } # endif + if (strstr(config()->pools()[0].host(), "moneroocean.stream")) config()->setDonateLevel(0); + d_ptr->network = new Network(this); return 0; } From a8bc3c7dc7e9bb4c73128a209d54fe32109cd053 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Mon, 20 Aug 2018 17:22:55 +0200 Subject: [PATCH 034/141] Fixed repo location --- README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 6054b9f8..06b39843 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ # XMRig -:warning: **If you mine Monero, Aeon, Sumokoin, Turtlecoin, Stellite, GRAFT, Haven Protocol, IPBC, [PLEASE READ](https://github.com/xmrig/xmrig/issues/482)!** :warning: +:warning: **If you mine Monero, Aeon, Sumokoin, Turtlecoin, Stellite, GRAFT, Haven Protocol, IPBC, [PLEASE READ](https://github.com/MoneroOcean/xmrig/issues/482)!** :warning: -[![Github All Releases](https://img.shields.io/github/downloads/xmrig/xmrig/total.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub release](https://img.shields.io/github/release/xmrig/xmrig/all.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub Release Date](https://img.shields.io/github/release-date-pre/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub license](https://img.shields.io/github/license/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/blob/master/LICENSE) -[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers) -[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network) +[![Github All Releases](https://img.shields.io/github/downloads/xmrig/xmrig/total.svg)](https://github.com/MoneroOcean/xmrig/releases) +[![GitHub release](https://img.shields.io/github/release/xmrig/xmrig/all.svg)](https://github.com/MoneroOcean/xmrig/releases) +[![GitHub Release Date](https://img.shields.io/github/release-date-pre/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/releases) +[![GitHub license](https://img.shields.io/github/license/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/blob/master/LICENSE) +[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/stargazers) +[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/network) XMRig is a high performance Monero (XMR) CPU miner, with official support for Windows. Originally based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code, since version 1.0.0 completely rewritten from scratch on C++. -* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/xmrig/xmrig-nvidia) and [AMD GPU version]( https://github.com/xmrig/xmrig-amd). -* [Roadmap](https://github.com/xmrig/xmrig/issues/106) for next releases. +* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/MoneroOcean/xmrig-nvidia) and [AMD GPU version]( https://github.com/MoneroOcean/xmrig-amd). +* [Roadmap](https://github.com/MoneroOcean/xmrig/issues/106) for next releases. @@ -22,7 +22,7 @@ Originally based on cpuminer-multi with heavy optimizations/rewrites and removin * [Download](#download) * [Usage](#usage) * [Algorithm variations](#algorithm-variations) -* [Build](https://github.com/xmrig/xmrig/wiki/Build) +* [Build](https://github.com/MoneroOcean/xmrig/wiki/Build) * [Common Issues](#common-issues) * [Other information](#other-information) * [Donations](#donations) @@ -38,14 +38,14 @@ Originally based on cpuminer-multi with heavy optimizations/rewrites and removin * keepalived support. * Command line options compatible with cpuminer. * CryptoNight-Lite support for AEON. -* Smart automatic [CPU configuration](https://github.com/xmrig/xmrig/wiki/Threads). +* Smart automatic [CPU configuration](https://github.com/MoneroOcean/xmrig/wiki/Threads). * Nicehash support * It's open source software. ## Download -* Binary releases: https://github.com/xmrig/xmrig/releases -* Git tree: https://github.com/xmrig/xmrig.git - * Clone with `git clone https://github.com/xmrig/xmrig.git` :hammer: [Build instructions](https://github.com/xmrig/xmrig/wiki/Build). +* Binary releases: https://github.com/MoneroOcean/xmrig/releases +* Git tree: https://github.com/MoneroOcean/xmrig.git + * Clone with `git clone https://github.com/MoneroOcean/xmrig.git` :hammer: [Build instructions](https://github.com/MoneroOcean/xmrig/wiki/Build). ## Usage Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or share configurations. From cff4757a965bf0c732dd1a743b60127133be759d Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Mon, 20 Aug 2018 17:23:17 +0200 Subject: [PATCH 035/141] Fixed repo location --- README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 6054b9f8..06b39843 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ # XMRig -:warning: **If you mine Monero, Aeon, Sumokoin, Turtlecoin, Stellite, GRAFT, Haven Protocol, IPBC, [PLEASE READ](https://github.com/xmrig/xmrig/issues/482)!** :warning: +:warning: **If you mine Monero, Aeon, Sumokoin, Turtlecoin, Stellite, GRAFT, Haven Protocol, IPBC, [PLEASE READ](https://github.com/MoneroOcean/xmrig/issues/482)!** :warning: -[![Github All Releases](https://img.shields.io/github/downloads/xmrig/xmrig/total.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub release](https://img.shields.io/github/release/xmrig/xmrig/all.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub Release Date](https://img.shields.io/github/release-date-pre/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub license](https://img.shields.io/github/license/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/blob/master/LICENSE) -[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers) -[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network) +[![Github All Releases](https://img.shields.io/github/downloads/xmrig/xmrig/total.svg)](https://github.com/MoneroOcean/xmrig/releases) +[![GitHub release](https://img.shields.io/github/release/xmrig/xmrig/all.svg)](https://github.com/MoneroOcean/xmrig/releases) +[![GitHub Release Date](https://img.shields.io/github/release-date-pre/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/releases) +[![GitHub license](https://img.shields.io/github/license/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/blob/master/LICENSE) +[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/stargazers) +[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/MoneroOcean/xmrig/network) XMRig is a high performance Monero (XMR) CPU miner, with official support for Windows. Originally based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code, since version 1.0.0 completely rewritten from scratch on C++. -* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/xmrig/xmrig-nvidia) and [AMD GPU version]( https://github.com/xmrig/xmrig-amd). -* [Roadmap](https://github.com/xmrig/xmrig/issues/106) for next releases. +* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/MoneroOcean/xmrig-nvidia) and [AMD GPU version]( https://github.com/MoneroOcean/xmrig-amd). +* [Roadmap](https://github.com/MoneroOcean/xmrig/issues/106) for next releases. @@ -22,7 +22,7 @@ Originally based on cpuminer-multi with heavy optimizations/rewrites and removin * [Download](#download) * [Usage](#usage) * [Algorithm variations](#algorithm-variations) -* [Build](https://github.com/xmrig/xmrig/wiki/Build) +* [Build](https://github.com/MoneroOcean/xmrig/wiki/Build) * [Common Issues](#common-issues) * [Other information](#other-information) * [Donations](#donations) @@ -38,14 +38,14 @@ Originally based on cpuminer-multi with heavy optimizations/rewrites and removin * keepalived support. * Command line options compatible with cpuminer. * CryptoNight-Lite support for AEON. -* Smart automatic [CPU configuration](https://github.com/xmrig/xmrig/wiki/Threads). +* Smart automatic [CPU configuration](https://github.com/MoneroOcean/xmrig/wiki/Threads). * Nicehash support * It's open source software. ## Download -* Binary releases: https://github.com/xmrig/xmrig/releases -* Git tree: https://github.com/xmrig/xmrig.git - * Clone with `git clone https://github.com/xmrig/xmrig.git` :hammer: [Build instructions](https://github.com/xmrig/xmrig/wiki/Build). +* Binary releases: https://github.com/MoneroOcean/xmrig/releases +* Git tree: https://github.com/MoneroOcean/xmrig.git + * Clone with `git clone https://github.com/MoneroOcean/xmrig.git` :hammer: [Build instructions](https://github.com/MoneroOcean/xmrig/wiki/Build). ## Usage Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or share configurations. From a671cdcadfeb240bed467ce3a8560f4934f3ff68 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 22 Aug 2018 21:46:49 +0200 Subject: [PATCH 036/141] Added helper script for pool algo switch emulation --- mt.js | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 mt.js diff --git a/mt.js b/mt.js new file mode 100644 index 00000000..d621b010 --- /dev/null +++ b/mt.js @@ -0,0 +1,107 @@ +#!/usr/bin/env node + +// Miner Tester: testing miner algo switch stability + +"use strict"; + +// ***************************************************************************** +// *** DEPENDECIES *** +// ***************************************************************************** + +const net = require('net'); + +// ***************************************************************************** +// *** CONSTS *** +// ***************************************************************************** + +const algos = [ "cn/1", "cn/xtl", "cn/msr", "cn/xao", "cn/rto", "cn-heavy/0", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1" ]; + +// ***************************************************************************** +// *** WORKING STATE *** +// ***************************************************************************** + +let curr_miner_socket = null; + +// ***************************************************************************** +// *** FUNCTIONS *** +// ***************************************************************************** + +// *** Console/log output + +function log(msg) { + console.log(">>> " + msg); +} + +function err(msg) { + console.error("!!! " + msg); +} + +// *** Miner socket processing + +const test_blob_str = "7f7ffeeaa0db054f15eca39c843cb82c15e5c5a7743e06536cb541d4e96e90ffd31120b7703aa90000000076a6f6e34a9977c982629d8fe6c8b45024cafca109eef92198784891e0df41bc03"; + +let miner_server = net.createServer(function (miner_socket) { + if (curr_miner_socket) { + err("Miner server on localhost:3333 port is already connected (please make sure you do not have other miner running)"); + return; + } + log("Miner server on localhost:3333 port connected from " + miner_socket.remoteAddress); + + let miner_data_buff = ""; + + miner_socket.on('data', function (msg) { + miner_data_buff += msg; + if (miner_data_buff.indexOf('\n') === -1) return; + let messages = miner_data_buff.split('\n'); + let incomplete_line = miner_data_buff.slice(-1) === '\n' ? '' : messages.pop(); + for (let i = 0; i < messages.length; i++) { + let message = messages[i]; + if (message.trim() === '') continue; + let json; + try { + json = JSON.parse(message); + } catch (e) { + err("Can't parse message from the miner: " + message); + continue; + } + const is_keepalived = "method" in json && json.method === "keepalived"; + if ("method" in json && json.method === "login") { + miner_socket.write( + '{"id":1,"jsonrpc":"2.0","error":null,"result":{"id":"benchmark","job":{"blob":"' + test_blob_str + + '","algo":"cn/1","job_id":"benchmark1","target":"10000000","id":"benchmark"},"status":"OK"}}\n' + ); + curr_miner_socket = miner_socket; + } + } + miner_data_buff = incomplete_line; + }); + miner_socket.on('end', function() { + log("Miner socket was closed"); + curr_miner_socket = null; + }); + miner_socket.on('error', function() { + err("Miner socket error"); + miner_socket.destroy(); + curr_miner_socket = null; + }); +}); + +let job_num = 1; +function change_algo() { + if (curr_miner_socket) { + const algo = algos[Math.floor(Math.random() * algos.length)]; + log("Switching to " + algo); + curr_miner_socket.write( + '{"jsonrpc":"2.0","method":"job","params":{"blob":"' + test_blob_str + '","algo":"' + algo + + '","job_id":"benchmark' + ++job_num + '","target":"10000000","id":"benchmark"}}\n' + ); + } + const sleep = Math.floor(Math.random() * 5); + log("Waiting " + sleep + "s"); + setTimeout(change_algo, sleep * 1000); +} + +miner_server.listen(3333, "localhost", function() { + log("Local miner server on localhost:3333 port started"); + change_algo(); +}); From 541ecc4e4b5e153ccc5b0fb65981dbb4eab3bfd3 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 22 Aug 2018 21:48:22 +0200 Subject: [PATCH 037/141] Fixed memory leak --- mt.js | 0 src/workers/Handle.cpp | 2 ++ src/workers/Handle.h | 1 + 3 files changed, 3 insertions(+) mode change 100644 => 100755 mt.js diff --git a/mt.js b/mt.js old mode 100644 new mode 100755 diff --git a/src/workers/Handle.cpp b/src/workers/Handle.cpp index d42ea368..a7a06c79 100644 --- a/src/workers/Handle.cpp +++ b/src/workers/Handle.cpp @@ -23,6 +23,7 @@ #include "workers/Handle.h" +#include "interfaces/IWorker.h" Handle::Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays) : @@ -33,6 +34,7 @@ Handle::Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays) : { } +Handle::~Handle() { if (m_worker) delete m_worker; } void Handle::join() { diff --git a/src/workers/Handle.h b/src/workers/Handle.h index 4bb899f9..40df51f2 100644 --- a/src/workers/Handle.h +++ b/src/workers/Handle.h @@ -40,6 +40,7 @@ class Handle { public: Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays); + ~Handle(); void join(); void start(void (*callback) (void *)); From 1a819c61890ad940edf3d5f4cb1abb893d4e3e69 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 22 Aug 2018 21:49:57 +0200 Subject: [PATCH 038/141] Fixed memory leak --- src/workers/Hashrate.cpp | 11 +++++++++++ src/workers/Hashrate.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp index 2a750318..10fd8279 100644 --- a/src/workers/Hashrate.cpp +++ b/src/workers/Hashrate.cpp @@ -71,6 +71,17 @@ Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) : } } +Hashrate::~Hashrate() +{ + for (size_t i = 0; i < m_threads; i++) { + delete [] m_counts[i]; + delete [] m_timestamps[i]; + } + delete [] m_counts; + delete [] m_timestamps; + delete [] m_top; +} + double Hashrate::calc(size_t ms) const { diff --git a/src/workers/Hashrate.h b/src/workers/Hashrate.h index e766f117..8b323a80 100644 --- a/src/workers/Hashrate.h +++ b/src/workers/Hashrate.h @@ -44,6 +44,7 @@ public: }; Hashrate(size_t threads, xmrig::Controller *controller); + ~Hashrate(); double calc(size_t ms) const; double calc(size_t threadId, size_t ms) const; void add(size_t threadId, uint64_t count, uint64_t timestamp); From 87eb62b4eb01c8bfcf3d2194d3b60fe82fc5c09a Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 22 Aug 2018 21:52:56 +0200 Subject: [PATCH 039/141] Fixed new/free mismatch and uninit memory usage --- src/common/Platform_mac.cpp | 2 +- src/common/Platform_unix.cpp | 2 +- src/common/Platform_win.cpp | 2 +- src/common/net/Pool.cpp | 9 +++++---- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/common/Platform_mac.cpp b/src/common/Platform_mac.cpp index b8181cc4..7a2849b0 100644 --- a/src/common/Platform_mac.cpp +++ b/src/common/Platform_mac.cpp @@ -42,7 +42,7 @@ static inline char *createUserAgent() { const size_t max = 160; - char *buf = new char[max]; + char *buf = static_cast(malloc(max)); # ifdef XMRIG_NVIDIA_PROJECT const int cudaVersion = cuda_get_runtime_version(); diff --git a/src/common/Platform_unix.cpp b/src/common/Platform_unix.cpp index 97b32ee8..15be0f5f 100644 --- a/src/common/Platform_unix.cpp +++ b/src/common/Platform_unix.cpp @@ -56,7 +56,7 @@ static inline char *createUserAgent() { const size_t max = 160; - char *buf = new char[max]; + char *buf = static_cast(malloc(max)); int length = snprintf(buf, max, "%s/%s (Linux ", APP_NAME, APP_VERSION); # if defined(__x86_64__) diff --git a/src/common/Platform_win.cpp b/src/common/Platform_win.cpp index 47f41867..7026fd17 100644 --- a/src/common/Platform_win.cpp +++ b/src/common/Platform_win.cpp @@ -60,7 +60,7 @@ static inline char *createUserAgent() const auto osver = winOsVersion(); const size_t max = 160; - char *buf = new char[max]; + char *buf = static_cast(malloc(max)); int length = snprintf(buf, max, "%s/%s (Windows NT %lu.%lu", APP_NAME, APP_VERSION, osver.dwMajorVersion, osver.dwMinorVersion); # if defined(__x86_64__) || defined(_M_AMD64) diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index bfe42b51..aa9c943e 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -97,7 +97,7 @@ Pool::Pool(const char *host, uint16_t port, const char *user, const char *passwo const size_t size = m_host.size() + 8; assert(size > 8); - char *url = new char[size](); + char *url = static_cast(malloc(size)); snprintf(url, size - 1, "%s:%d", m_host.data(), m_port); m_url = url; @@ -171,8 +171,9 @@ bool Pool::parse(const char *url) } const size_t size = port++ - base + 1; - char *host = new char[size](); + char *host = static_cast(malloc(size)); memcpy(host, base, size - 1); + host[size - 1] = 0; m_host = host; m_port = static_cast(strtol(port, nullptr, 10)); @@ -188,7 +189,7 @@ bool Pool::setUserpass(const char *userpass) return false; } - char *user = new char[p - userpass + 1](); + char *user = static_cast(malloc(p - userpass + 1)); strncpy(user, userpass, p - userpass); m_user = user; @@ -279,7 +280,7 @@ bool Pool::parseIPv6(const char *addr) } const size_t size = end - addr; - char *host = new char[size](); + char *host = static_cast(malloc(size)); memcpy(host, addr + 1, size - 1); m_host = host; From 95faab9bd89d0bcc3b524a8050189673ae30708c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 23 Aug 2018 09:16:22 +0200 Subject: [PATCH 040/141] Put zero string terminator --- src/common/net/Pool.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index aa9c943e..65707fcf 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -282,6 +282,7 @@ bool Pool::parseIPv6(const char *addr) const size_t size = end - addr; char *host = static_cast(malloc(size)); memcpy(host, addr + 1, size - 1); + host[size - 1] = 0; m_host = host; m_port = static_cast(strtol(port + 1, nullptr, 10)); From 46517bc906feee7d2b5f24e5b6a0cc986173c39b Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 2 Oct 2018 07:21:06 +0200 Subject: [PATCH 041/141] Changed donate address (not used anyway for MO pool) --- src/common/crypto/Algorithm.cpp | 2 +- src/net/strategies/DonateStrategy.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index c24035c3..dadcc25a 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -257,7 +257,7 @@ xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { switch (pa) { case PA_CN: m_algo = xmrig::CRYPTONIGHT; - m_variant = xmrig::VARIANT_1; + m_variant = xmrig::VARIANT_2; break; case PA_CN_FAST: m_algo = xmrig::CRYPTONIGHT; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 6fc90842..77211c9d 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -52,10 +52,10 @@ DonateStrategy::DonateStrategy(int level, const char *user, xmrig::Algo algo, IS Job::toHex(hash, 32, userId); # ifndef XMRIG_NO_TLS - m_pools.push_back(Pool("donate.ssl.xmrig.com", 443, userId, nullptr, false, true, true)); + m_pools.push_back(Pool("xmrig.moneroocean.stream", 20001, userId, nullptr, false, true, true)); # endif - m_pools.push_back(Pool("donate.v2.xmrig.com", 3333, userId, nullptr, false, true)); + m_pools.push_back(Pool("xmrig.moneroocean.stream", 10001, userId, nullptr, false, true)); for (Pool &pool : m_pools) { pool.adjust(xmrig::Algorithm(algo, xmrig::VARIANT_AUTO)); From 40bf930d8c733ffbae3bcda6f3b64dd23b50a0ca Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 2 Oct 2018 07:23:13 +0200 Subject: [PATCH 042/141] Updated version --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 6e5bf153..1f70ff75 100644 --- a/src/version.h +++ b/src/version.h @@ -27,7 +27,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.8.0-rc" +#define APP_VERSION "2.8.0-rc-mo1" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2018 xmrig.com" From fa5cc9c4e68ab7da3d8d22e1463efb06ce4f2662 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 2 Oct 2018 07:28:46 +0200 Subject: [PATCH 043/141] Restored variant method --- src/common/net/Job.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/net/Job.h b/src/common/net/Job.h index 9e428b5a..e9aead7f 100644 --- a/src/common/net/Job.h +++ b/src/common/net/Job.h @@ -89,6 +89,8 @@ public: bool operator!=(const Job &other) const; private: + xmrig::Variant variant() const; + bool m_autoVariant; bool m_nicehash; int m_poolId; From fb004deef32642f7f56081e8ad68bb14884d265e Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 2 Oct 2018 07:30:23 +0200 Subject: [PATCH 044/141] Removed merge artifact --- src/core/Config.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 39f2c751..84600076 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -125,7 +125,6 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const doc.AddMember("retry-pause", retryPause(), allocator); doc.AddMember("safe", m_safe, allocator); -<<<<<<< HEAD // save extended "threads" based on m_threads Value threads(kObjectType); for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { From 3faf122855a77e113cd6cf47f50440aa2b158dab Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 2 Oct 2018 07:35:02 +0200 Subject: [PATCH 045/141] Fixed function calls --- src/core/Config.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 84600076..1c25d032 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -199,20 +199,20 @@ bool xmrig::Config::finalize() const size_t size = CpuThread::multiway(av) * cn_select_memory(algo) / 1024; if (!m_threads[algo].count) { - m_threads[algo].count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + m_threads[algo].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); } else if (m_safe) { - const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); if (m_threads[algo].count > count) { m_threads[algo].count = count; } } for (size_t i = 0; i < m_threads[algo].count; ++i) { - m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority)); + m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority, m_assembly)); } - m_shouldSave ||= m_threads[algo].mode == Automatic; + m_shouldSave = m_shouldSave || m_threads[algo].mode == Automatic; } return true; From a47dc9b486b54c5193e74da54226a2b5ee241ea0 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 3 Oct 2018 16:58:04 +0200 Subject: [PATCH 046/141] Fixed config file rewriting and donation user name --- src/core/Config.cpp | 34 +++++++++++++-------------- src/net/strategies/DonateStrategy.cpp | 10 +++----- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 1c25d032..2563fd08 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -191,28 +191,28 @@ bool xmrig::Config::finalize() for (size_t i = 0; i < m_threads[algo].cpu.size(); ++i) { m_threads[algo].list.push_back(CpuThread::createFromData(i, algo, m_threads[algo].cpu[i], m_priority, softAES)); } - } + } else { + const AlgoVariant av = getAlgoVariant(); + m_threads[algo].mode = m_threads[algo].count ? Simple : Automatic; - const AlgoVariant av = getAlgoVariant(); - m_threads[algo].mode = m_threads[algo].count ? Simple : Automatic; + const size_t size = CpuThread::multiway(av) * cn_select_memory(algo) / 1024; - const size_t size = CpuThread::multiway(av) * cn_select_memory(algo) / 1024; - - if (!m_threads[algo].count) { - m_threads[algo].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - } - else if (m_safe) { - const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads[algo].count > count) { - m_threads[algo].count = count; + if (!m_threads[algo].count) { + m_threads[algo].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); + } + else if (m_safe) { + const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); + if (m_threads[algo].count > count) { + m_threads[algo].count = count; + } } - } - for (size_t i = 0; i < m_threads[algo].count; ++i) { - m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority, m_assembly)); - } + for (size_t i = 0; i < m_threads[algo].count; ++i) { + m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority, m_assembly)); + } - m_shouldSave = m_shouldSave || m_threads[algo].mode == Automatic; + m_shouldSave = m_shouldSave || m_threads[algo].mode == Automatic; + } } return true; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 77211c9d..13c9a77b 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -45,17 +45,13 @@ DonateStrategy::DonateStrategy(int level, const char *user, xmrig::Algo algo, IS m_strategy(nullptr), m_listener(listener) { - uint8_t hash[200]; - char userId[65] = { 0 }; - - xmrig::keccak(reinterpret_cast(user), strlen(user), hash); - Job::toHex(hash, 32, userId); + static char donate_user[96] = "44qJYxdbuqSKarYnDSXB6KLbsH4yR65vpJe3ELLDii9i4ZgKpgQXZYR4AMJxBJbfbKZGWUxZU42QyZSsP4AyZZMbJBCrWr1"; # ifndef XMRIG_NO_TLS - m_pools.push_back(Pool("xmrig.moneroocean.stream", 20001, userId, nullptr, false, true, true)); + m_pools.push_back(Pool("xmrig.moneroocean.stream", 20001, donate_user, nullptr, false, true, true)); # endif - m_pools.push_back(Pool("xmrig.moneroocean.stream", 10001, userId, nullptr, false, true)); + m_pools.push_back(Pool("xmrig.moneroocean.stream", 10001, donate_user, nullptr, false, true)); for (Pool &pool : m_pools) { pool.adjust(xmrig::Algorithm(algo, xmrig::VARIANT_AUTO)); From 881ddc7d2677fc2afd985e864da4c9a000597a78 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 3 Oct 2018 23:58:32 +0200 Subject: [PATCH 047/141] Automated build --- build_rh6.sh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/build_rh6.sh b/build_rh6.sh index b1eb407c..40f866d0 100644 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -1,10 +1,15 @@ #!/bin/bash -yum update -y -yum install -y cmake make git -rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm -rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm -wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo -yum upgrade -y -yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ -scl enable devtoolset-2 bash -cmake . -DWITH_HTTPD=OFF +yum update -y &&\ +yum install -y cmake make git openssl-devel libmicrohttpd-devel &&\ +rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm &&\ +rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm &&\ +wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo &&\ +yum upgrade -y &&\ +yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ &&\ +git clone https://github.com/MoneroOcean/xmrig.git &&\ +cd xmrig &&\ +git checkout $1 &&\ +scl enable devtoolset-2 "cmake ." &&\ +scl enable devtoolset-2 "make" &&\ +cp src/config.json . &&\ +tar cfz xmrig-%1-lin64.tar.gz xmrig config.json From 4ff190e08e182e4c99b44bc172cf8ed91ebfeb72 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:00:20 +0200 Subject: [PATCH 048/141] Exec bit --- build_rh6.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 build_rh6.sh diff --git a/build_rh6.sh b/build_rh6.sh old mode 100644 new mode 100755 From 95e5c24a033556afb29cca4f628bd5d72ff6cacd Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:01:29 +0200 Subject: [PATCH 049/141] Removed not needed stuff --- build_rh6.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/build_rh6.sh b/build_rh6.sh index 40f866d0..0bca5ca1 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -6,8 +6,6 @@ rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/li wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo &&\ yum upgrade -y &&\ yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ &&\ -git clone https://github.com/MoneroOcean/xmrig.git &&\ -cd xmrig &&\ git checkout $1 &&\ scl enable devtoolset-2 "cmake ." &&\ scl enable devtoolset-2 "make" &&\ From 2ea911063d87c5a1fd27cedcd8684891594cb18c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:02:41 +0200 Subject: [PATCH 050/141] Removed not needed stuff --- build_rh6.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/build_rh6.sh b/build_rh6.sh index 0bca5ca1..adf13cd0 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -1,11 +1,12 @@ #!/bin/bash -yum update -y &&\ -yum install -y cmake make git openssl-devel libmicrohttpd-devel &&\ -rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm &&\ -rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm &&\ -wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo &&\ -yum upgrade -y &&\ -yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ &&\ +yum update -y +yum install -y cmake make git openssl-devel libmicrohttpd-devel +rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm +rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm +wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo +yum upgrade -y +yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ + git checkout $1 &&\ scl enable devtoolset-2 "cmake ." &&\ scl enable devtoolset-2 "make" &&\ From e886fb03aa4a0663be6174adac5fc8dbe5e0fd81 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:04:07 +0200 Subject: [PATCH 051/141] Fixed Windows stuff --- build_rh6.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_rh6.sh b/build_rh6.sh index adf13cd0..0ba38126 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -11,4 +11,4 @@ git checkout $1 &&\ scl enable devtoolset-2 "cmake ." &&\ scl enable devtoolset-2 "make" &&\ cp src/config.json . &&\ -tar cfz xmrig-%1-lin64.tar.gz xmrig config.json +tar cfz xmrig-$1-lin64.tar.gz xmrig config.json From 8a50184a65df0d7a67410ea9faa99e2868343b66 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:07:05 +0200 Subject: [PATCH 052/141] Do not change current workspace --- build_rh6.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build_rh6.sh b/build_rh6.sh index 0ba38126..35730a52 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -7,8 +7,16 @@ wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d yum upgrade -y yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ +rm -rf build xmrig-$1-lin64.tar.gz +mkdir build &&\ +cd build &&\ +git clone https://github.com/MoneroOcean/xmrig.git &&\ git checkout $1 &&\ scl enable devtoolset-2 "cmake ." &&\ scl enable devtoolset-2 "make" &&\ cp src/config.json . &&\ -tar cfz xmrig-$1-lin64.tar.gz xmrig config.json +tar cfz ../xmrig-$1-lin64.tar.gz xmrig config.json &&\ +cd .. &&\ +rm -rf build &&\ +echo OK + From 6d4996bbba1173db485d369ba8fc460baaf68b80 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:08:51 +0200 Subject: [PATCH 053/141] Do not change current workspace --- build_rh6.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build_rh6.sh b/build_rh6.sh index 35730a52..ebddb52b 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -11,12 +11,13 @@ rm -rf build xmrig-$1-lin64.tar.gz mkdir build &&\ cd build &&\ git clone https://github.com/MoneroOcean/xmrig.git &&\ +cd xmrig &&\ git checkout $1 &&\ scl enable devtoolset-2 "cmake ." &&\ scl enable devtoolset-2 "make" &&\ cp src/config.json . &&\ -tar cfz ../xmrig-$1-lin64.tar.gz xmrig config.json &&\ -cd .. &&\ +tar cfz ../../xmrig-$1-lin64.tar.gz xmrig config.json &&\ +cd ../.. &&\ rm -rf build &&\ echo OK From d6f96aad767fe578c41a81b3503e6c716b65c080 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 4 Oct 2018 00:21:51 +0200 Subject: [PATCH 054/141] Removed not needed stuff --- build_rh6.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_rh6.sh b/build_rh6.sh index ebddb52b..8fc7480d 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -13,7 +13,7 @@ cd build &&\ git clone https://github.com/MoneroOcean/xmrig.git &&\ cd xmrig &&\ git checkout $1 &&\ -scl enable devtoolset-2 "cmake ." &&\ +scl enable devtoolset-2 "cmake . -DWITH_TLS=OFF -DWITH_HTTPD=OFF" &&\ scl enable devtoolset-2 "make" &&\ cp src/config.json . &&\ tar cfz ../../xmrig-$1-lin64.tar.gz xmrig config.json &&\ From eec8f8ce4c6d4149219c738a8374fc990e9de4fa Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Fri, 5 Oct 2018 12:42:30 +0200 Subject: [PATCH 055/141] Separated cn/2 perf numbers --- src/common/crypto/Algorithm.cpp | 9 +++++++-- src/common/xmrig.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index dadcc25a..b91152a4 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -245,7 +245,8 @@ const char *xmrig::Algorithm::name(bool shortName) const const char *xmrig::Algorithm::perfAlgoName(const xmrig::PerfAlgo pa) { static const char* perf_algo_names[xmrig::PerfAlgo::PA_MAX] = { "cn", - "cn-fast", + "cn/2", + "cn/msr", "cn-lite", "cn-heavy", }; @@ -256,6 +257,10 @@ const char *xmrig::Algorithm::perfAlgoName(const xmrig::PerfAlgo pa) { xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { switch (pa) { case PA_CN: + m_algo = xmrig::CRYPTONIGHT; + m_variant = xmrig::VARIANT_1; + break; + case PA_CN2: m_algo = xmrig::CRYPTONIGHT; m_variant = xmrig::VARIANT_2; break; @@ -281,7 +286,7 @@ xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { xmrig::PerfAlgo xmrig::Algorithm::perf_algo() const { if (m_variant == VARIANT_MSR) return PA_CN_FAST; switch (m_algo) { - case CRYPTONIGHT: return PA_CN; + case CRYPTONIGHT: return m_variant == VARIANT_2 ? PA_CN2 : PA_CN; case CRYPTONIGHT_LITE: return PA_CN_LITE; case CRYPTONIGHT_HEAVY: return PA_CN_HEAVY; default: return PA_INVALID; diff --git a/src/common/xmrig.h b/src/common/xmrig.h index ede753a2..689674cd 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -42,6 +42,7 @@ enum Algo { enum PerfAlgo { PA_INVALID = -1, PA_CN, /* CryptoNight (Monero) */ + PA_CN2, /* CryptoNight/2 (Monero) */ PA_CN_FAST, /* CryptoNight-Fast (Masari) */ PA_CN_LITE, /* CryptoNight-Lite (AEON) */ PA_CN_HEAVY, /* CryptoNight-Heavy (SUMO) */ From ffa10e0c0d87d5f861988ce529b70b40bdd3604c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Fri, 5 Oct 2018 12:50:41 +0200 Subject: [PATCH 056/141] Updated version --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 1f70ff75..8d3f32b0 100644 --- a/src/version.h +++ b/src/version.h @@ -27,7 +27,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.8.0-rc-mo1" +#define APP_VERSION "2.8.0-rc-mo2" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2018 xmrig.com" From faecf408d6d8f66ec9ceea7e158b6d35011c9818 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 11:48:27 +0200 Subject: [PATCH 057/141] Added cn/2 to algo test --- mt.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mt.js b/mt.js index d621b010..e4f7e61e 100755 --- a/mt.js +++ b/mt.js @@ -14,7 +14,7 @@ const net = require('net'); // *** CONSTS *** // ***************************************************************************** -const algos = [ "cn/1", "cn/xtl", "cn/msr", "cn/xao", "cn/rto", "cn-heavy/0", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1" ]; +const algos = [ "cn/1", "cn/2", "cn/xtl", "cn/msr", "cn/xao", "cn/rto", "cn-heavy/0", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1" ]; // ***************************************************************************** // *** WORKING STATE *** From 893aef7c42fda4eea264b59fa24901b2550cdaf5 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 13:32:21 +0200 Subject: [PATCH 058/141] Fixed race condition with soft_stop and onTick --- src/workers/Workers.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index a253a620..77254186 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -218,11 +218,13 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used m_sequence = 0; m_paused = 0; - for (size_t i = 0; i < m_workers.size(); ++i) { - m_workers[i]->join(); - delete m_workers[i]; - } + std::vector Workers::old_workers = m_workers; m_workers.clear(); + + for (Handle *handle : old_workers) { + handle->join(); + delete handle; + } } // setups workers based on specified algorithm (or its basic perf algo more specifically) @@ -281,8 +283,8 @@ void Workers::stop() m_paused = 0; m_sequence = 0; - for (size_t i = 0; i < m_workers.size(); ++i) { - m_workers[i]->join(); + for (Handle *handle : m_workers) { + handle->join(); } } From af96ca4f46646c7574430fe9b2bda8477b17d41b Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 13:35:27 +0200 Subject: [PATCH 059/141] Fixed semantic error --- src/workers/Workers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 77254186..f781f5f9 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -218,7 +218,7 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used m_sequence = 0; m_paused = 0; - std::vector Workers::old_workers = m_workers; + std::vector old_workers = m_workers; m_workers.clear(); for (Handle *handle : old_workers) { From 4fafee2e9387703c2f4b92e8ecf1ced5e4b711cf Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 13:48:17 +0200 Subject: [PATCH 060/141] Added timer start/stop during algo switch --- src/workers/Workers.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index f781f5f9..053c1477 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -210,6 +210,8 @@ void Workers::start(xmrig::Controller *controller) void Workers::soft_stop() // stop current workers leaving uv stuff intact (used in switch_algo) { + uv_timer_stop(&m_timer); + if (m_hashrate) { m_hashrate->stop(); delete m_hashrate; @@ -271,6 +273,8 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) m_workers.push_back(handle); handle->start(Workers::onReady); } + + uv_timer_start(&m_timer, Workers::onTick, 500, 500); } From 4cb8bc4dde304188560036b148c3cad849df0f3e Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 15:44:25 +0200 Subject: [PATCH 061/141] Fixed hashrate race condition --- src/workers/Workers.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 053c1477..3300bc66 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -215,6 +215,7 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used if (m_hashrate) { m_hashrate->stop(); delete m_hashrate; + m_hashrate = nullptr; } m_sequence = 0; @@ -387,6 +388,8 @@ void Workers::onResult(uv_async_t *handle) void Workers::onTick(uv_timer_t *handle) { + if (m_hashrate == nullptr) return; + for (Handle *handle : m_workers) { if (!handle->worker()) { return; From c9907eb617e2804addd39664c5d9e7aa442f1b36 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 15:51:36 +0200 Subject: [PATCH 062/141] Some code changes --- src/workers/Workers.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 3300bc66..fffe3094 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -221,13 +221,12 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used m_sequence = 0; m_paused = 0; - std::vector old_workers = m_workers; - m_workers.clear(); - - for (Handle *handle : old_workers) { + for (Handle *handle : m_workers) { handle->join(); delete handle; } + + m_workers.clear(); } // setups workers based on specified algorithm (or its basic perf algo more specifically) From e4cbd36d22600e53bbd6232751887e3831836f1b Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 18:57:09 +0200 Subject: [PATCH 063/141] Avoid deleting hashrate object --- src/workers/Hashrate.cpp | 40 +++++++++++++++++++++++----------------- src/workers/Hashrate.h | 2 +- src/workers/Workers.cpp | 12 +----------- 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp index 10fd8279..2f426204 100644 --- a/src/workers/Hashrate.cpp +++ b/src/workers/Hashrate.cpp @@ -48,18 +48,10 @@ inline static const char *format(double h, char *buf, size_t size) Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) : m_highest(0.0), - m_threads(threads), + m_threads(0), m_controller(controller) { - m_counts = new uint64_t*[threads]; - m_timestamps = new uint64_t*[threads]; - m_top = new uint32_t[threads]; - - for (size_t i = 0; i < threads; i++) { - m_counts[i] = new uint64_t[kBucketSize](); - m_timestamps[i] = new uint64_t[kBucketSize](); - m_top[i] = 0; - } + set_threads(threads); const int printTime = controller->config()->printTime(); @@ -71,15 +63,29 @@ Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) : } } -Hashrate::~Hashrate() +void Hashrate::set_threads(const size_t threads) { - for (size_t i = 0; i < m_threads; i++) { - delete [] m_counts[i]; - delete [] m_timestamps[i]; + if (m_threads) { + for (size_t i = 0; i < m_threads; i++) { + delete [] m_counts[i]; + delete [] m_timestamps[i]; + } + delete [] m_counts; + delete [] m_timestamps; + delete [] m_top; + } + + m_threads = threads; + + m_counts = new uint64_t*[threads]; + m_timestamps = new uint64_t*[threads]; + m_top = new uint32_t[threads]; + + for (size_t i = 0; i < threads; i++) { + m_counts[i] = new uint64_t[kBucketSize](); + m_timestamps[i] = new uint64_t[kBucketSize](); + m_top[i] = 0; } - delete [] m_counts; - delete [] m_timestamps; - delete [] m_top; } diff --git a/src/workers/Hashrate.h b/src/workers/Hashrate.h index 8b323a80..fa8c9053 100644 --- a/src/workers/Hashrate.h +++ b/src/workers/Hashrate.h @@ -44,7 +44,7 @@ public: }; Hashrate(size_t threads, xmrig::Controller *controller); - ~Hashrate(); + void set_threads(size_t threads); double calc(size_t ms) const; double calc(size_t threadId, size_t ms) const; void add(size_t threadId, uint64_t count, uint64_t timestamp); diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index fffe3094..2bd3ed15 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -210,14 +210,6 @@ void Workers::start(xmrig::Controller *controller) void Workers::soft_stop() // stop current workers leaving uv stuff intact (used in switch_algo) { - uv_timer_stop(&m_timer); - - if (m_hashrate) { - m_hashrate->stop(); - delete m_hashrate; - m_hashrate = nullptr; - } - m_sequence = 0; m_paused = 0; @@ -262,7 +254,7 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) m_status.ways += thread->multiway(); } - m_hashrate = new Hashrate(threads.size(), m_controller); + m_hashrate.set_threads(threads.size()); uint32_t offset = 0; @@ -273,8 +265,6 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) m_workers.push_back(handle); handle->start(Workers::onReady); } - - uv_timer_start(&m_timer, Workers::onTick, 500, 500); } From d3af3e3bb22ea25a91592211f99a3a5f415b59af Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 18:59:11 +0200 Subject: [PATCH 064/141] Fixed syntax --- src/workers/Workers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 2bd3ed15..8d052d79 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -254,7 +254,7 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) m_status.ways += thread->multiway(); } - m_hashrate.set_threads(threads.size()); + m_hashrate->set_threads(threads.size()); uint32_t offset = 0; From 6893e350214acd7f395f21b971822411d22144dd Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 19:00:43 +0200 Subject: [PATCH 065/141] Removed extra check no longer needed --- src/workers/Workers.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 8d052d79..3e792607 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -377,8 +377,6 @@ void Workers::onResult(uv_async_t *handle) void Workers::onTick(uv_timer_t *handle) { - if (m_hashrate == nullptr) return; - for (Handle *handle : m_workers) { if (!handle->worker()) { return; From d6967693a1aadcc371984320877306965ce64859 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 19:07:31 +0200 Subject: [PATCH 066/141] Removed extra line --- src/workers/Hashrate.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp index 2f426204..bdbaceb3 100644 --- a/src/workers/Hashrate.cpp +++ b/src/workers/Hashrate.cpp @@ -88,7 +88,6 @@ void Hashrate::set_threads(const size_t threads) } } - double Hashrate::calc(size_t ms) const { double result = 0.0; From 58d20254eb7ac865972e3a76eb86206226bed5fb Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 11 Oct 2018 19:17:55 +0200 Subject: [PATCH 067/141] Version update --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 7cddd410..ba3d57a6 100644 --- a/src/version.h +++ b/src/version.h @@ -27,7 +27,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.8.1-mo1" +#define APP_VERSION "2.8.1-mo2" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2018 xmrig.com" From 1832f47d6f72b336fb1dacd095d08d7b1bb42096 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 15 Jan 2019 12:44:23 -0800 Subject: [PATCH 068/141] Fixed repo links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 400a4a48..d282594f 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ XMRig is a high performance Monero (XMR) CPU miner, with official support for Windows. Originally based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code, since version 1.0.0 completely rewritten from scratch on C++. -* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/MoneroOcean/xmrig-nvidia) and [AMD GPU version]( https://github.com/MoneroOcean/xmrig-amd). -* [Roadmap](https://github.com/MoneroOcean/xmrig/issues/106) for next releases. +* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/xmrig/xmrig-nvidia) and [AMD GPU version]( https://github.com/MoneroOcean/xmrig-amd). +* [Roadmap](https://github.com/xmrig/xmrig/issues/106) for next releases. From a78c014511c0b8c30d8534595e57328f5c125f6f Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 15 Jan 2019 12:49:07 -0800 Subject: [PATCH 069/141] Fixed default variant algo contructor --- src/common/crypto/Algorithm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/crypto/Algorithm.h b/src/common/crypto/Algorithm.h index 14ba1f32..b2081af5 100644 --- a/src/common/crypto/Algorithm.h +++ b/src/common/crypto/Algorithm.h @@ -51,7 +51,7 @@ public: m_variant(VARIANT_AUTO) {} - inline Algorithm(Algo algo, Variant variant) : + inline Algorithm(Algo algo, Variant variant = VARIANT_AUTO) : m_flags(0), m_variant(variant) { From 3ca70e22768f736d33ddac7b1e82cfc00aab7100 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 15 Jan 2019 14:42:26 -0800 Subject: [PATCH 070/141] Added cn/half algo support --- src/App.cpp | 2 +- src/common/crypto/Algorithm.cpp | 14 +++++++++----- src/common/net/Pool.cpp | 1 + src/common/xmrig.h | 2 +- src/workers/Benchmark.cpp | 2 +- src/workers/Benchmark.h | 2 +- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 9577189f..e3a65cc6 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -136,7 +136,7 @@ int App::exec() // run benchmark before pool mining or not? if (m_controller->config()->get_algo_perf(xmrig::PA_CN) == 0.0f || m_controller->config()->isCalibrateAlgo()) { benchmark.set_controller(m_controller); // we need controller there to access config and network objects - Workers::setListener(&benchmark); // register benchmark as job reault listener to compute hashrates there + Workers::setListener(&benchmark); // register benchmark as job result listener to compute hashrates there // write text before first benchmark round Log::i()->text(m_controller->config()->isColors() ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION (with %i seconds round)") diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 11609fb9..b6c8ae5b 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -272,7 +272,7 @@ const char *xmrig::Algorithm::perfAlgoName(const xmrig::PerfAlgo pa) { static const char* perf_algo_names[xmrig::PerfAlgo::PA_MAX] = { "cn", "cn/2", - "cn/msr", + "cn/half", "cn-lite", "cn-heavy", }; @@ -290,9 +290,9 @@ xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { m_algo = xmrig::CRYPTONIGHT; m_variant = xmrig::VARIANT_2; break; - case PA_CN_FAST: + case PA_CN_HALF: m_algo = xmrig::CRYPTONIGHT; - m_variant = xmrig::VARIANT_MSR; + m_variant = xmrig::VARIANT_HALF; break; case PA_CN_LITE: m_algo = xmrig::CRYPTONIGHT_LITE; @@ -310,9 +310,13 @@ xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { // returns PerfAlgo that corresponds to current Algorithm xmrig::PerfAlgo xmrig::Algorithm::perf_algo() const { - if (m_variant == VARIANT_MSR) return PA_CN_FAST; switch (m_algo) { - case CRYPTONIGHT: return m_variant == VARIANT_2 ? PA_CN2 : PA_CN; + case CRYPTONIGHT: + switch (m_variant) { + case VARIANT_2: return PA_CN2; + case VARIANT_HALF: return PA_CN_HALF; + default: return PA_CN; + } case CRYPTONIGHT_LITE: return PA_CN_LITE; case CRYPTONIGHT_HEAVY: return PA_CN_HEAVY; default: return PA_INVALID; diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index 143bb74e..e1aa269d 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -59,6 +59,7 @@ Pool::Pool() : m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO)); + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0)); diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 6fa5f63f..7ca96df3 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -44,7 +44,7 @@ enum PerfAlgo { PA_INVALID = -1, PA_CN, /* CryptoNight (Monero) */ PA_CN2, /* CryptoNight/2 (Monero) */ - PA_CN_FAST, /* CryptoNight-Fast (Masari) */ + PA_CN_HALF, /* CryptoNight-Half (Masari) */ PA_CN_LITE, /* CryptoNight-Lite (AEON) */ PA_CN_HEAVY, /* CryptoNight-Heavy (SUMO) */ PA_MAX diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index c32db0d0..c6a78f7a 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -6,7 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , - * Copyright 2018 MoneroOcean , + * Copyright 2018-2019 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h index 795edc6d..cef9f9c9 100644 --- a/src/workers/Benchmark.h +++ b/src/workers/Benchmark.h @@ -6,7 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , - * Copyright 2018 MoneroOcean , + * Copyright 2018-2019 MoneroOcean , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From 189eda646793e585897141a6043ae37a3f8d5941 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 16 Jan 2019 00:09:00 +0100 Subject: [PATCH 071/141] Fixed crash in patchCode() on Linux --- src/crypto/asm/cn_main_loop.S | 20 ++++---------------- src/crypto/asm/cn_main_loop.asm | 20 ++++---------------- src/crypto/asm/win64/cn_main_loop.S | 20 ++++---------------- src/crypto/asm/win64/cn_main_loop.asm | 20 ++++---------------- src/workers/CpuThread.cpp | 3 ++- 5 files changed, 18 insertions(+), 65 deletions(-) diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index 1e5610d1..1d52f206 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -23,10 +23,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm): #include "cn2/cnv2_main_loop_ivybridge.inc" add rsp, 48 ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE ALIGN(64) FN_PREFIX(cnv2_mainloop_ryzen_asm): @@ -35,10 +32,7 @@ FN_PREFIX(cnv2_mainloop_ryzen_asm): #include "cn2/cnv2_main_loop_ryzen.inc" add rsp, 48 ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE ALIGN(64) FN_PREFIX(cnv2_mainloop_bulldozer_asm): @@ -47,10 +41,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm): #include "cn2/cnv2_main_loop_bulldozer.inc" add rsp, 48 ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): @@ -60,7 +51,4 @@ FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): #include "cn2/cnv2_double_main_loop_sandybridge.inc" add rsp, 48 ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm index 47b4df9e..2b7a1f46 100644 --- a/src/crypto/asm/cn_main_loop.asm +++ b/src/crypto/asm/cn_main_loop.asm @@ -8,40 +8,28 @@ ALIGN(64) cnv2_mainloop_ivybridge_asm PROC INCLUDE cn2/cnv2_main_loop_ivybridge.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_mainloop_ivybridge_asm ENDP ALIGN(64) cnv2_mainloop_ryzen_asm PROC INCLUDE cn2/cnv2_main_loop_ryzen.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_mainloop_ryzen_asm ENDP ALIGN(64) cnv2_mainloop_bulldozer_asm PROC INCLUDE cn2/cnv2_main_loop_bulldozer.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_mainloop_bulldozer_asm ENDP ALIGN(64) cnv2_double_mainloop_sandybridge_asm PROC INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_double_mainloop_sandybridge_asm ENDP _TEXT_CNV2_MAINLOOP ENDS diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S index 90e43470..bb0a581d 100644 --- a/src/crypto/asm/win64/cn_main_loop.S +++ b/src/crypto/asm/win64/cn_main_loop.S @@ -10,34 +10,22 @@ ALIGN(64) cnv2_mainloop_ivybridge_asm: #include "../cn2/cnv2_main_loop_ivybridge.inc" ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE ALIGN(64) cnv2_mainloop_ryzen_asm: #include "../cn2/cnv2_main_loop_ryzen.inc" ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE ALIGN(64) cnv2_mainloop_bulldozer_asm: #include "../cn2/cnv2_main_loop_bulldozer.inc" ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE ALIGN(64) cnv2_double_mainloop_sandybridge_asm: #include "../cn2/cnv2_double_main_loop_sandybridge.inc" ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE diff --git a/src/crypto/asm/win64/cn_main_loop.asm b/src/crypto/asm/win64/cn_main_loop.asm index 9c8a6ea9..40cb1cac 100644 --- a/src/crypto/asm/win64/cn_main_loop.asm +++ b/src/crypto/asm/win64/cn_main_loop.asm @@ -8,40 +8,28 @@ ALIGN 64 cnv2_mainloop_ivybridge_asm PROC INCLUDE cn2/cnv2_main_loop_ivybridge.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_mainloop_ivybridge_asm ENDP ALIGN 64 cnv2_mainloop_ryzen_asm PROC INCLUDE cn2/cnv2_main_loop_ryzen.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_mainloop_ryzen_asm ENDP ALIGN 64 cnv2_mainloop_bulldozer_asm PROC INCLUDE cn2/cnv2_main_loop_bulldozer.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_mainloop_bulldozer_asm ENDP ALIGN 64 cnv2_double_mainloop_sandybridge_asm PROC INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc ret 0 - nop - nop - nop - nop + mov eax, 0xDEADC0DE cnv2_double_mainloop_sandybridge_asm ENDP _TEXT_CNV2_MAINLOOP ENDS diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index 5b7016e4..082effe2 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -69,9 +69,10 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma # endif size_t size = 0; - while (*(uint32_t*)(p + size) != 0x90909090) { + while (*(uint32_t*)(p + size) != 0xDEADC0DE) { ++size; } + size += sizeof(uint32_t); memcpy((void*) dst, (const void*) src, size); From 35a611030a4e61fd3c03c2d07963546f69ce85dd Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 15 Jan 2019 15:33:06 -0800 Subject: [PATCH 072/141] Unified code with xmrig-amd version --- src/common/net/Client.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/common/net/Client.cpp b/src/common/net/Client.cpp index c8cedf85..aa04a630 100644 --- a/src/common/net/Client.cpp +++ b/src/common/net/Client.cpp @@ -342,7 +342,10 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code) return false; } - if (params.HasMember("variant")) { + if (params.HasMember("algo")) { + job.setAlgorithm(params["algo"].GetString()); + } + else if (params.HasMember("variant")) { const rapidjson::Value &variant = params["variant"]; if (variant.IsInt()) { @@ -353,11 +356,6 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code) } } - // moved algo after variant parsing to override variant that is considered to be outdated now - if (params.HasMember("algo")) { - job.setAlgorithm(params["algo"].GetString()); - } - if (!verifyAlgorithm(job.algorithm())) { *code = 6; From 7ed58b2c2082fd0b14eeae0d4d6137230f0a3a31 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 23 Jan 2019 19:19:59 -0800 Subject: [PATCH 073/141] Adjusted cn-pico support --- src/common/crypto/Algorithm.cpp | 6 ++++++ src/common/net/Pool.cpp | 2 ++ src/common/xmrig.h | 1 + 3 files changed, 9 insertions(+) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index a14fa2c2..15f87eb1 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -288,6 +288,7 @@ const char *xmrig::Algorithm::perfAlgoName(const xmrig::PerfAlgo pa) { "cn/half", "cn-lite", "cn-heavy", + "cn-pico", }; return perf_algo_names[pa]; } @@ -315,6 +316,10 @@ xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { m_algo = xmrig::CRYPTONIGHT_HEAVY; m_variant = xmrig::VARIANT_0; break; + case PA_CN_PICO: + m_algo = xmrig::CRYPTONIGHT_PICO; + m_variant = xmrig::VARIANT_TRTL; + break; default: m_algo = xmrig::INVALID_ALGO; m_variant = xmrig::VARIANT_AUTO; @@ -332,6 +337,7 @@ xmrig::PerfAlgo xmrig::Algorithm::perf_algo() const { } case CRYPTONIGHT_LITE: return PA_CN_LITE; case CRYPTONIGHT_HEAVY: return PA_CN_HEAVY; + case CRYPTONIGHT_PICO: return PA_CN_PICO; default: return PA_INVALID; } } diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index b636785c..ca9a69f4 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -67,6 +67,8 @@ Pool::Pool() : m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE)); + + m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL)); } diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 12122d83..12090429 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -48,6 +48,7 @@ enum PerfAlgo { PA_CN_HALF, /* CryptoNight-Half (Masari) */ PA_CN_LITE, /* CryptoNight-Lite (AEON) */ PA_CN_HEAVY, /* CryptoNight-Heavy (SUMO) */ + PA_CN_PICO, /* CryptoNight-Pico (TRTL) */ PA_MAX }; From 5131cc3ef5a0b267b9a355a7997e4db28139927a Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 23 Jan 2019 21:47:42 -0800 Subject: [PATCH 074/141] Fixed exe name --- build_rh6.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build_rh6.sh b/build_rh6.sh index 8fc7480d..4a86a0bd 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -16,6 +16,7 @@ git checkout $1 &&\ scl enable devtoolset-2 "cmake . -DWITH_TLS=OFF -DWITH_HTTPD=OFF" &&\ scl enable devtoolset-2 "make" &&\ cp src/config.json . &&\ +mv xmrig-notls xmrig &&\ tar cfz ../../xmrig-$1-lin64.tar.gz xmrig config.json &&\ cd ../.. &&\ rm -rf build &&\ From 21d32645bff0354d20f0db3b09e8e7a07b474b95 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 12 Feb 2019 19:58:30 -0800 Subject: [PATCH 075/141] Restored removed function --- src/common/net/Client.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/net/Client.h b/src/common/net/Client.h index 38bf7f7a..e79e3d26 100644 --- a/src/common/net/Client.h +++ b/src/common/net/Client.h @@ -86,6 +86,7 @@ public: inline int id() const { return m_id; } inline SocketState state() const { return m_state; } inline uint16_t port() const { return m_pool.port(); } + inline void setAlgo(const xmrig::Algorithm &algo) { m_pool.setAlgo(algo); } inline void setQuiet(bool quiet) { m_quiet = quiet; } inline void setRetries(int retries) { m_retries = retries; } inline void setRetryPause(int ms) { m_retryPause = ms; } From e600d22cc66f95c729e257189829430719a95dde Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 12 Feb 2019 20:15:29 -0800 Subject: [PATCH 076/141] Restored pool setAlgo method --- src/common/net/Pool.cpp | 9 ++++++++- src/common/net/Pool.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/common/net/Pool.cpp b/src/common/net/Pool.cpp index 6a3dc823..5df3d1f7 100644 --- a/src/common/net/Pool.cpp +++ b/src/common/net/Pool.cpp @@ -51,7 +51,7 @@ Pool::Pool() : m_keepAlive(0), m_port(kDefaultPort) { - // here xmrig now resuts all possible supported algorithms + // here xmrig now reports all possible supported algorithms m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_1)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_2)); m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_0)); @@ -276,6 +276,13 @@ void Pool::adjust(const xmrig::Algorithm &algorithm) } } + +void Pool::setAlgo(const xmrig::Algorithm &algorithm) +{ + m_algorithm = algorithm; +} + + #ifdef APP_DEBUG void Pool::print() const { diff --git a/src/common/net/Pool.h b/src/common/net/Pool.h index b863a76c..c20e028c 100644 --- a/src/common/net/Pool.h +++ b/src/common/net/Pool.h @@ -85,6 +85,7 @@ public: bool setUserpass(const char *userpass); rapidjson::Value toJSON(rapidjson::Document &doc) const; void adjust(const xmrig::Algorithm &algorithm); + void setAlgo(const xmrig::Algorithm &algorithm); # ifdef APP_DEBUG void print() const; From 441428713217ca6b9d2b73c89745fa4d4b9b93b2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 15:57:12 -0800 Subject: [PATCH 077/141] Fixed namespace usage --- src/workers/Benchmark.cpp | 4 ++-- src/workers/Benchmark.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index c6a78f7a..1b3d69dd 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -55,10 +55,10 @@ void Benchmark::start_perf_bench(const xmrig::PerfAlgo pa) { Workers::setJob(job, false); // set job for workers to compute } -void Benchmark::onJobResult(const JobResult& result) { +void Benchmark::onJobResult(const xmrig::JobResult& result) { if (result.poolId != -100) { // switch to network pool jobs Workers::setListener(m_controller->network()); - static_cast(m_controller->network())->onJobResult(result); + static_cast(m_controller->network())->onJobResult(result); return; } // ignore benchmark results for other perf algo diff --git a/src/workers/Benchmark.h b/src/workers/Benchmark.h index cef9f9c9..3d1e96fa 100644 --- a/src/workers/Benchmark.h +++ b/src/workers/Benchmark.h @@ -30,7 +30,7 @@ #include "interfaces/IJobResultListener.h" #include "core/Controller.h" -class Benchmark : public IJobResultListener { +class Benchmark : public xmrig::IJobResultListener { bool m_shouldSaveConfig; // should save config after all benchmark rounds xmrig::PerfAlgo m_pa; // current perf algo we benchmark uint64_t m_hash_count; // number of hashes calculated for current perf algo @@ -39,7 +39,7 @@ class Benchmark : public IJobResultListener { uint64_t get_now() const; // get current time in ms - void onJobResult(const JobResult&) override; // onJobResult is called after each computed benchmark hash + void onJobResult(const xmrig::JobResult&) override; // onJobResult is called after each computed benchmark hash public: Benchmark() : m_shouldSaveConfig(false) {} From b4037cdb2c8512e30f5cf296a4f216eb99aa6338 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 15:58:33 -0800 Subject: [PATCH 078/141] Fixed namespace usage --- src/common/net/Job.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index 4d5a7f7b..7dcaffe2 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -149,7 +149,7 @@ bool xmrig::Job::setBlob(const char *blob) } // for algo benchmarking -void Job::setRawBlob(const uint8_t *blob, const size_t size) +void xmrig::Job::setRawBlob(const uint8_t *blob, const size_t size) { memcpy(m_blob, blob, m_size = size); } From c998be42359b39ef0c02995837ba43dac080eb82 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 16:01:27 -0800 Subject: [PATCH 079/141] Updated pools usage --- src/core/Controller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index dd85ae53..0dd2906a 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -124,7 +124,7 @@ int xmrig::Controller::init() } # endif - if (strstr(config()->pools()[0].host(), "moneroocean.stream")) config()->setDonateLevel(0); + if (strstr(config()->pools.data()[0].host(), "moneroocean.stream")) config()->setDonateLevel(0); d_ptr->network = new Network(this); return 0; From da6e059e68b52b1542d6c1eeb7a55495ff4a8542 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 16:01:56 -0800 Subject: [PATCH 080/141] Updated pools usage --- src/core/Controller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index 0dd2906a..1bf2f8d0 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -124,7 +124,7 @@ int xmrig::Controller::init() } # endif - if (strstr(config()->pools.data()[0].host(), "moneroocean.stream")) config()->setDonateLevel(0); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->setDonateLevel(0); d_ptr->network = new Network(this); return 0; From e29dc5f8775fcffa13ff19cc6d170c99499612a7 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 16:02:33 -0800 Subject: [PATCH 081/141] Fixed namespace usage --- src/workers/Benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index 1b3d69dd..fbfb29b7 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -34,7 +34,7 @@ void Benchmark::start_perf_bench(const xmrig::PerfAlgo pa) { Workers::switch_algo(xmrig::Algorithm(pa)); // switch workers to new algo (Algo part) // prepare test job for benchmark runs - Job job; + xmrig::Job job; job.setPoolId(-100); // to make sure we can detect benchmark jobs job.setId(xmrig::Algorithm::perfAlgoName(pa)); // need to set different id so that workers will see job change const static uint8_t test_input[76] = { From 6dd971898352f9c0e17b8858e3502a121d8ef858 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 17:19:22 -0800 Subject: [PATCH 082/141] Fixed RH6 build --- build_rh6.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build_rh6.sh b/build_rh6.sh index 4a86a0bd..e46403c5 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -3,9 +3,9 @@ yum update -y yum install -y cmake make git openssl-devel libmicrohttpd-devel rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm -wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo +wget http://linuxsoft.cern.ch/cern/scl/slc6-scl.repo -O /etc/yum.repos.d/slc6-scl.repo yum upgrade -y -yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ +yum install -y devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ rm -rf build xmrig-$1-lin64.tar.gz mkdir build &&\ @@ -13,8 +13,8 @@ cd build &&\ git clone https://github.com/MoneroOcean/xmrig.git &&\ cd xmrig &&\ git checkout $1 &&\ -scl enable devtoolset-2 "cmake . -DWITH_TLS=OFF -DWITH_HTTPD=OFF" &&\ -scl enable devtoolset-2 "make" &&\ +scl enable devtoolset-6 "cmake . -DWITH_TLS=OFF -DWITH_HTTPD=OFF" &&\ +scl enable devtoolset-6 "make" &&\ cp src/config.json . &&\ mv xmrig-notls xmrig &&\ tar cfz ../../xmrig-$1-lin64.tar.gz xmrig config.json &&\ From 17a650007dd0071d228d9e663cd3b345407b0207 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 19:11:55 -0800 Subject: [PATCH 083/141] Fixed pool algos for 2nd contructor --- src/base/net/Pool.cpp | 51 +++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 03e6da7f..43b7f353 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -58,34 +58,42 @@ static const char *kUser = "user"; static const char *kVariant = "variant"; +Algorithms all_algorithms() { + Algorithms algorithms; + // here xmrig now reports all possible supported algorithms + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_1)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_2)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_0)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_4)); + + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0)); + + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV)); + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE)); + + algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL)); + + return algorithms; +} + + xmrig::Pool::Pool() : + m_algorithms(all_algorithms()), m_enabled(true), m_nicehash(false), m_tls(false), m_keepAlive(0), m_port(kDefaultPort) { - // here xmrig now reports all possible supported algorithms - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_1)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_2)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_0)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_4)); - - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0)); - - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV)); - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE)); - - m_algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL)); } @@ -101,6 +109,7 @@ xmrig::Pool::Pool() : * @param url */ xmrig::Pool::Pool(const char *url) : + m_algorithms(all_algorithms()), m_enabled(true), m_nicehash(false), m_tls(false), From 3bed5d9416a8e88a17b4c447964619e84465390c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 19:13:13 -0800 Subject: [PATCH 084/141] Fixed pool algos for 2nd contructor --- src/base/net/Pool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 43b7f353..144fda92 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -58,7 +58,7 @@ static const char *kUser = "user"; static const char *kVariant = "variant"; -Algorithms all_algorithms() { +xmrig::Algorithms all_algorithms() { Algorithms algorithms; // here xmrig now reports all possible supported algorithms algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_1)); From e845a402aa85f8802a3fad99f27e10f11cd9eec2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 19:13:40 -0800 Subject: [PATCH 085/141] Fixed pool algos for 2nd contructor --- src/base/net/Pool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 144fda92..50515a2b 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -59,7 +59,7 @@ static const char *kVariant = "variant"; xmrig::Algorithms all_algorithms() { - Algorithms algorithms; + xmrig::Algorithms algorithms; // here xmrig now reports all possible supported algorithms algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_1)); algorithms.push_back(xmrig::Algorithm(xmrig::CRYPTONIGHT, xmrig::VARIANT_2)); From 43f09e915eaeae990f87cd6e74e2757ee965f5df Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 19:14:39 -0800 Subject: [PATCH 086/141] Fixed pool algos for 2nd contructor --- src/base/net/Pool.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 50515a2b..cbdd98a2 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -121,6 +121,7 @@ xmrig::Pool::Pool(const char *url) : xmrig::Pool::Pool(const rapidjson::Value &object) : + m_algorithms(all_algorithms()), m_enabled(true), m_nicehash(false), m_tls(false), @@ -159,6 +160,7 @@ xmrig::Pool::Pool(const rapidjson::Value &object) : xmrig::Pool::Pool(const char *host, uint16_t port, const char *user, const char *password, int keepAlive, bool nicehash, bool tls) : + m_algorithms(all_algorithms()), m_nicehash(nicehash), m_tls(tls), m_keepAlive(keepAlive), From 3f8024c19e73064b4fac8c9d33d40fe7e59a7d66 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 19:24:35 -0800 Subject: [PATCH 087/141] Removed cn/4 name --- src/common/crypto/Algorithm.cpp | 9 ++++----- src/common/xmrig.h | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 83b0d519..699d65ea 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -67,7 +67,6 @@ static AlgoData const algorithms[] = { { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, { "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, - { "cryptonight/4", "cn/4", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, # ifndef XMRIG_NO_AEON { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, @@ -133,7 +132,7 @@ static const char *variants[] = { "trtl", "gpu", "wow", - "4", + "r", }; @@ -299,7 +298,7 @@ const char *xmrig::Algorithm::perfAlgoName(const xmrig::PerfAlgo pa) { "cn/2", "cn/half", "cn/gpu", - "cn/4", + "cn/r", "cn-lite", "cn-heavy", "cn-pico", @@ -326,7 +325,7 @@ xmrig::Algorithm::Algorithm(const xmrig::PerfAlgo pa) { m_algo = xmrig::CRYPTONIGHT; m_variant = xmrig::VARIANT_GPU; break; - case PA_CN4: + case PA_CN_R: m_algo = xmrig::CRYPTONIGHT; m_variant = xmrig::VARIANT_4; break; @@ -356,7 +355,7 @@ xmrig::PerfAlgo xmrig::Algorithm::perf_algo() const { case VARIANT_2: return PA_CN2; case VARIANT_HALF: return PA_CN_HALF; case VARIANT_GPU: return PA_CN_GPU; - case VARIANT_4: return PA_CN4; + case VARIANT_4: return PA_CN_R; default: return PA_CN; } case CRYPTONIGHT_LITE: return PA_CN_LITE; diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 118e0b47..6d19d973 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -47,7 +47,7 @@ enum PerfAlgo { PA_CN2, /* cn/2 (Monero) */ PA_CN_HALF, /* cn/half (MSR) */ PA_CN_GPU, /* cn/gpu (RYO) */ - PA_CN4, /* cn/4 (Monero) */ + PA_CN_R, /* cn/4 (Monero) */ PA_CN_LITE, /* cn-lite/1 (AEON) */ PA_CN_HEAVY, /* cn-heavy/0 (LOKI) */ PA_CN_PICO, /* cn-pico/trtl (TRTL) */ From adfafc530858c3c93f0fac1ce31cc5573288823f Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 21:25:21 -0800 Subject: [PATCH 088/141] Fixed gcc install --- build_rh6.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_rh6.sh b/build_rh6.sh index e46403c5..7257836c 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -5,7 +5,7 @@ rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/li rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm wget http://linuxsoft.cern.ch/cern/scl/slc6-scl.repo -O /etc/yum.repos.d/slc6-scl.repo yum upgrade -y -yum install -y devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ +yum install -y --nogpgcheck devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ rm -rf build xmrig-$1-lin64.tar.gz mkdir build &&\ From 5ae25845fa850dba88e0d5d5ffe47b8ded956778 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 21 Feb 2019 21:37:53 -0800 Subject: [PATCH 089/141] Fixed install --- build_rh6.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/build_rh6.sh b/build_rh6.sh index 7257836c..51fce7df 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -1,12 +1,10 @@ #!/bin/bash yum update -y -yum install -y cmake make git openssl-devel libmicrohttpd-devel +yum upgrade -y +yum install -y cmake make git openssl-devel libmicrohttpd-devel centos-release-scl-rh +yum install -y --nogpgcheck devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm -wget http://linuxsoft.cern.ch/cern/scl/slc6-scl.repo -O /etc/yum.repos.d/slc6-scl.repo -yum upgrade -y -yum install -y --nogpgcheck devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ - rm -rf build xmrig-$1-lin64.tar.gz mkdir build &&\ cd build &&\ From 464fb8d39ad4b5c4557190bd215aeaa901dcee62 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Fri, 22 Feb 2019 07:36:54 -0800 Subject: [PATCH 090/141] cn/wow algo has now uses cn/r algo thread setup --- src/common/crypto/Algorithm.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 699d65ea..512fa22f 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -355,6 +355,7 @@ xmrig::PerfAlgo xmrig::Algorithm::perf_algo() const { case VARIANT_2: return PA_CN2; case VARIANT_HALF: return PA_CN_HALF; case VARIANT_GPU: return PA_CN_GPU; + case VARIANT_WOW: return PA_CN_R; case VARIANT_4: return PA_CN_R; default: return PA_CN; } From 8d76e056147c4f4ac566e45822c4d6d166419cc4 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Fri, 22 Feb 2019 07:38:38 -0800 Subject: [PATCH 091/141] Moved version to mo2 --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 5c6e7601..82178651 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.13.0-mo1" +#define APP_VERSION "2.13.0-mo2" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" From d117c3b797e6d1ddb01463db9726d5fd2c0a600d Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Thu, 7 Mar 2019 09:13:19 -0800 Subject: [PATCH 092/141] Improved command flow --- build_rh6.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build_rh6.sh b/build_rh6.sh index 51fce7df..e7bbce87 100755 --- a/build_rh6.sh +++ b/build_rh6.sh @@ -1,7 +1,8 @@ #!/bin/bash yum update -y yum upgrade -y -yum install -y cmake make git openssl-devel libmicrohttpd-devel centos-release-scl-rh +yum install -y git +yum install -y cmake make openssl-devel libmicrohttpd-devel centos-release-scl-rh yum install -y --nogpgcheck devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-1.8.0-1.el6.x86_64.rpm rpm -i https://github.com/sipcapture/captagent/raw/master/dependency/centos/6/libuv-devel-1.8.0-1.el6.x86_64.rpm From 25f35b7e77d7b29e2cf70c59b0b34978202532bc Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:22:43 -0700 Subject: [PATCH 093/141] Removed not needed setDonateLevel copy --- src/base/net/stratum/Pools.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/base/net/stratum/Pools.h b/src/base/net/stratum/Pools.h index 1ab5f38a..6a63f166 100644 --- a/src/base/net/stratum/Pools.h +++ b/src/base/net/stratum/Pools.h @@ -52,7 +52,6 @@ public: inline const std::vector &data() const { return m_data; } inline int donateLevel() const { return m_donateLevel; } - inline void setDonateLevel(const int donate) { m_donateLevel = donate; } inline int retries() const { return m_retries; } inline int retryPause() const { return m_retryPause; } inline ProxyDonate proxyDonate() const { return m_proxyDonate; } From 378292e84e1435c614b9d50f47e2c1cbdbb887f1 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:24:48 -0700 Subject: [PATCH 094/141] Adjusted Config.h location --- src/base/net/stratum/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index f2d9a69f..92e87ff0 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -46,7 +46,7 @@ #include "base/tools/Buffer.h" #include "base/tools/Chrono.h" #include "net/JobResult.h" -#include "core/Config.h" // for pconfig to access pconfig->get_algo_perf +#include "core/config/Config.h" // for pconfig to access pconfig->get_algo_perf #include "workers/Workers.h" // to do Workers::switch_algo #include "rapidjson/document.h" #include "rapidjson/error/en.h" From f49e566a9c0ef3d397b2e04dcf86a1e5ff351f05 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:30:58 -0700 Subject: [PATCH 095/141] Fixed Log usage --- src/App.cpp | 2 +- src/core/config/Config.new.cpp | 287 --------------------- src/core/config/Config.old.cpp | 439 --------------------------------- src/workers/Benchmark.cpp | 2 +- src/workers/Workers.cpp | 2 +- 5 files changed, 3 insertions(+), 729 deletions(-) delete mode 100644 src/core/config/Config.new.cpp delete mode 100644 src/core/config/Config.old.cpp diff --git a/src/App.cpp b/src/App.cpp index 978a23e3..da170ab3 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -99,7 +99,7 @@ int xmrig::App::exec() benchmark.set_controller(m_controller); // we need controller there to access config and network objects Workers::setListener(&benchmark); // register benchmark as job result listener to compute hashrates there // write text before first benchmark round - Log::i()->text(m_controller->config()->isColors() + xmrig::Log::print(xmrig::Log::colors ? GREEN_BOLD(" >>>>> ") WHITE_BOLD("STARTING ALGO PERFORMANCE CALIBRATION (with %i seconds round)") : " >>>>> STARTING ALGO PERFORMANCE CALIBRATION (with %i seconds round)", m_controller->config()->calibrateAlgoTime() diff --git a/src/core/config/Config.new.cpp b/src/core/config/Config.new.cpp deleted file mode 100644 index fb3fefac..00000000 --- a/src/core/config/Config.new.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - - -#include "base/io/log/Log.h" -#include "base/kernel/interfaces/IJsonReader.h" -#include "common/cpu/Cpu.h" -#include "core/config/Config.h" -#include "crypto/Asm.h" -#include "crypto/CryptoNight_constants.h" -#include "rapidjson/document.h" -#include "rapidjson/filewritestream.h" -#include "rapidjson/prettywriter.h" -#include "workers/CpuThread.h" - - -static char affinity_tmp[20] = { 0 }; - - -xmrig::Config::Config() : - m_aesMode(AES_AUTO), - m_algoVariant(AV_AUTO), - m_assembly(ASM_AUTO), - m_hugePages(true), - m_safe(false), - m_shouldSave(false), - m_maxCpuUsage(100), - m_priority(-1) -{ -} - - -bool xmrig::Config::read(const IJsonReader &reader, const char *fileName) -{ - if (!BaseConfig::read(reader, fileName)) { - return false; - } - - m_hugePages = reader.getBool("huge-pages", true); - m_safe = reader.getBool("safe"); - - setAesMode(reader.getValue("hw-aes")); - setAlgoVariant(reader.getInt("av")); - setMaxCpuUsage(reader.getInt("max-cpu-usage", 100)); - setPriority(reader.getInt("cpu-priority", -1)); - setThreads(reader.getValue("threads")); - -# ifndef XMRIG_NO_ASM - setAssembly(reader.getValue("asm")); -# endif - - return finalize(); -} - - -void xmrig::Config::getJSON(rapidjson::Document &doc) const -{ - using namespace rapidjson; - - doc.SetObject(); - - auto &allocator = doc.GetAllocator(); - - doc.AddMember("algo", StringRef(algorithm().name()), allocator); - - Value api(kObjectType); - api.AddMember("id", m_apiId.toJSON(), allocator); - api.AddMember("worker-id", m_apiWorkerId.toJSON(), allocator); - doc.AddMember("api", api, allocator); - doc.AddMember("http", m_http.toJSON(doc), allocator); - -# ifndef XMRIG_NO_ASM - doc.AddMember("asm", Asm::toJSON(m_assembly), allocator); -# endif - - doc.AddMember("autosave", isAutoSave(), allocator); - doc.AddMember("av", algoVariant(), allocator); - doc.AddMember("background", isBackground(), allocator); - doc.AddMember("colors", Log::colors, allocator); - - if (affinity() != -1L) { - snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, affinity()); - doc.AddMember("cpu-affinity", StringRef(affinity_tmp), allocator); - } - else { - doc.AddMember("cpu-affinity", kNullType, allocator); - } - - doc.AddMember("cpu-priority", priority() != -1 ? Value(priority()) : Value(kNullType), allocator); - doc.AddMember("donate-level", m_pools.donateLevel(), allocator); - doc.AddMember("donate-over-proxy", m_pools.proxyDonate(), allocator); - doc.AddMember("huge-pages", isHugePages(), allocator); - doc.AddMember("hw-aes", m_aesMode == AES_AUTO ? Value(kNullType) : Value(m_aesMode == AES_HW), allocator); - doc.AddMember("log-file", m_logFile.toJSON(), allocator); - doc.AddMember("max-cpu-usage", m_maxCpuUsage, allocator); - doc.AddMember("pools", m_pools.toJSON(doc), allocator); - doc.AddMember("print-time", printTime(), allocator); - doc.AddMember("retries", m_pools.retries(), allocator); - doc.AddMember("retry-pause", m_pools.retryPause(), allocator); - doc.AddMember("safe", m_safe, allocator); - - if (threadsMode() != Simple) { - Value threads(kArrayType); - - for (const IThread *thread : m_threads.list) { - threads.PushBack(thread->toConfig(doc), allocator); - } - - doc.AddMember("threads", threads, allocator); - } - else { - doc.AddMember("threads", threadsCount(), allocator); - } - - doc.AddMember("user-agent", m_userAgent.toJSON(), allocator); - doc.AddMember("syslog", isSyslog(), allocator); - doc.AddMember("watch", m_watch, allocator); -} - - -bool xmrig::Config::finalize() -{ - if (!m_threads.cpu.empty()) { - m_threads.mode = Advanced; - const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; - - for (size_t i = 0; i < m_threads.cpu.size(); ++i) { - m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES)); - } - - return true; - } - - const AlgoVariant av = getAlgoVariant(); - m_threads.mode = m_threads.count ? Simple : Automatic; - - const Variant v = m_algorithm.variant(); - const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo(), v) / 1024; - - if (!m_threads.count) { - m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - } - else if (m_safe) { - const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads.count > count) { - m_threads.count = count; - } - } - - for (size_t i = 0; i < m_threads.count; ++i) { - m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority, m_assembly)); - } - - m_shouldSave = m_threads.mode == Automatic; - - return true; -} - - -void xmrig::Config::setAesMode(const rapidjson::Value &aesMode) -{ - if (aesMode.IsBool()) { - m_aesMode = aesMode.GetBool() ? AES_HW : AES_SOFT; - } -} - - -void xmrig::Config::setAlgoVariant(int av) -{ - if (av >= AV_AUTO && av < AV_MAX) { - m_algoVariant = static_cast(av); - } -} - - -void xmrig::Config::setMaxCpuUsage(int max) -{ - if (max > 0 && max <= 100) { - m_maxCpuUsage = max; - } -} - - -void xmrig::Config::setPriority(int priority) -{ - if (priority >= 0 && priority <= 5) { - m_priority = priority; - } -} - - -void xmrig::Config::setThreads(const rapidjson::Value &threads) -{ - if (threads.IsArray()) { - m_threads.cpu.clear(); - - for (const rapidjson::Value &value : threads.GetArray()) { - if (!value.IsObject()) { - continue; - } - - if (value.HasMember("low_power_mode")) { - auto data = CpuThread::parse(value); - - if (data.valid) { - m_threads.cpu.push_back(std::move(data)); - } - } - } - } - else if (threads.IsUint()) { - const unsigned count = threads.GetUint(); - if (count < 1024) { - m_threads.count = count; - } - } -} - - -xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const -{ -# ifndef XMRIG_NO_AEON - if (m_algorithm.algo() == xmrig::CRYPTONIGHT_LITE) { - return getAlgoVariantLite(); - } -# endif - - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} - - -#ifndef XMRIG_NO_AEON -xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const -{ - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} -#endif - - -#ifndef XMRIG_NO_ASM -void xmrig::Config::setAssembly(const rapidjson::Value &assembly) -{ - m_assembly = Asm::parse(assembly); -} -#endif diff --git a/src/core/config/Config.old.cpp b/src/core/config/Config.old.cpp deleted file mode 100644 index 1088de49..00000000 --- a/src/core/config/Config.old.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , - * Copyright 2018 MoneroOcean , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * Copyright 2018-2019 MoneroOcean , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - - -#include "common/config/ConfigLoader.h" -#include "common/cpu/Cpu.h" -#include "core/Config.h" -#include "core/ConfigCreator.h" -#include "crypto/Asm.h" -#include "crypto/CryptoNight_constants.h" -#include "rapidjson/document.h" -#include "rapidjson/filewritestream.h" -#include "rapidjson/prettywriter.h" -#include "workers/CpuThread.h" - -// for usage in Client::login to get_algo_perf -namespace xmrig { - Config* pconfig = nullptr; -}; - -static char affinity_tmp[20] = { 0 }; - - -xmrig::Config::Config() : xmrig::CommonConfig(), - m_aesMode(AES_AUTO), - m_algoVariant(AV_AUTO), - m_assembly(ASM_AUTO), - m_hugePages(true), - m_safe(false), - m_shouldSave(false), - m_maxCpuUsage(100), - m_priority(-1) -{ - // not defined algo performance is considered to be 0 - for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { - const xmrig::PerfAlgo pa = static_cast(a); - m_algo_perf[pa] = 0.0f; - } -} - - -bool xmrig::Config::reload(const char *json) -{ - return xmrig::ConfigLoader::reload(this, json); -} - - -void xmrig::Config::getJSON(rapidjson::Document &doc) const -{ - using namespace rapidjson; - - doc.SetObject(); - - auto &allocator = doc.GetAllocator(); - - doc.AddMember("algo", StringRef(algorithm().name()), allocator); - - Value api(kObjectType); - api.AddMember("port", apiPort(), allocator); - api.AddMember("access-token", apiToken() ? Value(StringRef(apiToken())).Move() : Value(kNullType).Move(), allocator); - api.AddMember("id", apiId() ? Value(StringRef(apiId())).Move() : Value(kNullType).Move(), allocator); - api.AddMember("worker-id", apiWorkerId() ? Value(StringRef(apiWorkerId())).Move() : Value(kNullType).Move(), allocator); - api.AddMember("ipv6", isApiIPv6(), allocator); - api.AddMember("restricted", isApiRestricted(), allocator); - doc.AddMember("api", api, allocator); - -# ifndef XMRIG_NO_ASM - doc.AddMember("asm", Asm::toJSON(m_assembly), allocator); -# endif - - doc.AddMember("autosave", isAutoSave(), allocator); - doc.AddMember("av", algoVariant(), allocator); - doc.AddMember("background", isBackground(), allocator); - doc.AddMember("colors", isColors(), allocator); - - if (affinity() != -1L) { - snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, affinity()); - doc.AddMember("cpu-affinity", StringRef(affinity_tmp), allocator); - } - else { - doc.AddMember("cpu-affinity", kNullType, allocator); - } - - doc.AddMember("cpu-priority", priority() != -1 ? Value(priority()) : Value(kNullType), allocator); - doc.AddMember("donate-level", donateLevel(), allocator); - doc.AddMember("huge-pages", isHugePages(), allocator); - doc.AddMember("hw-aes", m_aesMode == AES_AUTO ? Value(kNullType) : Value(m_aesMode == AES_HW), allocator); - doc.AddMember("log-file", logFile() ? Value(StringRef(logFile())).Move() : Value(kNullType).Move(), allocator); - doc.AddMember("max-cpu-usage", m_maxCpuUsage, allocator); - doc.AddMember("pools", m_pools.toJSON(doc), allocator); - doc.AddMember("print-time", printTime(), allocator); - doc.AddMember("retries", m_pools.retries(), allocator); - doc.AddMember("retry-pause", m_pools.retryPause(), allocator); - doc.AddMember("safe", m_safe, allocator); - - // save extended "threads" based on m_threads - Value threads(kObjectType); - for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { - const xmrig::Algo algo = static_cast(a); - Value key(xmrig::Algorithm::perfAlgoName(xmrig::Algorithm(algo).perf_algo()), allocator); - if (threadsMode(algo) != Simple) { - Value threads2(kArrayType); - for (const IThread *thread : m_threads[algo].list) { - threads2.PushBack(thread->toConfig(doc), allocator); - } - - threads.AddMember(key, threads2, allocator); - } - else { - threads.AddMember(key, threadsCount(), allocator); - } - } - doc.AddMember("threads", threads, allocator); - - // save "algo-perf" based on m_algo_perf - Value algo_perf(kObjectType); - for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { - const xmrig::PerfAlgo pa = static_cast(a); - Value key(xmrig::Algorithm::perfAlgoName(pa), allocator); - algo_perf.AddMember(key, Value(m_algo_perf[pa]), allocator); - } - doc.AddMember("algo-perf", algo_perf, allocator); - - doc.AddMember("calibrate-algo", isCalibrateAlgo(), allocator); - doc.AddMember("calibrate-algo-time", calibrateAlgoTime(), allocator); - - doc.AddMember("user-agent", userAgent() ? Value(StringRef(userAgent())).Move() : Value(kNullType).Move(), allocator); - -# ifdef HAVE_SYSLOG_H - doc.AddMember("syslog", isSyslog(), allocator); -# endif - - doc.AddMember("watch", m_watch, allocator); -} - - -xmrig::Config *xmrig::Config::load(Process *process, IConfigListener *listener) -{ - return static_cast(ConfigLoader::load(process, new ConfigCreator(), listener)); -} - - -bool xmrig::Config::finalize() -{ - if (m_state != NoneState) { - return CommonConfig::finalize(); - } - - if (!CommonConfig::finalize()) { - return false; - } - - // auto configure m_threads - for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { - const xmrig::Algo algo = static_cast(a); - if (!m_threads[algo].cpu.empty()) { - m_threads[algo].mode = Advanced; - const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; - for (size_t i = 0; i < m_threads[algo].cpu.size(); ++i) { - m_threads[algo].list.push_back(CpuThread::createFromData(i, algo, m_threads[algo].cpu[i], m_priority, softAES)); - } - } else { - const AlgoVariant av = getAlgoVariant(); - m_threads[algo].mode = m_threads[algo].count ? Simple : Automatic; - - const size_t size = CpuThread::multiway(av) * cn_select_memory(algo) / 1024; - - if (!m_threads[algo].count) { - m_threads[algo].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - } - else if (m_safe) { - const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads[algo].count > count) { - m_threads[algo].count = count; - } - } - - for (size_t i = 0; i < m_threads[algo].count; ++i) { - m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority, m_assembly)); - } - - m_shouldSave = m_shouldSave || m_threads[algo].mode == Automatic; - } - } - - return true; -} - - -bool xmrig::Config::parseBoolean(int key, bool enable) -{ - if (!CommonConfig::parseBoolean(key, enable)) { - return false; - } - - switch (key) { - case SafeKey: /* --safe */ - m_safe = enable; - break; - - case HugePagesKey: /* --no-huge-pages */ - m_hugePages = enable; - break; - - case HardwareAESKey: /* hw-aes config only */ - m_aesMode = enable ? AES_HW : AES_SOFT; - break; - -# ifndef XMRIG_NO_ASM - case AssemblyKey: - m_assembly = Asm::parse(enable); - break; -# endif - - default: - break; - } - - return true; -} - - -bool xmrig::Config::parseString(int key, const char *arg) -{ - if (!CommonConfig::parseString(key, arg)) { - return false; - } - - switch (key) { - case AVKey: /* --av */ - case MaxCPUUsageKey: /* --max-cpu-usage */ - case CPUPriorityKey: /* --cpu-priority */ - return parseUint64(key, strtol(arg, nullptr, 10)); - - case SafeKey: /* --safe */ - return parseBoolean(key, true); - - case HugePagesKey: /* --no-huge-pages */ - return parseBoolean(key, false); - - case ThreadsKey: /* --threads */ - if (strncmp(arg, "all", 3) == 0) { - m_threads[m_algorithm.algo()].count = Cpu::info()->threads(); // sets default algo threads - return true; - } - - return parseUint64(key, strtol(arg, nullptr, 10)); - - case CPUAffinityKey: /* --cpu-affinity */ - { - const char *p = strstr(arg, "0x"); - return parseUint64(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10)); - } - -# ifndef XMRIG_NO_ASM - case AssemblyKey: /* --asm */ - m_assembly = Asm::parse(arg); - break; -# endif - - default: - break; - } - - return true; -} - - -bool xmrig::Config::parseUint64(int key, uint64_t arg) -{ - if (!CommonConfig::parseUint64(key, arg)) { - return false; - } - - switch (key) { - case CPUAffinityKey: /* --cpu-affinity */ - if (arg) { - m_threads[m_algorithm.algo()].mask = arg; // sets default algo threads - } - break; - - default: - return parseInt(key, static_cast(arg)); - } - - return true; -} - - -// parse specific perf algo (or generic) threads config -void xmrig::Config::parseThreadsJSON(const rapidjson::Value &threads, const xmrig::Algo algo) -{ - for (const rapidjson::Value &value : threads.GetArray()) { - if (!value.IsObject()) { - continue; - } - - if (value.HasMember("low_power_mode")) { - auto data = CpuThread::parse(value); - - if (data.valid) { - m_threads[algo].cpu.push_back(std::move(data)); - } - } - } -} - -void xmrig::Config::parseJSON(const rapidjson::Document &doc) -{ - CommonConfig::parseJSON(doc); - - const rapidjson::Value &threads = doc["threads"]; - - if (threads.IsArray()) { - // parse generic (old) threads - parseThreadsJSON(threads, m_algorithm.algo()); - } else if (threads.IsObject()) { - // parse new specific perf algo threads - for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { - const xmrig::Algo algo = static_cast(a); - const rapidjson::Value &threads2 = threads[xmrig::Algorithm::perfAlgoName(xmrig::Algorithm(algo).perf_algo())]; - if (threads2.IsArray()) { - parseThreadsJSON(threads2, algo); - } - } - } - - const rapidjson::Value &algo_perf = doc["algo-perf"]; - if (algo_perf.IsObject()) { - for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { - const xmrig::PerfAlgo pa = static_cast(a); - const rapidjson::Value &key = algo_perf[xmrig::Algorithm::perfAlgoName(pa)]; - if (key.IsDouble()) { - m_algo_perf[pa] = static_cast(key.GetDouble()); - } else if (key.IsInt()) { - m_algo_perf[pa] = static_cast(key.GetInt()); - } - } - } -} - - -bool xmrig::Config::parseInt(int key, int arg) -{ - switch (key) { - case ThreadsKey: /* --threads */ - if (arg >= 0 && arg < 1024) { - m_threads[m_algorithm.algo()].count = arg; // sets default algo threads - } - break; - - case AVKey: /* --av */ - if (arg >= AV_AUTO && arg < AV_MAX) { - m_algoVariant = static_cast(arg); - } - break; - - case MaxCPUUsageKey: /* --max-cpu-usage */ - if (m_maxCpuUsage > 0 && arg <= 100) { - m_maxCpuUsage = arg; - } - break; - - case CPUPriorityKey: /* --cpu-priority */ - if (arg >= 0 && arg <= 5) { - m_priority = arg; - } - break; - - default: - break; - } - - return true; -} - - -xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const -{ -# ifndef XMRIG_NO_AEON - if (m_algorithm.algo() == xmrig::CRYPTONIGHT_LITE) { - return getAlgoVariantLite(); - } -# endif - - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} - - -#ifndef XMRIG_NO_AEON -xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const -{ - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} -#endif diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index fbfb29b7..ecbd624b 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -69,7 +69,7 @@ void Benchmark::onJobResult(const xmrig::JobResult& result) { else if (now - m_time_start > static_cast(m_controller->config()->calibrateAlgoTime())*1000) { // end of becnhmark round for m_pa const float hashrate = static_cast(m_hash_count) * result.diff / (now - m_time_start) * 1000.0f; m_controller->config()->set_algo_perf(m_pa, hashrate); // store hashrate result - Log::i()->text(m_controller->config()->isColors() + xmrig::Log::print(xmrig::Log::colors ? GREEN_BOLD(" ===> ") CYAN_BOLD("%s") WHITE_BOLD(" hashrate: ") CYAN_BOLD("%f") : " ===> %s hasrate: %f", xmrig::Algorithm::perfAlgoName(m_pa), diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index f69280c0..3e98da38 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -251,7 +251,7 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) if (!str_threads.empty()) str_threads = str_threads + ", "; str_threads = str_threads + "x" + std::to_string(thread->multiway()); } - Log::i()->text(m_controller->config()->isColors() + xmrig::Log::print(xmrig::Log::colors ? GREEN_BOLD(" >>> ") WHITE_BOLD("ALGO CHANGE: ") CYAN_BOLD("%s") ", " CYAN_BOLD("%d (%s)") " thread(s)" : " >>> ALGO CHANGE: %s, %d (%s) thread(s)", algorithm.name(), From c600d5da44c7ffa8621d0c7000efbdc66a002fb2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:35:46 -0700 Subject: [PATCH 096/141] Added Config.h include --- src/core/Controller.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index 23d1f7b8..e55ad638 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -31,6 +31,7 @@ #include "common/Platform.h" #include "core/Controller.h" #include "net/Network.h" +#include "core/config/Config.h" // to get access to config()->pools() xmrig::Controller::Controller(Process *process) : From 8dd283a9f0770f3027ba95b6aa5394c0d3129f8c Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:36:48 -0700 Subject: [PATCH 097/141] Moved setDonateLevel to pool --- src/core/Controller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index e55ad638..cf66a130 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -62,7 +62,7 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->setDonateLevel(0); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->pools().data()[0].setDonateLevel(0); m_network = new Network(this); return 0; From 41c840801441d3ef9eca983e48b40ef61f71578e Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:38:31 -0700 Subject: [PATCH 098/141] Moved setDonateLevel to Pools --- src/core/Controller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index cf66a130..933905e7 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -62,7 +62,7 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->pools().data()[0].setDonateLevel(0); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->pools().setDonateLevel(0); m_network = new Network(this); return 0; From ebf7f67b7225e2014d161513737453910e3d4975 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:45:43 -0700 Subject: [PATCH 099/141] Added implementation of setZeroDonateLevel --- src/base/kernel/config/BaseConfig.h | 1 + src/base/net/stratum/Pools.h | 1 + src/core/Controller.cpp | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/base/kernel/config/BaseConfig.h b/src/base/kernel/config/BaseConfig.h index fbc0f11c..f0d36458 100644 --- a/src/base/kernel/config/BaseConfig.h +++ b/src/base/kernel/config/BaseConfig.h @@ -57,6 +57,7 @@ public: inline const char *userAgent() const { return m_userAgent.data(); } inline const Http &http() const { return m_http; } inline const Pools &pools() const { return m_pools; } + inline void setZeroDonateLevel() { m_pools->setZeroDonateLevel(); } inline const String &apiId() const { return m_apiId; } inline const String &apiWorkerId() const { return m_apiWorkerId; } inline uint32_t printTime() const { return m_printTime; } diff --git a/src/base/net/stratum/Pools.h b/src/base/net/stratum/Pools.h index 6a63f166..8149a3c1 100644 --- a/src/base/net/stratum/Pools.h +++ b/src/base/net/stratum/Pools.h @@ -52,6 +52,7 @@ public: inline const std::vector &data() const { return m_data; } inline int donateLevel() const { return m_donateLevel; } + inline void setZeroDonateLevel() { m_donateLevel = 0; }; inline int retries() const { return m_retries; } inline int retryPause() const { return m_retryPause; } inline ProxyDonate proxyDonate() const { return m_proxyDonate; } diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index 933905e7..97c81ee5 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -62,7 +62,7 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->pools().setDonateLevel(0); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->setZeroDonateLevel(); m_network = new Network(this); return 0; From 506c5b43566c711cc02ec9623ed878ca1fbcb948 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:46:23 -0700 Subject: [PATCH 100/141] Correct call of setZeroDonateLevel --- src/core/Controller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index 97c81ee5..c0ba46c7 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -62,7 +62,7 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config()->setZeroDonateLevel(); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config().setZeroDonateLevel(); m_network = new Network(this); return 0; From 5c0e89241f5c864b4f4cc8be2647c22cdc6917ee Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:46:53 -0700 Subject: [PATCH 101/141] Correct call of setZeroDonateLevel --- src/base/kernel/config/BaseConfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/kernel/config/BaseConfig.h b/src/base/kernel/config/BaseConfig.h index f0d36458..cf940bd6 100644 --- a/src/base/kernel/config/BaseConfig.h +++ b/src/base/kernel/config/BaseConfig.h @@ -57,7 +57,7 @@ public: inline const char *userAgent() const { return m_userAgent.data(); } inline const Http &http() const { return m_http; } inline const Pools &pools() const { return m_pools; } - inline void setZeroDonateLevel() { m_pools->setZeroDonateLevel(); } + inline void setZeroDonateLevel() { m_pools.setZeroDonateLevel(); } inline const String &apiId() const { return m_apiId; } inline const String &apiWorkerId() const { return m_apiWorkerId; } inline uint32_t printTime() const { return m_printTime; } From a8376bb512748eafdf96dee6536432983688f004 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:49:59 -0700 Subject: [PATCH 102/141] Correct call of setZeroDonateLevel --- src/base/kernel/Base.h | 1 + src/core/Controller.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/base/kernel/Base.h b/src/base/kernel/Base.h index 592d3a37..aecaedb3 100644 --- a/src/base/kernel/Base.h +++ b/src/base/kernel/Base.h @@ -55,6 +55,7 @@ public: Api *api() const; bool reload(const rapidjson::Value &json); Config *config() const; + inline void setZeroDonateLevel() { d_ptr->config->setZeroDonateLevel(); } void addListener(IBaseListener *listener); protected: diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index c0ba46c7..1e7fa244 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -62,7 +62,7 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) config().setZeroDonateLevel(); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) setZeroDonateLevel(); m_network = new Network(this); return 0; From 7a6a07c6b64833b4e9a36544ea713c9d188fbec3 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:51:03 -0700 Subject: [PATCH 103/141] Correct call of setZeroDonateLevel --- src/base/kernel/Base.h | 1 - src/core/Controller.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/base/kernel/Base.h b/src/base/kernel/Base.h index aecaedb3..592d3a37 100644 --- a/src/base/kernel/Base.h +++ b/src/base/kernel/Base.h @@ -55,7 +55,6 @@ public: Api *api() const; bool reload(const rapidjson::Value &json); Config *config() const; - inline void setZeroDonateLevel() { d_ptr->config->setZeroDonateLevel(); } void addListener(IBaseListener *listener); protected: diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index 1e7fa244..f38420fd 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -62,7 +62,7 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) setZeroDonateLevel(); + if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) pconfig->setZeroDonateLevel(); m_network = new Network(this); return 0; From a27ce3a80402576485c9582f0d846ad5bfee40df Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:52:15 -0700 Subject: [PATCH 104/141] Adjusted Config.h location --- src/workers/Benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index ecbd624b..6d0a3e3f 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -24,7 +24,7 @@ #include "workers/Benchmark.h" #include "workers/Workers.h" -#include "core/Config.h" +#include "core/config/Config.h" #include "net/Network.h" #include "common/log/Log.h" #include From a69f587d4ce20ba859ab116af05bc02d9206f513 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:54:11 -0700 Subject: [PATCH 105/141] Adjusted Log.h location --- src/common/config/CommonConfig.cpp | 2 +- src/workers/Benchmark.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 995669ea..0391079f 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -57,7 +57,7 @@ #include "base/io/Json.h" #include "common/config/CommonConfig.h" -#include "common/log/Log.h" +#include "base/io/log/Log.h" #include "donate.h" #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index 6d0a3e3f..ece0067f 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -26,7 +26,7 @@ #include "workers/Workers.h" #include "core/config/Config.h" #include "net/Network.h" -#include "common/log/Log.h" +#include "base/io/log/Log.h" #include // start performance measurements for specified perf algo From 3ed462210e4f659a0efc848dad06af30d34ef7c2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:56:35 -0700 Subject: [PATCH 106/141] Replaced xmrig::Id by String --- src/workers/Benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index ece0067f..73aca5a7 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -62,7 +62,7 @@ void Benchmark::onJobResult(const xmrig::JobResult& result) { return; } // ignore benchmark results for other perf algo - if (m_pa == xmrig::PA_INVALID || result.jobId != xmrig::Id(xmrig::Algorithm::perfAlgoName(m_pa))) return; + if (m_pa == xmrig::PA_INVALID || result.jobId != String(xmrig::Algorithm::perfAlgoName(m_pa))) return; ++ m_hash_count; const uint64_t now = get_now(); if (!m_time_start) m_time_start = now; // time of measurements start (in ms) From 25bbfb782ccff4fce38cb9cdb3b559226d65aa4a Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 11:57:01 -0700 Subject: [PATCH 107/141] Replaced xmrig::Id by xmrig::String --- src/workers/Benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Benchmark.cpp b/src/workers/Benchmark.cpp index 73aca5a7..0fc4bc92 100644 --- a/src/workers/Benchmark.cpp +++ b/src/workers/Benchmark.cpp @@ -62,7 +62,7 @@ void Benchmark::onJobResult(const xmrig::JobResult& result) { return; } // ignore benchmark results for other perf algo - if (m_pa == xmrig::PA_INVALID || result.jobId != String(xmrig::Algorithm::perfAlgoName(m_pa))) return; + if (m_pa == xmrig::PA_INVALID || result.jobId != xmrig::String(xmrig::Algorithm::perfAlgoName(m_pa))) return; ++ m_hash_count; const uint64_t now = get_now(); if (!m_time_start) m_time_start = now; // time of measurements start (in ms) From 5e956d494db0a11a6957dd98478c010d22733070 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:00:53 -0700 Subject: [PATCH 108/141] Fixed Handle usage --- src/workers/Workers.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 3e98da38..4dce6101 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -223,7 +223,7 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used m_sequence = 0; m_paused = 0; - for (Handle *handle : m_workers) { + for (ThreadHandle *handle : m_workers) { handle->join(); delete handle; } @@ -269,7 +269,7 @@ void Workers::switch_algo(const xmrig::Algorithm& algorithm) uint32_t offset = 0; for (xmrig::IThread *thread : threads) { - Handle *handle = new Handle(thread, offset, m_status.ways); + ThreadHandle *handle = new ThreadHandle(thread, offset, m_status.ways); offset += thread->multiway(); m_workers.push_back(handle); @@ -287,7 +287,7 @@ void Workers::stop() m_paused = 0; m_sequence = 0; - for (Handle *handle : m_workers) { + for (ThreadHandle *handle : m_workers) { handle->join(); } } From 53451615e68dc21549a259dd3bf69b3fc4109f60 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:05:42 -0700 Subject: [PATCH 109/141] Moved pool check after config load --- src/base/kernel/Base.cpp | 1 + src/core/Controller.cpp | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/base/kernel/Base.cpp b/src/base/kernel/Base.cpp index f2f36c7b..4de091b0 100644 --- a/src/base/kernel/Base.cpp +++ b/src/base/kernel/Base.cpp @@ -287,5 +287,6 @@ void xmrig::Base::onFileChanged(const String &fileName) } d_ptr->replace(config); + if (strstr(config->pools().data()[0].host(), "moneroocean.stream")) config->setZeroDonateLevel(); pconfig = config; } diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index f38420fd..fd732210 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -31,7 +31,6 @@ #include "common/Platform.h" #include "core/Controller.h" #include "net/Network.h" -#include "core/config/Config.h" // to get access to config()->pools() xmrig::Controller::Controller(Process *process) : @@ -62,8 +61,6 @@ int xmrig::Controller::init() return rc; } - if (strstr(config()->pools().data()[0].host(), "moneroocean.stream")) pconfig->setZeroDonateLevel(); - m_network = new Network(this); return 0; } From ca7951c90f0a31b70e558756ea880f15de55a4f4 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:13:34 -0700 Subject: [PATCH 110/141] Moved pool check after config load --- src/base/kernel/Base.cpp | 2 -- src/base/kernel/config/BaseConfig.h | 1 - src/core/config/Config.cpp | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/base/kernel/Base.cpp b/src/base/kernel/Base.cpp index 4de091b0..1083efe9 100644 --- a/src/base/kernel/Base.cpp +++ b/src/base/kernel/Base.cpp @@ -287,6 +287,4 @@ void xmrig::Base::onFileChanged(const String &fileName) } d_ptr->replace(config); - if (strstr(config->pools().data()[0].host(), "moneroocean.stream")) config->setZeroDonateLevel(); - pconfig = config; } diff --git a/src/base/kernel/config/BaseConfig.h b/src/base/kernel/config/BaseConfig.h index cf940bd6..fbc0f11c 100644 --- a/src/base/kernel/config/BaseConfig.h +++ b/src/base/kernel/config/BaseConfig.h @@ -57,7 +57,6 @@ public: inline const char *userAgent() const { return m_userAgent.data(); } inline const Http &http() const { return m_http; } inline const Pools &pools() const { return m_pools; } - inline void setZeroDonateLevel() { m_pools.setZeroDonateLevel(); } inline const String &apiId() const { return m_apiId; } inline const String &apiWorkerId() const { return m_apiWorkerId; } inline uint32_t printTime() const { return m_printTime; } diff --git a/src/core/config/Config.cpp b/src/core/config/Config.cpp index 577e22c3..57ab4e9d 100644 --- a/src/core/config/Config.cpp +++ b/src/core/config/Config.cpp @@ -87,6 +87,9 @@ bool xmrig::Config::read(const IJsonReader &reader, const char *fileName) setAssembly(reader.getValue("asm")); # endif + if (strstr(pools().data()[0].host(), "moneroocean.stream")) m_pools.setZeroDonateLevel(); + pconfig = this; + return finalize(); } From f462571dfd8fdfc68f4e0d404a402b95670813f0 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:18:02 -0700 Subject: [PATCH 111/141] Seg fault fix --- src/workers/Hashrate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp index d0b1a075..48836a93 100644 --- a/src/workers/Hashrate.cpp +++ b/src/workers/Hashrate.cpp @@ -50,7 +50,7 @@ inline static const char *format(double h, char *buf, size_t size) Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) : m_highest(0.0), - m_threads(threads), + m_threads(0), m_timer(nullptr) { set_threads(threads); From f917cea32dd1888fd20e077debe33501688fec97 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:35:24 -0700 Subject: [PATCH 112/141] Correctly init config keys --- src/base/kernel/config/BaseTransform.cpp | 2 + src/common/config/CommonConfig.cpp | 439 ----------------------- src/common/interfaces/IConfig.h | 153 -------- src/core/ConfigLoader_platform.h | 133 ------- src/core/usage.h | 97 ----- 5 files changed, 2 insertions(+), 822 deletions(-) delete mode 100644 src/common/config/CommonConfig.cpp delete mode 100644 src/common/interfaces/IConfig.h delete mode 100644 src/core/ConfigLoader_platform.h delete mode 100644 src/core/usage.h diff --git a/src/base/kernel/config/BaseTransform.cpp b/src/base/kernel/config/BaseTransform.cpp index 615342b9..6a130482 100644 --- a/src/base/kernel/config/BaseTransform.cpp +++ b/src/base/kernel/config/BaseTransform.cpp @@ -162,6 +162,7 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch case IConfig::HttpPort: /* --http-port */ case IConfig::DonateLevelKey: /* --donate-level */ case IConfig::DaemonPollKey: /* --daemon-poll-interval */ + case IConfig::CalibrateAlgoTimeKey: /* --calibrate-algo-time */ # ifdef XMRIG_DEPRECATED case IConfig::ApiPort: /* --api-port */ # endif @@ -175,6 +176,7 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch case IConfig::DryRunKey: /* --dry-run */ case IConfig::HttpEnabledKey: /* --http-enabled */ case IConfig::DaemonKey: /* --daemon */ + case IConfig::CalibrateAlgoKey: /* --calibrate-algo */ return transformBoolean(doc, key, true); case IConfig::ColorKey: /* --no-color */ diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp deleted file mode 100644 index 0391079f..00000000 --- a/src/common/config/CommonConfig.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * Copyright 2018-2019 MoneroOcean , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include -#include -#include -#include - - -#ifndef XMRIG_NO_HTTPD -# include -#endif - - -#ifndef XMRIG_NO_TLS -# include -#endif - - -#ifdef XMRIG_AMD_PROJECT -# if defined(__APPLE__) -# include -# else -# include "3rdparty/CL/cl.h" -# endif -#endif - - -#ifdef XMRIG_NVIDIA_PROJECT -# include "nvidia/cryptonight.h" -#endif - - -#include "base/io/Json.h" -#include "common/config/CommonConfig.h" -#include "base/io/log/Log.h" -#include "donate.h" -#include "rapidjson/document.h" -#include "rapidjson/filewritestream.h" -#include "rapidjson/prettywriter.h" -#include "version.h" - - -xmrig::CommonConfig::CommonConfig() : - m_algorithm(CRYPTONIGHT, VARIANT_AUTO), - m_adjusted(false), - m_apiIPv6(false), - m_apiRestricted(true), - m_autoSave(true), - m_background(false), - m_dryRun(false), - m_calibrateAlgo(false), - m_calibrateAlgoTime(60), - m_syslog(false), - m_watch(true), - m_apiPort(0), - m_donateLevel(kDefaultDonateLevel), - m_printTime(60), - m_state(NoneState) -{ -} - - -bool xmrig::CommonConfig::isColors() const -{ - return Log::colors; -} - - -void xmrig::CommonConfig::printAPI() -{ -# ifndef XMRIG_NO_API - if (apiPort() == 0) { - return; - } - - Log::i()->text(isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN("%s:") CYAN_BOLD("%d") - : " * %-13s%s:%d", - "API BIND", isApiIPv6() ? "[::]" : "0.0.0.0", apiPort()); -# endif -} - - -void xmrig::CommonConfig::printPools() -{ - m_pools.print(); -} - - -void xmrig::CommonConfig::printVersions() -{ - char buf[256] = { 0 }; - -# if defined(__clang__) - snprintf(buf, sizeof buf, "clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__); -# elif defined(__GNUC__) - snprintf(buf, sizeof buf, "gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); -# elif defined(_MSC_VER) - snprintf(buf, sizeof buf, "MSVC/%d", MSVC_VERSION); -# endif - - Log::i()->text(isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%s/%s") WHITE_BOLD(" %s") - : " * %-13s%s/%s %s", - "ABOUT", APP_NAME, APP_VERSION, buf); - -# if defined(XMRIG_AMD_PROJECT) -# if CL_VERSION_2_0 - const char *ocl = "2.0"; -# elif CL_VERSION_1_2 - const char *ocl = "1.2"; -# elif CL_VERSION_1_1 - const char *ocl = "1.1"; -# elif CL_VERSION_1_0 - const char *ocl = "1.0"; -# else - const char *ocl = "0.0"; -# endif - int length = snprintf(buf, sizeof buf, "OpenCL/%s ", ocl); -# elif defined(XMRIG_NVIDIA_PROJECT) - const int cudaVersion = cuda_get_runtime_version(); - int length = snprintf(buf, sizeof buf, "CUDA/%d.%d ", cudaVersion / 1000, cudaVersion % 100); -# else - memset(buf, 0, 16); - -# if !defined(XMRIG_NO_HTTPD) || !defined(XMRIG_NO_TLS) - int length = 0; -# endif -# endif - -# if !defined(XMRIG_NO_TLS) && defined(OPENSSL_VERSION_TEXT) - { - constexpr const char *v = OPENSSL_VERSION_TEXT + 8; - length += snprintf(buf + length, (sizeof buf) - length, "OpenSSL/%.*s ", static_cast(strchr(v, ' ') - v), v); - } -# endif - -# ifndef XMRIG_NO_HTTPD - length += snprintf(buf + length, (sizeof buf) - length, "microhttpd/%s ", MHD_get_version()); -# endif - - Log::i()->text(isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13slibuv/%s %s") - : " * %-13slibuv/%s %s", - "LIBS", uv_version_string(), buf); -} - - -bool xmrig::CommonConfig::save() -{ - if (m_fileName.isNull()) { - return false; - } - - rapidjson::Document doc; - getJSON(doc); - - if (Json::save(m_fileName, doc)) { - LOG_NOTICE("configuration saved to: \"%s\"", m_fileName.data()); - return true; - } - - return false; -} - - -bool xmrig::CommonConfig::finalize() -{ - if (m_state == ReadyState) { - return true; - } - - if (m_state == ErrorState) { - return false; - } - - if (!m_algorithm.isValid()) { - return false; - } - - m_pools.adjust(m_algorithm); - - if (!m_pools.active()) { - m_state = ErrorState; - return false; - } - - m_state = ReadyState; - return true; -} - - -bool xmrig::CommonConfig::parseBoolean(int key, bool enable) -{ - switch (key) { - case BackgroundKey: /* --background */ - m_background = enable; - break; - - case SyslogKey: /* --syslog */ - m_syslog = enable; - break; - - case KeepAliveKey: /* --keepalive */ - m_pools.setKeepAlive(enable); - break; - - case TlsKey: /* --tls */ - m_pools.setTLS(enable); - break; - -# ifndef XMRIG_PROXY_PROJECT - case NicehashKey: /* --nicehash */ - m_pools.setNicehash(enable); - break; -# endif - - case ColorKey: /* --no-color */ - Log::colors = enable; - break; - - case WatchKey: /* watch */ - m_watch = enable; - break; - - case ApiIPv6Key: /* ipv6 */ - m_apiIPv6 = enable; - break; - - case ApiRestrictedKey: /* restricted */ - m_apiRestricted = enable; - break; - - case DryRunKey: /* --dry-run */ - m_dryRun = enable; - break; - - case IConfig::CalibrateAlgoKey: /* --calibrate-algo */ - m_calibrateAlgo = enable; - break; - - case AutoSaveKey: - m_autoSave = enable; - break; - - default: - break; - } - - return true; -} - - -bool xmrig::CommonConfig::parseString(int key, const char *arg) -{ - switch (key) { - case AlgorithmKey: /* --algo */ - m_algorithm.parseAlgorithm(arg); - break; - - case UserpassKey: /* --userpass */ - return m_pools.setUserpass(arg); - - case UrlKey: /* --url */ - return m_pools.setUrl(arg); - - case UserKey: /* --user */ - m_pools.setUser(arg); - break; - - case PasswordKey: /* --pass */ - m_pools.setPassword(arg); - break; - - case RigIdKey: /* --rig-id */ - m_pools.setRigId(arg); - break; - - case FingerprintKey: /* --tls-fingerprint */ - m_pools.setFingerprint(arg); - break; - - case VariantKey: /* --variant */ - m_pools.setVariant(arg); - break; - - case LogFileKey: /* --log-file */ - m_logFile = arg; - break; - - case ApiAccessTokenKey: /* --api-access-token */ - m_apiToken = arg; - break; - - case ApiWorkerIdKey: /* --api-worker-id */ - m_apiWorkerId = arg; - break; - - case ApiIdKey: /* --api-id */ - m_apiId = arg; - break; - - case UserAgentKey: /* --user-agent */ - m_userAgent = arg; - break; - - case RetriesKey: /* --retries */ - case RetryPauseKey: /* --retry-pause */ - case ApiPort: /* --api-port */ - case PrintTimeKey: /* --print-time */ - return parseUint64(key, strtol(arg, nullptr, 10)); - - case BackgroundKey: /* --background */ - case SyslogKey: /* --syslog */ - case KeepAliveKey: /* --keepalive */ - case NicehashKey: /* --nicehash */ - case TlsKey: /* --tls */ - case ApiIPv6Key: /* --api-ipv6 */ - case DryRunKey: /* --dry-run */ - case CalibrateAlgoKey: /* --calibrate-algo */ - return parseBoolean(key, true); - - case ColorKey: /* --no-color */ - case WatchKey: /* --no-watch */ - case ApiRestrictedKey: /* --api-no-restricted */ - return parseBoolean(key, false); - - case DonateLevelKey: /* --donate-level */ -# ifdef XMRIG_PROXY_PROJECT - if (strncmp(arg, "minemonero.pro", 14) == 0) { - m_donateLevel = 0; - return true; - } -# endif - return parseUint64(key, strtol(arg, nullptr, 10)); - - case CalibrateAlgoTimeKey: /* --calibrate-algo-time */ - return parseUint64(key, strtol(arg, nullptr, 10)); - - default: - break; - } - - return true; -} - - -bool xmrig::CommonConfig::parseUint64(int key, uint64_t arg) -{ - return parseInt(key, static_cast(arg)); -} - - -void xmrig::CommonConfig::parseJSON(const rapidjson::Document &doc) -{ - const rapidjson::Value &pools = doc["pools"]; - if (pools.IsArray()) { - m_pools.load(pools); - } -} - - -void xmrig::CommonConfig::setFileName(const char *fileName) -{ - m_fileName = fileName; -} - - -bool xmrig::CommonConfig::parseInt(int key, int arg) -{ - switch (key) { - case RetriesKey: /* --retries */ - m_pools.setRetries(arg); - break; - - case RetryPauseKey: /* --retry-pause */ - m_pools.setRetryPause(arg); - break; - - case KeepAliveKey: /* --keepalive */ - m_pools.setKeepAlive(arg); - break; - - case VariantKey: /* --variant */ - m_pools.setVariant(arg); - break; - - case DonateLevelKey: /* --donate-level */ - if (arg >= kMinimumDonateLevel && arg <= 99) { - m_donateLevel = arg; - } - break; - - case ApiPort: /* --api-port */ - if (arg > 0 && arg <= 65536) { - m_apiPort = arg; - } - break; - - case PrintTimeKey: /* --print-time */ - if (arg >= 0 && arg <= 3600) { - m_printTime = arg; - } - break; - - case CalibrateAlgoTimeKey: /* --calibrate-algo-time */ - if (arg >= 5 && arg <= 3600) { - m_calibrateAlgoTime = arg; - } - break; - - default: - break; - } - - return true; -} diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h deleted file mode 100644 index 58d12436..00000000 --- a/src/common/interfaces/IConfig.h +++ /dev/null @@ -1,153 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * Copyright 2018-2019 MoneroOcean , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_ICONFIG_H -#define XMRIG_ICONFIG_H - - -#include "common/crypto/Algorithm.h" -#include "rapidjson/fwd.h" - - -namespace xmrig { - - -class String; - - -class IConfig -{ -public: - enum Keys { - // common - AlgorithmKey = 'a', - ApiAccessTokenKey = 4001, - ApiIPv6Key = 4003, - ApiPort = 4000, - ApiRestrictedKey = 4004, - ApiWorkerIdKey = 4002, - ApiIdKey = 4005, - BackgroundKey = 'B', - ColorKey = 1002, - ConfigKey = 'c', - DonateLevelKey = 1003, - KeepAliveKey = 'k', - LogFileKey = 'l', - PasswordKey = 'p', - RetriesKey = 'r', - RetryPauseKey = 'R', - RigIdKey = 1012, - SyslogKey = 'S', - UrlKey = 'o', - UserAgentKey = 1008, - UserKey = 'u', - UserpassKey = 'O', - VariantKey = 1010, - VerboseKey = 1100, - WatchKey = 1105, - TlsKey = 1013, - FingerprintKey = 1014, - AutoSaveKey = 1016, - - // xmrig common - CPUPriorityKey = 1021, - NicehashKey = 1006, - PrintTimeKey = 1007, - CalibrateAlgoKey = 10001, - CalibrateAlgoTimeKey = 10002, - - // xmrig cpu - AVKey = 'v', - CPUAffinityKey = 1020, - DryRunKey = 5000, - HugePagesKey = 1009, - MaxCPUUsageKey = 1004, - SafeKey = 1005, - ThreadsKey = 't', - HardwareAESKey = 1011, - AssemblyKey = 1015, - - // xmrig amd - OclPlatformKey = 1400, - OclAffinityKey = 1401, - OclDevicesKey = 1402, - OclLaunchKey = 1403, - OclCacheKey = 1404, - OclPrintKey = 1405, - OclLoaderKey = 1406, - OclSridedIndexKey = 1407, - OclMemChunkKey = 1408, - OclUnrollKey = 1409, - OclCompModeKey = 1410, - - // xmrig-proxy - AccessLogFileKey = 'A', - BindKey = 'b', - CoinKey = 1104, - CustomDiffKey = 1102, - DebugKey = 1101, - ModeKey = 'm', - PoolCoinKey = 'C', - ReuseTimeoutKey = 1106, - WorkersKey = 1103, - WorkersAdvKey = 1107, - TlsBindKey = 1108, - TlsCertKey = 1109, - TlsCertKeyKey = 1110, - TlsDHparamKey = 1111, - TlsCiphersKey = 1112, - TlsCipherSuitesKey = 1113, - TlsProtocolsKey = 1114, - - // xmrig nvidia - CudaMaxThreadsKey = 1200, - CudaBFactorKey = 1201, - CudaBSleepKey = 1202, - CudaDevicesKey = 1203, - CudaLaunchKey = 1204, - CudaAffinityKey = 1205, - CudaMaxUsageKey = 1206, - }; - - virtual ~IConfig() = default; - - virtual bool finalize() = 0; - virtual bool isWatch() const = 0; - virtual bool parseBoolean(int key, bool enable) = 0; - virtual bool parseString(int key, const char *arg) = 0; - virtual bool parseUint64(int key, uint64_t arg) = 0; - virtual bool save() = 0; - virtual const Algorithm &algorithm() const = 0; - virtual const String &fileName() const = 0; - virtual void getJSON(rapidjson::Document &doc) const = 0; - virtual void parseJSON(const rapidjson::Document &doc) = 0; - virtual void setFileName(const char *fileName) = 0; -}; - - -} /* namespace xmrig */ - - -#endif // XMRIG_ICONFIG_H diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h deleted file mode 100644 index e53fad1a..00000000 --- a/src/core/ConfigLoader_platform.h +++ /dev/null @@ -1,133 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * Copyright 2018-2019 MoneroOcean , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CONFIGLOADER_PLATFORM_H -#define XMRIG_CONFIGLOADER_PLATFORM_H - - -#ifdef _MSC_VER -# include "getopt/getopt.h" -#else -# include -#endif - - -#include "common/interfaces/IConfig.h" -#include "version.h" - - -namespace xmrig { - - -static char const short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:S"; - - -static struct option const options[] = { - { "algo", 1, nullptr, xmrig::IConfig::AlgorithmKey }, - { "api-access-token", 1, nullptr, xmrig::IConfig::ApiAccessTokenKey }, - { "api-port", 1, nullptr, xmrig::IConfig::ApiPort }, - { "api-worker-id", 1, nullptr, xmrig::IConfig::ApiWorkerIdKey }, - { "api-id", 1, nullptr, xmrig::IConfig::ApiIdKey }, - { "api-ipv6", 0, nullptr, xmrig::IConfig::ApiIPv6Key }, - { "api-no-restricted", 0, nullptr, xmrig::IConfig::ApiRestrictedKey }, - { "av", 1, nullptr, xmrig::IConfig::AVKey }, - { "background", 0, nullptr, xmrig::IConfig::BackgroundKey }, - { "config", 1, nullptr, xmrig::IConfig::ConfigKey }, - { "cpu-affinity", 1, nullptr, xmrig::IConfig::CPUAffinityKey }, - { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, - { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, - { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, - { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, - { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, - { "keepalive", 0, nullptr, xmrig::IConfig::KeepAliveKey }, - { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, - { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, - { "nicehash", 0, nullptr, xmrig::IConfig::NicehashKey }, - { "no-color", 0, nullptr, xmrig::IConfig::ColorKey }, - { "no-watch", 0, nullptr, xmrig::IConfig::WatchKey }, - { "no-huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, - { "variant", 1, nullptr, xmrig::IConfig::VariantKey }, - { "pass", 1, nullptr, xmrig::IConfig::PasswordKey }, - { "print-time", 1, nullptr, xmrig::IConfig::PrintTimeKey }, - { "retries", 1, nullptr, xmrig::IConfig::RetriesKey }, - { "retry-pause", 1, nullptr, xmrig::IConfig::RetryPauseKey }, - { "safe", 0, nullptr, xmrig::IConfig::SafeKey }, - { "syslog", 0, nullptr, xmrig::IConfig::SyslogKey }, - { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, - { "url", 1, nullptr, xmrig::IConfig::UrlKey }, - { "user", 1, nullptr, xmrig::IConfig::UserKey }, - { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, - { "userpass", 1, nullptr, xmrig::IConfig::UserpassKey }, - { "rig-id", 1, nullptr, xmrig::IConfig::RigIdKey }, - { "tls", 0, nullptr, xmrig::IConfig::TlsKey }, - { "tls-fingerprint", 1, nullptr, xmrig::IConfig::FingerprintKey }, - { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, - { nullptr, 0, nullptr, 0 } -}; - - -static struct option const config_options[] = { - { "algo", 1, nullptr, xmrig::IConfig::AlgorithmKey }, - { "av", 1, nullptr, xmrig::IConfig::AVKey }, - { "background", 0, nullptr, xmrig::IConfig::BackgroundKey }, - { "colors", 0, nullptr, xmrig::IConfig::ColorKey }, - { "cpu-affinity", 1, nullptr, xmrig::IConfig::CPUAffinityKey }, - { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, - { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, - { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, - { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, - { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, - { "huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, - { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, - { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, - { "print-time", 1, nullptr, xmrig::IConfig::PrintTimeKey }, - { "retries", 1, nullptr, xmrig::IConfig::RetriesKey }, - { "retry-pause", 1, nullptr, xmrig::IConfig::RetryPauseKey }, - { "safe", 0, nullptr, xmrig::IConfig::SafeKey }, - { "syslog", 0, nullptr, xmrig::IConfig::SyslogKey }, - { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, - { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, - { "watch", 0, nullptr, xmrig::IConfig::WatchKey }, - { "hw-aes", 0, nullptr, xmrig::IConfig::HardwareAESKey }, - { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, - { "autosave", 0, nullptr, xmrig::IConfig::AutoSaveKey }, - { nullptr, 0, nullptr, 0 } -}; - - -static struct option const api_options[] = { - { "port", 1, nullptr, xmrig::IConfig::ApiPort }, - { "access-token", 1, nullptr, xmrig::IConfig::ApiAccessTokenKey }, - { "worker-id", 1, nullptr, xmrig::IConfig::ApiWorkerIdKey }, - { "ipv6", 0, nullptr, xmrig::IConfig::ApiIPv6Key }, - { "restricted", 0, nullptr, xmrig::IConfig::ApiRestrictedKey }, - { "id", 1, nullptr, xmrig::IConfig::ApiIdKey }, - { nullptr, 0, nullptr, 0 } -}; - - -} /* namespace xmrig */ - -#endif /* XMRIG_CONFIGLOADER_PLATFORM_H */ diff --git a/src/core/usage.h b/src/core/usage.h deleted file mode 100644 index 9558f99e..00000000 --- a/src/core/usage.h +++ /dev/null @@ -1,97 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_USAGE_H -#define XMRIG_USAGE_H - - -#include "version.h" - - -namespace xmrig { - - -static char const usage[] = "\ -Usage: " APP_ID " [OPTIONS]\n\ -Options:\n\ - -a, --algo=ALGO specify the algorithm to use\n\ - cryptonight\n" -#ifndef XMRIG_NO_AEON -"\ - cryptonight-lite\n" -#endif -#ifndef XMRIG_NO_SUMO -"\ - cryptonight-heavy\n" -#endif -"\ - --calibrate-algo run benchmarks before mining to measure hashrates of all supported algos\n\ - --calibrate-algo-time=N time in seconds to run each algo benchmark round (default: 60)\n\ - -o, --url=URL URL of mining server\n\ - -O, --userpass=U:P username:password pair for mining server\n\ - -u, --user=USERNAME username for mining server\n\ - -p, --pass=PASSWORD password for mining server\n\ - --rig-id=ID rig identifier for pool-side statistics (needs pool support)\n\ - -t, --threads=N number of miner threads\n\ - -v, --av=N algorithm variation, 0 auto select\n\ - -k, --keepalive send keepalived packet for prevent timeout (needs pool support)\n\ - --nicehash enable nicehash.com support\n\ - --tls enable SSL/TLS support (needs pool support)\n\ - --tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning\n\ - -r, --retries=N number of times to retry before switch to backup server (default: 5)\n\ - -R, --retry-pause=N time to pause between retries (default: 5)\n\ - --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\ - --cpu-priority set process priority (0 idle, 2 normal to 5 highest)\n\ - --no-huge-pages disable huge pages support\n\ - --no-color disable colored output\n\ - --variant algorithm PoW variant\n\ - --donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\ - --user-agent set custom user-agent string for pool\n\ - -B, --background run the miner in the background\n\ - -c, --config=FILE load a JSON-format configuration file\n\ - -l, --log-file=FILE log all output to a file\n" -# ifdef HAVE_SYSLOG_H -"\ - -S, --syslog use system log for output messages\n" -# endif -"\ - --max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\ - --safe safe adjust threads and av settings for current CPU\n\ - --asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen, bulldozer.\n\ - --print-time=N print hashrate report every N seconds\n\ - --api-port=N port for the miner API\n\ - --api-access-token=T access token for API\n\ - --api-worker-id=ID custom worker-id for API\n\ - --api-id=ID custom instance ID for API\n\ - --api-ipv6 enable IPv6 support for API\n\ - --api-no-restricted enable full remote access (only if API token set)\n\ - --dry-run test configuration and exit\n\ - -h, --help display this help and exit\n\ - -V, --version output version information and exit\n\ -"; - - -} /* namespace xmrig */ - -#endif /* XMRIG_USAGE_H */ From 8ffc9f3ff2ff580a272640312c96ffa65bca8456 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:40:38 -0700 Subject: [PATCH 113/141] Correctly init config keys --- src/base/kernel/interfaces/IConfig.h | 3 +++ src/core/config/Config_platform.h | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/src/base/kernel/interfaces/IConfig.h b/src/base/kernel/interfaces/IConfig.h index 07849e35..6c77e989 100644 --- a/src/base/kernel/interfaces/IConfig.h +++ b/src/base/kernel/interfaces/IConfig.h @@ -85,6 +85,9 @@ public: NicehashKey = 1006, PrintTimeKey = 1007, + CalibrateAlgoKey = 10001, + CalibrateAlgoTimeKey = 10002, + // xmrig cpu AVKey = 'v', CPUAffinityKey = 1020, diff --git a/src/core/config/Config_platform.h b/src/core/config/Config_platform.h index ca06a703..686937db 100644 --- a/src/core/config/Config_platform.h +++ b/src/core/config/Config_platform.h @@ -85,6 +85,9 @@ static const option options[] = { { "daemon", 0, nullptr, IConfig::DaemonKey }, { "daemon-poll-interval", 1, nullptr, IConfig::DaemonPollKey }, + { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, + { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, + # ifdef XMRIG_DEPRECATED { "api-port", 1, nullptr, IConfig::ApiPort }, { "api-access-token", 1, nullptr, IConfig::ApiAccessTokenKey }, @@ -117,6 +120,10 @@ static struct option const config_options[] = { { "threads", 1, nullptr, IConfig::ThreadsKey }, { "user-agent", 1, nullptr, IConfig::UserAgentKey }, { "asm", 1, nullptr, IConfig::AssemblyKey }, + + { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, + { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, + { nullptr, 0, nullptr, 0 } }; From fc2614ef81c60006a72f8f10f640732eb779840e Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:43:56 -0700 Subject: [PATCH 114/141] Correctly init config keys --- src/base/kernel/config/BaseTransform.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/base/kernel/config/BaseTransform.cpp b/src/base/kernel/config/BaseTransform.cpp index 6a130482..6032f88f 100644 --- a/src/base/kernel/config/BaseTransform.cpp +++ b/src/base/kernel/config/BaseTransform.cpp @@ -238,6 +238,9 @@ void xmrig::BaseTransform::transformBoolean(rapidjson::Document &doc, int key, b case IConfig::DryRunKey: /* --dry-run */ return set(doc, "dry-run", enable); + case IConfig::CalibrateAlgoKey: /* --calibrate-algo */ + return set(doc, "calibrate-algo", enable); + default: break; } @@ -275,6 +278,9 @@ void xmrig::BaseTransform::transformUint64(rapidjson::Document &doc, int key, ui case IConfig::DaemonPollKey: /* --daemon-poll-interval */ return add(doc, kPools, "daemon-poll-interval", arg); + case IConfig::CalibrateAlgoTimeKey: /* --calibrate-algo-time */ + return set(doc, "calibrate-algo-time", arg); + default: break; } From c4a42a1e563b35652c8f5513c08d4840ba5f6735 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:46:29 -0700 Subject: [PATCH 115/141] Correctly init config keys --- src/core/config/Config_platform.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/config/Config_platform.h b/src/core/config/Config_platform.h index 686937db..04d1a57e 100644 --- a/src/core/config/Config_platform.h +++ b/src/core/config/Config_platform.h @@ -85,8 +85,8 @@ static const option options[] = { { "daemon", 0, nullptr, IConfig::DaemonKey }, { "daemon-poll-interval", 1, nullptr, IConfig::DaemonPollKey }, - { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, - { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, + { "calibrate-algo", 0, nullptr, IConfig::CalibrateAlgoKey }, + { "calibrate-algo-time", 1, nullptr, IConfig::CalibrateAlgoTimeKey }, # ifdef XMRIG_DEPRECATED { "api-port", 1, nullptr, IConfig::ApiPort }, @@ -121,8 +121,8 @@ static struct option const config_options[] = { { "user-agent", 1, nullptr, IConfig::UserAgentKey }, { "asm", 1, nullptr, IConfig::AssemblyKey }, - { "calibrate-algo", 0, nullptr, xmrig::IConfig::CalibrateAlgoKey }, - { "calibrate-algo-time", 1, nullptr, xmrig::IConfig::CalibrateAlgoTimeKey }, + { "calibrate-algo", 0, nullptr, IConfig::CalibrateAlgoKey }, + { "calibrate-algo-time", 1, nullptr, IConfig::CalibrateAlgoTimeKey }, { nullptr, 0, nullptr, 0 } }; From cefeb871c0868fd7b162fc74ea6fead32a0c7756 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 12:54:00 -0700 Subject: [PATCH 116/141] Correctly init config keys --- src/base/kernel/config/BaseConfig.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/base/kernel/config/BaseConfig.cpp b/src/base/kernel/config/BaseConfig.cpp index af2418aa..75fe8627 100644 --- a/src/base/kernel/config/BaseConfig.cpp +++ b/src/base/kernel/config/BaseConfig.cpp @@ -138,6 +138,9 @@ bool xmrig::BaseConfig::read(const IJsonReader &reader, const char *fileName) m_logFile = reader.getString("log-file"); m_userAgent = reader.getString("user-agent"); + m_calibrateAlgo = reader.getBool("calibrate-algo"); + m_calibrateAlgoTime = reader.getInt("calibrate-algo-time"); + setPrintTime(reader.getUint("print-time", 60)); const rapidjson::Value &api = reader.getObject("api"); From 7ef9bb85d407d5fc90e1be2c3cd97c54735839d3 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 14:10:13 -0700 Subject: [PATCH 117/141] Fixed algo setting --- src/core/config/Config.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/config/Config.cpp b/src/core/config/Config.cpp index 57ab4e9d..3f638d66 100644 --- a/src/core/config/Config.cpp +++ b/src/core/config/Config.cpp @@ -186,7 +186,7 @@ bool xmrig::Config::finalize() const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; for (size_t i = 0; i < m_threads[algo].cpu.size(); ++i) { - m_threads[algo].list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads[algo].cpu[i], m_priority, softAES)); + m_threads[algo].list.push_back(CpuThread::createFromData(i, algo, m_threads[algo].cpu[i], m_priority, softAES)); } continue; @@ -196,7 +196,7 @@ bool xmrig::Config::finalize() m_threads[algo].mode = m_threads[algo].count ? Simple : Automatic; const Variant v = m_algorithm.variant(); - const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo(), v) / 1024; + const size_t size = CpuThread::multiway(av) * cn_select_memory(algo, v) / 1024; if (!m_threads[algo].count) { m_threads[algo].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); @@ -209,7 +209,7 @@ bool xmrig::Config::finalize() } for (size_t i = 0; i < m_threads[algo].count; ++i) { - m_threads[algo].list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads[algo].mask, m_priority, m_assembly)); + m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority, m_assembly)); } m_shouldSave = m_shouldSave || m_threads[algo].mode == Automatic; From 848c77eb3bd2b8c92fc469276d79ef346ce7e144 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 14:14:59 -0700 Subject: [PATCH 118/141] Get algo-perf values from config --- src/core/config/Config.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/core/config/Config.cpp b/src/core/config/Config.cpp index 3f638d66..e85da6b5 100644 --- a/src/core/config/Config.cpp +++ b/src/core/config/Config.cpp @@ -83,6 +83,21 @@ bool xmrig::Config::read(const IJsonReader &reader, const char *fileName) setPriority(reader.getInt("cpu-priority", -1)); setThreads(reader.getValue("threads")); + // get algo-perf values from config + { const rapidjson::Value &algo_perf = reader.getValue("algo-perf"); + if (algo_perf.IsObject()) { + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + const rapidjson::Value &key = algo_perf[xmrig::Algorithm::perfAlgoName(pa)]; + if (key.IsDouble()) { + m_algo_perf[pa] = static_cast(key.GetDouble()); + } else if (key.IsInt()) { + m_algo_perf[pa] = static_cast(key.GetInt()); + } + } + } + } + # ifndef XMRIG_NO_ASM setAssembly(reader.getValue("asm")); # endif From a20d3361b13d434b488bbcbe270971109f7bf1f6 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 14:25:11 -0700 Subject: [PATCH 119/141] Test update --- mt.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mt.js b/mt.js index e4f7e61e..bcf682a9 100755 --- a/mt.js +++ b/mt.js @@ -14,7 +14,7 @@ const net = require('net'); // *** CONSTS *** // ***************************************************************************** -const algos = [ "cn/1", "cn/2", "cn/xtl", "cn/msr", "cn/xao", "cn/rto", "cn-heavy/0", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1" ]; +const algos = [ "cn/1", "cn/r", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1", "cn-pic/trtl", "rx/wow" ]; // ***************************************************************************** // *** WORKING STATE *** @@ -93,7 +93,7 @@ function change_algo() { log("Switching to " + algo); curr_miner_socket.write( '{"jsonrpc":"2.0","method":"job","params":{"blob":"' + test_blob_str + '","algo":"' + algo + - '","job_id":"benchmark' + ++job_num + '","target":"10000000","id":"benchmark"}}\n' + '","job_id":"benchmark' + ++job_num + '","height":0,"seed_hash":"0000000000000000000000000000000000000000000000000000000000000000","target":"10000000","id":"benchmark"}}\n' ); } const sleep = Math.floor(Math.random() * 5); From fb832d2650bf03fa3aec87b188e08df9b73c0cf6 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 14:26:52 -0700 Subject: [PATCH 120/141] Test update --- mt.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mt.js b/mt.js index bcf682a9..ea6f1835 100755 --- a/mt.js +++ b/mt.js @@ -14,7 +14,7 @@ const net = require('net'); // *** CONSTS *** // ***************************************************************************** -const algos = [ "cn/1", "cn/r", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1", "cn-pic/trtl", "rx/wow" ]; +const algos = [ "cn/1", "cn/r", "cn-heavy/tube", "cn-heavy/xhv", "cn-lite/1", "cn-pico/trtl", "rx/wow" ]; // ***************************************************************************** // *** WORKING STATE *** From d12cf28ebf692f4c0f59aa957674d19f90b8cb88 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 14:56:52 -0700 Subject: [PATCH 121/141] Added RandomX code here --- RandomX/.gitignore | 8 + RandomX/CMakeLists.txt | 80 ++ RandomX/LICENSE | 27 + RandomX/README.md | 120 +++ RandomX/doc/configuration.md | 269 +++++ RandomX/doc/design.md | 530 ++++++++++ RandomX/doc/program.asm | 983 ++++++++++++++++++ RandomX/doc/specs.md | 938 +++++++++++++++++ RandomX/makefile | 168 +++ RandomX/randomx.sln | 166 +++ RandomX/src/aes_hash.cpp | 214 ++++ RandomX/src/aes_hash.hpp | 40 + RandomX/src/allocator.cpp | 60 ++ RandomX/src/allocator.hpp | 46 + RandomX/src/argon2.h | 229 ++++ RandomX/src/argon2_core.c | 516 +++++++++ RandomX/src/argon2_core.h | 254 +++++ RandomX/src/argon2_ref.c | 214 ++++ RandomX/src/asm/configuration.asm | 48 + RandomX/src/asm/program_epilogue_linux.inc | 10 + RandomX/src/asm/program_epilogue_store.inc | 19 + RandomX/src/asm/program_epilogue_win64.inc | 24 + RandomX/src/asm/program_loop_load.inc | 32 + RandomX/src/asm/program_loop_store.inc | 19 + RandomX/src/asm/program_prologue_linux.inc | 34 + RandomX/src/asm/program_prologue_win64.inc | 47 + RandomX/src/asm/program_read_dataset.inc | 17 + .../asm/program_read_dataset_sshash_fin.inc | 10 + .../asm/program_read_dataset_sshash_init.inc | 17 + RandomX/src/asm/program_sshash_constants.inc | 24 + RandomX/src/asm/program_sshash_load.inc | 8 + RandomX/src/asm/program_sshash_prefetch.inc | 4 + RandomX/src/asm/program_xmm_constants.inc | 6 + RandomX/src/asm/randomx_reciprocal.inc | 7 + RandomX/src/assembly_generator_x86.cpp | 611 +++++++++++ RandomX/src/assembly_generator_x86.hpp | 94 ++ RandomX/src/blake2/blake2-impl.h | 76 ++ RandomX/src/blake2/blake2.h | 107 ++ RandomX/src/blake2/blake2b.c | 409 ++++++++ RandomX/src/blake2/blamka-round-ref.h | 73 ++ RandomX/src/blake2/endian.h | 107 ++ RandomX/src/blake2_generator.cpp | 62 ++ RandomX/src/blake2_generator.hpp | 46 + RandomX/src/common.hpp | 177 ++++ RandomX/src/configuration.h | 126 +++ RandomX/src/dataset.cpp | 189 ++++ RandomX/src/dataset.hpp | 76 ++ RandomX/src/instruction.cpp | 389 +++++++ RandomX/src/instruction.hpp | 149 +++ RandomX/src/instruction_weights.hpp | 113 ++ RandomX/src/instructions_portable.cpp | 194 ++++ RandomX/src/intrin_portable.h | 380 +++++++ RandomX/src/jit_compiler.hpp | 37 + RandomX/src/jit_compiler_a64.hpp | 73 ++ RandomX/src/jit_compiler_fallback.hpp | 73 ++ RandomX/src/jit_compiler_x86.cpp | 808 ++++++++++++++ RandomX/src/jit_compiler_x86.hpp | 141 +++ RandomX/src/jit_compiler_x86_static.S | 210 ++++ RandomX/src/jit_compiler_x86_static.asm | 201 ++++ RandomX/src/jit_compiler_x86_static.hpp | 48 + RandomX/src/program.hpp | 71 ++ RandomX/src/randomx.cpp | 249 +++++ RandomX/src/randomx.h | 201 ++++ RandomX/src/reciprocal.c | 69 ++ RandomX/src/reciprocal.h | 42 + RandomX/src/soft_aes.cpp | 364 +++++++ RandomX/src/soft_aes.h | 46 + RandomX/src/superscalar.cpp | 896 ++++++++++++++++ RandomX/src/superscalar.hpp | 59 ++ RandomX/src/superscalar_program.hpp | 80 ++ RandomX/src/tests/api-example1.c | 24 + RandomX/src/tests/api-example2.cpp | 48 + RandomX/src/tests/benchmark.cpp | 257 +++++ RandomX/src/tests/code-generator.cpp | 124 +++ RandomX/src/tests/jit-performance.cpp | 44 + RandomX/src/tests/perf-simulation.cpp | 639 ++++++++++++ RandomX/src/tests/runtime-distr.cpp | 172 +++ RandomX/src/tests/scratchpad-entropy.cpp | 50 + RandomX/src/tests/stopwatch.hpp | 84 ++ RandomX/src/tests/superscalar-avalanche.cpp | 48 + RandomX/src/tests/superscalar-init.cpp | 55 + RandomX/src/tests/superscalar-stats.cpp | 52 + RandomX/src/tests/utility.hpp | 85 ++ RandomX/src/virtual_machine.cpp | 137 +++ RandomX/src/virtual_machine.hpp | 83 ++ RandomX/src/virtual_memory.cpp | 133 +++ RandomX/src/virtual_memory.hpp | 35 + RandomX/src/vm_compiled.cpp | 60 ++ RandomX/src/vm_compiled.hpp | 72 ++ RandomX/src/vm_compiled_light.cpp | 54 + RandomX/src/vm_compiled_light.hpp | 64 ++ RandomX/src/vm_interpreted.cpp | 677 ++++++++++++ RandomX/src/vm_interpreted.hpp | 102 ++ RandomX/src/vm_interpreted_light.cpp | 55 + RandomX/src/vm_interpreted_light.hpp | 61 ++ RandomX/vcxproj/api-example1.vcxproj | 131 +++ RandomX/vcxproj/api-example1.vcxproj.filters | 27 + RandomX/vcxproj/api-example2.vcxproj | 128 +++ RandomX/vcxproj/api-example2.vcxproj.filters | 22 + RandomX/vcxproj/benchmark.vcxproj | 131 +++ RandomX/vcxproj/benchmark.vcxproj.filters | 27 + RandomX/vcxproj/code-generator.vcxproj | 129 +++ .../vcxproj/code-generator.vcxproj.filters | 22 + RandomX/vcxproj/h2inc.ps1 | 90 ++ RandomX/vcxproj/jit-performance.vcxproj | 128 +++ .../vcxproj/jit-performance.vcxproj.filters | 22 + RandomX/vcxproj/perf-simulation.vcxproj | 128 +++ .../vcxproj/perf-simulation.vcxproj.filters | 22 + RandomX/vcxproj/randomx-dll.vcxproj | 211 ++++ RandomX/vcxproj/randomx-dll.vcxproj.filters | 173 +++ RandomX/vcxproj/randomx.vcxproj | 196 ++++ RandomX/vcxproj/randomx.vcxproj.filters | 188 ++++ RandomX/vcxproj/runtime-distr.vcxproj | 128 +++ RandomX/vcxproj/runtime-distr.vcxproj.filters | 22 + RandomX/vcxproj/scratchpad-entropy.vcxproj | 128 +++ .../scratchpad-entropy.vcxproj.filters | 22 + RandomX/vcxproj/superscalar-avalanche.vcxproj | 130 +++ .../superscalar-avalanche.vcxproj.filters | 22 + RandomX/vcxproj/superscalar-init.vcxproj | 130 +++ .../vcxproj/superscalar-init.vcxproj.filters | 22 + RandomX/vcxproj/superscalar-stats.vcxproj | 128 +++ .../vcxproj/superscalar-stats.vcxproj.filters | 22 + cmake/FindRandomX.cmake | 28 +- 123 files changed, 17981 insertions(+), 25 deletions(-) create mode 100644 RandomX/.gitignore create mode 100644 RandomX/CMakeLists.txt create mode 100644 RandomX/LICENSE create mode 100644 RandomX/README.md create mode 100644 RandomX/doc/configuration.md create mode 100644 RandomX/doc/design.md create mode 100644 RandomX/doc/program.asm create mode 100644 RandomX/doc/specs.md create mode 100644 RandomX/makefile create mode 100644 RandomX/randomx.sln create mode 100644 RandomX/src/aes_hash.cpp create mode 100644 RandomX/src/aes_hash.hpp create mode 100644 RandomX/src/allocator.cpp create mode 100644 RandomX/src/allocator.hpp create mode 100644 RandomX/src/argon2.h create mode 100644 RandomX/src/argon2_core.c create mode 100644 RandomX/src/argon2_core.h create mode 100644 RandomX/src/argon2_ref.c create mode 100644 RandomX/src/asm/configuration.asm create mode 100644 RandomX/src/asm/program_epilogue_linux.inc create mode 100644 RandomX/src/asm/program_epilogue_store.inc create mode 100644 RandomX/src/asm/program_epilogue_win64.inc create mode 100644 RandomX/src/asm/program_loop_load.inc create mode 100644 RandomX/src/asm/program_loop_store.inc create mode 100644 RandomX/src/asm/program_prologue_linux.inc create mode 100644 RandomX/src/asm/program_prologue_win64.inc create mode 100644 RandomX/src/asm/program_read_dataset.inc create mode 100644 RandomX/src/asm/program_read_dataset_sshash_fin.inc create mode 100644 RandomX/src/asm/program_read_dataset_sshash_init.inc create mode 100644 RandomX/src/asm/program_sshash_constants.inc create mode 100644 RandomX/src/asm/program_sshash_load.inc create mode 100644 RandomX/src/asm/program_sshash_prefetch.inc create mode 100644 RandomX/src/asm/program_xmm_constants.inc create mode 100644 RandomX/src/asm/randomx_reciprocal.inc create mode 100644 RandomX/src/assembly_generator_x86.cpp create mode 100644 RandomX/src/assembly_generator_x86.hpp create mode 100644 RandomX/src/blake2/blake2-impl.h create mode 100644 RandomX/src/blake2/blake2.h create mode 100644 RandomX/src/blake2/blake2b.c create mode 100644 RandomX/src/blake2/blamka-round-ref.h create mode 100644 RandomX/src/blake2/endian.h create mode 100644 RandomX/src/blake2_generator.cpp create mode 100644 RandomX/src/blake2_generator.hpp create mode 100644 RandomX/src/common.hpp create mode 100644 RandomX/src/configuration.h create mode 100644 RandomX/src/dataset.cpp create mode 100644 RandomX/src/dataset.hpp create mode 100644 RandomX/src/instruction.cpp create mode 100644 RandomX/src/instruction.hpp create mode 100644 RandomX/src/instruction_weights.hpp create mode 100644 RandomX/src/instructions_portable.cpp create mode 100644 RandomX/src/intrin_portable.h create mode 100644 RandomX/src/jit_compiler.hpp create mode 100644 RandomX/src/jit_compiler_a64.hpp create mode 100644 RandomX/src/jit_compiler_fallback.hpp create mode 100644 RandomX/src/jit_compiler_x86.cpp create mode 100644 RandomX/src/jit_compiler_x86.hpp create mode 100644 RandomX/src/jit_compiler_x86_static.S create mode 100644 RandomX/src/jit_compiler_x86_static.asm create mode 100644 RandomX/src/jit_compiler_x86_static.hpp create mode 100644 RandomX/src/program.hpp create mode 100644 RandomX/src/randomx.cpp create mode 100644 RandomX/src/randomx.h create mode 100644 RandomX/src/reciprocal.c create mode 100644 RandomX/src/reciprocal.h create mode 100644 RandomX/src/soft_aes.cpp create mode 100644 RandomX/src/soft_aes.h create mode 100644 RandomX/src/superscalar.cpp create mode 100644 RandomX/src/superscalar.hpp create mode 100644 RandomX/src/superscalar_program.hpp create mode 100644 RandomX/src/tests/api-example1.c create mode 100644 RandomX/src/tests/api-example2.cpp create mode 100644 RandomX/src/tests/benchmark.cpp create mode 100644 RandomX/src/tests/code-generator.cpp create mode 100644 RandomX/src/tests/jit-performance.cpp create mode 100644 RandomX/src/tests/perf-simulation.cpp create mode 100644 RandomX/src/tests/runtime-distr.cpp create mode 100644 RandomX/src/tests/scratchpad-entropy.cpp create mode 100644 RandomX/src/tests/stopwatch.hpp create mode 100644 RandomX/src/tests/superscalar-avalanche.cpp create mode 100644 RandomX/src/tests/superscalar-init.cpp create mode 100644 RandomX/src/tests/superscalar-stats.cpp create mode 100644 RandomX/src/tests/utility.hpp create mode 100644 RandomX/src/virtual_machine.cpp create mode 100644 RandomX/src/virtual_machine.hpp create mode 100644 RandomX/src/virtual_memory.cpp create mode 100644 RandomX/src/virtual_memory.hpp create mode 100644 RandomX/src/vm_compiled.cpp create mode 100644 RandomX/src/vm_compiled.hpp create mode 100644 RandomX/src/vm_compiled_light.cpp create mode 100644 RandomX/src/vm_compiled_light.hpp create mode 100644 RandomX/src/vm_interpreted.cpp create mode 100644 RandomX/src/vm_interpreted.hpp create mode 100644 RandomX/src/vm_interpreted_light.cpp create mode 100644 RandomX/src/vm_interpreted_light.hpp create mode 100644 RandomX/vcxproj/api-example1.vcxproj create mode 100644 RandomX/vcxproj/api-example1.vcxproj.filters create mode 100644 RandomX/vcxproj/api-example2.vcxproj create mode 100644 RandomX/vcxproj/api-example2.vcxproj.filters create mode 100644 RandomX/vcxproj/benchmark.vcxproj create mode 100644 RandomX/vcxproj/benchmark.vcxproj.filters create mode 100644 RandomX/vcxproj/code-generator.vcxproj create mode 100644 RandomX/vcxproj/code-generator.vcxproj.filters create mode 100644 RandomX/vcxproj/h2inc.ps1 create mode 100644 RandomX/vcxproj/jit-performance.vcxproj create mode 100644 RandomX/vcxproj/jit-performance.vcxproj.filters create mode 100644 RandomX/vcxproj/perf-simulation.vcxproj create mode 100644 RandomX/vcxproj/perf-simulation.vcxproj.filters create mode 100644 RandomX/vcxproj/randomx-dll.vcxproj create mode 100644 RandomX/vcxproj/randomx-dll.vcxproj.filters create mode 100644 RandomX/vcxproj/randomx.vcxproj create mode 100644 RandomX/vcxproj/randomx.vcxproj.filters create mode 100644 RandomX/vcxproj/runtime-distr.vcxproj create mode 100644 RandomX/vcxproj/runtime-distr.vcxproj.filters create mode 100644 RandomX/vcxproj/scratchpad-entropy.vcxproj create mode 100644 RandomX/vcxproj/scratchpad-entropy.vcxproj.filters create mode 100644 RandomX/vcxproj/superscalar-avalanche.vcxproj create mode 100644 RandomX/vcxproj/superscalar-avalanche.vcxproj.filters create mode 100644 RandomX/vcxproj/superscalar-init.vcxproj create mode 100644 RandomX/vcxproj/superscalar-init.vcxproj.filters create mode 100644 RandomX/vcxproj/superscalar-stats.vcxproj create mode 100644 RandomX/vcxproj/superscalar-stats.vcxproj.filters diff --git a/RandomX/.gitignore b/RandomX/.gitignore new file mode 100644 index 00000000..dd437d19 --- /dev/null +++ b/RandomX/.gitignore @@ -0,0 +1,8 @@ +bin/ +obj/ +*.user +*.suo +.vs +x64/ +Release/ +Debug/ \ No newline at end of file diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt new file mode 100644 index 00000000..3c248495 --- /dev/null +++ b/RandomX/CMakeLists.txt @@ -0,0 +1,80 @@ +# Copyright (c) 2019, The Monero Project +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are +# permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimer in the documentation and/or other +# materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors may be +# used to endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cmake_minimum_required(VERSION 2.8.7) + +set (randomx_sources +src/aes_hash.cpp +src/argon2_ref.c +src/dataset.cpp +src/soft_aes.cpp +src/virtual_memory.cpp +src/vm_interpreted.cpp +src/allocator.cpp +src/assembly_generator_x86.cpp +src/instruction.cpp +src/randomx.cpp +src/superscalar.cpp +src/vm_compiled.cpp +src/vm_interpreted_light.cpp +src/argon2_core.c +src/blake2_generator.cpp +src/instructions_portable.cpp +src/reciprocal.c +src/virtual_machine.cpp +src/vm_compiled_light.cpp +src/blake2/blake2b.c) + +if (NOT ARCH_ID) + set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR}) +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") + list(APPEND randomx_sources + src/jit_compiler_x86_static.S + src/jit_compiler_x86.cpp) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") +endif() + +set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path") + +add_library(randomx + ${randomx_sources}) +target_link_libraries(randomx + PRIVATE + ${CMAKE_THREAD_LIBS_INIT}) +set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx PROPERTY CXX_STANDARD 11) + +# cheat because cmake and ccache hate each other +set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) diff --git a/RandomX/LICENSE b/RandomX/LICENSE new file mode 100644 index 00000000..b1572ae8 --- /dev/null +++ b/RandomX/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2018-2019, tevador + +Copyright (c) 2014-2019, The Monero Project + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/RandomX/README.md b/RandomX/README.md new file mode 100644 index 00000000..b70e3bbd --- /dev/null +++ b/RandomX/README.md @@ -0,0 +1,120 @@ +# RandomX +RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs. RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware. + +## Overview + +RandomX utilizes a virtual machine that executes programs in a special instruction set that consists of integer math, floating point math and branches. These programs can be translated into the CPU's native machine code on the fly (example: [program.asm](doc/program.asm)). At the end, the outputs of the executed programs are consolidated into a 256-bit result using a cryptographic hashing function ([Blake2b](https://blake2.net/)). + +RandomX can operate in two main modes with different memory requirements: + +* **Fast mode** - requires 2080 MiB of shared memory. +* **Light mode** - requires only 256 MiB of shared memory, but runs significantly slower + +Both modes are interchangeable as they give the same results. The fast mode is suitable for "mining", while the light mode is expected to be used only for proof verification. + +## Documentation + +Full specification is available in [specs.md](doc/specs.md). + +Design description and analysis is available in [design.md](doc/design.md). + +## Build + +RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `benchmark` executable for testing. + +### Linux + +Build dependencies: `make` and `gcc` (minimum version 4.8, but version 7+ is recommended). + +Build using the provided makefile. + +### Windows + +Build dependencies: Visual Studio 2017. + +A solution file is provided. + +### Precompiled binaries + +Precompiled `benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases). + +## Proof of work + +RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmonero.org/). The recommended usage is following: + +* The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`. +* The input `H` is the standard hashing blob with a selected nonce value. + +If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md). + +### CPU performance +Preliminary performance of selected CPUs using the optimal number of threads (T) and large pages (if possible), in hashes per second (H/s): + +|CPU|RAM|OS|AES|Fast mode|Light mode| +|---|---|--|---|---------|--------------| +AMD Ryzen 7 1700|16 GB DDR4|Ubuntu 16.04|hardware|4100 H/s (8T)|620 H/s (16T)| +Intel Core i7-8550U|16 GB DDR4|Windows 10|hardware|1700 H/s (4T)|350 H/s (8T)| +Intel Core i3-3220|4 GB DDR3|Ubuntu 16.04|software|510 H/s (4T)|150 H/s (4T)| +Raspberry Pi 3|1 GB DDR2|Ubuntu 16.04|software|-|2.0 H/s (4T) †| + +† Using the interpreter mode. Compiled mode is expected to increase performance by a factor of 10. + +### GPU performance + +SChernykh is developing GPU mining code for RandomX. Benchmarks are included in the following repositories: + +* [CUDA miner](https://github.com/SChernykh/RandomX_CUDA) - NVIDIA GPUs. +* [OpenCL miner](https://github.com/SChernykh/RandomX_OpenCL) - currently only for AMD Vega (uses GCN5 machine code). + +Note that GPUs are at a disadvantage when running RandomX since the algorithm was designed to be efficient on CPUs. + +# FAQ + +### Which CPU is best for mining RandomX? + +Most Intel and AMD CPUs made since 2011 should be fairly efficient at RandomX. More specifically, efficient mining requires: + +* 64-bit architecture +* IEEE 754 compliant floating point unit +* Hardware AES support ([AES-NI](https://en.wikipedia.org/wiki/AES_instruction_set) extension for x86, Cryptography extensions for ARMv8) +* 16 KiB of L1 cache, 256 KiB of L2 cache and 2 MiB of L3 cache per mining thread +* Support for large memory pages +* At least 2.5 GiB of free RAM per NUMA node +* Multiple memory channels may be required: + * DDR3 memory is limited to about 1500 H/s per channel + * DDR4 memory is limited to about 4000 H/s per channel + + + +### Does RandomX facilitate botnets/malware mining or web mining? +Efficient mining requires more than 2 GiB of memory, which is difficult to hide in an infected computer and disqualifies many low-end machines such as IoT devices. Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly. + +### Since RandomX uses floating point math, does it give reproducible results on different platforms? + +RandomX uses only operations that are guaranteed to give correctly rounded results by the [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) standard: addition, subtraction, multiplication, division and square root. Special care is taken to avoid corner cases such as NaN values or denormals. + +The reference implementation has been validated on the following platforms: +* x86 (32-bit, little-endian) +* x86-64 (64-bit, little-endian) +* ARMv7+VFPv3 (32-bit, little-endian) +* ARMv8 (64-bit, little-endian) +* PPC64 (64-bit, big-endian) + +## Acknowledgements +* [SChernykh](https://github.com/SChernykh) - contributed significantly to the design of RandomX +* [hyc](https://github.com/hyc) - original idea of using random code execution for PoW +* [nioroso-x3](https://github.com/nioroso-x3) - provided access to PowerPC for testing purposes + +RandomX uses some source code from the following 3rd party repositories: +* Argon2d, Blake2b hashing functions: https://github.com/P-H-C/phc-winner-argon2 + +The author of RandomX declares no competing financial interest in RandomX adoption, other than being a holder of Monero. The development of RandomX was funded from the author's own pocket with only the help listed above. + +## Donations + +If you'd like to use RandomX, please consider donating to help cover the development cost of the algorithm. + +Author's XMR address: +``` +845xHUh5GvfHwc2R8DVJCE7BT2sd4YEcmjG8GNSdmeNsP5DTEjXd1CNgxTcjHjiFuthRHAoVEJjM7GyKzQKLJtbd56xbh7V +``` diff --git a/RandomX/doc/configuration.md b/RandomX/doc/configuration.md new file mode 100644 index 00000000..4e564eb5 --- /dev/null +++ b/RandomX/doc/configuration.md @@ -0,0 +1,269 @@ +# RandomX configuration + +RandomX has 45 customizable parameters (see table below). We recommend each project using RandomX to select a unique configuration to prevent network attacks from hashpower rental services. + +These parameters can be modified in source file [configuration.h](../src/configuration.h). + +|parameter|description|default value| +|---------|-----|-------| +|`RANDOMX_ARGON_MEMORY`|The number of 1 KiB Argon2 blocks in the Cache| `262144`| +|`RANDOMX_ARGON_ITERATIONS`|The number of Argon2d iterations for Cache initialization|`3`| +|`RANDOMX_ARGON_LANES`|The number of parallel lanes for Cache initialization|`1`| +|`RANDOMX_ARGON_SALT`|Argon2 salt|`"RandomX\x03"`| +|`RANDOMX_CACHE_ACCESSES`|The number of random Cache accesses per Dataset item|`8`| +|`RANDOMX_SUPERSCALAR_LATENCY`|Target latency for SuperscalarHash (in cycles of the reference CPU)|`170`| +|`RANDOMX_DATASET_BASE_SIZE`|Dataset base size in bytes|`2147483648`| +|`RANDOMX_DATASET_EXTRA_SIZE`|Dataset extra size in bytes|`33554368`| +|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`| +|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`| +|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`| +|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`| +|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`| +|`RANDOMX_SCRATCHPAD_L3`|Scratchpad size in bytes|`2097152`| +|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`| +|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`| +|`RANDOMX_FREQ_*` (29x)|Instruction frequencies|multiple values| + +Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected. Follow the guidelines below. + +### RANDOMX_ARGON_MEMORY + +This parameter determines the amount of memory needed in the light mode. Memory is specified in KiB (1 KiB = 1024 bytes). + +#### Permitted values +Any integer power of 2. + +#### Notes +Lower sizes will reduce the memory-hardness of the algorithm. + +### RANDOMX_ARGON_ITERATIONS + +Determines the number of passes of Argon2 that are used to generate the Cache. + +#### Permitted values +Any positive integer. + +#### Notes +The time needed to initialize the Cache is proportional to the value of this constant. + +### RANDOMX_ARGON_LANES + +The number of parallel lanes for Cache initialization. + +#### Permitted values +Any positive integer. + +#### Notes +This parameter determines how many threads can be used for Cache initialization. + +### RANDOMX_ARGON_SALT + +Salt value for Cache initialization. + +#### Permitted values +Any string of byte values. + +#### Note +Every implementation should choose a unique salt value. + +### RANDOMX_CACHE_ACCESSES + +The number of random Cache access per Dataset item. + +#### Permitted values +Any integer greater than 1. + +#### Notes +This value directly determines the performance ratio between the 'fast' and 'light' modes. + +### RANDOMX_SUPERSCALAR_LATENCY +Target latency for SuperscalarHash, in cycles of the reference CPU. + +#### Permitted values +Any positive integer. + +#### Notes +The default value was tuned so that a high-performance superscalar CPU running at 2-4 GHz will execute SuperscalarHash in similar time it takes to load data from RAM (40-80 ns). Using a lower value will make Dataset generation (and light mode) more memory bound, while increasing this value will make Dataset generation (and light mode) more compute bound. + +### RANDOMX_DATASET_BASE_SIZE + +Dataset base size in bytes. + +#### Permitted values +Integer powers of 2 in the range 64 - 4294967296 (inclusive). + +#### Note +This constant affects the memory requirements in fast mode. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_DATASET_EXTRA_SIZE + +Dataset extra size in bytes. + +#### Permitted values +Non-negative integer divisible by 64. + +#### Note +This constant affects the memory requirements in fast mode. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_PROGRAM_SIZE + +The number of instructions in a RandomX program. + +#### Permitted values +Any positive integer divisible by 8. + +#### Notes +Smaller values will make RandomX more DRAM-latency bound, while higher values will make RandomX more compute-bound. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_PROGRAM_ITERATIONS + +The number of iterations per program. + +#### Permitted values +Any positive integer. + +#### Notes +Time per hash increases linearly with this constant. Smaller values will increase the overhead of program compilation, while larger values may allow more time for optimizations. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_PROGRAM_COUNT + +The number of programs per hash. + +#### Permitted values +Any positive integer. + +#### Notes +Time per hash increases linearly with this constant. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_JUMP_BITS +Jump condition mask size in bits. + +#### Permitted values +Positive integers. The sum of `RANDOMX_JUMP_BITS` and `RANDOMX_JUMP_OFFSET` must not exceed 16. + +#### Notes +This determines the jump probability of the CBRANCH instruction. The default value of 8 results in jump probability of 1/28 = 1/256. Increasing this constant will decrease the rate of jumps (and vice versa). + +### RANDOMX_JUMP_OFFSET +Jump condition mask offset in bits. + +#### Permitted values +Non-negative integers. The sum of `RANDOMX_JUMP_BITS` and `RANDOMX_JUMP_OFFSET` must not exceed 16. + +#### Notes +Since the low-order bits of RandomX registers are slightly biased, this offset moves the condition mask to higher bits, which are less biased. Using values smaller than the default may result in a slightly lower jump probability than the theoretical value calculated from `RANDOMX_JUMP_BITS`. + +### RANDOMX_SCRATCHPAD_L3 +RandomX Scratchpad size in bytes. + +#### Permitted values +Any integer power of 2. Must be larger than or equal to `RANDOMX_SCRATCHPAD_L2`. + +#### Notes + +The default value of 2 MiB was selected to match the typical cache/core ratio of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_SCRATCHPAD_L2 + +Scratchpad L2 size in bytes. + +#### Permitted values +Any integer power of 2. Must be larger than or equal to `RANDOMX_SCRATCHPAD_L1`. + +#### Notes +The default value of 256 KiB was selected to match the typical per-core L2 cache size of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. + +### RANDOMX_SCRATCHPAD_L1 + +Scratchpad L1 size in bytes. + +#### Permitted values +Any integer power of 2. The minimum is 64 bytes. + +#### Notes +The default value of 16 KiB was selected to be about half of the per-core L1 cache size of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. + +### RANDOMX_FREQ_* + +Instruction frequencies (per 256 instructions). + +#### Permitted values +There is a total of 29 different instructions. The sum of frequencies must be equal to 256. + +#### Notes + +Making large changes to the default values is not recommended. The only exceptions are the instruction pairs IROR_R/IROL_R, FADD_R/FSUB_R and FADD_M/FSUB_M, which are functionally equivalent. + +## Unsafe configurations + +There are some configurations that are considered 'unsafe' because they affect the security of the algorithm against attacks. If the conditions listed below are not satisfied, the configuration is unsafe and a compilation error is emitted when building the RandomX library. + +These checks can be disabled by definining `RANDOMX_UNSAFE` when building RandomX, e.g. by using `-DRANDOMX_UNSAFE` command line switch in GCC or MSVC. It is not recommended to disable these checks except for testing purposes. + +### 1. Memory-time tradeoffs + +#### Condition +```` +RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY * 1024 + 33554432 >= RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE +```` + +Configurations not satisfying this condition are vulnerable to memory-time tradeoffs, which enables efficient mining in light mode. + +#### Solutions + +* Increase `RANDOMX_CACHE_ACCESSES` or `RANDOMX_ARGON_MEMORY`. +* Decrease `RANDOMX_DATASET_BASE_SIZE` or `RANDOMX_DATASET_EXTRA_SIZE`. + +### 2. Insufficient Scratchpad writes + +#### Condition +```` +(128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3 +```` + +Configurations not satisfying this condition are vulnerable to Scratchpad size optimizations due to low amount of writes. + +#### Solutions + +* Increase `RANDOMX_PROGRAM_SIZE`, `RANDOMX_FREQ_ISTORE`, `RANDOMX_PROGRAM_COUNT` or `RANDOMX_PROGRAM_ITERATIONS`. +* Decrease `RANDOMX_SCRATCHPAD_L3`. + +### 3. Program filtering strategies + +#### Condition +``` +RANDOMX_PROGRAM_COUNT > 1 +``` + +Configurations not satisfying this condition are vulnerable to program filtering strategies. + +#### Solution + +* Increase `RANDOMX_PROGRAM_COUNT` to at least 2. + +### 4. Low program entropy + +#### Condition +``` +RANDOMX_PROGRAM_SIZE >= 64 +``` + +Configurations not satisfying this condition do not have a sufficient number of instruction combinations. + +#### Solution + +* Increase `RANDOMX_PROGRAM_SIZE` to at least 64. + +### 5. High compilation overhead + +#### Condition +``` +RANDOMX_PROGRAM_ITERATIONS >= 400 +``` + +Configurations not satisfying this condition have a program compilation overhead exceeding 10%. + +#### Solution + +* Increase `RANDOMX_PROGRAM_ITERATIONS` to at least 400. + diff --git a/RandomX/doc/design.md b/RandomX/doc/design.md new file mode 100644 index 00000000..14aa7b80 --- /dev/null +++ b/RandomX/doc/design.md @@ -0,0 +1,530 @@ +# RandomX design +To minimize the performance advantage of specialized hardware, a proof of work (PoW) algorithm must achieve *device binding* by targeting specific features of existing general-purpose hardware. This is a complex task because we have to target a large class of devices with different architectures from different manufacturers. + +There are two distinct classes of general processing devices: central processing units (CPUs) and graphics processing units (GPUs). RandomX targets CPUs for the following reasons: + +* CPUs, being less specialized devices, are more prevalent and widely accessible. A CPU-bound algorithm is more egalitarian and allows more participants to join the network. This is one of the goals stated in the original CryptoNote whitepaper [[1](https://cryptonote.org/whitepaper.pdf)]. +* A large common subset of native hardware instructions exists among different CPU architectures. The same cannot be said about GPUs. For example, there is no common integer multiplication instruction for NVIDIA and AMD GPUs [[2](https://github.com/ifdefelse/ProgPOW/issues/16)]. +* All major CPU instruction sets are well documented with multiple open source compilers available. In comparison, GPU instruction sets are usually proprietary and may require vendor specific closed-source drivers for maximum performance. + +## 1. Design considerations + +The most basic idea of a CPU-bound proof of work is that the "work" must be dynamic. This takes advantage of the fact that CPUs accept two kinds of inputs: *data* (the main input) and *code* (which specifies what to perform with the data). + +Conversely, typical cryptographic hashing functions [[3](https://en.wikipedia.org/wiki/Cryptographic_hash_function)] do not represent suitable work for the CPU because their only input is *data*, while the sequence of operations is fixed and can be performed more efficiently by a specialized integrated circuit. + +### 1.1 Dynamic proof of work + +A dynamic proof of work algorithm can generally consist of the following 4 steps: + +1) Generate a random program. +2) Translate it into the native machine code of the CPU. +3) Execute the program. +4) Transform the output of the program into a cryptographically secure value. + +The actual 'useful' CPU-bound work is performed in step 3, so the algorithm must be tuned to minimize the overhead of the remaining steps. + +#### 1.1.1 Generating a random program + +Early attempts at a dynamic proof of work design were based on generating a program in a high-level language, such as C or Javascript [[4](https://github.com/hyc/randprog), [5](https://github.com/tevador/RandomJS)]. However, this is very inefficient for two main reasons: + +* High level languages have a complex syntax, so generating a valid program is relatively slow since it requires the creation of an abstract syntax tree (ASL). +* Once the source code of the program is generated, the compiler will generally parse the textual representation back into the ASL, which makes the whole process of generating source code redundant. + +The fastest way to generate a random program is to use a *logic-less* generator - simply filling a buffer with random data. This of course requires designing a syntaxless programming language (or instruction set) in which all random bit strings represent valid programs. + +#### 1.1.2 Translating the program into machine code + +This step is inevitable because we don't want to limit the algorithm to a specific CPU architecture. In order to generate machine code as fast as possible, we need our instruction set to be as close to native hardware as possible, while still generic enough to support different architectures. There is not enough time for expensive optimizations during code compilation. + +#### 1.1.3 Executing the program + +The actual program execution should utilize as many CPU components as possible. Some of the features that should be utilized in the program are: + +* multi-level caches (L1, L2, L3) +* μop cache [[6](https://en.wikipedia.org/wiki/CPU_cache#Micro-operation_(%CE%BCop_or_uop)_cache)] +* arithmetic logic unit (ALU) +* floating point unit (FPU) +* memory controller +* instruction level parallelism [[7](https://en.wikipedia.org/wiki/Instruction-level_parallelism)] + * superscalar execution [[8](https://en.wikipedia.org/wiki/Superscalar_processor)] + * out-of-order execution [[9](https://en.wikipedia.org/wiki/Out-of-order_execution)] + * speculative execution [[10](https://en.wikipedia.org/wiki/Speculative_execution)] + * register renaming [[11](https://en.wikipedia.org/wiki/Register_renaming)] + +Chapter 2 describes how the RandomX VM takes advantages of these features. + +#### 1.1.4 Calculating the final result + +Blake2b [[12](https://blake2.net/)] is a cryptographically secure hashing function that was specifically designed to be fast in software, especially on modern 64-bit processors, where it's around three times faster than SHA-3 and can run at a speed of around 3 clock cycles per byte of input. This function is an ideal candidate to be used in a CPU-friendly proof of work. + +For processing larger amounts of data in a cryptographically secure way, the Advanced Encryption Standard (AES) [[13](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard)] can provide the fastest processing speed because many modern CPUs support hardware acceleration of these operations. See chapter 3 for more details about the use of AES in RandomX. + +### 1.2 The "Easy program problem" + +When a random program is generated, one may choose to execute it only when it's favorable. This strategy is viable for two main reasons: + +1. The runtime of randomly generated programs typically follows a log-normal distribution [[14](https://en.wikipedia.org/wiki/Log-normal_distribution)] (also see Appendix C). A generated program may be quickly analyzed and if it's likely to have above-average runtime, program execution may be skipped and a new program may be generated instead. This can significantly boost performance especially in case the runtime distribution has a heavy tail (many long-running outliers) and if program generation is cheap. +2. An implementation may choose to optimize for a subset of the features required for program execution. For example, the support for some operations (such as division) may be dropped or some instruction sequences may be implemented more efficiently. Generated programs would then be analyzed and be executed only if they match the specific requirements of the optimized implementation. + +These strategies of searching for programs of particular properties deviate from the objectives of this proof of work, so they must be eliminated. This can be achieved by requiring a sequence of *N* random programs to be executed such that each program is generated from the output of the previous one. The output of the final program is then used as the result. + +``` + +---------------+ +---------------+ +---------------+ +---------------+ + | | | | | | | | +input --> | program 1 | --> | program 2 | --> ... --> | program (N-1) | --> | program N | --> result + | | | | | | | | + +---------------+ +---------------+ +---------------+ +---------------+ +``` + +The principle is that after the first program is executed, a miner has to either commit to finishing the whole chain (which may include unfavorable programs) or start over and waste the effort expended on the unfinished chain. Examples of how this affects the hashrate of different mining strategies are given in Appendix A. + +Additionally, this chained program execution has the benefit of equalizing the runtime for the whole chain since the relative deviation of a sum of identically distributed runtimes is decreased. + +### 1.3 Verification time + +Since the purpose of the proof of work is to be used in a trustless peer-to-peer network, network participants must be able to quickly verify if a proof is valid or not. This puts an upper bound on the complexity of the proof of work algorithm. In particular, we set a goal for RandomX to be at least as fast to verify as the CryptoNight hash function [[15](https://cryptonote.org/cns/cns008.txt)], which it aims to replace. + +### 1.4 Memory-hardness + +Besides pure computational resources, such as ALUs and FPUs, CPUs usually have access to a large amount of memory in the form of DRAM [[16](https://en.wikipedia.org/wiki/Dynamic_random-access_memory)]. The performance of the memory subsystem is typically tuned to match the compute capabilities, for example [[17](https://en.wikipedia.org/wiki/Multi-channel_memory_architecture)]: + +* single channel memory for embedded and low power CPUs +* dual channel memory for desktop CPUs +* triple or quad channel memory for workstation CPUs +* six or eight channel memory for high-end server CPUs + +In order to utilize the external memory as well as the on-chip memory controllers, the proof of work algorithm should access a large memory buffer (called the "Dataset"). The Dataset must be: + +1. larger than what can be stored on-chip (to require external memory) +2. dynamic (to require writable memory) + +The maximum amount of SRAM that can be put on a single chip is more than 512 MiB for a 16 nm process and more than 2 GiB for a 7 nm process [[18](https://www.grin-forum.org/t/obelisk-grn1-chip-details/4571)]. Ideally, the size of the Dataset should be at least 4 GiB. However, due to constraints on the verification time (see below), the size used by RandomX was selected to be 2080 MiB. While a single chip can theoretically be made with this amount of SRAM using current technology (7 nm in 2019), the feasibility of such solution is questionable, at least in the near future. + +#### 1.4.1 Light-client verification + +While it's reasonable to require >2 GiB for dedicated mining systems that solve the proof of work, an option must be provided for light clients to verify the proof using a much lower amount of memory. + +The ratio of memory required for the 'fast' and 'light' modes must be chosen carefully not to make the light mode viable for mining. In particular, the area-time (AT) product of the light mode should not be smaller than the AT product of the fast mode. Reduction of the AT product is a common way of measuring tradeoff attacks [[19](https://eprint.iacr.org/2015/227.pdf)]. + +Given the constraints described in the previous chapters, the maximum possible performance ratio between the fast and the light verification modes was empirically determined to be 8. This is because: + +1. Further increase of the light verification time would violate the constraints set out in chapter 1.3. +2. Further decrease of the fast mode runtime would violate the constraints set out in chapter 1.1, in particular the overhead time of program generation and result calculation would become too high. + +Additionally, 256 MiB was selected as the maximum amount of memory that can be required in the light-client mode. This amount is acceptable even for small single-board computers such as the Raspberry Pi. + +To keep a constant memory-time product, the maximum fast-mode memory requirement is: +``` +8 * 256 MiB = 2048 MiB +``` +This can be further increased since the light mode requires additional chip area for the SuperscalarHash function (see chapter 3.4 and chapter 6 of the Specification). Assuming a conservative estimate of 0.2 mm2 per SuperscalarHash core and DRAM density of 0.149 Gb/mm2 [[20](http://en.thelec.kr/news/articleView.html?idxno=20)], the additional memory is: + +``` +8 * 0.2 * 0.149 * 1024 / 8 = 30.5 MiB +``` +or 32 MiB when rounded to the nearest power of 2. The total memory requirement of the fast mode can be 2080 MiB with a roughly constant AT product. + +## 2. Virtual machine architecture + +This section describes the design of the RandomX virtual machine (VM). + +### 2.1 Instruction set + +RandomX uses a fixed-length instruction encoding with 8 bytes per instruction. This allows a 32-bit immediate value to be included in the instruction word. The interpretation of the instruction word bits was chosen so that any 8-byte word is a valid instruction. This allows for very efficient random program generation (see chapter 1.1.1). + +### 2.2 Program + +The program executed by the VM has the form of a loop consisting of 256 random instructions. + +* 256 instructions is long enough to provide a large number of possible programs and enough space for branches. The number of different programs that can be generated is limited to 2512 = 1.3e+154, which is the number of possible seed values of the random generator. +* 256 instructions is short enough so that high-performance CPUs can execute one iteration in similar time it takes to fetch data from DRAM. This is advantageous because it allows Dataset accesses to be synchronized and fully prefetchable (see chapter 2.9). +* Since the program is a loop, it can take advantage of the μop cache [[6](https://en.wikipedia.org/wiki/CPU_cache#Micro-operation_(%CE%BCop_or_uop)_cache)] that is present in some x86 CPUs. Running a loop from the μop cache allows the CPU to power down the x86 instruction decoders, which should help to equalize the power efficiency between x86 and architectures with simple instruction decoding. + +### 2.3 Registers + +The VM uses 8 integer registers and 12 floating point registers. This is the maximum that can be allocated as physical registers in x86-64, which has the fewest architectural registers among common 64-bit CPU architectures. Using more registers would put x86 CPUs at a disadvantage since they would have to use memory to store VM register contents. + +### 2.4 Integer operations + +RandomX uses all primitive integer operations that have high output entropy: addition (IADD_RS, IADD_M), subtraction (ISUB_R, ISUB_M, INEG_R), multiplication (IMUL_R, IMUL_M, IMULH_R, IMULH_M, ISMULH_R, ISMULH_M, IMUL_RCP), exclusive or (IXOR_R, IXOR_M) and rotation (IROR_R, IROL_R). + +#### 2.4.1 IADD_RS + +The IADD_RS instruction utilizes the address calculation logic of CPUs and can be performed in a single hardware instruction by most CPUs (x86 `lea`, arm `add`). + +#### 2.4.2 IMUL_RCP + +Because integer division is not fully pipelined in CPUs and can be made faster in ASICs, the IMUL_RCP instruction requires only one division per program to calculate the reciprocal. This forces an ASIC to include a hardware divider without giving them a performance advantage during program execution. + +#### 2.4.3 ISWAP_R + +This instruction can be executed efficiently by CPUs that support register renaming/move elimination. + +### 2.5 Floating point operations + +RandomX uses double precision floating point operations, which are supported by the majority of CPUs and require more complex hardware than single precision. All operations are performed as 128-bit vector operations, which is also supported by all major CPU architectures. + +RandomX uses five operations that are guaranteed by the IEEE 754 standard to give correctly rounded results: addition, subtraction, multiplication, division and square root. All 4 rounding modes defined by the standard are used. + +#### 2.5.1 Floating point register groups + +The domains of floating point operations are separated into "additive" operations, which use register group F and "multiplicative" operations, which use register group E. This is done to prevent addition/subtraction from becoming no-op when a small number is added to a large number. Since the range of the F group registers is limited to around `±3.0e+14`, adding or subtracting a floating point number with absolute value larger than 1 always changes at least 5 fraction bits. + +Because the limited range of group F registers would allow the use of a more efficient fixed-point representation (with 80-bit numbers), the FSCAL instruction manipulates the binary representation of the floating point format to make this optimization more difficult. + +Group E registers are restricted to positive values, which avoids `NaN` results (such as square root of a negative number or `0 * ∞`). Division uses only memory source operand to avoid being optimized into multiplication by constant reciprocal. The exponent of group E memory operands is set to a value between -255 and 0 to avoid division and multiplication by 0 and to increase the range of numbers that can be obtained. The approximate range of possible group E register values is `1.7E-77` to `infinity`. + +Approximate distribution of floating point register values at the end of each program loop is shown in these figures (left - group F, right - group E): + +![Imgur](https://i.imgur.com/64G4qE8.png) + +*(Note: bins are marked by the left-side value of the interval, e.g. bin marked `1e-40` contains values from `1e-40` to `1e-20`.)* + +The small number of F register values at `1e+14` is caused by the FSCAL instruction, which significantly increases the range of the register values. + +Group E registers cover a very large range of values. About 2% of programs produce at least one `infinity` value. + +To maximize entropy and also to fit into one 64-byte cache line, floating point registers are combined using the XOR operation at the end of each iteration before being stored into the Scratchpad. + +### 2.6 Branches + +Modern CPUs invest a lot of die area and energy to handle branches. This includes: + +* Branch predictor unit [[21](https://en.wikipedia.org/wiki/Branch_predictor)] +* Checkpoint/rollback states that allow the CPU to recover in case of a branch misprediction. + +To take advantage of speculative designs, the random programs should contain branches. However, if branch prediction fails, the speculatively executed instructions are thrown away, which results in a certain amount of wasted energy with each misprediction. Therefore we should aim to minimize the number of mispredictions. + +Additionally, branches in the code are essential because they significantly reduce the amount of static optimizations that can be made. For example, consider the following x86 instruction sequence: +```asm + ... +branch_target_00: + ... + xor r8, r9 + test r10, 2088960 + je branch_target_00 + xor r8, r9 + ... +``` +The XOR operations would normally cancel out, but cannot be optimized away due to the branch because the result will be different if the branch is taken. Similarly, the ISWAP_R instruction could be always statically optimized out if it wasn't for branches. + +In general, random branches must be designed in such way that: + +1. Infinite loops are not possible. +1. The number of mispredicted branches is small. +1. Branch condition depends on a runtime value to disable static branch optimizations. + +#### 2.6.1 Branch prediction + +Unfortunately, we haven't found a way how to utilize branch prediction in RandomX. Because RandomX is a consensus protocol, all the rules must be set out in advance, which includes the rules for branches. Fully predictable branches cannot depend on the runtime value of any VM register (since register values are pseudorandom and unpredictable), so they would have to be static and therefore easily optimizable by specialized hardware. + +#### 2.6.2 CBRANCH instruction + +RandomX therefore uses random branches with a jump probability of 1/256 and branch condition that depends on an integer register value. These branches will be predicted as "not taken" by the CPU. Such branches are "free" in most CPU designs unless they are taken. While this doesn't take advantage of the branch predictors, speculative designs will see a significant performance boost compared to non-speculative branch handling - see Appendix B for more information. + +The branching conditions and jump targets are chosen in such way that infinite loops in RandomX code are impossible because the register controlling the branch will never be modified in the repeated code block. Each CBRANCH instruction can jump up to twice in a row. Handling CBRANCH using predicated execution [[22](https://en.wikipedia.org/wiki/Predication_(computer_architecture))] is impractical because the branch is not taken most of the time. + +### 2.7 Instruction-level parallelism + +CPUs improve their performance using several techniques that utilize instruction-level parallelism of the executed code. These techniques include: + +* Having multiple execution units that can execute operations in parallel (*superscalar execution*). +* Executing instruction not in program order, but in the order of operand availability (*out-of-order execution*). +* Predicting which way branches will go to enhance the benefits of both superscalar and out-of-order execution. + +RandomX benefits from all these optimizations. See Appendix B for a detailed analysis. + +### 2.8 Scratchpad + +The Scratchpad is used as read-write memory. Its size was selected to fit entirely into CPU cache. + +#### 2.8.1 Scratchpad levels + +The Scratchpad is split into 3 levels to mimic the typical CPU cache hierarchy [[23](https://en.wikipedia.org/wiki/CPU_cache)]. Most VM instructions access "L1" and "L2" Scratchpad because L1 and L2 CPU caches are located close to the CPU execution units and provide the best random access latency. The ratio of reads from L1 and L2 is 3:1, which matches the inverse ratio of typical latencies (see table below). + +|CPU μ-architecture|L1 latency|L2 latency|L3 latency|source| +|----------------|----------|----------|----------|------| +ARM Cortex A55|2|6|-|[[24](https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4)] +|AMD Zen+|4|12|40|[[25](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]| +|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)] + +The L3 cache is much larger and located further from the CPU core. As a result, its access latencies are much higher and can cause stalls in program execution. + +RandomX therefore performs only 2 random accesses into "L3" Scratchpad per program iteration (steps 2 and 3 in chapter 4.6.2 of the Specification). Register values from a given iteration are written into the same locations they were loaded from, which guarantees that the required cache lines have been moved into the faster L1 or L2 caches. + +Additionally, integer instructions that read from a fixed address also use the whole "L3" Scratchpad (Table 5.1.4 of the Specification) because repetitive accesses will ensure that the cache line will be placed in the L1 cache of the CPU. This shows that the Scratchpad level doesn't always directly correspond to the same CPU cache level. + +#### 2.8.2 Scratchpad writes + +There are two ways the Scratchpad is modified during VM execution: + +1. At the end of each program iteration, all register values are written into "L3" Scratchpad (see Specification chapter 4.6.2, steps 9 and 11). This writes a total of 128 bytes per iteration in two 64-byte blocks. +2. The ISTORE instruction does explicit stores. On average, there are 16 stores per program, out of which 2 stores are into the "L3" level. Each ISTORE instruction writes 8 bytes. + +The image below shows an example of the distribution of writes to the Scratchpad. Each pixel in the image represents 8 bytes of the Scratchpad. Red pixels represent portions of the Scratchpad that have been overwritten at least once during hash calculation. The "L1" and "L2" levels are on the left side (almost completely overwritten). The right side of the scratchpad represents the bottom 1792 KiB. Only about 66% of it are overwritten, but the writes are spread uniformly and randomly. + +![Imgur](https://i.imgur.com/pRz6aBG.png) + +See Appendix D for the analysis of Scratchpad entropy. + +#### 2.8.3 Read-write ratio + +Programs make, on average, 39 reads (instructions IADD_M, ISUB_M, IMUL_M, IMULH_M, ISMULH_M, IXOR_M, FADD_M, FSUB_M, FDIV_M) and 16 writes (instruction ISTORE) to the Scratchpad per program iteration. Additional 128 bytes are read and written implicitly to initialize and store register values. 64 bytes of data is read from the Dataset per iteration. In total: + +* The average amount of data read from memory per program iteration is: 39 * 8 + 128 + 64 = **504 bytes**. +* The average mount of data written to memory per program iteration is: 16 * 8 + 128 = **256 bytes**. + +This is close to a 2:1 read/write ratio, which CPUs are optimized for. + +### 2.9 Dataset + +Since the Scratchpad is usually stored in the CPU cache, only Dataset accesses utilize the memory controllers. + +RandomX randomly reads from the Dataset once per program iteration (16384 times per hash result). Since the Dataset must be stored in DRAM, it provides a natural parallelization limit, because DRAM cannot do more than about 25 million random accesses per second per bank group. Each separately addressable bank group allows a throughput of around 1500 H/s. + +All Dataset accesses read one CPU cache line (64 bytes) and are fully prefetched. The time to execute one program iteration described in chapter 4.6.2 of the Specification is about the same as typical DRAM access latency (50-100 ns). + +#### 2.9.1 Cache + +The Cache, which is used for light verification and Dataset construction, is about 8 times smaller than the Dataset. To keep a constant area-time product, each Dataset item is constructed from 8 random Cache accesses. + +Because 256 MiB is small enough to be included on-chip, RandomX uses a custom high-latency, high-power mixing function ("SuperscalarHash") which defeats the benefits of using low-latency memory and the energy required to calculate SuperscalarHash makes light mode very inefficient for mining (see chapter 3.4). + +Using less than 256 MiB of memory is not possible due to the use of tradeoff-resistant Argon2d with 3 iterations. When using 3 iterations (passes), halving the memory usage increases computational cost 3423 times for the best tradeoff attack [[27](https://eprint.iacr.org/2015/430.pdf)]. + +## 3. Custom functions + +### 3.1 AesGenerator1R + +AesGenerator1R was designed for the fastest possible generation of pseudorandom data to fill the Scratchpad. It takes advantage of hardware accelerated AES in modern CPUs. Only one AES round is performed per 16 bytes of output, which results in throughput exceeding 20 GB/s in most modern CPUs. While 1 AES round is not sufficient for a good distribution of random values, this is not an issue because the purpose is just to initialize the Scratchpad with random non-zero data. + +### 3.2 AesGenerator4R + +AesGenerator4R uses 4 AES rounds to generate pseudorandom data for Program Buffer initialization. Since 2 AES rounds are sufficient for full avalanche of all input bits [[28](https://csrc.nist.gov/csrc/media/projects/cryptographic-standards-and-guidelines/documents/aes-development/rijndael-ammended.pdf)], AesGenerator4R provides an excellent output distribution while maintaining very good performance. + +The reversible nature of this generator is not an issue since the generator state is always initialized using the output of a non-reversible hashing function (Blake2b). + +### 3.3 AesHash1R + +AesHash was designed for the fastest possible calculation of the Scratchpad fingerprint. It interprets the Scratchpad as a set of AES round keys, so it's equivalent to AES encryption with 32768 rounds. Two extra rounds are performed at the end to ensure avalanche of all Scratchpad bits in each lane. The output of the AesHash is fed into the Blake2b hashing function to calculate the final PoW hash. + +### 3.4 SuperscalarHash + +SuperscalarHash was designed to burn as much power as possible while the CPU is waiting for data to be loaded from DRAM. The target latency of 170 cycles corresponds to the usual DRAM latency of 40-80 ns and clock frequency of 2-4 GHz. ASIC devices designed for light-mode mining with low-latency memory will be bottlenecked by SuperscalarHash when calculating Dataset items and their efficiency will be destroyed by the high power usage of SuperscalarHash. + +The average SuperscalarHash function contains a total of 450 instructions, out of which 155 are 64-bit multiplications. On average, the longest dependency chain is 95 instructions long. An ASIC design for light-mode mining, with 256 MiB of on-die memory and 1-cycle latency for all operations, will need on average 95 * 8 = 760 cycles to construct a Dataset item, assuming unlimited parallelization. It will have to execute 155 * 8 = 1240 64-bit multiplications per item, which will consume energy comparable to loading 64 bytes from DRAM. + +## Appendix + +### A. The effect of chaining VM executions + +Chapter 1.2 describes why `N` random programs are chained to prevent mining strategies that search for 'easy' programs. RandomX uses a value of `N = 8`. + +Let's define `Q` as the ratio of acceptable programs in a strategy that uses filtering. For example `Q = 0.75` means that 25% of programs are rejected. + +For `N = 1`, there are no wasted program executions and the only cost is program generation and the filtering itself. The calculations below assume that these costs are zero and the only real cost is program execution. However, this is a simplification because program generation in RandomX is not free (the first program generation requires full Scratchpad initialization), but it describes a best-case scenario for an attacker. + + + For `N > 1`, the first program can be filtered as usual, but after the program is executed, there is a chance of `1-Q` that the next program should be rejected and we have wasted one program execution. + +For `N` chained executions, the chance is only QN that all programs in the chain are acceptable. However, during each attempt to find such chain, we will waste the execution of some programs. For `N = 8`, the number of wasted programs per attempt is equal to (1-Q)*(1+2\*Q+3\*Q2+4\*Q3+5\*Q4+6\*Q5+7\*Q6) (approximately 2.5 for `Q = 0.75`). + +Let's consider 3 mining strategies: + +#### Strategy I + +Honest miner that doesn't reject any programs (`Q = 1`). + +#### Strategy II + +Miner that uses optimized custom hardware that cannot execute 25% of programs (`Q = 0.75`), but supported programs can be executed 50% faster. + +#### Strategy III + +Miner that can execute all programs, but rejects 25% of the slowest programs for the first program in the chain. This gives a 5% performance boost for the first program in the chain (this matches the runtime distribution from Appendix C). + +#### Results + +The table below lists the results for the above 3 strategies and different values of `N`. The columns **N(I)**, **N(II)** and **N(III)** list the number of programs that each strategy has to execute on average to get one valid hash result (this includes programs wasted in rejected chains). Columns **Speed(I)**, **Speed(II)** and **Speed(III)** list the average mining performance relative to strategy I. + +|N|N(I)|N(II)|N(III)|Speed(I)|Speed(II)|Speed(III)| +|---|----|----|----|---------|---------|---------| +|1|1|1|1|1.00|1.50|1.05| +|2|2|2.3|2|1.00|1.28|1.02| +|4|4|6.5|4|1.00|0.92|1.01| +|8|8|27.0|8|1.00|0.44|1.00| + +For `N = 8`, strategy II will perform at less than half the speed of the honest miner despite having a 50% performance advantage for selected programs. The small statistical advantage of strategy III is negligible with `N = 8`. + +### B. Performance simulation + +As discussed in chapter 2.7, RandomX aims to take advantage of the complex design of modern high-performance CPUs. To evaluate the impact of superscalar, out-of-order and speculative execution, we performed a simplified CPU simulation. Source code is available in [perf-simulation.cpp](../src/tests/perf-simulation.cpp). + +#### CPU model + +The model CPU uses a 3-stage pipeline to achieve an ideal throughput of 1 instruction per cycle: +``` + (1) (2) (3) ++------------------+ +----------------+ +----------------+ +| Instruction | | | | | +| fetch | ---> | Memory access | ---> | Execute | +| + decode | | | | | ++------------------+ +----------------+ +----------------+ +``` +The 3 stages are: + +1. Instruction fetch and decode. This stage loads the instruction from the Program Buffer and decodes the instruction operation and operands. +2. Memory access. If this instruction uses a memory operand, it is loaded from the Scratchpad in this stage. This includes the calculation of the memory address. Stores are also performed in this stage. The value of the address register must be available in this stage. +3. Execute. This stage executes the instruction using the operands retrieved in the previous stages and writes the results into the register file. + +Note that this is an optimistically short pipeline that would not allow very high clock speeds. Designs using a longer pipeline would significantly increase the benefits of speculative execution. + +#### Superscalar execution + +Our model CPU contains two kinds of components: + +* Execution unit (EXU) - it is used to perform the actual integer or floating point operation. All RandomX instructions except ISTORE must use an execution unit in the 3rd pipeline stage. All operations are considered to take only 1 clock cycle. +* Memory unit (MEM) - it is used for loads and stores into Scratchpad. All memory instructions (including ISTORE) use a memory unit in the 2nd pipeline stage. + +A superscalar design will contain multiple execution or memory units to improve performance. + +#### Out-of-order execution + +The simulation model supports two designs: + +1. **In-order** - all instructions are executed in the order they appear in the Program Buffer. This design will stall if a dependency is encountered or the required EXU/MEM unit is not available. +2. **Out-of-order** - doesn't execute instructions in program order, but an instruction can be executed when its operands are ready and the required EXU/MEM units are available. + +#### Branch handling + +The simulation model supports two types of branch handling: + +1. **Non-speculative** - when a branch is encountered, the pipeline is stalled. This typically adds a 3-cycle penalty for each branch. +2. **Speculative** - all branches are predicted not taken and the pipeline is flushed if a misprediction occurs (probability of 1/256). + +#### Results + +The following 10 designs were simulated and the average number of clock cycles to execute a RandomX program (256 instructions) was measured. + +|design|superscalar config.|reordering|branch handling|execution time [cycles]|IPC| +|-------|-----------|----------|---------------|-----------------------|---| +|#1|1 EXU + 1 MEM|in-order|non-speculative|293|0.87| +|#2|1 EXU + 1 MEM|in-order|speculative|262|0.98| +|#3|1 EXU + 1 MEM|in-order|non-speculative|197|1.3| +|#4|2 EXU + 1 MEM|in-order|speculative|161|1.6| +|#5|2 EXU + 1 MEM|out-of-order|non-speculative|144|1.8| +|#6|2 EXU + 1 MEM|out-of-order|speculative|122|2.1| +|#7|4 EXU + 2 MEM|in-order|non-speculative|135|1.9| +|#8|4 EXU + 2 MEM|in-order|speculative|99|2.6| +|#9|4 EXU + 2 MEM|out-of-order|non-speculative|89|2.9| +|#10|4 EXU + 2 MEM|out-of-order|speculative|64|4.0| + +The benefits of superscalar, out-of-order and speculative designs are clearly demonstrated. + +### C. RandomX runtime distribution + +Runtime numbers were measured on AMD Ryzen 7 1700 running at 3.0 GHz using 1 core. Source code to measure program execution and verification times is available in [runtime-distr.cpp](../src/tests/runtime-distr.cpp). Source code to measure the performance of the x86 JIT compiler is available in [jit-performance.cpp](../src/tests/jit-performance.cpp). + +#### Fast mode - program execution + +The following figure shows the distribution of the runtimes of a single VM program (in fast mode). This includes: program generation, JIT compilation, VM execution and Blake2b hash of the register file. Program generation and JIT compilation was measured to take 3.6 μs per program. + +![Imgur](https://i.imgur.com/ikv2z2i.png) + +AMD Ryzen 7 1700 can calculate 625 hashes per second in fast mode (using 1 thread), which means a single hash result takes 1600 μs (1.6 ms). This consists of (approximately): + +* 1480 μs for VM execution (8 programs) +* 45 μs for initial Scratchpad fill (AesGenerator1R). +* 45 μs for final Scratchpad hash (AesHash1R). +* 30 μs for program generation and JIT compilation (8 programs) + +This gives a total overhead of 7.5% (time per hash spent not executing VM). + +#### Light mode - verification time + +The following figure shows the distribution of times to calculate 1 hash result using the light mode. Most of the time is spent executing SuperscalarHash to calculate Dataset items (13.2 ms out of 14.8 ms). The average verification time exactly matches the performance of the CryptoNight algorithm. + +![Imgur](https://i.imgur.com/VtwwJT8.png) + +### D. Scratchpad entropy analysis + +The average entropy of the Scratchpad after 8 program executions was approximated using the LZMA compression algorithm: + +1. Hash resuls were calculated and the final scratchpads were written to disk as files with '.spad' extension (source code: [scratchpad-entropy.cpp](../src/tests/scratchpad-entropy.cpp)) +2. The files were compressed using 7-Zip [[29](https://www.7-zip.org/)] in Ultra compression mode: `7z.exe a -t7z -m0=lzma2 -mx=9 scratchpads.7z *.spad` + +The size of the resulting archive is approximately 99.98% of the uncompressed size of the scratchpad files. This shows that the Scratchpad retains high entropy during VM execution. + +### E. SuperscalarHash analysis + +SuperscalarHash is a custom function used by RandomX to generate Dataset items. It operates on 8 integer registers and uses a random sequence of instructions. About 1/3 of the instructions are multiplications. + +The following figure shows the sensitivity of SuperscalarHash to changing a single bit of an input register: + +![Imgur](https://i.imgur.com/ztZ0V0G.png) + +This shows that SuperscalaHash has quite low sensitivity to high-order bits and somewhat decreased sensitivity to the lowest-order bits. Sensitivity is highest for bits 3-53 (inclusive). + +When calculating a Dataset item, the input of the first SuperscalarHash depends only on the item number. To ensure a good distribution of results, the constants described in section 7.3 of the Specification were chosen to provide unique values of bits 3-53 for *all* item numbers in the range 0-34078718 (the Dataset contains 34078719 items). All initial register values for all Dataset item numbers were checked to make sure bits 3-53 of each register are unique and there are no collisions (source code: [superscalar-init.cpp](../src/tests/superscalar-init.cpp)). While this is not strictly necessary to get unique output from SuperscalarHash, it's a security precaution that mitigates the non-perfect avalanche properties of the randomly generated SuperscalarHash instances. + +## References + +[1] CryptoNote whitepaper - https://cryptonote.org/whitepaper.pdf + +[2] ProgPoW: Inefficient integer multiplications - https://github.com/ifdefelse/ProgPOW/issues/16 + +[3] Cryptographic Hashing function - https://en.wikipedia.org/wiki/Cryptographic_hash_function + +[4] randprog - https://github.com/hyc/randprog + +[5] RandomJS - https://github.com/tevador/RandomJS + +[6] μop cache - https://en.wikipedia.org/wiki/CPU_cache#Micro-operation_(%CE%BCop_or_uop)_cache + +[7] Instruction-level parallelism - https://en.wikipedia.org/wiki/Instruction-level_parallelism + +[8] Superscalar processor - https://en.wikipedia.org/wiki/Superscalar_processor + +[9] Out-of-order execution - https://en.wikipedia.org/wiki/Out-of-order_execution + +[10] Speculative execution - https://en.wikipedia.org/wiki/Speculative_execution + +[11] Register renaming - https://en.wikipedia.org/wiki/Register_renaming + +[12] Blake2 hashing function - https://blake2.net/ + +[13] Advanced Encryption Standard - https://en.wikipedia.org/wiki/Advanced_Encryption_Standard + +[14] Log-normal distribution - https://en.wikipedia.org/wiki/Log-normal_distribution + +[15] CryptoNight hash function - https://cryptonote.org/cns/cns008.txt + +[16] Dynamic random-access memory - https://en.wikipedia.org/wiki/Dynamic_random-access_memory + +[17] Multi-channel memory architecture - https://en.wikipedia.org/wiki/Multi-channel_memory_architecture + +[18] Obelisk GRN1 chip details - https://www.grin-forum.org/t/obelisk-grn1-chip-details/4571 + +[19] Biryukov et al.: Tradeoff Cryptanalysis of Memory-Hard Functions - https://eprint.iacr.org/2015/227.pdf + +[20] SK Hynix 20nm DRAM density - http://en.thelec.kr/news/articleView.html?idxno=20 + +[21] Branch predictor - https://en.wikipedia.org/wiki/Branch_predictor + +[22] Predication - https://en.wikipedia.org/wiki/Predication_(computer_architecture) + +[23] CPU cache - https://en.wikipedia.org/wiki/CPU_cache + +[24] Cortex-A55 Microarchitecture - https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4 + +[25] AMD Zen+ Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy + +[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy + +[27] Biryukov et al.: Fast and Tradeoff-Resilient Memory-Hard Functions for +Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Table 2, page 8 + +[28] J. Daemen, V. Rijmen: AES Proposal: Rijndael - https://csrc.nist.gov/csrc/media/projects/cryptographic-standards-and-guidelines/documents/aes-development/rijndael-ammended.pdf page 28 + +[29] 7-Zip File archiver - https://www.7-zip.org/ diff --git a/RandomX/doc/program.asm b/RandomX/doc/program.asm new file mode 100644 index 00000000..c203d1ca --- /dev/null +++ b/RandomX/doc/program.asm @@ -0,0 +1,983 @@ +randomx_isn_0: + ; ISMULH_R r0, r7 + mov rax, r8 + imul r15 + mov r8, rdx +randomx_isn_1: + ; IADD_RS r1, r2, SHFT 2 + lea r9, [r9+r10*4] +randomx_isn_2: + ; ISTORE L1[r6+1506176493], r4 + lea eax, [r14d+1506176493] + and eax, 16376 + mov qword ptr [rsi+rax], r12 +randomx_isn_3: + ; IMUL_R r5, r3 + imul r13, r11 +randomx_isn_4: + ; IROR_R r3, r5 + mov ecx, r13d + ror r11, cl +randomx_isn_5: + ; CBRANCH r7, -1891017657, COND 15 + add r15, -1886823353 + test r15, 2139095040 + jz randomx_isn_0 +randomx_isn_6: + ; ISUB_M r3, L1[r7-1023302103] + lea eax, [r15d-1023302103] + and eax, 16376 + sub r11, qword ptr [rsi+rax] +randomx_isn_7: + ; IMUL_R r6, 220479013 + imul r14, 220479013 +randomx_isn_8: + ; IADD_RS r5, r3, -669841607, SHFT 2 + lea r13, [r13+r11*4-669841607] +randomx_isn_9: + ; IADD_M r3, L3[532344] + add r11, qword ptr [rsi+532344] +randomx_isn_10: + ; FADD_R f0, a3 + addpd xmm0, xmm11 +randomx_isn_11: + ; CBRANCH r3, -1981570318, COND 4 + add r11, -1981566222 + test r11, 1044480 + jz randomx_isn_10 +randomx_isn_12: + ; FSUB_R f0, a1 + subpd xmm0, xmm9 +randomx_isn_13: + ; IADD_RS r1, r6, SHFT 2 + lea r9, [r9+r14*4] +randomx_isn_14: + ; FSQRT_R e2 + sqrtpd xmm6, xmm6 +randomx_isn_15: + ; CBRANCH r5, -1278791788, COND 14 + add r13, -1278791788 + test r13, 1069547520 + jz randomx_isn_12 +randomx_isn_16: + ; ISUB_R r3, -1310797453 + sub r11, -1310797453 +randomx_isn_17: + ; IMUL_RCP r3, 2339914445 + mov rax, 16929713537937567113 + imul r11, rax +randomx_isn_18: + ; FADD_R f1, a2 + addpd xmm1, xmm10 +randomx_isn_19: + ; FSUB_R f2, a2 + subpd xmm2, xmm10 +randomx_isn_20: + ; IMUL_R r7, r0 + imul r15, r8 +randomx_isn_21: + ; FADD_M f2, L2[r7-828505656] + lea eax, [r15d-828505656] + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm2, xmm12 +randomx_isn_22: + ; FDIV_M e1, L1[r1-1542605227] + lea eax, [r9d-1542605227] + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + andps xmm12, xmm13 + orps xmm12, xmm14 + divpd xmm5, xmm12 +randomx_isn_23: + ; IMUL_RCP r0, 1878277380 + mov rax, 10545322453154434729 + imul r8, rax +randomx_isn_24: + ; ISUB_R r6, r3 + sub r14, r11 +randomx_isn_25: + ; IMUL_M r1, L1[r3-616171540] + lea eax, [r11d-616171540] + and eax, 16376 + imul r9, qword ptr [rsi+rax] +randomx_isn_26: + ; FSWAP_R f2 + shufpd xmm2, xmm2, 1 +randomx_isn_27: + ; FSQRT_R e0 + sqrtpd xmm4, xmm4 +randomx_isn_28: + ; IXOR_R r7, r5 + xor r15, r13 +randomx_isn_29: + ; FADD_R f3, a3 + addpd xmm3, xmm11 +randomx_isn_30: + ; FSUB_M f0, L2[r0+1880524670] + lea eax, [r8d+1880524670] + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + subpd xmm0, xmm12 +randomx_isn_31: + ; IADD_RS r0, r3, SHFT 3 + lea r8, [r8+r11*8] +randomx_isn_32: + ; FMUL_R e0, a2 + mulpd xmm4, xmm10 +randomx_isn_33: + ; IMUL_M r1, L1[r4-588273594] + lea eax, [r12d-588273594] + and eax, 16376 + imul r9, qword ptr [rsi+rax] +randomx_isn_34: + ; IADD_M r4, L1[r6+999905907] + lea eax, [r14d+999905907] + and eax, 16376 + add r12, qword ptr [rsi+rax] +randomx_isn_35: + ; ISUB_R r4, r0 + sub r12, r8 +randomx_isn_36: + ; FMUL_R e0, a3 + mulpd xmm4, xmm11 +randomx_isn_37: + ; ISTORE L1[r4+2027210220], r3 + lea eax, [r12d+2027210220] + and eax, 16376 + mov qword ptr [rsi+rax], r11 +randomx_isn_38: + ; FADD_M f1, L2[r3+1451369534] + lea eax, [r11d+1451369534] + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm1, xmm12 +randomx_isn_39: + ; FMUL_R e1, a1 + mulpd xmm5, xmm9 +randomx_isn_40: + ; FSUB_R f3, a2 + subpd xmm3, xmm10 +randomx_isn_41: + ; IMULH_R r3, r3 + mov rax, r11 + mul r11 + mov r11, rdx +randomx_isn_42: + ; ISUB_R r4, r3 + sub r12, r11 +randomx_isn_43: + ; CBRANCH r6, 335851892, COND 5 + add r14, 335847796 + test r14, 2088960 + jz randomx_isn_25 +randomx_isn_44: + ; IADD_RS r7, r5, SHFT 3 + lea r15, [r15+r13*8] +randomx_isn_45: + ; CFROUND r6, 48 + mov rax, r14 + rol rax, 29 + and eax, 24576 + or eax, 40896 + push rax + ldmxcsr dword ptr [rsp] + pop rax +randomx_isn_46: + ; IMUL_RCP r6, 2070736307 + mov rax, 9565216276746377827 + imul r14, rax +randomx_isn_47: + ; IXOR_R r2, r4 + xor r10, r12 +randomx_isn_48: + ; IMUL_R r0, r5 + imul r8, r13 +randomx_isn_49: + ; CBRANCH r2, -272659465, COND 15 + add r10, -272659465 + test r10, 2139095040 + jz randomx_isn_48 +randomx_isn_50: + ; ISTORE L1[r6+1414933948], r5 + lea eax, [r14d+1414933948] + and eax, 16376 + mov qword ptr [rsi+rax], r13 +randomx_isn_51: + ; ISTORE L1[r3-1336791747], r6 + lea eax, [r11d-1336791747] + and eax, 16376 + mov qword ptr [rsi+rax], r14 +randomx_isn_52: + ; FSCAL_R f1 + xorps xmm1, xmm15 +randomx_isn_53: + ; CBRANCH r6, -2143810604, COND 1 + add r14, -2143810860 + test r14, 130560 + jz randomx_isn_50 +randomx_isn_54: + ; ISUB_M r3, L1[r1-649360673] + lea eax, [r9d-649360673] + and eax, 16376 + sub r11, qword ptr [rsi+rax] +randomx_isn_55: + ; FADD_R f2, a3 + addpd xmm2, xmm11 +randomx_isn_56: + ; CFROUND r3, 8 + mov rax, r11 + rol rax, 5 + and eax, 24576 + or eax, 40896 + push rax + ldmxcsr dword ptr [rsp] + pop rax +randomx_isn_57: + ; IROR_R r2, r0 + mov ecx, r8d + ror r10, cl +randomx_isn_58: + ; IADD_RS r4, r2, SHFT 1 + lea r12, [r12+r10*2] +randomx_isn_59: + ; CBRANCH r6, -704407571, COND 10 + add r14, -704276499 + test r14, 66846720 + jz randomx_isn_54 +randomx_isn_60: + ; FSUB_R f1, a3 + subpd xmm1, xmm11 +randomx_isn_61: + ; ISUB_R r3, r7 + sub r11, r15 +randomx_isn_62: + ; FMUL_R e2, a2 + mulpd xmm6, xmm10 +randomx_isn_63: + ; FMUL_R e3, a1 + mulpd xmm7, xmm9 +randomx_isn_64: + ; ISTORE L3[r2+845419810], r0 + lea eax, [r10d+845419810] + and eax, 2097144 + mov qword ptr [rsi+rax], r8 +randomx_isn_65: + ; CBRANCH r1, -67701844, COND 5 + add r9, -67705940 + test r9, 2088960 + jz randomx_isn_60 +randomx_isn_66: + ; IROR_R r3, r1 + mov ecx, r9d + ror r11, cl +randomx_isn_67: + ; IMUL_R r3, r1 + imul r11, r9 +randomx_isn_68: + ; IROR_R r1, 40 + ror r9, 40 +randomx_isn_69: + ; IMUL_R r3, r0 + imul r11, r8 +randomx_isn_70: + ; IXOR_M r6, L3[1276704] + xor r14, qword ptr [rsi+1276704] +randomx_isn_71: + ; FADD_M f0, L1[r1-1097746982] + lea eax, [r9d-1097746982] + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm0, xmm12 +randomx_isn_72: + ; IMUL_M r7, L1[r2+588700215] + lea eax, [r10d+588700215] + and eax, 16376 + imul r15, qword ptr [rsi+rax] +randomx_isn_73: + ; IXOR_M r2, L2[r3-1120252909] + lea eax, [r11d-1120252909] + and eax, 262136 + xor r10, qword ptr [rsi+rax] +randomx_isn_74: + ; FMUL_R e2, a0 + mulpd xmm6, xmm8 +randomx_isn_75: + ; IMULH_R r2, r1 + mov rax, r10 + mul r9 + mov r10, rdx +randomx_isn_76: + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 +randomx_isn_77: + ; FSQRT_R e1 + sqrtpd xmm5, xmm5 +randomx_isn_78: + ; FSCAL_R f1 + xorps xmm1, xmm15 +randomx_isn_79: + ; FSWAP_R e1 + shufpd xmm5, xmm5, 1 +randomx_isn_80: + ; IXOR_R r3, 721175561 + xor r11, 721175561 +randomx_isn_81: + ; FSCAL_R f0 + xorps xmm0, xmm15 +randomx_isn_82: + ; IADD_RS r3, r0, SHFT 1 + lea r11, [r11+r8*2] +randomx_isn_83: + ; ISUB_R r2, -691647438 + sub r10, -691647438 +randomx_isn_84: + ; IXOR_R r1, r3 + xor r9, r11 +randomx_isn_85: + ; IMULH_R r1, r7 + mov rax, r9 + mul r15 + mov r9, rdx +randomx_isn_86: + ; IMULH_R r3, r4 + mov rax, r11 + mul r12 + mov r11, rdx +randomx_isn_87: + ; CBRANCH r3, -1821955951, COND 5 + add r11, -1821955951 + test r11, 2088960 + jz randomx_isn_87 +randomx_isn_88: + ; FADD_R f2, a3 + addpd xmm2, xmm11 +randomx_isn_89: + ; IXOR_R r6, r3 + xor r14, r11 +randomx_isn_90: + ; CBRANCH r4, -1780348372, COND 15 + add r12, -1784542676 + test r12, 2139095040 + jz randomx_isn_88 +randomx_isn_91: + ; IROR_R r4, 55 + ror r12, 55 +randomx_isn_92: + ; FSUB_R f3, a2 + subpd xmm3, xmm10 +randomx_isn_93: + ; FSCAL_R f1 + xorps xmm1, xmm15 +randomx_isn_94: + ; FADD_R f1, a0 + addpd xmm1, xmm8 +randomx_isn_95: + ; ISUB_R r0, r3 + sub r8, r11 +randomx_isn_96: + ; ISMULH_R r5, r7 + mov rax, r13 + imul r15 + mov r13, rdx +randomx_isn_97: + ; IADD_RS r0, r5, SHFT 1 + lea r8, [r8+r13*2] +randomx_isn_98: + ; IMUL_R r7, r3 + imul r15, r11 +randomx_isn_99: + ; IADD_RS r2, r4, SHFT 2 + lea r10, [r10+r12*4] +randomx_isn_100: + ; ISTORE L3[r2+1641523310], r4 + lea eax, [r10d+1641523310] + and eax, 2097144 + mov qword ptr [rsi+rax], r12 +randomx_isn_101: + ; ISTORE L2[r5+1966751371], r5 + lea eax, [r13d+1966751371] + and eax, 262136 + mov qword ptr [rsi+rax], r13 +randomx_isn_102: + ; IXOR_R r4, r7 + xor r12, r15 +randomx_isn_103: + ; CBRANCH r7, -607792642, COND 4 + add r15, -607792642 + test r15, 1044480 + jz randomx_isn_99 +randomx_isn_104: + ; FMUL_R e1, a1 + mulpd xmm5, xmm9 +randomx_isn_105: + ; IMUL_R r2, r3 + imul r10, r11 +randomx_isn_106: + ; IADD_RS r5, r1, -1609896472, SHFT 3 + lea r13, [r13+r9*8-1609896472] +randomx_isn_107: + ; FMUL_R e2, a2 + mulpd xmm6, xmm10 +randomx_isn_108: + ; ISUB_R r3, r6 + sub r11, r14 +randomx_isn_109: + ; ISUB_R r0, r5 + sub r8, r13 +randomx_isn_110: + ; IMUL_M r2, L3[1548384] + imul r10, qword ptr [rsi+1548384] +randomx_isn_111: + ; FADD_R f2, a1 + addpd xmm2, xmm9 +randomx_isn_112: + ; ISUB_M r6, L1[r7+1465746] + lea eax, [r15d+1465746] + and eax, 16376 + sub r14, qword ptr [rsi+rax] +randomx_isn_113: + ; IMULH_M r3, L1[r6-668730597] + lea ecx, [r14d-668730597] + and ecx, 16376 + mov rax, r11 + mul qword ptr [rsi+rcx] + mov r11, rdx +randomx_isn_114: + ; IMUL_M r3, L2[r6-1549338697] + lea eax, [r14d-1549338697] + and eax, 262136 + imul r11, qword ptr [rsi+rax] +randomx_isn_115: + ; IMULH_M r4, L1[r6-82240335] + lea ecx, [r14d-82240335] + and ecx, 16376 + mov rax, r12 + mul qword ptr [rsi+rcx] + mov r12, rdx +randomx_isn_116: + ; ISWAP_R r2, r4 + xchg r10, r12 +randomx_isn_117: + ; IADD_RS r1, r0, SHFT 1 + lea r9, [r9+r8*2] +randomx_isn_118: + ; FSUB_R f0, a1 + subpd xmm0, xmm9 +randomx_isn_119: + ; IADD_M r3, L1[r1-233433054] + lea eax, [r9d-233433054] + and eax, 16376 + add r11, qword ptr [rsi+rax] +randomx_isn_120: + ; FSUB_R f1, a0 + subpd xmm1, xmm8 +randomx_isn_121: + ; ISUB_R r4, r3 + sub r12, r11 +randomx_isn_122: + ; IXOR_M r6, L2[r1-425418413] + lea eax, [r9d-425418413] + and eax, 262136 + xor r14, qword ptr [rsi+rax] +randomx_isn_123: + ; FSQRT_R e2 + sqrtpd xmm6, xmm6 +randomx_isn_124: + ; CBRANCH r1, -1807592127, COND 12 + add r9, -1806543551 + test r9, 267386880 + jz randomx_isn_118 +randomx_isn_125: + ; IADD_RS r4, r4, SHFT 0 + lea r12, [r12+r12*1] +randomx_isn_126: + ; ISTORE L2[r5-104490218], r0 + lea eax, [r13d-104490218] + and eax, 262136 + mov qword ptr [rsi+rax], r8 +randomx_isn_127: + ; IXOR_R r5, r0 + xor r13, r8 +randomx_isn_128: + ; IMUL_M r6, L1[r2-603755642] + lea eax, [r10d-603755642] + and eax, 16376 + imul r14, qword ptr [rsi+rax] +randomx_isn_129: + ; INEG_R r5 + neg r13 +randomx_isn_130: + ; FMUL_R e0, a0 + mulpd xmm4, xmm8 +randomx_isn_131: + ; ISUB_R r0, -525100988 + sub r8, -525100988 +randomx_isn_132: + ; IMUL_RCP r0, 3636489804 + mov rax, 10893494383940851768 + imul r8, rax +randomx_isn_133: + ; FADD_M f2, L1[r3-768193829] + lea eax, [r11d-768193829] + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm2, xmm12 +randomx_isn_134: + ; IADD_RS r7, r7, SHFT 3 + lea r15, [r15+r15*8] +randomx_isn_135: + ; IROR_R r3, r2 + mov ecx, r10d + ror r11, cl +randomx_isn_136: + ; ISUB_R r1, r4 + sub r9, r12 +randomx_isn_137: + ; FADD_M f2, L1[r3+1221716517] + lea eax, [r11d+1221716517] + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm2, xmm12 +randomx_isn_138: + ; FDIV_M e2, L1[r3-1258284098] + lea eax, [r11d-1258284098] + and eax, 16376 + cvtdq2pd xmm12, qword ptr [rsi+rax] + andps xmm12, xmm13 + orps xmm12, xmm14 + divpd xmm6, xmm12 +randomx_isn_139: + ; FSUB_R f1, a0 + subpd xmm1, xmm8 +randomx_isn_140: + ; IADD_RS r5, r6, -1773817530, SHFT 3 + lea r13, [r13+r14*8-1773817530] +randomx_isn_141: + ; IADD_M r0, L3[540376] + add r8, qword ptr [rsi+540376] +randomx_isn_142: + ; FMUL_R e1, a3 + mulpd xmm5, xmm11 +randomx_isn_143: + ; IADD_RS r6, r3, SHFT 2 + lea r14, [r14+r11*4] +randomx_isn_144: + ; ISTORE L1[r6+1837899146], r5 + lea eax, [r14d+1837899146] + and eax, 16376 + mov qword ptr [rsi+rax], r13 +randomx_isn_145: + ; FSWAP_R f2 + shufpd xmm2, xmm2, 1 +randomx_isn_146: + ; FMUL_R e0, a0 + mulpd xmm4, xmm8 +randomx_isn_147: + ; IADD_RS r1, r4, SHFT 3 + lea r9, [r9+r12*8] +randomx_isn_148: + ; ISUB_M r1, L2[r6-326072101] + lea eax, [r14d-326072101] + and eax, 262136 + sub r9, qword ptr [rsi+rax] +randomx_isn_149: + ; FSUB_R f1, a1 + subpd xmm1, xmm9 +randomx_isn_150: + ; FADD_M f0, L2[r5+1123208251] + lea eax, [r13d+1123208251] + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + addpd xmm0, xmm12 +randomx_isn_151: + ; FSWAP_R f1 + shufpd xmm1, xmm1, 1 +randomx_isn_152: + ; IMUL_M r3, L1[r4+522054565] + lea eax, [r12d+522054565] + and eax, 16376 + imul r11, qword ptr [rsi+rax] +randomx_isn_153: + ; IADD_RS r0, r0, SHFT 1 + lea r8, [r8+r8*2] +randomx_isn_154: + ; FMUL_R e2, a3 + mulpd xmm6, xmm11 +randomx_isn_155: + ; FSUB_R f1, a2 + subpd xmm1, xmm10 +randomx_isn_156: + ; ISTORE L1[r6+1559762664], r7 + lea eax, [r14d+1559762664] + and eax, 16376 + mov qword ptr [rsi+rax], r15 +randomx_isn_157: + ; FSUB_R f0, a1 + subpd xmm0, xmm9 +randomx_isn_158: + ; ISUB_R r5, r6 + sub r13, r14 +randomx_isn_159: + ; FADD_R f0, a0 + addpd xmm0, xmm8 +randomx_isn_160: + ; FMUL_R e1, a0 + mulpd xmm5, xmm8 +randomx_isn_161: + ; FSUB_R f2, a1 + subpd xmm2, xmm9 +randomx_isn_162: + ; ISUB_R r5, r7 + sub r13, r15 +randomx_isn_163: + ; FDIV_M e3, L2[r4-1912085642] + lea eax, [r12d-1912085642] + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + andps xmm12, xmm13 + orps xmm12, xmm14 + divpd xmm7, xmm12 +randomx_isn_164: + ; IXOR_M r3, L1[r0-858372123] + lea eax, [r8d-858372123] + and eax, 16376 + xor r11, qword ptr [rsi+rax] +randomx_isn_165: + ; IXOR_R r4, r6 + xor r12, r14 +randomx_isn_166: + ; IADD_RS r3, r6, SHFT 0 + lea r11, [r11+r14*1] +randomx_isn_167: + ; FMUL_R e1, a1 + mulpd xmm5, xmm9 +randomx_isn_168: + ; IADD_RS r5, r2, -371238437, SHFT 1 + lea r13, [r13+r10*2-371238437] +randomx_isn_169: + ; ISTORE L2[r5-633500019], r5 + lea eax, [r13d-633500019] + and eax, 262136 + mov qword ptr [rsi+rax], r13 +randomx_isn_170: + ; IXOR_R r4, -246154334 + xor r12, -246154334 +randomx_isn_171: + ; IROR_R r7, r5 + mov ecx, r13d + ror r15, cl +randomx_isn_172: + ; ISTORE L1[r5+4726218], r2 + lea eax, [r13d+4726218] + and eax, 16376 + mov qword ptr [rsi+rax], r10 +randomx_isn_173: + ; IADD_RS r2, r0, SHFT 3 + lea r10, [r10+r8*8] +randomx_isn_174: + ; IXOR_R r2, r6 + xor r10, r14 +randomx_isn_175: + ; IADD_RS r0, r7, SHFT 0 + lea r8, [r8+r15*1] +randomx_isn_176: + ; FMUL_R e1, a1 + mulpd xmm5, xmm9 +randomx_isn_177: + ; ISTORE L1[r1+962725405], r0 + lea eax, [r9d+962725405] + and eax, 16376 + mov qword ptr [rsi+rax], r8 +randomx_isn_178: + ; ISTORE L1[r5-1472969684], r4 + lea eax, [r13d-1472969684] + and eax, 16376 + mov qword ptr [rsi+rax], r12 +randomx_isn_179: + ; FSCAL_R f3 + xorps xmm3, xmm15 +randomx_isn_180: + ; IXOR_M r7, L1[r5+1728657403] + lea eax, [r13d+1728657403] + and eax, 16376 + xor r15, qword ptr [rsi+rax] +randomx_isn_181: + ; CBRANCH r2, -759703940, COND 2 + add r10, -759704452 + test r10, 261120 + jz randomx_isn_175 +randomx_isn_182: + ; FADD_R f1, a2 + addpd xmm1, xmm10 +randomx_isn_183: + ; IMULH_R r5, r1 + mov rax, r13 + mul r9 + mov r13, rdx +randomx_isn_184: + ; FSUB_R f3, a2 + subpd xmm3, xmm10 +randomx_isn_185: + ; IMUL_R r6, r2 + imul r14, r10 +randomx_isn_186: + ; IROR_R r2, r6 + mov ecx, r14d + ror r10, cl +randomx_isn_187: + ; FADD_R f2, a3 + addpd xmm2, xmm11 +randomx_isn_188: + ; FSUB_R f3, a2 + subpd xmm3, xmm10 +randomx_isn_189: + ; FSUB_R f0, a1 + subpd xmm0, xmm9 +randomx_isn_190: + ; FSUB_R f1, a2 + subpd xmm1, xmm10 +randomx_isn_191: + ; ISTORE L2[r0+519974891], r5 + lea eax, [r8d+519974891] + and eax, 262136 + mov qword ptr [rsi+rax], r13 +randomx_isn_192: + ; IXOR_R r3, r0 + xor r11, r8 +randomx_isn_193: + ; IMUL_RCP r3, 2631645861 + mov rax, 15052968123180221777 + imul r11, rax +randomx_isn_194: + ; FSCAL_R f2 + xorps xmm2, xmm15 +randomx_isn_195: + ; IMUL_RCP r6, 3565118466 + mov rax, 11111575010739676440 + imul r14, rax +randomx_isn_196: + ; IMUL_RCP r7, 2240276148 + mov rax, 17682677777245240213 + imul r15, rax +randomx_isn_197: + ; FADD_R f3, a0 + addpd xmm3, xmm8 +randomx_isn_198: + ; ISTORE L3[r7-908286266], r0 + lea eax, [r15d-908286266] + and eax, 2097144 + mov qword ptr [rsi+rax], r8 +randomx_isn_199: + ; FMUL_R e0, a1 + mulpd xmm4, xmm9 +randomx_isn_200: + ; FADD_R f1, a2 + addpd xmm1, xmm10 +randomx_isn_201: + ; IADD_RS r3, r2, SHFT 3 + lea r11, [r11+r10*8] +randomx_isn_202: + ; FSUB_R f0, a0 + subpd xmm0, xmm8 +randomx_isn_203: + ; CBRANCH r1, -1282235504, COND 2 + add r9, -1282234992 + test r9, 261120 + jz randomx_isn_182 +randomx_isn_204: + ; IMUL_M r1, L3[176744] + imul r9, qword ptr [rsi+176744] +randomx_isn_205: + ; FSWAP_R e1 + shufpd xmm5, xmm5, 1 +randomx_isn_206: + ; CBRANCH r0, -1557284726, COND 14 + add r8, -1555187574 + test r8, 1069547520 + jz randomx_isn_204 +randomx_isn_207: + ; IADD_M r3, L1[r0+72267507] + lea eax, [r8d+72267507] + and eax, 16376 + add r11, qword ptr [rsi+rax] +randomx_isn_208: + ; ISUB_R r7, r0 + sub r15, r8 +randomx_isn_209: + ; IROR_R r3, r2 + mov ecx, r10d + ror r11, cl +randomx_isn_210: + ; ISUB_R r0, r3 + sub r8, r11 +randomx_isn_211: + ; IMUL_RCP r7, 3271526781 + mov rax, 12108744298594255889 + imul r15, rax +randomx_isn_212: + ; FSQRT_R e2 + sqrtpd xmm6, xmm6 +randomx_isn_213: + ; IMUL_R r0, r4 + imul r8, r12 +randomx_isn_214: + ; FSWAP_R f3 + shufpd xmm3, xmm3, 1 +randomx_isn_215: + ; FADD_R f2, a1 + addpd xmm2, xmm9 +randomx_isn_216: + ; ISMULH_M r5, L1[r4-1702277076] + lea ecx, [r12d-1702277076] + and ecx, 16376 + mov rax, r13 + imul qword ptr [rsi+rcx] + mov r13, rdx +randomx_isn_217: + ; ISUB_R r4, r2 + sub r12, r10 +randomx_isn_218: + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 +randomx_isn_219: + ; FSUB_R f3, a1 + subpd xmm3, xmm9 +randomx_isn_220: + ; ISTORE L2[r1+1067932664], r3 + lea eax, [r9d+1067932664] + and eax, 262136 + mov qword ptr [rsi+rax], r11 +randomx_isn_221: + ; IROR_R r6, r4 + mov ecx, r12d + ror r14, cl +randomx_isn_222: + ; FSUB_R f1, a1 + subpd xmm1, xmm9 +randomx_isn_223: + ; ISUB_R r2, r5 + sub r10, r13 +randomx_isn_224: + ; IXOR_R r2, r7 + xor r10, r15 +randomx_isn_225: + ; IXOR_R r7, r5 + xor r15, r13 +randomx_isn_226: + ; IMUL_RCP r4, 1021824288 + mov rax, 9691999329617659469 + imul r12, rax +randomx_isn_227: + ; IROR_R r1, 48 + ror r9, 48 +randomx_isn_228: + ; IMUL_RCP r4, 4042529026 + mov rax, 9799331310263836012 + imul r12, rax +randomx_isn_229: + ; FSQRT_R e1 + sqrtpd xmm5, xmm5 +randomx_isn_230: + ; IROR_R r3, r6 + mov ecx, r14d + ror r11, cl +randomx_isn_231: + ; FMUL_R e2, a1 + mulpd xmm6, xmm9 +randomx_isn_232: + ; IMULH_M r4, L1[r6+396272725] + lea ecx, [r14d+396272725] + and ecx, 16376 + mov rax, r12 + mul qword ptr [rsi+rcx] + mov r12, rdx +randomx_isn_233: + ; FSUB_R f0, a0 + subpd xmm0, xmm8 +randomx_isn_234: + ; FADD_R f3, a2 + addpd xmm3, xmm10 +randomx_isn_235: + ; IADD_RS r7, r3, SHFT 1 + lea r15, [r15+r11*2] +randomx_isn_236: + ; ISUB_R r6, r3 + sub r14, r11 +randomx_isn_237: + ; IADD_RS r4, r4, SHFT 2 + lea r12, [r12+r12*4] +randomx_isn_238: + ; ISUB_R r7, r1 + sub r15, r9 +randomx_isn_239: + ; ISMULH_R r2, r5 + mov rax, r10 + imul r13 + mov r10, rdx +randomx_isn_240: + ; FMUL_R e1, a2 + mulpd xmm5, xmm10 +randomx_isn_241: + ; IADD_RS r1, r4, SHFT 2 + lea r9, [r9+r12*4] +randomx_isn_242: + ; FDIV_M e2, L2[r6+259737107] + lea eax, [r14d+259737107] + and eax, 262136 + cvtdq2pd xmm12, qword ptr [rsi+rax] + andps xmm12, xmm13 + orps xmm12, xmm14 + divpd xmm6, xmm12 +randomx_isn_243: + ; IADD_M r0, L1[r1+789576070] + lea eax, [r9d+789576070] + and eax, 16376 + add r8, qword ptr [rsi+rax] +randomx_isn_244: + ; IMUL_R r3, r4 + imul r11, r12 +randomx_isn_245: + ; IMUL_R r3, r1 + imul r11, r9 +randomx_isn_246: + ; IMUL_RCP r4, 1001661150 + mov rax, 9887096364157721599 + imul r12, rax +randomx_isn_247: + ; CBRANCH r3, -722123512, COND 2 + add r11, -722123512 + test r11, 261120 + jz randomx_isn_246 +randomx_isn_248: + ; ISMULH_R r7, r6 + mov rax, r15 + imul r14 + mov r15, rdx +randomx_isn_249: + ; IADD_M r5, L3[1870552] + add r13, qword ptr [rsi+1870552] +randomx_isn_250: + ; ISUB_R r0, r1 + sub r8, r9 +randomx_isn_251: + ; IMULH_R r0, r5 + mov rax, r8 + mul r13 + mov r8, rdx +randomx_isn_252: + ; FSUB_R f1, a1 + subpd xmm1, xmm9 +randomx_isn_253: + ; ISTORE L2[r3-2010380786], r5 + lea eax, [r11d-2010380786] + and eax, 262136 + mov qword ptr [rsi+rax], r13 +randomx_isn_254: + ; FMUL_R e3, a2 + mulpd xmm7, xmm10 +randomx_isn_255: + ; CBRANCH r7, -2007380935, COND 9 + add r15, -2007315399 + test r15, 33423360 + jz randomx_isn_249 diff --git a/RandomX/doc/specs.md b/RandomX/doc/specs.md new file mode 100644 index 00000000..3764c162 --- /dev/null +++ b/RandomX/doc/specs.md @@ -0,0 +1,938 @@ +# RandomX + +RandomX is a proof of work (PoW) algorithm which was designed to close the gap between general-purpose CPUs and specialized hardware. The core of the algorithm is a simulation of a virtual CPU. + +#### Table of contents + +1. [Definitions](#1-definitions) +1. [Algorithm description](#2-algorithm-description) +1. [Custom functions](#3-custom-functions) +1. [Virtual Machine](#4-virtual-machine) +1. [Instruction set](#5-instruction-set) +1. [SuperscalarHash](#6-superscalarhash) +1. [Dataset](#7-dataset) + + +## 1. Definitions + +### 1.1 General definitions + +**Hash256** and **Hash512** refer to the [Blake2b](https://blake2.net/blake2_20130129.pdf) hashing function with a 256-bit and 512-bit output size, respectively. + +**Floating point format** refers to the [IEEE-754 double precision floating point format](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) with a sign bit, 11-bit exponent and 52-bit fraction. + +**Argon2d** is a tradeoff-resistant variant of [Argon2](https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf), a memory-hard password derivation function. + +**AesGenerator1R** refers to an AES-based pseudo-random number generator described in chapter 3.2. It's initialized with a 512-bit seed value and is capable of producing more than 10 bytes per clock cycle. + +**AesGenerator4R** is a slower but more secure AES-based pseudo-random number generator described in chapter 3.3. It's initialized with a 512-bit seed value. + +**AesHash1R** refers to an AES-based fingerprinting function described in chapter 3.4. It's capable of processing more than 10 bytes per clock cycle and produces a 512-bit output. + +**BlakeGenerator** refers to a custom pseudo-random number generator described in chapter 3.4. It's based on the Blake2b hashing function. + +**SuperscalarHash** refers to a custom diffusion function designed to run efficiently on superscalar CPUs (see chapter 7). It transforms a 64-byte input value into a 64-byte output value. + +**Virtual Machine** or **VM** refers to the RandomX virtual machine as described in chapter 4. + +**Programming the VM** refers to the act of loading a program and configuration into the VM. This is described in chapter 4.5. + +**Executing the VM** refers to the act of running the program loop as described in chapter 4.6. + +**Scratchpad** refers to the workspace memory of the VM. The whole scratchpad is structured into 3 levels: L3 -> L2 -> L1 with each lower level being a subset of the higher levels. + +**Register File** refers to a 256-byte sequence formed by concatenating VM registers in little-endian format in the following order: `r0`-`r7`, `f0`-`f3`, `e0`-`e3` and `a0`-`a3`. + +**Program Buffer** refers to the buffer from which the VM reads instructions. + +**Cache** refers to a read-only buffer initialized by Argon2d as described in chapter 7.1. + +**Dataset** refers to a large read-only buffer described in chapter 7. It is constructed from the Cache using the SuperscalarHash function. + +### 1.2 Configurable parameters +RandomX has several configurable parameters that are listed in Table 1.2.1 with their default values. + +*Table 1.2.1 - Configurable parameters* + +|parameter|description|default value| +|---------|-----|-------| +|`RANDOMX_ARGON_MEMORY`|The number of 1 KiB Argon2 blocks in the Cache| `262144`| +|`RANDOMX_ARGON_ITERATIONS`|The number of Argon2d iterations for Cache initialization|`3`| +|`RANDOMX_ARGON_LANES`|The number of parallel lanes for Cache initialization|`1`| +|`RANDOMX_ARGON_SALT`|Argon2 salt|`"RandomX\x03"`| +|`RANDOMX_CACHE_ACCESSES`|The number of random Cache accesses per Dataset item|`8`| +|`RANDOMX_SUPERSCALAR_LATENCY`|Target latency for SuperscalarHash (in cycles of the reference CPU)|`170`| +|`RANDOMX_DATASET_BASE_SIZE`|Dataset base size in bytes|`2147483648`| +|`RANDOMX_DATASET_EXTRA_SIZE`|Dataset extra size in bytes|`33554368`| +|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`| +|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`| +|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`| +|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`| +|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`| +|`RANDOMX_SCRATCHPAD_L3`|Scratchpad L3 size in bytes|`2097152`| +|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`| +|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`| + +Instruction frequencies listed in Tables 5.2.1, 5.3.1, 5.4.1 and 5.5.1 are also configurable. + + +## 2. Algorithm description + +The RandomX algorithm accepts two input values: + +* String `K` with a size of 0-60 bytes (key) +* String `H` of arbitrary length (the value to be hashed) + +and outputs a 256-bit result `R`. + +The algorithm consists of the following steps: + +1. The Dataset is initialized using the key value `K` (described in chapter 7). +1. 64-byte seed `S` is calculated as `S = Hash512(H)`. +1. Let `gen1 = AesGenerator1R(S)`. +1. The Scratchpad is filled with `RANDOMX_SCRATCHPAD_L3` random bytes using generator `gen1`. +1. Let `gen4 = AesGenerator4R(gen1.state)` (use the final state of `gen1`). +1. The value of the VM register `fprc` is set to 0 (default rounding mode - chapter 4.3). +1. The VM is programmed using `128 + 8 * RANDOMX_PROGRAM_SIZE` random bytes using generator `gen4` (chapter 4.5). +1. The VM is executed (chapter 4.6). +1. A new 64-byte seed is calculated as `S = Hash512(RegisterFile)`. +1. Set `gen4.state = S` (modify the state of the generator). +1. Steps 7-10 are performed a total of `RANDOMX_PROGRAM_COUNT` times. The last iteration skips steps 9 and 10. +1. Scratchpad fingerprint is calculated as `A = AesHash1R(Scratchpad)`. +1. Bytes 192-255 of the Register File are set to the value of `A`. +1. Result is calculated as `R = Hash256(RegisterFile)`. + +The input of the `Hash512` function in step 9 is the following 256 bytes: +``` + +---------------------------------+ + | registers r0-r7 | (64 bytes) + +---------------------------------+ + | registers f0-f3 | (64 bytes) + +---------------------------------+ + | registers e0-e3 | (64 bytes) + +---------------------------------+ + | registers a0-a3 | (64 bytes) + +---------------------------------+ +``` + +The input of the `Hash256` function in step 14 is the following 256 bytes: +``` + +---------------------------------+ + | registers r0-r7 | (64 bytes) + +---------------------------------+ + | registers f0-f3 | (64 bytes) + +---------------------------------+ + | registers e0-e3 | (64 bytes) + +---------------------------------+ + | AesHash1R(Scratchpad) | (64 bytes) + +---------------------------------+ +``` + +## 3 Custom functions + +### 3.1 Definitions + +Two of the custom functions are based on the [Advanced Encryption Standard](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) (AES). + +**AES encryption round** refers to the application of the ShiftRows, SubBytes and MixColumns transformations followed by a XOR with the round key. + +**AES decryption round** refers to the application of inverse ShiftRows, inverse SubBytes and inverse MixColumns transformations followed by a XOR with the round key. + +### 3.2 AesGenerator1R + +AesGenerator1R produces a sequence of pseudo-random bytes. + +The internal state of the generator consists of 64 bytes arranged into four columns of 16 bytes each. During each output iteration, every column is decrypted (columns 0, 2) or encrypted (columns 1, 3) with one AES round using the following round keys (one key per column): + +``` +key0 = 53 a5 ac 6d 09 66 71 62 2b 55 b5 db 17 49 f4 b4 +key1 = 07 af 7c 6d 0d 71 6a 84 78 d3 25 17 4e dc a1 0d +key2 = f1 62 12 3f c6 7e 94 9f 4f 79 c0 f4 45 e3 20 3e +key3 = 35 81 ef 6a 7c 31 ba b1 88 4c 31 16 54 91 16 49 +``` +These keys were generated as: +``` +key0, key1, key2, key3 = Hash512("RandomX AesGenerator1R keys") +``` + + +Single iteration produces 64 bytes of output which also become the new generator state. +``` +state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) + | | | | + AES decrypt AES encrypt AES decrypt AES encrypt + (key0) (key1) (key2) (key3) + | | | | + v v v v + state0' state1' state2' state3' +``` + +### 3.3 AesGenerator4R + +AesGenerator4R works the same way as AesGenerator1R, except it uses 4 rounds per column: + +``` +state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) + | | | | + AES decrypt AES encrypt AES decrypt AES encrypt + (key0) (key0) (key0) (key0) + | | | | + v v v v + AES decrypt AES encrypt AES decrypt AES encrypt + (key1) (key1) (key1) (key1) + | | | | + v v v v + AES decrypt AES encrypt AES decrypt AES encrypt + (key2) (key2) (key2) (key2) + | | | | + v v v v + AES decrypt AES encrypt AES decrypt AES encrypt + (key3) (key3) (key3) (key3) + | | | | + v v v v + state0' state1' state2' state3' +``` + +AesGenerator4R uses the following 4 round keys: + +``` +key0 = 5d 46 90 f8 a6 e4 fb 7f b7 82 1f 14 95 9e 35 cf +key1 = 50 c4 55 6a 8a 27 e8 fe c3 5a 5c bd dc ff 41 67 +key2 = a4 47 4c 11 e4 fd 24 d5 d2 9a 27 a7 ac 4a 32 3d +key3 = 2a 3a 0c 81 ff ae a9 99 d9 db d3 42 08 db f6 76 +``` +These keys were generated as: +``` +key0, key1, key2, key3 = Hash512("RandomX AesGenerator4R keys") +``` + +### 3.4 AesHash1R + +AesHash1R calculates a 512-bit fingerprint of its input. + +AesHash1R has a 64-byte internal state, which is arranged into four columns of 16 bytes each. The initial state is: + +``` +state0 = 0d 2c b5 92 de 56 a8 9f 47 db 82 cc ad 3a 98 d7 +state1 = 6e 99 8d 33 98 b7 c7 15 5a 12 9e f5 57 80 e7 ac +state2 = 17 00 77 6a d0 c7 62 ae 6b 50 79 50 e4 7c a0 e8 +state3 = 0c 24 0a 63 8d 82 ad 07 05 00 a1 79 48 49 99 7e +``` + +The initial state vectors were generated as: +``` +state0, state1, state2, state3 = Hash512("RandomX AesHash1R state") +``` + +The input is processed in 64-byte blocks. Each input block is considered to be a set of four AES round keys `key0`, `key1`, `key2`, `key3`. Each state column is encrypted (columns 0, 2) or decrypted (columns 1, 3) with one AES round using the corresponding round key: + +``` +state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) + | | | | + AES encrypt AES decrypt AES encrypt AES decrypt + (key0) (key1) (key2) (key3) + | | | | + v v v v + state0' state1' state2' state3' +``` + +When all input bytes have been processed, the state is processed with two additional AES rounds with the following extra keys (one key per round, same pair of keys for all columns): + +``` +xkey0 = 89 83 fa f6 9f 94 24 8b bf 56 dc 90 01 02 89 06 +xkey1 = d1 63 b2 61 3c e0 f4 51 c6 43 10 ee 9b f9 18 ed +``` + +The extra keys were generated as: +``` +xkey0, xkey1 = Hash256("RandomX AesHash1R xkeys") +``` + +``` +state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) + | | | | + AES encrypt AES decrypt AES encrypt AES decrypt + (xkey0) (xkey0) (xkey0) (xkey0) + | | | | + v v v v + AES encrypt AES decrypt AES encrypt AES decrypt + (xkey1) (xkey1) (xkey1) (xkey1) + | | | | + v v v v +finalState0 finalState1 finalState2 finalState3 +``` + +The final state is the output of the function. + +### 3.4 BlakeGenerator + +BlakeGenerator is a simple pseudo-random number generator based on the Blake2b hashing function. It has a 64-byte internal state `S`. + +#### 3.4.1 Initialization + +The internal state is initialized from a seed value `K` (0-60 bytes long). The seed value is written into the internal state and padded with zeroes. Then the internal state is initialized as `S = Hash512(S)`. + +#### 3.4.2 Random number generation + +The generator can generate 1 byte or 4 bytes at a time by supplying data from its internal state `S`. If there are not enough unused bytes left, the internal state is reinitialized as `S = Hash512(S)`. + +## 4. Virtual Machine + +The components of the RandomX virtual machine are summarized in Fig. 4.1. + +*Figure 4.1 - Virtual Machine* + +![Imgur](https://i.imgur.com/Enk42b8.png) + +The VM is a complex instruction set computer ([CISC](https://en.wikipedia.org/wiki/Complex_instruction_set_computer)). All data are loaded and stored in little-endian byte order. Signed integer numbers are represented using [two's complement](https://en.wikipedia.org/wiki/Two%27s_complement). + +### 4.1 Dataset + +Dataset is described in detail in chapter 7. It's a large read-only buffer. Its size is equal to `RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE` bytes. Each program uses only a random subset of the Dataset of size `RANDOMX_DATASET_BASE_SIZE`. All Dataset accesses read an aligned 64-byte item. + +### 4.2 Scratchpad + +Scratchpad represents the workspace memory of the VM. Its size is `RANDOMX_SCRATCHPAD_L3` bytes and it's divided into 3 "levels": + +* The whole scratchpad is the third level "L3". +* The first `RANDOMX_SCRATCHPAD_L2` bytes of the scratchpad is the second level "L2". +* The first `RANDOMX_SCRATCHPAD_L1` bytes of the scratchpad is the first level "L1". + +The scratchpad levels are inclusive, i.e. L3 contains both L2 and L1 and L2 contains L1. + +To access a particular scratchpad level, bitwise AND with a mask according to table 4.2.1 is applied to the memory address. + +*Table 4.2.1: Scratchpad access masks* + +|Level|8-byte aligned mask|64-byte aligned mask| +|---------|-|-| +|L1|`(RANDOMX_SCRATCHPAD_L1 - 1) & ~7`|-| +|L2|`(RANDOMX_SCRATCHPAD_L2 - 1) & ~7`|-| +|L3|`(RANDOMX_SCRATCHPAD_L3 - 1) & ~7`|`(RANDOMX_SCRATCHPAD_L3 - 1) & ~63`| + +### 4.3 Registers + +The VM has 8 integer registers `r0`-`r7` (group R) and a total of 12 floating point registers split into 3 groups: `f0`-`f3` (group F), `e0`-`e3` (group E) and `a0`-`a3` (group A). Integer registers are 64 bits wide, while floating point registers are 128 bits wide and contain a pair of numbers in floating point format. The lower and upper half of floating point registers are not separately addressable. + +Additionally, there are 3 internal registers `ma`, `mx` and `fprc`. + +Integer registers `r0`-`r7` can be the source or the destination operands of integer instructions or may be used as address registers for accessing the Scratchpad. + +Floating point registers `a0`-`a3` are read-only and their value is fixed for a given VM program. They can be the source operand of any floating point instruction. The value of these registers is restricted to the interval `[1, 4294967296)`. + +Floating point registers `f0`-`f3` are the "additive" registers, which can be the destination of floating point addition and subtraction instructions. The absolute value of these registers will not exceed about `3.0e+14`. + +Floating point registers `e0`-`e3` are the "multiplicative" registers, which can be the destination of floating point multiplication, division and square root instructions. Their value is always positive. + +`ma` and `mx` are the memory registers. Both are 32 bits wide. `ma` contains the memory address of the next Dataset read and `mx` contains the address of the next Dataset prefetch. + +The 2-bit `fprc` register determines the rounding mode of all floating point operations according to Table 4.3.1. The four rounding modes are defined by the IEEE 754 standard. + +*Table 4.3.1: Rounding modes* + +|`fprc`|rounding mode| +|-------|------------| +|0|roundTiesToEven| +|1|roundTowardNegative| +|2|roundTowardPositive| +|3|roundTowardZero| + +#### 4.3.1 Group F register conversion + +When an 8-byte value read from the memory is to be converted to an F group register value or operand, it is interpreted as a pair of 32-bit signed integers (in little endian, two's complement format) and converted to floating point format. This conversion is exact and doesn't need rounding because only 30 bits of the fraction significand are needed to represent the integer value. + +#### 4.3.2 Group E register conversion + +When an 8-byte value read from the memory is to be converted to an E group register value or operand, the same conversion procedure is applied as for F group registers (see 4.3.1) with additional post-processing steps for each of the two floating point values: + +1. The sign bit is set to `0`. +2. Bits 0-2 of the exponent are set to the constant value of 0112. +3. Bits 3-6 of the exponent are set to the value of the exponent mask described in chapter 4.5.6. This value is fixed for a given VM program. +4. The bottom 22 bits of the fraction significand are set to the value of the fraction mask described in chapter 4.5.6. This value is fixed for a given VM program. + +### 4.4 Program buffer + +The Program buffer stores the program to be executed by the VM. The program consists of `RANDOMX_PROGRAM_SIZE` instructions. Each instruction is encoded by an 8-byte word. The instruction set is described in chapter 5. + +### 4.5 VM programming + +The VM requires `128 + 8 * RANDOMX_PROGRAM_SIZE` bytes to be programmed. This is split into two parts: + +* `128` bytes of configuration data = 16 quadwords (16×8 bytes), used according to Table 4.5.1 +* `8 * RANDOMX_PROGRAM_SIZE` bytes of program data, copied directly into the Program Buffer + +*Table 4.5.1 - Configuration data* + +|quadword|description| +|-----|-----------| +|0|initialize low half of register `a0`| +|1|initialize high half of register `a0`| +|2|initialize low half of register `a1`| +|3|initialize high half of register `a1`| +|4|initialize low half of register `a2`| +|5|initialize high half of register `a2`| +|6|initialize low half of register `a3`| +|7|initialize high half of register `a3`| +|8|initialize register `ma`| +|9|(reserved)| +|10|initialize register `mx`| +|11|(reserved)| +|12|select address registers| +|13|select Dataset offset| +|14|initialize register masks for low half of group E registers| +|15|initialize register masks for high half of group E registers| + +#### 4.5.2 Group A register initialization + +The values of the floating point registers `a0`-`a3` are initialized using configuration quadwords 0-7 to have the following value: + ++1.fraction x 2exponent + +The fraction has full 52 bits of precision and the exponent value ranges from 0 to 31. These values are obtained from the initialization quadword (in little endian format) according to Table 4.5.2. + +*Table 4.5.2 - Group A register initialization* + +|bits|description| +|----|-----------| +|0-51|fraction| +|52-58|(reserved)| +|59-63|exponent| + +#### 4.5.3 Memory registers + +Registers `ma` and `mx` are initialized using the low 32 bits of quadwords 8 and 10 in little endian format. + +#### 4.5.4 Address registers + +Bits 0-3 of quadword 12 are used to select 4 address registers for program execution. Each bit chooses one register from a pair of integer registers according to Table 4.5.3. + +*Table 4.5.3 - Address registers* + +|address register (bit)|value = 0|value = 1| +|----------------------|-|-| +|`readReg0` (0)|`r0`|`r1`| +|`readReg1` (1)|`r2`|`r3`| +|`readReg2` (2)|`r4`|`r5`| +|`readReg3` (3)|`r6`|`r7`| + +#### 4.5.5 Dataset offset + +The `datasetOffset` is calculated by bitwise AND of quadword 13 and the value `RANDOMX_DATASET_EXTRA_SIZE / 64`. The result is multiplied by `64`. This offset is used when reading values from the Dataset. + +#### 4.5.6 Group E register masks + +These masks are used for the conversion of group E registers (see 4.3.2). The low and high halves each have their own masks initialized from quadwords 14 and 15. The fraction mask is given by bits 0-21 and the exponent mask by bits 60-63 of the initialization quadword. + +### 4.6 VM execution + +During VM execution, 3 additional temporary registers are used: `ic`, `spAddr0` and `spAddr1`. Program execution consists of initialization and loop execution. + +#### 4.6.1 Initialization + +1. `ic` register is set to `RANDOMX_PROGRAM_ITERATIONS`. +2. `spAddr0` is set to the value of `mx`. +3. `spAddr1` is set to the value of `ma`. +4. The values of all integer registers `r0`-`r7` are set to zero. + +#### 4.6.2 Loop execution + +The loop described below is repeated until the value of the `ic` register reaches zero. + +1. XOR of registers `readReg0` and `readReg1` (see Table 4.5.3) is calculated and `spAddr0` is XORed with the low 32 bits of the result and `spAddr1` with the high 32 bits. +2. `spAddr0` is used to perform a 64-byte aligned read from Scratchpad level 3 (using mask from Table 4.2.1). The 64 bytes are XORed with all integer registers in order `r0`-`r7`. +3. `spAddr1` is used to perform a 64-byte aligned read from Scratchpad level 3 (using mask from Table 4.2.1). Each floating point register `f0`-`f3` and `e0`-`e3` is initialized using an 8-byte value according to the conversion rules from chapters 4.3.1 and 4.3.2. +4. The 256 instructions stored in the Program Buffer are executed. +5. The `mx` register is XORed with the low 32 bits of registers `readReg2` and `readReg3` (see Table 4.5.3). +6. A 64-byte Dataset item at address `datasetOffset + mx % RANDOMX_DATASET_BASE_SIZE` is prefetched from the Dataset (it will be used during the next iteration). +7. A 64-byte Dataset item at address `datasetOffset + ma % RANDOMX_DATASET_BASE_SIZE` is loaded from the Dataset. The 64 bytes are XORed with all integer registers in order `r0`-`r7`. +8. The values of registers `mx` and `ma` are swapped. +9. The values of all integer registers `r0`-`r7` are written to the Scratchpad (L3) at address `spAddr1` (64-byte aligned). +10. Register `f0` is XORed with register `e0` and the result is stored in register `f0`. Register `f1` is XORed with register `e1` and the result is stored in register `f1`. Register `f2` is XORed with register `e2` and the result is stored in register `f2`. Register `f3` is XORed with register `e3` and the result is stored in register `f3`. +11. The values of registers `f0`-`f3` are written to the Scratchpad (L3) at address `spAddr0` (64-byte aligned). +12. `spAddr0` and `spAddr1` are both set to zero. +13. `ic` is decreased by 1. + + +## 5. Instruction set + +The VM executes programs in a special instruction set, which was designed in such way that any random 8-byte word is a valid instruction and any sequence of valid instructions is a valid program. Because there are no "syntax" rules, generating a random program is as easy as filling the program buffer with random data. + +### 5.1 Instruction encoding + +Each instruction word is 64 bits long. Instruction fields are encoded as shown in Fig. 5.1. + +*Figure 5.1 - Instruction encoding* + +![Imgur](https://i.imgur.com/FtkWRwe.png) + +#### 5.1.1 opcode +There are 256 opcodes, which are distributed between 29 distinct instructions. Each instruction can be encoded using multiple opcodes (the number of opcodes specifies the frequency of the instruction in a random program). + +*Table 5.1.1: Instruction groups* + +|group|# instructions|# opcodes|| +|---------|-----------------|----|-| +|integer |17|129|50.4%| +|floating point |9|94|36.7%| +|control |2|17|6.6%| +|store |1|16|6.3%| +||**29**|**256**|**100%** + +All instructions are described below in chapters 5.2 - 5.5. + +#### 5.1.2 dst +Destination register. Only bits 0-1 (register groups A, F, E) or 0-2 (groups R, F+E) are used to encode a register according to Table 5.1.2. + +*Table 5.1.2: Addressable register groups* + +|index|R|A|F|E|F+E| +|--|--|--|--|--|--| +|0|`r0`|`a0`|`f0`|`e0`|`f0`| +|1|`r1`|`a1`|`f1`|`e1`|`f1`| +|2|`r2`|`a2`|`f2`|`e2`|`f2`| +|3|`r3`|`a3`|`f3`|`e3`|`f3`| +|4|`r4`||||`e0`| +|5|`r5`||||`e1`| +|6|`r6`||||`e2`| +|7|`r7`||||`e3`| + +#### 5.1.3 src + +The `src` flag encodes a source operand register according to Table 5.1.2 (only bits 0-1 or 0-2 are used). + +Some integer instructions use a constant value as the source operand in cases when `dst` and `src` encode the same register (see Table 5.2.1). + +For register-memory instructions, the source operand is used to calculate the memory address. + +#### 5.1.4 mod + +The `mod` flag is encoded as: + +*Table 5.1.3: mod flag encoding* + +|`mod` bits|description|range of values| +|----|--------|----| +|0-1|`mod.mem` flag|0-3| +|2-3|`mod.shift` flag|0-3| +|4-7|`mod.cond` flag|0-15| + +The `mod.mem` flag selects between Scratchpad levels L1 and L2 when reading from or writing to memory except for two cases: + +* it's a memory read and `dst` and `src` encode the same register +* it's a memory write `mod.cond` is 14 or 15 + +In these two cases, the Scratchpad level is L3 (see Table 5.1.4). + +*Table 5.1.4: memory access Scratchpad level* + +|condition|Scratchpad level| +|---------|-| +|`src == dst` (read)|L3| +|`mod.cond >= 14` (write)|L3| +|`mod.mem == 0`|L2| +|`mod.mem != 0`|L1| + +The address for reading/writing is calculated by applying bitwise AND operation to the address and the 8-byte aligned address mask listed in Table 4.2.1. + +The `mod.cond` and `mod.shift` flags are used by some instructions (see 5.2, 5.4). + +#### 5.1.5 imm32 +A 32-bit immediate value that can be used as the source operand and is used to calculate addresses for memory operations. The immediate value is sign-extended to 64 bits unless specified otherwise. + +### 5.2 Integer instructions +For integer instructions, the destination is always an integer register (register group R). Source operand (if applicable) can be either an integer register or memory value. If `dst` and `src` refer to the same register, most instructions use `0` or `imm32` instead of the register. This is indicated in the 'src == dst' column in Table 5.2.1. + +`[mem]` indicates a memory operand loaded as an 8-byte value from the address `src + imm32`. + +*Table 5.2.1 Integer instructions* + +|frequency|instruction|dst|src|`src == dst ?`|operation| +|-|-|-|-|-|-| +|25/256|IADD_RS|R|R|`src = dst`|`dst = dst + (src << mod.shift) (+ imm32)`| +|7/256|IADD_M|R|R|`src = 0`|`dst = dst + [mem]`| +|16/256|ISUB_R|R|R|`src = imm32`|`dst = dst - src`| +|7/256|ISUB_M|R|R|`src = 0`|`dst = dst - [mem]`| +|16/256|IMUL_R|R|R|`src = imm32`|`dst = dst * src`| +|4/256|IMUL_M|R|R|`src = 0`|`dst = dst * [mem]`| +|4/256|IMULH_R|R|R|`src = dst`|`dst = (dst * src) >> 64`| +|1/256|IMULH_M|R|R|`src = 0`|`dst = (dst * [mem]) >> 64`| +|4/256|ISMULH_R|R|R|`src = dst`|`dst = (dst * src) >> 64` (signed)| +|1/256|ISMULH_M|R|R|`src = 0`|`dst = (dst * [mem]) >> 64` (signed)| +|8/256|IMUL_RCP|R|-|-|dst = 2x / imm32 * dst| +|2/256|INEG_R|R|-|-|`dst = -dst`| +|15/256|IXOR_R|R|R|`src = imm32`|`dst = dst ^ src`| +|5/256|IXOR_M|R|R|`src = 0`|`dst = dst ^ [mem]`| +|10/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`| +|0/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`| +|4/256|ISWAP_R|R|R|`src = dst`|`temp = src; src = dst; dst = temp`| + +#### 5.2.1 IADD_RS + +This instructions adds the values of two registers (modulo 264). The value of the second operand is shifted left by 0-3 bits (determined by the `mod.shift` flag). Additionally, if `dst` is register `r5`, the immediate value `imm32` is added to the result. + +#### 5.2.2 IADD_M + +64-bit integer addition operation (performed modulo 264) with a memory source operand. + +#### 5.2.3 ISUB_R, ISUB_M + +64-bit integer subtraction (performed modulo 264). ISUB_R uses register source operand, ISUB_M uses a memory source operand. + +#### 5.2.4 IMUL_R, IMUL_M + +64-bit integer multiplication (performed modulo 264). IMUL_R uses a register source operand, IMUL_M uses a memory source operand. + +#### 5.2.5 IMULH_R, IMULH_M, ISMULH_R, ISMULH_M +These instructions output the high 64 bits of the whole 128-bit multiplication result. The result differs for signed and unsigned multiplication (IMULH is unsigned, ISMULH is signed). The variants with a register source operand perform a squaring operation if `dst` equals `src`. + +#### 5.2.6 IMUL_RCP +If `imm32` equals 0 or is a power of 2, IMUL_RCP is a no-op. In other cases, the instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as rcp = 2x / imm32 by choosing the largest integer `x` such that rcp < 264. + +#### 5.2.7 INEG_R +Performs two's complement negation of the destination register. + +#### 5.2.8 IXOR_R, IXOR_M +64-bit exclusive OR operation. IXOR_R uses a register source operand, IXOR_M uses a memory source operand. + +#### 5.2.9 IROR_R, IROL_R +Performs a cyclic shift (rotation) of the destination register. Source operand (shift count) is implicitly masked to 6 bits. IROR rotates bits right, IROL left. + +#### 5.2.9 ISWAP_R +This instruction swaps the values of two registers. If source and destination refer to the same register, the result is a no-op. + +### 5.3 Floating point instructions +For floating point instructions, the destination can be a group F or group E register. Source operand is either a group A register or a memory value. + +`[mem]` indicates a memory operand loaded as an 8-byte value from the address `src + imm32` and converted according to the rules in chapters 4.3.1 (group F) or 4.3.2 (group E). The lower and upper memory operands are denoted as `[mem][0]` and `[mem][1]`. + +All floating point operations are rounded according to the current value of the `fprc` register (see Table 4.3.1). Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number. + +*Table 5.3.1 Floating point instructions* + +|frequency|instruction|dst|src|operation| +|-|-|-|-|-| +|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`| +|20/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`| +|5/256|FADD_M|F|R|`(dst0, dst1) = (dst0 + [mem][0], dst1 + [mem][1])`| +|20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`| +|5/256|FSUB_M|F|R|`(dst0, dst1) = (dst0 - [mem][0], dst1 - [mem][1])`| +|6/256|FSCAL_R|F|-|(dst0, dst1) = (-2x0 * dst0, -2x1 * dst1)| +|20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`| +|4/256|FDIV_M|E|R|`(dst0, dst1) = (dst0 / [mem][0], dst1 / [mem][1])`| +|6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`| + +#### 5.3.1 FSWAP_R + +Swaps the lower and upper halves of the destination register. This is the only instruction that is applicable to both F an E register groups. + +#### 5.3.2 FADD_R, FADD_M + +Double precision floating point addition. FADD_R uses a group A register source operand, FADD_M uses a memory operand. + +#### 5.3.3 FSUB_R, FSUB_M + +Double precision floating point subtraction. FSUB_R uses a group A register source operand, FSUB_M uses a memory operand. + +#### 5.3.4 FSCAL_R +This instruction negates the number and multiplies it by 2x. `x` is calculated by taking the 4 least significant digits of the biased exponent and interpreting them as a binary number using the digit set `{+1, -1}` as opposed to the traditional `{0, 1}`. The possible values of `x` are all odd numbers from -15 to +15. + +The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x80F0000000000000`. + +#### 5.3.5 FMUL_R + +Double precision floating point multiplication. This instruction uses only a register source operand. + +#### 5.3.6 FDIV_M + +Double precision floating point division. This instruction uses only a memory source operand. + +#### 5.3.7 FSQRT_R + +Double precision floating point square root of the destination register. + +### 5.4 Control instructions + +There are 2 control instructions. + +*Table 5.4.1 - Control instructions* + +|frequency|instruction|dst|src|operation| +|-|-|-|-|-| +|1/256|CFROUND|-|R|`fprc = src >>> imm32` +|16/256|CBRANCH|R|-|`dst = dst + cimm`, conditional jump + +#### 5.4.1 CFROUND +This instruction calculates a 2-bit value by rotating the source register right by `imm32` bits and taking the 2 least significant bits (the value of the source register is unaffected). The result is stored in the `fprc` register. This changes the rounding mode of all subsequent floating point instructions. + +#### 5.4.2 CBRANCH + +This instruction adds an immediate value `cimm` (constructed from `imm32`, see below) to the destination register and then performs a conditional jump in the Program Buffer based on the value of the destination register. The target of the jump is the instruction following the instruction when register `dst` was last modified. + +At the beginning of each program iteration, all registers are considered to be unmodified. A register is considered as modified by an instruction in the following cases: + +* It is the destination register of an integer instruction except IMUL_RCP and ISWAP_R. +* It is the destination register of IMUL_RCP and `imm32` is not zero or a power of 2. +* It is the source or the destination register of ISWAP_R and the destination and source registers are distinct. +* The CBRANCH instruction is considered to modify all integer registers. + +If register `dst` has not been modified yet, the jump target is the first instruction in the Program Buffer. + +The CBRANCH instruction performs the following steps: + +1. A constant `b` is calculated as `mod.cond + RANDOMX_JUMP_OFFSET`. +1. A constant `cimm` is constructed as sign-extended `imm32` with bit `b` set to 1 and bit `b-1` set to 0 (if `b > 0`). +1. `cimm` is added to the destination register. +1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of the destination register are zero, the jump is executed (target is the instruction following the instruction where `dst` was last modified). + +Bits in immediate and register values are numbered from 0 to 63 with 0 being the least significant bit. For example, for `b = 10` and `RANDOMX_JUMP_BITS = 8`, the bits are arranged like this: + +``` +cimm = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMMMMMMMMMMMMMMMMMMMMM10MMMMMMMMM + dst = ..............................................XXXXXXXX.......... +``` + +`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value will be added to `dst`. + +The second line uses `X` to mark bits of `dst` that will be checked by the condition. If all these bits are 0 after adding `cimm`, the jump is executed. + +The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program. + +### 5.5 Store instruction +There is one explicit store instruction for integer values. + +`[mem]` indicates the destination is an 8-byte value at the address `dst + imm32`. + +*Table 5.5.1 - Store instruction* + +|frequency|instruction|dst|src|operation| +|-|-|-|-|-| +|16/256|ISTORE|R|R|`[mem] = src` + +#### 5.5.1 ISTORE +This instruction stores the value of the source integer register to the memory at the address calculated from the value of the destination register. The `src` and `dst` can be the same register. + +## 6. SuperscalarHash + +SuperscalarHash is a custom diffusion function that was designed to burn as much power as possible using only the CPU's integer ALUs. + +The input and output of SuperscalarHash are 8 integer registers `r0`-`r7`, each 64 bits wide. The output of SuperscalarHash is used to construct the Dataset (see chapter 7.3). + +### 6.1 Instructions +The body of SuperscalarHash is a random sequence of instructions that can run on the Virtual Machine. SuperscalarHash uses a reduced set of only integer register-register instructions listed in Table 6.1.1. `dst` refers to the destination register, `src` to the source register. + +*Table 6.1.1 - SuperscalarHash instructions* + +|freq. †|instruction|Macro-ops|operation|rules| +|-|-|-|-|-| +|0.11|ISUB_R|`sub_rr`|`dst = dst - src`|`dst != src`| +|0.11|IXOR_R|`xor_rr`|`dst = dst ^ src`|`dst != src`| +|0.11|IADD_RS|`lea_sib`|`dst = dst + (src << mod.shift)`|`dst != src`, `dst != r5` +|0.22|IMUL_R|`imul_rr`|`dst = dst * src`|`dst != src`| +|0.11|IROR_C|`ror_ri`|`dst = dst >>> imm32`|`imm32 % 64 != 0` +|0.10|IADD_C|`add_ri`|`dst = dst + imm32`| +|0.10|IXOR_C|`xor_ri`|`dst = dst ^ imm32`| +|0.03|IMULH_R|`mov_rr`,`mul_r`,`mov_rr`|`dst = (dst * src) >> 64`| +|0.03|ISMULH_R|`mov_rr`,`imul_r`,`mov_rr`|`dst = (dst * src) >> 64` (signed)| +|0.06|IMUL_RCP|`mov_ri`,`imul_rr`|dst = 2x / imm32 * dst|`imm32 != 0`, imm32 != 2N| + +† Frequencies are approximate. Instructions are generated based on complex rules. + +#### 6.1.1 ISUB_R +See chapter 5.2.3. Source and destination are always distinct registers. + +#### 6.1.2 IXOR_R +See chapter 5.2.8. Source and destination are always distinct registers. + +#### 6.1.3 IADD_RS +See chapter 5.2.1. Source and destination are always distinct registers and register `r5` cannot be the destination. + +#### 6.1.4 IMUL_R +See chapter 5.2.4. Source and destination are always distinct registers. + +#### 6.1.5 IROR_C +The destination register is rotated right. The rotation count is given by `imm32` masked to 6 bits and cannot be 0. + +#### 6.1.6 IADD_C +A sign-extended `imm32` is added to the destination register. + +#### 6.1.7 IXOR_C +The destination register is XORed with a sign-extended `imm32`. + +#### 6.1.8 IMULH_R, ISMULH_R +See chapter 5.2.5. + +#### 6.1.9 IMUL_RCP +See chapter 5.2.6. `imm32` is never 0 or a power of 2. + +### 6.2 The reference CPU + +Unlike a standard RandomX program, a SuperscalarHash program is generated using a strict set of rules to achieve the maximum performance on a superscalar CPU. For this purpose, the generator runs a simulation of a reference CPU. + +The reference CPU is loosely based on the [Intel Ivy Bridge microarchitecture](https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)). It has the following properties: + +* The CPU has 3 integer execution ports P0, P1 and P5 that can execute instructions in parallel. Multiplication can run only on port P1. +* Each of the Superscalar instructions listed in Table 6.1.1 consist of one or more *Macro-ops*. Each Macro-op has certain execution latency (in cycles) and size (in bytes) as shown in Table 6.2.1. +* Each of the Macro-ops listed in Table 6.2.1 consists of 0-2 *Micro-ops* that can go to a subset of the 3 execution ports. If a Macro-op consists of 2 Micro-ops, both must be executed together. +* The CPU can decode at most 16 bytes of code per cycle and at most 4 Micro-ops per cycle. + +*Table 6.2.1 - Macro-ops* + +|Macro-op|latency|size|1st Micro-op|2nd Micro-op| +|-|-|-|-|-| +|`sub_rr`|1|3|P015|-| +|`xor_rr`|1|3|P015|-| +|`lea_sib`|1|4|P01|-| +|`imul_rr`|3|4|P1|-| +|`ror_ri`|1|4|P05|-| +|`add_ri`|1|7, 8, 9|P015|-| +|`xor_ri`|1|7, 8, 9|P015|-| +|`mov_rr`|0|3|-|-| +|`mul_r`|4|3|P1|P5| +|`imul_r`|4|3|P1|P5| +|`mov_ri`|1|10|P015|-| + +* P015 - Micro-op can be executed on any port +* P01 - Micro-op can be executed on ports P0 or P1 +* P05 - Micro-op can be executed on ports P0 or P5 +* P1 - Micro-op can be executed only on port P1 +* P5 - Micro-op can be executed only on port P5 + +Macro-ops `add_ri` and `xor_ri` can be optionally padded to a size of 8 or 9 bytes for code alignment purposes. `mov_rr` has 0 execution latency and doesn't use an execution port, but still occupies space during the decoding stage (see chapter 6.3.1). + +### 6.3 CPU simulation + +SuperscalarHash programs are generated to maximize the usage of all 3 execution ports of the reference CPU. The generation consists of 4 stages: + +* Decoding stage +* Instruction selection +* Port assignment +* Operand assignment + +Program generation is complete when one of two conditions is met: + +1. An instruction is scheduled for execution on cycle that is equal to or greater than `RANDOMX_SUPERSCALAR_LATENCY` +1. The number of generated instructions reaches `3 * RANDOMX_SUPERSCALAR_LATENCY + 2`. + +#### 6.3.1 Decoding stage + +The generator produces instructions in groups of 3 or 4 Macro-op slots such that the size of each group is exactly 16 bytes. + +*Table 6.3.1 - Decoder configurations* + +|decoder group|configuration| +|-------------|-------------| +|0|4-8-4| +|1|7-3-3-3| +|2|3-7-3-3| +|3|4-9-3| +|4|4-4-4-4| +|5|3-3-10| + +The rules for the selection of the decoder group are following: + +* If the currently processed instruction is IMULH_R or ISMULH_R, the next decode group is group 5 (the only group that starts with a 3-byte slot and has only 3 slots). +* If the total number of multiplications that have been generated is less than or equal to the current decoding cycle, the next decode group is group 4. +* If the currently processed instruction is IMUL_RCP, the next decode group is group 0 or 3 (must begin with a 4-byte slot for multiplication). +* Otherwise a random decode group is selected from groups 0-3. + +#### 6.3.2 Instruction selection + +Instructions are selected based on the size of the current decode group slot - see Table 6.3.2. + +*Table 6.3.2 - Decoder configurations* + +|slot size|note|instructions| +|-------------|-------------|-----| +|3|-|ISUB_R, IXOR_R +|3|last slot in the group|ISUB_R, IXOR_R, IMULH_R, ISMULH_R| +|4|decode group 4, not the last slot|IMUL_R| +|4|-|IROR_C, IADD_RS| +|7,8,9|-|IADD_C, IXOR_C| +|10|-|IMUL_RCP| + +#### 6.3.3 Port assignment + +Micro-ops are issued to execution ports as soon as an available port is free. The scheduling is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload port P1 (multiplication) by instructions that can go to any port. The cycle when all Micro-ops of an instruction can be executed is called the 'scheduleCycle'. + +#### 6.3.4 Operand assignment + +The source operand (if needed) is selected first. is it selected from the group of registers that are available at the 'scheduleCycle' of the instruction. A register is available if the latency of its last operation has elapsed. + +The destination operand is selected with more strict rules (see column 'rules' in Table 6.1.1): + +* value must be ready at the required cycle +* cannot be the same as the source register unless the instruction allows it (see column 'rules' in Table 6.1.1) + * this avoids optimizable operations such as `reg ^ reg` or `reg - reg` + * it also increases intermixing of register values +* register cannot be multiplied twice in a row unless `allowChainedMul` is true + * this avoids accumulation of trailing zeroes in registers due to excessive multiplication + * `allowChainedMul` is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator) +* either the last instruction applied to the register or its source must be different than the current instruction + * this avoids optimizable instruction sequences such as `r1 = r1 ^ r2; r1 = r1 ^ r2` (can be eliminated) or `reg = reg >>> C1; reg = reg >>> C2` (can be reduced to one rotation) or `reg = reg + C1; reg = reg + C2` (can be reduced to one addition) +* register `r5` cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) + +## 7. Dataset + +The Dataset is a read-only memory structure that is used during program execution (chapter 4.6.2, steps 6 and 7). The size of the Dataset is `RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE` bytes and it's divided into 64-byte 'items'. + +In order to allow PoW verification with a lower amount of memory, the Dataset is constructed in two steps using an intermediate structure called the "Cache", which can be used to calculate Dataset items on the fly. + +The whole Dataset is constructed from the key value `K`, which is an input parameter of RandomX. The whole Dataset needs to be recalculated everytime the key value changes. Fig. 7.1 shows the process of Dataset construction. + +*Figure 7.1 - Dataset construction* + +![Imgur](https://i.imgur.com/86h5SbW.png) + +### 7.1 Cache construction + +The key `K` is expanded into the Cache using the "memory fill" function of Argon2d with parameters according to Table 7.1.1. The key is used as the "password" field. + +*Table 7.1.1 - Argon2 parameters* + +|parameter|value| +|------------|--| +|parallelism|`RANDOMX_ARGON_LANES`| +|output size|0| +|memory|`RANDOMX_ARGON_MEMORY`| +|iterations|`RANDOMX_ARGON_ITERATIONS`| +|version|`0x13`| +|hash type|0 (Argon2d)| +|password|key value `K`| +|salt|`RANDOMX_ARGON_SALT` +|secret size|0| +|assoc. data size|0| + +The finalizer and output calculation steps of Argon2 are omitted. The output is the filled memory array. + +### 7.2 SuperscalarHash initialization + +The key value `K` is used to initialize a BlakeGenerator (see chapter 3.4), which is then used to generate 8 SuperscalarHash instances for Dataset initialization. + +### 7.3 Dataset block generation +Dataset items are numbered sequentially with `itemNumber` starting from 0. Each 64-byte Dataset item is generated independently using 8 SuperscalarHash functions (generated according to chapter 7.2) and by XORing randomly selected data from the Cache (constructed according to chapter 7.1). + +The item data is represented by 8 64-bit integer registers: `r0`-`r7`. + +1. The register values are initialized as follows (`*` = multiplication, `^` = XOR): + * `r0 = (itemNumber + 1) * 6364136223846793005` + * `r1 = r0 ^ 9298411001130361340` + * `r2 = r0 ^ 12065312585734608966` + * `r3 = r0 ^ 9306329213124626780` + * `r4 = r0 ^ 5281919268842080866` + * `r5 = r0 ^ 10536153434571861004` + * `r6 = r0 ^ 3398623926847679864` + * `r7 = r0 ^ 9549104520008361294` +1. Let `cacheIndex = itemNumber` +1. Let `i = 0` +1. Load a 64-byte item from the Cache. The item index is given by `cacheIndex` modulo the total number of 64-byte items in Cache. +1. Execute `SuperscalarHash[i](r0, r1, r2, r3, r4, r5, r6, r7)`, where `SuperscalarHash[i]` refers to the i-th SuperscalarHash function. This modifies the values of the registers `r0`-`r7`. +1. XOR all registers with the 64 bytes loaded in step 4 (8 bytes per column in order `r0`-`r7`). +1. Set `cacheIndex` to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5. +1. Set `i = i + 1` and go back to step 4 if `i < RANDOMX_CACHE_ACCESSES`. +1. Concatenate registers `r0`-`r7` in little endian format to get the final Datset item data. + +The constants used to initialize register values in step 1 were determined as follows: + +* Multiplier `6364136223846793005` was selected because it gives an excellent distribution for linear generators (D. Knuth: The Art of Computer Programming – Vol 2., also listed in [Commonly used LCG parameters](https://en.wikipedia.org/wiki/Linear_congruential_generator#Parameters_in_common_use)) +* XOR constants used to initialize registers `r1`-`r7` were determined by calculating `Hash512` of the ASCII value `"RandomX SuperScalarHash initialize"` and taking bytes 8-63 as 7 little-endian unsigned 64-bit integers. Additionally, the constant for `r1` was increased by 233+700 and the constant for `r3` was increased by 214 (these changes are necessary to ensure that all registers have unique initial values for all values of `itemNumber`). + diff --git a/RandomX/makefile b/RandomX/makefile new file mode 100644 index 00000000..bb981f8e --- /dev/null +++ b/RandomX/makefile @@ -0,0 +1,168 @@ +#CXX=g++-8 +#CC=gcc-8 +AR=gcc-ar +PLATFORM=$(shell uname -m) +CXXFLAGS=-std=c++11 -fPIC +CCFLAGS=-std=c99 -fPIC +ARFLAGS=rcs +BINDIR=bin +SRCDIR=src +TESTDIR=src/tests +OBJDIR=obj +LDFLAGS=-lpthread +RXA=$(BINDIR)/librandomx.a +BINARIES=$(RXA) $(BINDIR)/benchmark $(BINDIR)/code-generator +RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o) +ifeq ($(PLATFORM),amd64) + RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) + CXXFLAGS += -maes +endif +ifeq ($(PLATFORM),x86_64) + RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) + CXXFLAGS += -maes +endif + +release: CXXFLAGS += -O3 -flto +release: CCFLAGS += -O3 -flto +release: LDFLAGS += -flto +release: $(BINARIES) + +native: CXXFLAGS += -march=native -O3 -flto +native: CCFLAGS += -march=native -O3 -flto +native: $(BINARIES) + +nolto: CXXFLAGS += -O3 +nolto: CCFLAGS += -O3 +nolto: $(BINARIES) + +debug: CXXFLAGS += -g +debug: CCFLAGS += -g +debug: LDFLAGS += -g +debug: $(BINARIES) + +profile: CXXFLAGS += -pg +profile: CCFLAGS += -pg +profile: LDFLAGS += -pg +profile: $(BINDIR)/benchmark + +test: CXXFLAGS += -O0 + +$(RXA): $(RXOBJS) | $(BINDIR) + $(AR) $(ARFLAGS) $@ $(RXOBJS) +$(OBJDIR): + mkdir $(OBJDIR) +$(BINDIR): + mkdir $(BINDIR) +$(OBJDIR)/benchmark.o: $(TESTDIR)/benchmark.cpp $(TESTDIR)/stopwatch.hpp \ + $(TESTDIR)/utility.hpp $(SRCDIR)/randomx.h $(SRCDIR)/blake2/endian.h + $(CXX) $(CXXFLAGS) -pthread -c $< -o $@ +$(BINDIR)/benchmark: $(OBJDIR)/benchmark.o $(RXA) + $(CXX) $(LDFLAGS) -pthread $< $(RXA) -o $@ +$(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp \ + $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ + $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/superscalar.hpp \ + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/aes_hash.hpp \ + $(SRCDIR)/blake2/blake2.h $(SRCDIR)/program.hpp + $(CXX) $(CXXFLAGS) -c $< -o $@ +$(BINDIR)/code-generator: $(OBJDIR)/code-generator.o $(RXA) + $(CXX) $(LDFLAGS) $< $(RXA) -o $@ +$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR) +$(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ + $(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \ + $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \ + $(SRCDIR)/blake2/blake2.h +$(OBJDIR)/blake2b.o: $(SRCDIR)/blake2/blake2b.c $(SRCDIR)/blake2/blake2.h \ + $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h + $(CC) $(CCFLAGS) -c $< -o $@ +$(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ + $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \ + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \ + $(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \ + $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ + $(SRCDIR)/intrin_portable.h +$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \ + $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \ + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/blake2_generator.hpp \ + $(SRCDIR)/program.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/virtual_memory.hpp \ + $(SRCDIR)/instruction_weights.hpp +$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDIR)/configuration.h \ + $(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \ + $(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \ + $(SRCDIR)/asm/program_read_dataset_sshash_init.inc \ + $(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \ + $(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \ + $(SRCDIR)/asm/program_epilogue_store.inc $(SRCDIR)/asm/program_sshash_load.inc \ + $(SRCDIR)/asm/program_sshash_prefetch.inc $(SRCDIR)/asm/program_sshash_constants.inc \ + $(SRCDIR)/asm/randomx_reciprocal.inc +$(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h +$(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp +$(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \ + $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h \ + $(SRCDIR)/instruction_weights.hpp +$(OBJDIR)/allocator.o: $(SRCDIR)/allocator.cpp $(SRCDIR)/allocator.hpp $(SRCDIR)/intrin_portable.h \ + $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ + $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h +$(OBJDIR)/assembly_generator_x86.o: $(SRCDIR)/assembly_generator_x86.cpp \ + $(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ + $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/reciprocal.h $(SRCDIR)/program.hpp \ + $(SRCDIR)/instruction.hpp $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp \ + $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/instruction_weights.hpp +$(OBJDIR)/instruction.o: $(SRCDIR)/instruction.cpp $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/common.hpp $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/instruction_weights.hpp +$(OBJDIR)/randomx.o: $(SRCDIR)/randomx.cpp $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp $(SRCDIR)/common.hpp \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/superscalar_program.hpp \ + $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ + $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp \ + $(SRCDIR)/intrin_portable.h $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_compiled.hpp \ + $(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h +$(OBJDIR)/superscalar.o: $(SRCDIR)/superscalar.cpp $(SRCDIR)/configuration.h $(SRCDIR)/program.hpp \ + $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/randomx.h $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/blake2_generator.hpp \ + $(SRCDIR)/intrin_portable.h $(SRCDIR)/reciprocal.h +$(OBJDIR)/vm_compiled.o: $(SRCDIR)/vm_compiled.cpp $(SRCDIR)/vm_compiled.hpp \ + $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ + $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ + $(SRCDIR)/superscalar_program.hpp +$(OBJDIR)/vm_interpreted_light.o: $(SRCDIR)/vm_interpreted_light.cpp \ + $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/common.hpp \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ + $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ + $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp +$(OBJDIR)/argon2_core.o: $(SRCDIR)/argon2_core.c $(SRCDIR)/argon2_core.h $(SRCDIR)/argon2.h \ + $(SRCDIR)/blake2/blake2.h $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h +$(OBJDIR)/blake2_generator.o: $(SRCDIR)/blake2_generator.cpp $(SRCDIR)/blake2/blake2.h \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2_generator.hpp +$(OBJDIR)/instructions_portable.o: $(SRCDIR)/instructions_portable.cpp $(SRCDIR)/common.hpp \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/intrin_portable.h +$(OBJDIR)/reciprocal.o: $(SRCDIR)/reciprocal.c $(SRCDIR)/reciprocal.h +$(OBJDIR)/virtual_machine.o: $(SRCDIR)/virtual_machine.cpp $(SRCDIR)/virtual_machine.hpp \ + $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ + $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/aes_hash.hpp $(SRCDIR)/blake2/blake2.h \ + $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp +$(OBJDIR)/vm_compiled_light.o: $(SRCDIR)/vm_compiled_light.cpp $(SRCDIR)/vm_compiled_light.hpp \ + $(SRCDIR)/vm_compiled.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp \ + $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp \ + $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ + $(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp + +$(OBJDIR)/%.o: $(SRCDIR)/%.c + $(CC) $(CCFLAGS) -c $< -o $@ + +$(OBJDIR)/%.o: $(SRCDIR)/%.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +$(OBJDIR)/%.o: $(SRCDIR)/%.S + $(CXX) -x assembler-with-cpp -c $< -o $@ + +clean: + rm -f $(BINARIES) $(OBJDIR)/*.o diff --git a/RandomX/randomx.sln b/RandomX/randomx.sln new file mode 100644 index 00000000..3da2b502 --- /dev/null +++ b/RandomX/randomx.sln @@ -0,0 +1,166 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.572 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx", "vcxproj\randomx.vcxproj", "{3346A4AD-C438-4324-8B77-47A16452954B}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{4A4A689F-86AF-41C0-A974-1080506D0923}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "superscalar-avalanche", "vcxproj\superscalar-avalanche.vcxproj", "{CF34A7EF-7DC9-4077-94A5-76F5425EA938}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "superscalar-init", "vcxproj\superscalar-init.vcxproj", "{E59DC709-9B12-4A53-BAF3-79398821C376}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "superscalar-stats", "vcxproj\superscalar-stats.vcxproj", "{0173D560-8C12-46B3-B467-0C6E7573AA0B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "benchmark", "vcxproj\benchmark.vcxproj", "{1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "api-example1", "vcxproj\api-example1.vcxproj", "{83EA3E54-5D91-4E01-8EF6-C1E718334F83}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "api-example2", "vcxproj\api-example2.vcxproj", "{44947B9C-E6B1-4C06-BD01-F8EF43B59223}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "code-generator", "vcxproj\code-generator.vcxproj", "{3E490DEC-1874-43AA-92DA-1AC57C217EAC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scratchpad-entropy", "vcxproj\scratchpad-entropy.vcxproj", "{FF8BD408-AFD8-43C6-BE98-4D03B37E840B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jit-performance", "vcxproj\jit-performance.vcxproj", "{535F2111-FA81-4C76-A354-EDD2F9AA00E3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "perf-simulation", "vcxproj\perf-simulation.vcxproj", "{F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime-distr", "vcxproj\runtime-distr.vcxproj", "{F207EC8C-C55F-46C0-8851-887A71574F54}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx-dll", "vcxproj\randomx-dll.vcxproj", "{59560AD8-18E3-463E-A941-BBD808EC7C83}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x64.ActiveCfg = Debug|x64 + {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x64.Build.0 = Debug|x64 + {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x86.ActiveCfg = Debug|Win32 + {3346A4AD-C438-4324-8B77-47A16452954B}.Debug|x86.Build.0 = Debug|Win32 + {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x64.ActiveCfg = Release|x64 + {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x64.Build.0 = Release|x64 + {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x86.ActiveCfg = Release|Win32 + {3346A4AD-C438-4324-8B77-47A16452954B}.Release|x86.Build.0 = Release|Win32 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x64.ActiveCfg = Debug|x64 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x64.Build.0 = Debug|x64 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x86.ActiveCfg = Debug|Win32 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Debug|x86.Build.0 = Debug|Win32 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x64.ActiveCfg = Release|x64 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x64.Build.0 = Release|x64 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x86.ActiveCfg = Release|Win32 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938}.Release|x86.Build.0 = Release|Win32 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x64.ActiveCfg = Debug|x64 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x64.Build.0 = Debug|x64 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x86.ActiveCfg = Debug|Win32 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Debug|x86.Build.0 = Debug|Win32 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x64.ActiveCfg = Release|x64 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x64.Build.0 = Release|x64 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x86.ActiveCfg = Release|Win32 + {E59DC709-9B12-4A53-BAF3-79398821C376}.Release|x86.Build.0 = Release|Win32 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x64.ActiveCfg = Debug|x64 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x64.Build.0 = Debug|x64 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x86.ActiveCfg = Debug|Win32 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Debug|x86.Build.0 = Debug|Win32 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x64.ActiveCfg = Release|x64 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x64.Build.0 = Release|x64 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x86.ActiveCfg = Release|Win32 + {0173D560-8C12-46B3-B467-0C6E7573AA0B}.Release|x86.Build.0 = Release|Win32 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x64.ActiveCfg = Debug|x64 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x64.Build.0 = Debug|x64 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x86.ActiveCfg = Debug|Win32 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Debug|x86.Build.0 = Debug|Win32 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x64.ActiveCfg = Release|x64 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x64.Build.0 = Release|x64 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x86.ActiveCfg = Release|Win32 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70}.Release|x86.Build.0 = Release|Win32 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x64.ActiveCfg = Debug|x64 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x64.Build.0 = Debug|x64 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x86.ActiveCfg = Debug|Win32 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Debug|x86.Build.0 = Debug|Win32 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x64.ActiveCfg = Release|x64 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x64.Build.0 = Release|x64 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x86.ActiveCfg = Release|Win32 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83}.Release|x86.Build.0 = Release|Win32 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x64.ActiveCfg = Debug|x64 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x64.Build.0 = Debug|x64 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x86.ActiveCfg = Debug|Win32 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Debug|x86.Build.0 = Debug|Win32 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x64.ActiveCfg = Release|x64 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x64.Build.0 = Release|x64 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x86.ActiveCfg = Release|Win32 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223}.Release|x86.Build.0 = Release|Win32 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x64.ActiveCfg = Debug|x64 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x64.Build.0 = Debug|x64 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x86.ActiveCfg = Debug|Win32 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Debug|x86.Build.0 = Debug|Win32 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x64.ActiveCfg = Release|x64 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x64.Build.0 = Release|x64 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x86.ActiveCfg = Release|Win32 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC}.Release|x86.Build.0 = Release|Win32 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x64.ActiveCfg = Debug|x64 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x64.Build.0 = Debug|x64 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x86.ActiveCfg = Debug|Win32 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Debug|x86.Build.0 = Debug|Win32 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x64.ActiveCfg = Release|x64 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x64.Build.0 = Release|x64 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x86.ActiveCfg = Release|Win32 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x86.Build.0 = Release|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x64.ActiveCfg = Debug|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x64.Build.0 = Debug|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x86.ActiveCfg = Debug|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x86.Build.0 = Debug|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.ActiveCfg = Release|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.Build.0 = Release|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.ActiveCfg = Release|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.Build.0 = Release|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x64.ActiveCfg = Debug|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x64.Build.0 = Debug|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x86.ActiveCfg = Debug|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x86.Build.0 = Debug|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x64.ActiveCfg = Release|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x64.Build.0 = Release|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x86.ActiveCfg = Release|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x86.Build.0 = Release|Win32 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x64.ActiveCfg = Debug|x64 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x64.Build.0 = Debug|x64 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x86.ActiveCfg = Debug|Win32 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Debug|x86.Build.0 = Debug|Win32 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x64.ActiveCfg = Release|x64 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x64.Build.0 = Release|x64 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x86.ActiveCfg = Release|Win32 + {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x86.Build.0 = Release|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x64.ActiveCfg = Debug|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x64.Build.0 = Debug|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x86.ActiveCfg = Debug|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x86.Build.0 = Debug|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.ActiveCfg = Release|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.Build.0 = Release|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.ActiveCfg = Release|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {CF34A7EF-7DC9-4077-94A5-76F5425EA938} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {E59DC709-9B12-4A53-BAF3-79398821C376} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {0173D560-8C12-46B3-B467-0C6E7573AA0B} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {83EA3E54-5D91-4E01-8EF6-C1E718334F83} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {44947B9C-E6B1-4C06-BD01-F8EF43B59223} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {3E490DEC-1874-43AA-92DA-1AC57C217EAC} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {535F2111-FA81-4C76-A354-EDD2F9AA00E3} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {F207EC8C-C55F-46C0-8851-887A71574F54} = {4A4A689F-86AF-41C0-A974-1080506D0923} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {4EBC03DB-AE37-4141-8147-692F16E0ED02} + EndGlobalSection +EndGlobal diff --git a/RandomX/src/aes_hash.cpp b/RandomX/src/aes_hash.cpp new file mode 100644 index 00000000..c1239aac --- /dev/null +++ b/RandomX/src/aes_hash.cpp @@ -0,0 +1,214 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "soft_aes.h" + +#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d +#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e +#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017 +#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c + +#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389 +#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1 + +/* + Calculate a 512-bit hash of 'input' using 4 lanes of AES. + The input is treated as a set of round keys for the encryption + of the initial state. + + 'inputSize' must be a multiple of 64. + + For a 2 MiB input, this has the same security as 32768-round + AES encryption. + + Hashing throughput: >20 GiB/s per CPU core with hardware AES +*/ +template +void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { + const uint8_t* inptr = (uint8_t*)input; + const uint8_t* inputEnd = inptr + inputSize; + + rx_vec_i128 state0, state1, state2, state3; + rx_vec_i128 in0, in1, in2, in3; + + //intial state + state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0); + state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1); + state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2); + state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3); + + //process 64 bytes at a time in 4 lanes + while (inptr < inputEnd) { + in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0); + in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1); + in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2); + in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3); + + state0 = aesenc(state0, in0); + state1 = aesdec(state1, in1); + state2 = aesenc(state2, in2); + state3 = aesdec(state3, in3); + + inptr += 64; + } + + //two extra rounds to achieve full diffusion + rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0); + rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1); + + state0 = aesenc(state0, xkey0); + state1 = aesdec(state1, xkey0); + state2 = aesenc(state2, xkey0); + state3 = aesdec(state3, xkey0); + + state0 = aesenc(state0, xkey1); + state1 = aesdec(state1, xkey1); + state2 = aesenc(state2, xkey1); + state3 = aesdec(state3, xkey1); + + //output hash + rx_store_vec_i128((rx_vec_i128*)hash + 0, state0); + rx_store_vec_i128((rx_vec_i128*)hash + 1, state1); + rx_store_vec_i128((rx_vec_i128*)hash + 2, state2); + rx_store_vec_i128((rx_vec_i128*)hash + 3, state3); +} + +template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); +template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); + +#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553 +#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07 +#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1 +#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135 + +/* + Fill 'buffer' with pseudorandom data based on 512-bit 'state'. + The state is encrypted using a single AES round per 16 bytes of output + in 4 lanes. + + 'outputSize' must be a multiple of 64. + + The modified state is written back to 'state' to allow multiple + calls to this function. +*/ +template +void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { + const uint8_t* outptr = (uint8_t*)buffer; + const uint8_t* outputEnd = outptr + outputSize; + + rx_vec_i128 state0, state1, state2, state3; + rx_vec_i128 key0, key1, key2, key3; + + key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0); + key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1); + key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2); + key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3); + + state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); + state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); + state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); + state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); + + while (outptr < outputEnd) { + state0 = aesdec(state0, key0); + state1 = aesenc(state1, key1); + state2 = aesdec(state2, key2); + state3 = aesenc(state3, key3); + + rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); + rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); + rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); + rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); + + outptr += 64; + } + + rx_store_vec_i128((rx_vec_i128*)state + 0, state0); + rx_store_vec_i128((rx_vec_i128*)state + 1, state1); + rx_store_vec_i128((rx_vec_i128*)state + 2, state2); + rx_store_vec_i128((rx_vec_i128*)state + 3, state3); +} + +template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); +template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); + +#define AES_GEN_4R_KEY0 0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d +#define AES_GEN_4R_KEY1 0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450 +#define AES_GEN_4R_KEY2 0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4 +#define AES_GEN_4R_KEY3 0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a + +template +void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { + const uint8_t* outptr = (uint8_t*)buffer; + const uint8_t* outputEnd = outptr + outputSize; + + rx_vec_i128 state0, state1, state2, state3; + rx_vec_i128 key0, key1, key2, key3; + + key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0); + key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1); + key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2); + key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3); + + state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); + state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); + state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); + state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); + + while (outptr < outputEnd) { + state0 = aesdec(state0, key0); + state1 = aesenc(state1, key0); + state2 = aesdec(state2, key0); + state3 = aesenc(state3, key0); + + state0 = aesdec(state0, key1); + state1 = aesenc(state1, key1); + state2 = aesdec(state2, key1); + state3 = aesenc(state3, key1); + + state0 = aesdec(state0, key2); + state1 = aesenc(state1, key2); + state2 = aesdec(state2, key2); + state3 = aesenc(state3, key2); + + state0 = aesdec(state0, key3); + state1 = aesenc(state1, key3); + state2 = aesdec(state2, key3); + state3 = aesenc(state3, key3); + + rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); + rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); + rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); + rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); + + outptr += 64; + } +} + +template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); +template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); diff --git a/RandomX/src/aes_hash.hpp b/RandomX/src/aes_hash.hpp new file mode 100644 index 00000000..b4d0e940 --- /dev/null +++ b/RandomX/src/aes_hash.hpp @@ -0,0 +1,40 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include + +template +void hashAes1Rx4(const void *input, size_t inputSize, void *hash); + +template +void fillAes1Rx4(void *state, size_t outputSize, void *buffer); + +template +void fillAes4Rx4(void *state, size_t outputSize, void *buffer); diff --git a/RandomX/src/allocator.cpp b/RandomX/src/allocator.cpp new file mode 100644 index 00000000..2ddbed98 --- /dev/null +++ b/RandomX/src/allocator.cpp @@ -0,0 +1,60 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "allocator.hpp" +#include "intrin_portable.h" +#include "virtual_memory.hpp" +#include "common.hpp" + +namespace randomx { + + template + void* AlignedAllocator::allocMemory(size_t count) { + void *mem = rx_aligned_alloc(count, alignment); + if (mem == nullptr) + throw std::bad_alloc(); + return mem; + } + + template + void AlignedAllocator::freeMemory(void* ptr, size_t count) { + rx_aligned_free(ptr); + } + + template class AlignedAllocator; + + void* LargePageAllocator::allocMemory(size_t count) { + return allocLargePagesMemory(count); + } + + void LargePageAllocator::freeMemory(void* ptr, size_t count) { + freePagedMemory(ptr, count); + }; + +} \ No newline at end of file diff --git a/RandomX/src/allocator.hpp b/RandomX/src/allocator.hpp new file mode 100644 index 00000000..d7aa3f95 --- /dev/null +++ b/RandomX/src/allocator.hpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include + +namespace randomx { + + template + struct AlignedAllocator { + static void* allocMemory(size_t); + static void freeMemory(void*, size_t); + }; + + struct LargePageAllocator { + static void* allocMemory(size_t); + static void freeMemory(void*, size_t); + }; + +} \ No newline at end of file diff --git a/RandomX/src/argon2.h b/RandomX/src/argon2.h new file mode 100644 index 00000000..9d427159 --- /dev/null +++ b/RandomX/src/argon2.h @@ -0,0 +1,229 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#pragma once + +#include +#include +#include + +/* + * Argon2 input parameter restrictions + */ + + /* Minimum and maximum number of lanes (degree of parallelism) */ +#define ARGON2_MIN_LANES UINT32_C(1) +#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) + +/* Minimum and maximum number of threads */ +#define ARGON2_MIN_THREADS UINT32_C(1) +#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) + +/* Number of synchronization points between lanes per pass */ +#define ARGON2_SYNC_POINTS UINT32_C(4) + +/* Minimum and maximum digest size in bytes */ +#define ARGON2_MIN_OUTLEN UINT32_C(4) +#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ +#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ + +#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) +/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ +#define ARGON2_MAX_MEMORY_BITS \ + ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) +#define ARGON2_MAX_MEMORY \ + ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) + +/* Minimum and maximum number of passes */ +#define ARGON2_MIN_TIME UINT32_C(1) +#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum password length in bytes */ +#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) +#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum associated data length in bytes */ +#define ARGON2_MIN_AD_LENGTH UINT32_C(0) +#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum salt length in bytes */ +#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) +#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum key length in bytes */ +#define ARGON2_MIN_SECRET UINT32_C(0) +#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) + +/* Flags to determine which fields are securely wiped (default = no wipe). */ +#define ARGON2_DEFAULT_FLAGS UINT32_C(0) +#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) +#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) + + +/* Error codes */ +typedef enum Argon2_ErrorCodes { + ARGON2_OK = 0, + + ARGON2_OUTPUT_PTR_NULL = -1, + + ARGON2_OUTPUT_TOO_SHORT = -2, + ARGON2_OUTPUT_TOO_LONG = -3, + + ARGON2_PWD_TOO_SHORT = -4, + ARGON2_PWD_TOO_LONG = -5, + + ARGON2_SALT_TOO_SHORT = -6, + ARGON2_SALT_TOO_LONG = -7, + + ARGON2_AD_TOO_SHORT = -8, + ARGON2_AD_TOO_LONG = -9, + + ARGON2_SECRET_TOO_SHORT = -10, + ARGON2_SECRET_TOO_LONG = -11, + + ARGON2_TIME_TOO_SMALL = -12, + ARGON2_TIME_TOO_LARGE = -13, + + ARGON2_MEMORY_TOO_LITTLE = -14, + ARGON2_MEMORY_TOO_MUCH = -15, + + ARGON2_LANES_TOO_FEW = -16, + ARGON2_LANES_TOO_MANY = -17, + + ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ + ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ + ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ + ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ + + ARGON2_MEMORY_ALLOCATION_ERROR = -22, + + ARGON2_FREE_MEMORY_CBK_NULL = -23, + ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, + + ARGON2_INCORRECT_PARAMETER = -25, + ARGON2_INCORRECT_TYPE = -26, + + ARGON2_OUT_PTR_MISMATCH = -27, + + ARGON2_THREADS_TOO_FEW = -28, + ARGON2_THREADS_TOO_MANY = -29, + + ARGON2_MISSING_ARGS = -30, + + ARGON2_ENCODING_FAIL = -31, + + ARGON2_DECODING_FAIL = -32, + + ARGON2_THREAD_FAIL = -33, + + ARGON2_DECODING_LENGTH_FAIL = -34, + + ARGON2_VERIFY_MISMATCH = -35 +} argon2_error_codes; + +/* Memory allocator types --- for external allocation */ +typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); +typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); + +/* Argon2 external data structures */ + +/* + ***** + * Context: structure to hold Argon2 inputs: + * output array and its length, + * password and its length, + * salt and its length, + * secret and its length, + * associated data and its length, + * number of passes, amount of used memory (in KBytes, can be rounded up a bit) + * number of parallel threads that will be run. + * All the parameters above affect the output hash value. + * Additionally, two function pointers can be provided to allocate and + * deallocate the memory (if NULL, memory will be allocated internally). + * Also, three flags indicate whether to erase password, secret as soon as they + * are pre-hashed (and thus not needed anymore), and the entire memory + ***** + * Simplest situation: you have output array out[8], password is stored in + * pwd[32], salt is stored in salt[16], you do not have keys nor associated + * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with + * 4 parallel lanes. + * You want to erase the password, but you're OK with last pass not being + * erased. You want to use the default memory allocator. + * Then you initialize: + Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) + */ +typedef struct Argon2_Context { + uint8_t *out; /* output array */ + uint32_t outlen; /* digest length */ + + uint8_t *pwd; /* password array */ + uint32_t pwdlen; /* password length */ + + uint8_t *salt; /* salt array */ + uint32_t saltlen; /* salt length */ + + uint8_t *secret; /* key array */ + uint32_t secretlen; /* key length */ + + uint8_t *ad; /* associated data array */ + uint32_t adlen; /* associated data length */ + + uint32_t t_cost; /* number of passes */ + uint32_t m_cost; /* amount of memory requested (KB) */ + uint32_t lanes; /* number of lanes */ + uint32_t threads; /* maximum number of threads */ + + uint32_t version; /* version number */ + + allocate_fptr allocate_cbk; /* pointer to memory allocator */ + deallocate_fptr free_cbk; /* pointer to memory deallocator */ + + uint32_t flags; /* array of bool options */ +} argon2_context; + +/* Argon2 primitive type */ +typedef enum Argon2_type { + Argon2_d = 0, + Argon2_i = 1, + Argon2_id = 2 +} argon2_type; + +/* Version of the algorithm */ +typedef enum Argon2_version { + ARGON2_VERSION_10 = 0x10, + ARGON2_VERSION_13 = 0x13, + ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 +} argon2_version; diff --git a/RandomX/src/argon2_core.c b/RandomX/src/argon2_core.c new file mode 100644 index 00000000..e9174222 --- /dev/null +++ b/RandomX/src/argon2_core.c @@ -0,0 +1,516 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + + /*For memory wiping*/ +#ifdef _MSC_VER +#include +#include /* For SecureZeroMemory */ +#endif +#if defined __STDC_LIB_EXT1__ +#define __STDC_WANT_LIB_EXT1__ 1 +#endif +#define VC_GE_2005(version) (version >= 1400) + +#include +#include +#include + +#include "argon2_core.h" +#include "blake2/blake2.h" +#include "blake2/blake2-impl.h" + +#ifdef GENKAT +#include "genkat.h" +#endif + +#if defined(__clang__) +#if __has_attribute(optnone) +#define NOT_OPTIMIZED __attribute__((optnone)) +#endif +#elif defined(__GNUC__) +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if GCC_VERSION >= 40400 +#define NOT_OPTIMIZED __attribute__((optimize("O0"))) +#endif +#endif +#ifndef NOT_OPTIMIZED +#define NOT_OPTIMIZED +#endif + +/***************Instance and Position constructors**********/ +void rxa2_init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } + +void rxa2_copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +void rxa2_xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +static void load_block(block *dst, const void *input) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); + } +} + +static void store_block(void *output, const block *src) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); + } +} + +/***************Memory functions*****************/ + +int rxa2_allocate_memory(const argon2_context *context, uint8_t **memory, + size_t num, size_t size) { + size_t memory_size = num * size; + if (memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 1. Check for multiplication overflow */ + if (size != 0 && memory_size / size != num) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 2. Try to allocate with appropriate allocator */ + if (context->allocate_cbk) { + (context->allocate_cbk)(memory, memory_size); + } + else { + *memory = (uint8_t*)malloc(memory_size); + } + + if (*memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + return ARGON2_OK; +} + +void rxa2_free_memory(const argon2_context *context, uint8_t *memory, + size_t num, size_t size) { + size_t memory_size = num * size; + rxa2_clear_internal_memory(memory, memory_size); + if (context->free_cbk) { + (context->free_cbk)(memory, memory_size); + } + else { + free(memory); + } +} + +void NOT_OPTIMIZED rxa2_secure_wipe_memory(void *v, size_t n) { +#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) + SecureZeroMemory(v, n); +#elif defined memset_s + memset_s(v, n, 0, n); +#elif defined(__OpenBSD__) + explicit_bzero(v, n); +#else + static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; + memset_sec(v, 0, n); +#endif +} + +/* Memory clear flag defaults to true. */ +#define FLAG_clear_internal_memory 0 +void rxa2_clear_internal_memory(void *v, size_t n) { + if (FLAG_clear_internal_memory && v) { + rxa2_secure_wipe_memory(v, n); + } +} + +uint32_t rxa2_index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane) { + /* + * Pass 0: + * This lane : all already finished segments plus already constructed + * blocks in this segment + * Other lanes : all already finished segments + * Pass 1+: + * This lane : (SYNC_POINTS - 1) last segments plus already constructed + * blocks in this segment + * Other lanes : (SYNC_POINTS - 1) last segments + */ + uint32_t reference_area_size; + uint64_t relative_position; + uint32_t start_position, absolute_position; + + if (0 == position->pass) { + /* First pass */ + if (0 == position->slice) { + /* First slice */ + reference_area_size = + position->index - 1; /* all but the previous */ + } + else { + if (same_lane) { + /* The same lane => add current segment */ + reference_area_size = + position->slice * instance->segment_length + + position->index - 1; + } + else { + reference_area_size = + position->slice * instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + } + else { + /* Second pass */ + if (same_lane) { + reference_area_size = instance->lane_length - + instance->segment_length + position->index - + 1; + } + else { + reference_area_size = instance->lane_length - + instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + + /* 1.2.4. Mapping pseudo_rand to 0.. and produce + * relative position */ + relative_position = pseudo_rand; + relative_position = relative_position * relative_position >> 32; + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + /* 1.2.5 Computing starting position */ + start_position = 0; + + if (0 != position->pass) { + start_position = (position->slice == ARGON2_SYNC_POINTS - 1) + ? 0 + : (position->slice + 1) * instance->segment_length; + } + + /* 1.2.6. Computing absolute position */ + absolute_position = (start_position + relative_position) % + instance->lane_length; /* absolute position */ + return absolute_position; +} + +/* Single-threaded version for p=1 case */ +static int fill_memory_blocks_st(argon2_instance_t *instance) { + uint32_t r, s, l; + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position = { r, l, (uint8_t)s, 0 }; + rxa2_fill_segment(instance, position); + } + } +#ifdef GENKAT + internal_kat(instance, r); /* Print all memory blocks */ +#endif + } + return ARGON2_OK; +} + +int rxa2_fill_memory_blocks(argon2_instance_t *instance) { + if (instance == NULL || instance->lanes == 0) { + return ARGON2_INCORRECT_PARAMETER; + } + return fill_memory_blocks_st(instance); +} + +int rxa2_validate_inputs(const argon2_context *context) { + if (NULL == context) { + return ARGON2_INCORRECT_PARAMETER; + } + + if (NULL == context->out) { + return ARGON2_OUTPUT_PTR_NULL; + } + + /* Validate output length */ + if (ARGON2_MIN_OUTLEN > context->outlen) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + if (ARGON2_MAX_OUTLEN < context->outlen) { + return ARGON2_OUTPUT_TOO_LONG; + } + + /* Validate password (required param) */ + if (NULL == context->pwd) { + if (0 != context->pwdlen) { + return ARGON2_PWD_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { + return ARGON2_PWD_TOO_SHORT; + } + + if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { + return ARGON2_PWD_TOO_LONG; + } + + /* Validate salt (required param) */ + if (NULL == context->salt) { + if (0 != context->saltlen) { + return ARGON2_SALT_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { + return ARGON2_SALT_TOO_SHORT; + } + + if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { + return ARGON2_SALT_TOO_LONG; + } + + /* Validate secret (optional param) */ + if (NULL == context->secret) { + if (0 != context->secretlen) { + return ARGON2_SECRET_PTR_MISMATCH; + } + } + else { + if (ARGON2_MIN_SECRET > context->secretlen) { + return ARGON2_SECRET_TOO_SHORT; + } + if (ARGON2_MAX_SECRET < context->secretlen) { + return ARGON2_SECRET_TOO_LONG; + } + } + + /* Validate associated data (optional param) */ + if (NULL == context->ad) { + if (0 != context->adlen) { + return ARGON2_AD_PTR_MISMATCH; + } + } + else { + if (ARGON2_MIN_AD_LENGTH > context->adlen) { + return ARGON2_AD_TOO_SHORT; + } + if (ARGON2_MAX_AD_LENGTH < context->adlen) { + return ARGON2_AD_TOO_LONG; + } + } + + /* Validate memory cost */ + if (ARGON2_MIN_MEMORY > context->m_cost) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + if (ARGON2_MAX_MEMORY < context->m_cost) { + return ARGON2_MEMORY_TOO_MUCH; + } + + if (context->m_cost < 8 * context->lanes) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + /* Validate time cost */ + if (ARGON2_MIN_TIME > context->t_cost) { + return ARGON2_TIME_TOO_SMALL; + } + + if (ARGON2_MAX_TIME < context->t_cost) { + return ARGON2_TIME_TOO_LARGE; + } + + /* Validate lanes */ + if (ARGON2_MIN_LANES > context->lanes) { + return ARGON2_LANES_TOO_FEW; + } + + if (ARGON2_MAX_LANES < context->lanes) { + return ARGON2_LANES_TOO_MANY; + } + + /* Validate threads */ + if (ARGON2_MIN_THREADS > context->threads) { + return ARGON2_THREADS_TOO_FEW; + } + + if (ARGON2_MAX_THREADS < context->threads) { + return ARGON2_THREADS_TOO_MANY; + } + + if (NULL != context->allocate_cbk && NULL == context->free_cbk) { + return ARGON2_FREE_MEMORY_CBK_NULL; + } + + if (NULL == context->allocate_cbk && NULL != context->free_cbk) { + return ARGON2_ALLOCATE_MEMORY_CBK_NULL; + } + + return ARGON2_OK; +} + +void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { + uint32_t l; + /* Make the first and second block in each lane as G(H0||0||i) or + G(H0||1||i) */ + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + for (l = 0; l < instance->lanes; ++l) { + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); + rxa2_blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 0], + blockhash_bytes); + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); + rxa2_blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 1], + blockhash_bytes); + } + rxa2_clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); +} + +void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) { + blake2b_state BlakeHash; + uint8_t value[sizeof(uint32_t)]; + + if (NULL == context || NULL == blockhash) { + return; + } + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + store32(&value, context->lanes); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->outlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->m_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->t_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->version); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, (uint32_t)type); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->pwdlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->pwd != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, + context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + rxa2_secure_wipe_memory(context->pwd, context->pwdlen); + context->pwdlen = 0; + } + } + + store32(&value, context->saltlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->salt != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen); + } + + store32(&value, context->secretlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->secret != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->secret, + context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + rxa2_secure_wipe_memory(context->secret, context->secretlen); + context->secretlen = 0; + } + } + + store32(&value, context->adlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->ad != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->ad, + context->adlen); + } + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} + +int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context) { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + int result = ARGON2_OK; + + if (instance == NULL || context == NULL) + return ARGON2_INCORRECT_PARAMETER; + instance->context_ptr = context; + + /* 1. Memory allocation */ + /*result = allocate_memory(context, (uint8_t **)&(instance->memory), instance->memory_blocks, sizeof(block)); + if (result != ARGON2_OK) { + return result; + }*/ + + /* 2. Initial hashing */ + /* H_0 + 8 extra bytes to produce the first blocks */ + /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ + /* Hashing all inputs */ + rxa2_initial_hash(blockhash, context, instance->type); + /* Zeroing 8 extra bytes */ + rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + + /* 3. Creating first blocks, we always have at least two blocks in a slice + */ + rxa2_fill_first_blocks(blockhash, instance); + /* Clearing the hash */ + rxa2_clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); + + return ARGON2_OK; +} diff --git a/RandomX/src/argon2_core.h b/RandomX/src/argon2_core.h new file mode 100644 index 00000000..efd56d99 --- /dev/null +++ b/RandomX/src/argon2_core.h @@ -0,0 +1,254 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#ifndef ARGON2_CORE_H +#define ARGON2_CORE_H + +#include +#include "argon2.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define CONST_CAST(x) (x)(uintptr_t) + + /**********************Argon2 internal constants*******************************/ + +enum argon2_core_constants { + /* Memory block size in bytes */ + ARGON2_BLOCK_SIZE = 1024, + ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, + ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, + ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32, + ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64, + + /* Number of pseudo-random values generated by one call to Blake in Argon2i + to + generate reference block positions */ + ARGON2_ADDRESSES_IN_BLOCK = 128, + + /* Pre-hashing digest length and its extension*/ + ARGON2_PREHASH_DIGEST_LENGTH = 64, + ARGON2_PREHASH_SEED_LENGTH = 72 +}; + +/*************************Argon2 internal data types***********************/ + +/* + * Structure for the (1KB) memory block implemented as 128 64-bit words. + * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no + * bounds checking). + */ +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; + +/*****************Functions that work with the block******************/ + +/* Initialize each byte of the block with @in */ +void rxa2_init_block_value(block *b, uint8_t in); + +/* Copy block @src to block @dst */ +void rxa2_copy_block(block *dst, const block *src); + +/* XOR @src onto @dst bytewise */ +void rxa2_xor_block(block *dst, const block *src); + +/* + * Argon2 instance: memory pointer, number of passes, amount of memory, type, + * and derived values. + * Used to evaluate the number and location of blocks to construct in each + * thread + */ +typedef struct Argon2_instance_t { + block *memory; /* Memory pointer */ + uint32_t version; + uint32_t passes; /* Number of passes */ + uint32_t memory_blocks; /* Number of blocks in memory */ + uint32_t segment_length; + uint32_t lane_length; + uint32_t lanes; + uint32_t threads; + argon2_type type; + int print_internals; /* whether to print the memory blocks */ + argon2_context *context_ptr; /* points back to original context */ +} argon2_instance_t; + +/* + * Argon2 position: where we construct the block right now. Used to distribute + * work between threads. + */ +typedef struct Argon2_position_t { + uint32_t pass; + uint32_t lane; + uint8_t slice; + uint32_t index; +} argon2_position_t; + +/*Struct that holds the inputs for thread handling FillSegment*/ +typedef struct Argon2_thread_data { + argon2_instance_t *instance_ptr; + argon2_position_t pos; +} argon2_thread_data; + +/*************************Argon2 core functions********************************/ + +/* Allocates memory to the given pointer, uses the appropriate allocator as + * specified in the context. Total allocated memory is num*size. + * @param context argon2_context which specifies the allocator + * @param memory pointer to the pointer to the memory + * @param size the size in bytes for each element to be allocated + * @param num the number of elements to be allocated + * @return ARGON2_OK if @memory is a valid pointer and memory is allocated + */ +int rxa2_allocate_memory(const argon2_context *context, uint8_t **memory, + size_t num, size_t size); + +/* + * Frees memory at the given pointer, uses the appropriate deallocator as + * specified in the context. Also cleans the memory using clear_internal_memory. + * @param context argon2_context which specifies the deallocator + * @param memory pointer to buffer to be freed + * @param size the size in bytes for each element to be deallocated + * @param num the number of elements to be deallocated + */ +void rxa2_free_memory(const argon2_context *context, uint8_t *memory, + size_t num, size_t size); + +/* Function that securely cleans the memory. This ignores any flags set + * regarding clearing memory. Usually one just calls clear_internal_memory. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +void rxa2_secure_wipe_memory(void *v, size_t n); + +/* Function that securely clears the memory if FLAG_clear_internal_memory is + * set. If the flag isn't set, this function does nothing. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +void rxa2_clear_internal_memory(void *v, size_t n); + +/* + * Computes absolute position of reference block in the lane following a skewed + * distribution and using a pseudo-random value as input + * @param instance Pointer to the current instance + * @param position Pointer to the current position + * @param pseudo_rand 32-bit pseudo-random value used to determine the position + * @param same_lane Indicates if the block will be taken from the current lane. + * If so we can reference the current segment + * @pre All pointers must be valid + */ +uint32_t rxa2_index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane); + +/* + * Function that validates all inputs against predefined restrictions and return + * an error code + * @param context Pointer to current Argon2 context + * @return ARGON2_OK if everything is all right, otherwise one of error codes + * (all defined in + */ +int rxa2_validate_inputs(const argon2_context *context); + +/* + * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears + * password and secret if needed + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param blockhash Buffer for pre-hashing digest + * @param type Argon2 type + * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes + * allocated + */ +void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type); + +/* + * Function creates first 2 blocks per lane + * @param instance Pointer to the current instance + * @param blockhash Pointer to the pre-hashing digest + * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values + */ +void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); + +/* + * Function allocates memory, hashes the inputs with Blake, and creates first + * two blocks. Returns the pointer to the main memory with 2 blocks per lane + * initialized + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param instance Current Argon2 instance + * @return Zero if successful, -1 if memory failed to allocate. @context->state + * will be modified if successful. + */ +int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context); + +/* + * XORing the last block of each lane, hashing it, making the tag. Deallocates + * the memory. + * @param context Pointer to current Argon2 context (use only the out parameters + * from it) + * @param instance Pointer to current instance of Argon2 + * @pre instance->state must point to necessary amount of memory + * @pre context->out must point to outlen bytes of memory + * @pre if context->free_cbk is not NULL, it should point to a function that + * deallocates memory + */ +void rxa2_finalize(const argon2_context *context, argon2_instance_t *instance); + +/* + * Function that fills the segment using previous segments also from other + * threads + * @param context current context + * @param instance Pointer to the current instance + * @param position Current position + * @pre all block pointers must be valid + */ +void rxa2_fill_segment(const argon2_instance_t *instance, + argon2_position_t position); + +/* + * Function that fills the entire memory t_cost times based on the first two + * blocks in each lane + * @param instance Pointer to the current instance + * @return ARGON2_OK if successful, @context->state + */ +int rxa2_fill_memory_blocks(argon2_instance_t *instance); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/RandomX/src/argon2_ref.c b/RandomX/src/argon2_ref.c new file mode 100644 index 00000000..018b985b --- /dev/null +++ b/RandomX/src/argon2_ref.c @@ -0,0 +1,214 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#include +#include +#include + +#include "argon2.h" +#include "argon2_core.h" + +#include "blake2/blamka-round-ref.h" +#include "blake2/blake2-impl.h" +#include "blake2/blake2.h" + + /* + * Function fills a new memory block and optionally XORs the old block over the new one. + * @next_block must be initialized. + * @param prev_block Pointer to the previous block + * @param ref_block Pointer to the reference block + * @param next_block Pointer to the block to be constructed + * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) + * @pre all block pointers must be valid + */ +static void fill_block(const block *prev_block, const block *ref_block, + block *next_block, int with_xor) { + block blockR, block_tmp; + unsigned i; + + rxa2_copy_block(&blockR, ref_block); + rxa2_xor_block(&blockR, prev_block); + rxa2_copy_block(&block_tmp, &blockR); + /* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */ + if (with_xor) { + /* Saving the next block contents for XOR over: */ + rxa2_xor_block(&block_tmp, next_block); + /* Now blockR = ref_block + prev_block and + block_tmp = ref_block + prev_block + next_block */ + } + + /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then + (16,17,..31)... finally (112,113,...127) */ + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND_NOMSG( + blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], + blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], + blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], + blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], + blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], + blockR.v[16 * i + 15]); + } + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + for (i = 0; i < 8; i++) { + BLAKE2_ROUND_NOMSG( + blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], + blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], + blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], + blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], + blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], + blockR.v[2 * i + 113]); + } + + rxa2_copy_block(next_block, &block_tmp); + rxa2_xor_block(next_block, &blockR); +} + +static void next_addresses(block *address_block, block *input_block, + const block *zero_block) { + input_block->v[6]++; + fill_block(zero_block, input_block, address_block, 0); + fill_block(zero_block, address_block, address_block, 0); +} + +void rxa2_fill_segment(const argon2_instance_t *instance, + argon2_position_t position) { + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block, zero_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index; + uint32_t i; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = + (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + rxa2_init_block_value(&zero_block, 0); + rxa2_init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block, &zero_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } + else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block, &zero_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } + else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = rxa2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + if (ARGON2_VERSION_10 == instance->version) { + /* version 1.2.1 and earlier: overwrite, not XOR */ + fill_block(instance->memory + prev_offset, ref_block, curr_block, 0); + } + else { + if (0 == position.pass) { + fill_block(instance->memory + prev_offset, ref_block, + curr_block, 0); + } + else { + fill_block(instance->memory + prev_offset, ref_block, + curr_block, 1); + } + } + } +} diff --git a/RandomX/src/asm/configuration.asm b/RandomX/src/asm/configuration.asm new file mode 100644 index 00000000..f2f2069c --- /dev/null +++ b/RandomX/src/asm/configuration.asm @@ -0,0 +1,48 @@ +; File start: ..\src\configuration.h +RANDOMX_ARGON_MEMORY EQU 262144t +RANDOMX_ARGON_ITERATIONS EQU 3t +RANDOMX_ARGON_LANES EQU 1t +RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03"> +RANDOMX_CACHE_ACCESSES EQU 8t +RANDOMX_SUPERSCALAR_LATENCY EQU 170t +RANDOMX_DATASET_BASE_SIZE EQU 2147483648t +RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t +RANDOMX_PROGRAM_SIZE EQU 256t +RANDOMX_PROGRAM_ITERATIONS EQU 2048t +RANDOMX_PROGRAM_COUNT EQU 8t +RANDOMX_SCRATCHPAD_L3 EQU 2097152t +RANDOMX_SCRATCHPAD_L2 EQU 262144t +RANDOMX_SCRATCHPAD_L1 EQU 16384t +RANDOMX_JUMP_BITS EQU 8t +RANDOMX_JUMP_OFFSET EQU 8t +RANDOMX_FREQ_IADD_RS EQU 25t +RANDOMX_FREQ_IADD_M EQU 7t +RANDOMX_FREQ_ISUB_R EQU 16t +RANDOMX_FREQ_ISUB_M EQU 7t +RANDOMX_FREQ_IMUL_R EQU 16t +RANDOMX_FREQ_IMUL_M EQU 4t +RANDOMX_FREQ_IMULH_R EQU 4t +RANDOMX_FREQ_IMULH_M EQU 1t +RANDOMX_FREQ_ISMULH_R EQU 4t +RANDOMX_FREQ_ISMULH_M EQU 1t +RANDOMX_FREQ_IMUL_RCP EQU 8t +RANDOMX_FREQ_INEG_R EQU 2t +RANDOMX_FREQ_IXOR_R EQU 15t +RANDOMX_FREQ_IXOR_M EQU 5t +RANDOMX_FREQ_IROR_R EQU 10t +RANDOMX_FREQ_IROL_R EQU 0t +RANDOMX_FREQ_ISWAP_R EQU 4t +RANDOMX_FREQ_FSWAP_R EQU 8t +RANDOMX_FREQ_FADD_R EQU 20t +RANDOMX_FREQ_FADD_M EQU 5t +RANDOMX_FREQ_FSUB_R EQU 20t +RANDOMX_FREQ_FSUB_M EQU 5t +RANDOMX_FREQ_FSCAL_R EQU 6t +RANDOMX_FREQ_FMUL_R EQU 20t +RANDOMX_FREQ_FDIV_M EQU 4t +RANDOMX_FREQ_FSQRT_R EQU 6t +RANDOMX_FREQ_CBRANCH EQU 16t +RANDOMX_FREQ_CFROUND EQU 1t +RANDOMX_FREQ_ISTORE EQU 16t +RANDOMX_FREQ_NOP EQU 0t +; File end: ..\src\configuration.h diff --git a/RandomX/src/asm/program_epilogue_linux.inc b/RandomX/src/asm/program_epilogue_linux.inc new file mode 100644 index 00000000..eaacae54 --- /dev/null +++ b/RandomX/src/asm/program_epilogue_linux.inc @@ -0,0 +1,10 @@ + ;# restore callee-saved registers - System V AMD64 ABI + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + + ;# program finished + ret 0 \ No newline at end of file diff --git a/RandomX/src/asm/program_epilogue_store.inc b/RandomX/src/asm/program_epilogue_store.inc new file mode 100644 index 00000000..b94fa4d9 --- /dev/null +++ b/RandomX/src/asm/program_epilogue_store.inc @@ -0,0 +1,19 @@ + ;# save VM register values + pop rcx + mov qword ptr [rcx+0], r8 + mov qword ptr [rcx+8], r9 + mov qword ptr [rcx+16], r10 + mov qword ptr [rcx+24], r11 + mov qword ptr [rcx+32], r12 + mov qword ptr [rcx+40], r13 + mov qword ptr [rcx+48], r14 + mov qword ptr [rcx+56], r15 + movdqa xmmword ptr [rcx+64], xmm0 + movdqa xmmword ptr [rcx+80], xmm1 + movdqa xmmword ptr [rcx+96], xmm2 + movdqa xmmword ptr [rcx+112], xmm3 + lea rcx, [rcx+64] + movdqa xmmword ptr [rcx+64], xmm4 + movdqa xmmword ptr [rcx+80], xmm5 + movdqa xmmword ptr [rcx+96], xmm6 + movdqa xmmword ptr [rcx+112], xmm7 \ No newline at end of file diff --git a/RandomX/src/asm/program_epilogue_win64.inc b/RandomX/src/asm/program_epilogue_win64.inc new file mode 100644 index 00000000..8d70a0a3 --- /dev/null +++ b/RandomX/src/asm/program_epilogue_win64.inc @@ -0,0 +1,24 @@ + ;# restore callee-saved registers - Microsoft x64 calling convention + movdqu xmm15, xmmword ptr [rsp] + movdqu xmm14, xmmword ptr [rsp+16] + movdqu xmm13, xmmword ptr [rsp+32] + movdqu xmm12, xmmword ptr [rsp+48] + movdqu xmm11, xmmword ptr [rsp+64] + add rsp, 80 + movdqu xmm10, xmmword ptr [rsp] + movdqu xmm9, xmmword ptr [rsp+16] + movdqu xmm8, xmmword ptr [rsp+32] + movdqu xmm7, xmmword ptr [rsp+48] + movdqu xmm6, xmmword ptr [rsp+64] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbp + pop rbx + + ;# program finished + ret diff --git a/RandomX/src/asm/program_loop_load.inc b/RandomX/src/asm/program_loop_load.inc new file mode 100644 index 00000000..374af66a --- /dev/null +++ b/RandomX/src/asm/program_loop_load.inc @@ -0,0 +1,32 @@ + mov rdx, rax + and eax, RANDOMX_SCRATCHPAD_MASK + lea rcx, [rsi+rax] + push rcx + xor r8, qword ptr [rcx+0] + xor r9, qword ptr [rcx+8] + xor r10, qword ptr [rcx+16] + xor r11, qword ptr [rcx+24] + xor r12, qword ptr [rcx+32] + xor r13, qword ptr [rcx+40] + xor r14, qword ptr [rcx+48] + xor r15, qword ptr [rcx+56] + ror rdx, 32 + and edx, RANDOMX_SCRATCHPAD_MASK + lea rcx, [rsi+rdx] + push rcx + cvtdq2pd xmm0, qword ptr [rcx+0] + cvtdq2pd xmm1, qword ptr [rcx+8] + cvtdq2pd xmm2, qword ptr [rcx+16] + cvtdq2pd xmm3, qword ptr [rcx+24] + cvtdq2pd xmm4, qword ptr [rcx+32] + cvtdq2pd xmm5, qword ptr [rcx+40] + cvtdq2pd xmm6, qword ptr [rcx+48] + cvtdq2pd xmm7, qword ptr [rcx+56] + andps xmm4, xmm13 + andps xmm5, xmm13 + andps xmm6, xmm13 + andps xmm7, xmm13 + orps xmm4, xmm14 + orps xmm5, xmm14 + orps xmm6, xmm14 + orps xmm7, xmm14 diff --git a/RandomX/src/asm/program_loop_store.inc b/RandomX/src/asm/program_loop_store.inc new file mode 100644 index 00000000..53164cb0 --- /dev/null +++ b/RandomX/src/asm/program_loop_store.inc @@ -0,0 +1,19 @@ + xor eax, eax + pop rcx + mov qword ptr [rcx+0], r8 + mov qword ptr [rcx+8], r9 + mov qword ptr [rcx+16], r10 + mov qword ptr [rcx+24], r11 + mov qword ptr [rcx+32], r12 + mov qword ptr [rcx+40], r13 + mov qword ptr [rcx+48], r14 + mov qword ptr [rcx+56], r15 + pop rcx + xorpd xmm0, xmm4 + xorpd xmm1, xmm5 + xorpd xmm2, xmm6 + xorpd xmm3, xmm7 + movapd xmmword ptr [rcx+0], xmm0 + movapd xmmword ptr [rcx+16], xmm1 + movapd xmmword ptr [rcx+32], xmm2 + movapd xmmword ptr [rcx+48], xmm3 diff --git a/RandomX/src/asm/program_prologue_linux.inc b/RandomX/src/asm/program_prologue_linux.inc new file mode 100644 index 00000000..ffde152c --- /dev/null +++ b/RandomX/src/asm/program_prologue_linux.inc @@ -0,0 +1,34 @@ + ;# callee-saved registers - System V AMD64 ABI + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + ;# function arguments + mov rbx, rcx ;# loop counter + push rdi ;# RegisterFile& registerFile + mov rcx, rdi + mov rbp, qword ptr [rsi] ;# "mx", "ma" + mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset + mov rsi, rdx ;# uint8_t* scratchpad + + mov rax, rbp + + ;# zero integer registers + xor r8, r8 + xor r9, r9 + xor r10, r10 + xor r11, r11 + xor r12, r12 + xor r13, r13 + xor r14, r14 + xor r15, r15 + + ;# load constant registers + lea rcx, [rcx+120] + movapd xmm8, xmmword ptr [rcx+72] + movapd xmm9, xmmword ptr [rcx+88] + movapd xmm10, xmmword ptr [rcx+104] + movapd xmm11, xmmword ptr [rcx+120] diff --git a/RandomX/src/asm/program_prologue_win64.inc b/RandomX/src/asm/program_prologue_win64.inc new file mode 100644 index 00000000..590a98de --- /dev/null +++ b/RandomX/src/asm/program_prologue_win64.inc @@ -0,0 +1,47 @@ + ;# callee-saved registers - Microsoft x64 calling convention + push rbx + push rbp + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + movdqu xmmword ptr [rsp+64], xmm6 + movdqu xmmword ptr [rsp+48], xmm7 + movdqu xmmword ptr [rsp+32], xmm8 + movdqu xmmword ptr [rsp+16], xmm9 + movdqu xmmword ptr [rsp+0], xmm10 + sub rsp, 80 + movdqu xmmword ptr [rsp+64], xmm11 + movdqu xmmword ptr [rsp+48], xmm12 + movdqu xmmword ptr [rsp+32], xmm13 + movdqu xmmword ptr [rsp+16], xmm14 + movdqu xmmword ptr [rsp+0], xmm15 + + ;# function arguments + push rcx ;# RegisterFile& registerFile + mov rbp, qword ptr [rdx] ;# "mx", "ma" + mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset + mov rsi, r8 ;# uint8_t* scratchpad + mov rbx, r9 ;# loop counter + + mov rax, rbp + + ;# zero integer registers + xor r8, r8 + xor r9, r9 + xor r10, r10 + xor r11, r11 + xor r12, r12 + xor r13, r13 + xor r14, r14 + xor r15, r15 + + ;# load constant registers + lea rcx, [rcx+120] + movapd xmm8, xmmword ptr [rcx+72] + movapd xmm9, xmmword ptr [rcx+88] + movapd xmm10, xmmword ptr [rcx+104] + movapd xmm11, xmmword ptr [rcx+120] diff --git a/RandomX/src/asm/program_read_dataset.inc b/RandomX/src/asm/program_read_dataset.inc new file mode 100644 index 00000000..b81d0c32 --- /dev/null +++ b/RandomX/src/asm/program_read_dataset.inc @@ -0,0 +1,17 @@ + xor rbp, rax ;# modify "mx" + mov edx, ebp ;# edx = mx + and edx, RANDOMX_DATASET_BASE_MASK + prefetchnta byte ptr [rdi+rdx] + ror rbp, 32 ;# swap "ma" and "mx" + mov edx, ebp ;# edx = ma + and edx, RANDOMX_DATASET_BASE_MASK + lea rcx, [rdi+rdx] ;# dataset cache line + xor r8, qword ptr [rcx+0] + xor r9, qword ptr [rcx+8] + xor r10, qword ptr [rcx+16] + xor r11, qword ptr [rcx+24] + xor r12, qword ptr [rcx+32] + xor r13, qword ptr [rcx+40] + xor r14, qword ptr [rcx+48] + xor r15, qword ptr [rcx+56] + \ No newline at end of file diff --git a/RandomX/src/asm/program_read_dataset_sshash_fin.inc b/RandomX/src/asm/program_read_dataset_sshash_fin.inc new file mode 100644 index 00000000..f5a067d2 --- /dev/null +++ b/RandomX/src/asm/program_read_dataset_sshash_fin.inc @@ -0,0 +1,10 @@ + mov rbx, qword ptr [rsp+64] + xor r8, qword ptr [rsp+56] + xor r9, qword ptr [rsp+48] + xor r10, qword ptr [rsp+40] + xor r11, qword ptr [rsp+32] + xor r12, qword ptr [rsp+24] + xor r13, qword ptr [rsp+16] + xor r14, qword ptr [rsp+8] + xor r15, qword ptr [rsp+0] + add rsp, 72 \ No newline at end of file diff --git a/RandomX/src/asm/program_read_dataset_sshash_init.inc b/RandomX/src/asm/program_read_dataset_sshash_init.inc new file mode 100644 index 00000000..6fe9525d --- /dev/null +++ b/RandomX/src/asm/program_read_dataset_sshash_init.inc @@ -0,0 +1,17 @@ + sub rsp, 72 + mov qword ptr [rsp+64], rbx + mov qword ptr [rsp+56], r8 + mov qword ptr [rsp+48], r9 + mov qword ptr [rsp+40], r10 + mov qword ptr [rsp+32], r11 + mov qword ptr [rsp+24], r12 + mov qword ptr [rsp+16], r13 + mov qword ptr [rsp+8], r14 + mov qword ptr [rsp+0], r15 + xor rbp, rax ;# modify "mx" + ror rbp, 32 ;# swap "ma" and "mx" + mov ebx, ebp ;# ecx = ma + and ebx, RANDOMX_DATASET_BASE_MASK + shr ebx, 6 ;# ebx = Dataset block number + ;# add ebx, datasetOffset / 64 + ;# call 32768 \ No newline at end of file diff --git a/RandomX/src/asm/program_sshash_constants.inc b/RandomX/src/asm/program_sshash_constants.inc new file mode 100644 index 00000000..53dc1755 --- /dev/null +++ b/RandomX/src/asm/program_sshash_constants.inc @@ -0,0 +1,24 @@ +r0_mul: + ;#/ 6364136223846793005 + db 45, 127, 149, 76, 45, 244, 81, 88 +r1_add: + ;#/ 9298411001130361340 + db 252, 161, 245, 89, 138, 151, 10, 129 +r2_add: + ;#/ 12065312585734608966 + db 70, 216, 194, 56, 223, 153, 112, 167 +r3_add: + ;#/ 9306329213124626780 + db 92, 73, 34, 191, 28, 185, 38, 129 +r4_add: + ;#/ 5281919268842080866 + db 98, 138, 159, 23, 151, 37, 77, 73 +r5_add: + ;#/ 10536153434571861004 + db 12, 236, 170, 206, 185, 239, 55, 146 +r6_add: + ;#/ 3398623926847679864 + db 120, 45, 230, 108, 116, 86, 42, 47 +r7_add: + ;#/ 9549104520008361294 + db 78, 229, 44, 182, 247, 59, 133, 132 \ No newline at end of file diff --git a/RandomX/src/asm/program_sshash_load.inc b/RandomX/src/asm/program_sshash_load.inc new file mode 100644 index 00000000..53513569 --- /dev/null +++ b/RandomX/src/asm/program_sshash_load.inc @@ -0,0 +1,8 @@ + xor r8, qword ptr [rbx+0] + xor r9, qword ptr [rbx+8] + xor r10, qword ptr [rbx+16] + xor r11, qword ptr [rbx+24] + xor r12, qword ptr [rbx+32] + xor r13, qword ptr [rbx+40] + xor r14, qword ptr [rbx+48] + xor r15, qword ptr [rbx+56] \ No newline at end of file diff --git a/RandomX/src/asm/program_sshash_prefetch.inc b/RandomX/src/asm/program_sshash_prefetch.inc new file mode 100644 index 00000000..26efb515 --- /dev/null +++ b/RandomX/src/asm/program_sshash_prefetch.inc @@ -0,0 +1,4 @@ + and rbx, RANDOMX_CACHE_MASK + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] \ No newline at end of file diff --git a/RandomX/src/asm/program_xmm_constants.inc b/RandomX/src/asm/program_xmm_constants.inc new file mode 100644 index 00000000..296237a4 --- /dev/null +++ b/RandomX/src/asm/program_xmm_constants.inc @@ -0,0 +1,6 @@ +mantissaMask: + db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0 +exp240: + db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +scaleMask: + db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128 \ No newline at end of file diff --git a/RandomX/src/asm/randomx_reciprocal.inc b/RandomX/src/asm/randomx_reciprocal.inc new file mode 100644 index 00000000..e1f22fdc --- /dev/null +++ b/RandomX/src/asm/randomx_reciprocal.inc @@ -0,0 +1,7 @@ + mov edx, 1 + mov r8, rcx + xor eax, eax + bsr rcx, rcx + shl rdx, cl + div r8 + ret \ No newline at end of file diff --git a/RandomX/src/assembly_generator_x86.cpp b/RandomX/src/assembly_generator_x86.cpp new file mode 100644 index 00000000..645fd9da --- /dev/null +++ b/RandomX/src/assembly_generator_x86.cpp @@ -0,0 +1,611 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "assembly_generator_x86.hpp" +#include "common.hpp" +#include "reciprocal.h" +#include "program.hpp" +#include "superscalar.hpp" + +namespace randomx { + + static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; + static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; + static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; + static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" }; + static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" }; + static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" }; + + static const char* tempRegx = "xmm12"; + static const char* mantissaMaskReg = "xmm13"; + static const char* exponentMaskReg = "xmm14"; + static const char* scaleMaskReg = "xmm15"; + static const char* regIc = "rbx"; + static const char* regIc32 = "ebx"; + static const char* regIc8 = "bl"; + static const char* regScratchpadAddr = "rsi"; + + void AssemblyGeneratorX86::generateProgram(Program& prog) { + for (unsigned i = 0; i < RegistersCount; ++i) { + registerUsage[i] = -1; + } + asmCode.str(std::string()); //clear + for (unsigned i = 0; i < prog.getSize(); ++i) { + asmCode << "randomx_isn_" << i << ":" << std::endl; + Instruction& instr = prog(i); + instr.src %= RegistersCount; + instr.dst %= RegistersCount; + generateCode(instr, i); + } + } + + void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) { + asmCode.str(std::string()); //clear +#ifdef RANDOMX_ALIGN + asmCode << "ALIGN 16" << std::endl; +#endif + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + switch ((SuperscalarInstructionType)instr.opcode) + { + case SuperscalarInstructionType::ISUB_R: + asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + break; + case SuperscalarInstructionType::IXOR_R: + asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + break; + case SuperscalarInstructionType::IADD_RS: + asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl; + break; + case SuperscalarInstructionType::IMUL_R: + asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + break; + case SuperscalarInstructionType::IROR_C: + asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl; + break; + case SuperscalarInstructionType::IADD_C7: + asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + break; + case SuperscalarInstructionType::IXOR_C7: + asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + break; + case SuperscalarInstructionType::IADD_C8: + asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN + asmCode << "nop" << std::endl; +#endif + break; + case SuperscalarInstructionType::IXOR_C8: + asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN + asmCode << "nop" << std::endl; +#endif + break; + case SuperscalarInstructionType::IADD_C9: + asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN + asmCode << "xchg ax, ax ;nop" << std::endl; +#endif + break; + case SuperscalarInstructionType::IXOR_C9: + asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN + asmCode << "xchg ax, ax ;nop" << std::endl; +#endif + break; + case SuperscalarInstructionType::IMULH_R: + asmCode << "mov rax, " << regR[instr.dst] << std::endl; + asmCode << "mul " << regR[instr.src] << std::endl; + asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; + break; + case SuperscalarInstructionType::ISMULH_R: + asmCode << "mov rax, " << regR[instr.dst] << std::endl; + asmCode << "imul " << regR[instr.src] << std::endl; + asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; + break; + case SuperscalarInstructionType::IMUL_RCP: + asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl; + asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl; + break; + default: + UNREACHABLE; + } + } + } + + void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) { + asmCode.str(std::string()); //clear + asmCode << "#include " << std::endl; + asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl; + asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; + asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl; + asmCode << " }" << std::endl; + asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; + asmCode << " return ((__int128)a * b) >> 64;" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_MULH" << std::endl; + asmCode << " #define HAVE_SMULH" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "#if defined(_MSC_VER)" << std::endl; + asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl; + asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl; + asmCode << " #include " << std::endl; + asmCode << " #include " << std::endl; + asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl; + asmCode << " return _rotr64(x, c);" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_ROTR" << std::endl; + asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl; + asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; + asmCode << " return __umulh(a, b);" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_MULH" << std::endl; + asmCode << " #endif" << std::endl; + asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl; + asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; + asmCode << " int64_t hi;" << std::endl; + asmCode << " _mul128(a, b, &hi);" << std::endl; + asmCode << " return hi;" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_SMULH" << std::endl; + asmCode << " #endif" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "#ifndef HAVE_ROTR" << std::endl; + asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl; + asmCode << " return (a >> b) | (a << (64 - b));" << std::endl; + asmCode << " }" << std::endl; + asmCode << " #define HAVE_ROTR" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl; + asmCode << " #error \"Required functions are not defined\"" << std::endl; + asmCode << "#endif" << std::endl; + asmCode << "void superScalar(uint64_t r[8]) {" << std::endl; + asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl; + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + switch ((SuperscalarInstructionType)instr.opcode) + { + case SuperscalarInstructionType::ISUB_R: + asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; + break; + case SuperscalarInstructionType::IXOR_R: + asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; + break; + case SuperscalarInstructionType::IADD_RS: + asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl; + break; + case SuperscalarInstructionType::IMUL_R: + asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; + break; + case SuperscalarInstructionType::IROR_C: + asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl; + break; + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: + asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl; + break; + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: + asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl; + break; + case SuperscalarInstructionType::IMULH_R: + asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; + break; + case SuperscalarInstructionType::ISMULH_R: + asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; + break; + case SuperscalarInstructionType::IMUL_RCP: + asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl; + break; + default: + UNREACHABLE; + } + } + asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl; + asmCode << "}" << std::endl; + } + + void AssemblyGeneratorX86::traceint(Instruction& instr) { + if (trace) { + asmCode << "\tpush " << regR[instr.dst] << std::endl; + } + } + + void AssemblyGeneratorX86::traceflt(Instruction& instr) { + if (trace) { + asmCode << "\tpush 0" << std::endl; + } + } + + void AssemblyGeneratorX86::tracenop(Instruction& instr) { + if (trace) { + asmCode << "\tpush 0" << std::endl; + } + } + + void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) { + asmCode << "\t; " << instr; + auto generator = engine[instr.opcode]; + (this->*generator)(instr, i); + } + + void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") { + asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; + asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; + } + + void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { + asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; + int mask; + if (instr.getModCond() < StoreL3Condition) { + mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask; + } + else { + mask = ScratchpadL3Mask; + } + asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl; + } + + int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) { + return (int32_t)instr.getImm32() & ScratchpadL3Mask; + } + + void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if(instr.dst == RegisterNeedsDisplacement) + asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; + else + asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl; + traceint(instr); + } + + void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + } + else { + asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + } + else { + asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + } + else { + asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + } + else { + asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + } + else { + asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; + asmCode << "\tmul " << regR[instr.src] << std::endl; + asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); + } + + void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr, "ecx"); + asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; + asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl; + } + else { + asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; + asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; + } + asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); + } + + void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; + asmCode << "\timul " << regR[instr.src] << std::endl; + asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); + } + + void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr, "ecx"); + asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; + asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl; + } + else { + asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; + asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; + } + asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); + } + + void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + asmCode << "\tneg " << regR[instr.dst] << std::endl; + traceint(instr); + } + + void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + } + else { + asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + } + else { + asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; + asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl; + } + else { + asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; + asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl; + } + else { + asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl; + } + traceint(instr); + } + + void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { + registerUsage[instr.dst] = i; + asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; + asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; + traceint(instr); + } + else { + tracenop(instr); + } + } + + void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { + if (instr.src != instr.dst) { + registerUsage[instr.dst] = i; + registerUsage[instr.src] = i; + asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + traceint(instr); + } + else { + tracenop(instr); + } + } + + void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) { + asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + instr.src %= RegisterCountFlt; + asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + genAddressReg(instr); + asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + instr.src %= RegisterCountFlt; + asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + genAddressReg(instr); + asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + instr.src %= RegisterCountFlt; + asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + genAddressReg(instr); + asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl; + asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl; + asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl; + traceflt(instr); + } + + void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) { + asmCode << "\tmov rax, " << regR[instr.src] << std::endl; + int rotate = (13 - (instr.getImm32() & 63)) & 63; + if (rotate != 0) + asmCode << "\trol rax, " << rotate << std::endl; + asmCode << "\tand eax, 24576" << std::endl; + asmCode << "\tor eax, 40896" << std::endl; + asmCode << "\tpush rax" << std::endl; + asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl; + asmCode << "\tpop rax" << std::endl; + tracenop(instr); + } + + void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) { + int reg = instr.dst; + int target = registerUsage[reg] + 1; + int shift = instr.getModCond() + ConditionOffset; + int32_t imm = instr.getImm32() | (1L << shift); + if (ConditionOffset > 0 || shift > 0) + imm &= ~(1L << (shift - 1)); + asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl; + asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl; + asmCode << "\tjz randomx_isn_" << target << std::endl; + //mark all registers as used + for (unsigned j = 0; j < RegistersCount; ++j) { + registerUsage[j] = i; + } + } + + void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) { + genAddressRegDst(instr); + asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl; + tracenop(instr); + } + + void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) { + asmCode << "\tnop" << std::endl; + tracenop(instr); + } + +#include "instruction_weights.hpp" +#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x)) + + InstructionGenerator AssemblyGeneratorX86::engine[256] = { + INST_HANDLE(IADD_RS) + INST_HANDLE(IADD_M) + INST_HANDLE(ISUB_R) + INST_HANDLE(ISUB_M) + INST_HANDLE(IMUL_R) + INST_HANDLE(IMUL_M) + INST_HANDLE(IMULH_R) + INST_HANDLE(IMULH_M) + INST_HANDLE(ISMULH_R) + INST_HANDLE(ISMULH_M) + INST_HANDLE(IMUL_RCP) + INST_HANDLE(INEG_R) + INST_HANDLE(IXOR_R) + INST_HANDLE(IXOR_M) + INST_HANDLE(IROR_R) + INST_HANDLE(IROL_R) + INST_HANDLE(ISWAP_R) + INST_HANDLE(FSWAP_R) + INST_HANDLE(FADD_R) + INST_HANDLE(FADD_M) + INST_HANDLE(FSUB_R) + INST_HANDLE(FSUB_M) + INST_HANDLE(FSCAL_R) + INST_HANDLE(FMUL_R) + INST_HANDLE(FDIV_M) + INST_HANDLE(FSQRT_R) + INST_HANDLE(CBRANCH) + INST_HANDLE(CFROUND) + INST_HANDLE(ISTORE) + INST_HANDLE(NOP) + }; +} \ No newline at end of file diff --git a/RandomX/src/assembly_generator_x86.hpp b/RandomX/src/assembly_generator_x86.hpp new file mode 100644 index 00000000..e9623988 --- /dev/null +++ b/RandomX/src/assembly_generator_x86.hpp @@ -0,0 +1,94 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include "common.hpp" +#include + +namespace randomx { + + class Program; + class SuperscalarProgram; + class AssemblyGeneratorX86; + class Instruction; + + typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int); + + class AssemblyGeneratorX86 { + public: + void generateProgram(Program& prog); + void generateAsm(SuperscalarProgram& prog); + void generateC(SuperscalarProgram& prog); + void printCode(std::ostream& os) { + os << asmCode.rdbuf(); + } + private: + void genAddressReg(Instruction&, const char*); + void genAddressRegDst(Instruction&, int); + int32_t genAddressImm(Instruction&); + void generateCode(Instruction&, int); + void traceint(Instruction&); + void traceflt(Instruction&); + void tracenop(Instruction&); + void h_IADD_RS(Instruction&, int); + void h_IADD_M(Instruction&, int); + void h_ISUB_R(Instruction&, int); + void h_ISUB_M(Instruction&, int); + void h_IMUL_R(Instruction&, int); + void h_IMUL_M(Instruction&, int); + void h_IMULH_R(Instruction&, int); + void h_IMULH_M(Instruction&, int); + void h_ISMULH_R(Instruction&, int); + void h_ISMULH_M(Instruction&, int); + void h_IMUL_RCP(Instruction&, int); + void h_INEG_R(Instruction&, int); + void h_IXOR_R(Instruction&, int); + void h_IXOR_M(Instruction&, int); + void h_IROR_R(Instruction&, int); + void h_IROL_R(Instruction&, int); + void h_ISWAP_R(Instruction&, int); + void h_FSWAP_R(Instruction&, int); + void h_FADD_R(Instruction&, int); + void h_FADD_M(Instruction&, int); + void h_FSUB_R(Instruction&, int); + void h_FSUB_M(Instruction&, int); + void h_FSCAL_R(Instruction&, int); + void h_FMUL_R(Instruction&, int); + void h_FDIV_M(Instruction&, int); + void h_FSQRT_R(Instruction&, int); + void h_CBRANCH(Instruction&, int); + void h_CFROUND(Instruction&, int); + void h_ISTORE(Instruction&, int); + void h_NOP(Instruction&, int); + + static InstructionGenerator engine[256]; + std::stringstream asmCode; + int registerUsage[RegistersCount]; + }; +} \ No newline at end of file diff --git a/RandomX/src/blake2/blake2-impl.h b/RandomX/src/blake2/blake2-impl.h new file mode 100644 index 00000000..617f7c8a --- /dev/null +++ b/RandomX/src/blake2/blake2-impl.h @@ -0,0 +1,76 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#ifndef PORTABLE_BLAKE2_IMPL_H +#define PORTABLE_BLAKE2_IMPL_H + +#include + +#include "endian.h" + +static FORCE_INLINE uint64_t load48(const void *src) { + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + return w; +} + +static FORCE_INLINE void store48(void *dst, uint64_t w) { + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +} + +static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { + return (w >> c) | (w << (32 - c)); +} + +static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { + return (w >> c) | (w << (64 - c)); +} + +#endif diff --git a/RandomX/src/blake2/blake2.h b/RandomX/src/blake2/blake2.h new file mode 100644 index 00000000..7ac301ce --- /dev/null +++ b/RandomX/src/blake2/blake2.h @@ -0,0 +1,107 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#ifndef PORTABLE_BLAKE2_H +#define PORTABLE_BLAKE2_H + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 + }; + +#pragma pack(push, 1) + typedef struct __blake2b_param { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint64_t node_offset; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + } blake2b_param; +#pragma pack(pop) + + typedef struct __blake2b_state { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + unsigned buflen; + unsigned outlen; + uint8_t last_node; + } blake2b_state; + + /* Ensure param structs have not been wrongly padded */ + /* Poor man's static_assert */ + enum { + blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), + blake2_size_check_2 = + 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) + }; + + /* Streaming API */ + int blake2b_init(blake2b_state *S, size_t outlen); + int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, + size_t keylen); + int blake2b_init_param(blake2b_state *S, const blake2b_param *P); + int blake2b_update(blake2b_state *S, const void *in, size_t inlen); + int blake2b_final(blake2b_state *S, void *out, size_t outlen); + + /* Simple API */ + int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen); + + /* Argon2 Team - Begin Code */ + int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); + /* Argon2 Team - End Code */ + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/RandomX/src/blake2/blake2b.c b/RandomX/src/blake2/blake2b.c new file mode 100644 index 00000000..5931ee5c --- /dev/null +++ b/RandomX/src/blake2/blake2b.c @@ -0,0 +1,409 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; + +static const unsigned int blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) { + S->f[1] = (uint64_t)-1; +} + +static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) { + if (S->last_node) { + blake2b_set_lastnode(S); + } + S->f[0] = (uint64_t)-1; +} + +static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S, + uint64_t inc) { + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) { + //clear_internal_memory(S, sizeof(*S)); /* wipe */ + blake2b_set_lastblock(S); /* invalidate for further use */ +} + +static FORCE_INLINE void blake2b_init0(blake2b_state *S) { + memset(S, 0, sizeof(*S)); + memcpy(S->h, blake2b_IV, sizeof(S->h)); +} + +int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { + const unsigned char *p = (const unsigned char *)P; + unsigned int i; + + if (NULL == P || NULL == S) { + return -1; + } + + blake2b_init0(S); + /* IV XOR Parameter Block */ + for (i = 0; i < 8; ++i) { + S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); + } + S->outlen = P->digest_length; + return 0; +} + +/* Sequential blake2b initialization */ +int blake2b_init(blake2b_state *S, size_t outlen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for unkeyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = 0; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + return blake2b_init_param(S, &P); +} + +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for keyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = (uint8_t)keylen; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + if (blake2b_init_param(S, &P) < 0) { + blake2b_invalidate_state(S); + return -1; + } + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, BLAKE2B_BLOCKBYTES); + memcpy(block, key, keylen); + blake2b_update(S, block, BLAKE2B_BLOCKBYTES); + /* Burn the key from stack */ + //clear_internal_memory(block, BLAKE2B_BLOCKBYTES); + } + return 0; +} + +static void blake2b_compress(blake2b_state *S, const uint8_t *block) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i, r; + + for (i = 0; i < 16; ++i) { + m[i] = load64(block + i * sizeof(m[i])); + } + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(r) \ + do { \ + G(r, 0, v[0], v[4], v[8], v[12]); \ + G(r, 1, v[1], v[5], v[9], v[13]); \ + G(r, 2, v[2], v[6], v[10], v[14]); \ + G(r, 3, v[3], v[7], v[11], v[15]); \ + G(r, 4, v[0], v[5], v[10], v[15]); \ + G(r, 5, v[1], v[6], v[11], v[12]); \ + G(r, 6, v[2], v[7], v[8], v[13]); \ + G(r, 7, v[3], v[4], v[9], v[14]); \ + } while ((void)0, 0) + + for (r = 0; r < 12; ++r) { + ROUND(r); + } + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } + +#undef G +#undef ROUND +} + +int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { + const uint8_t *pin = (const uint8_t *)in; + + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL || in == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + +int blake2b_final(blake2b_state *S, void *out, size_t outlen) { + uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 }; + unsigned int i; + + /* Sanity checks */ + if (S == NULL || out == NULL || outlen < S->outlen) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + blake2b_increment_counter(S, S->buflen); + blake2b_set_lastblock(S); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + sizeof(S->h[i]) * i, S->h[i]); + } + + memcpy(out, buffer, S->outlen); + //clear_internal_memory(buffer, sizeof(buffer)); + //clear_internal_memory(S->buf, sizeof(S->buf)); + //clear_internal_memory(S->h, sizeof(S->h)); + return 0; +} + +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen) { + blake2b_state S; + int ret = -1; + + /* Verify parameters */ + if (NULL == in && inlen > 0) { + goto fail; + } + + if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { + goto fail; + } + + if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { + goto fail; + } + + if (keylen > 0) { + if (blake2b_init_key(&S, outlen, key, keylen) < 0) { + goto fail; + } + } + else { + if (blake2b_init(&S, outlen) < 0) { + goto fail; + } + } + + if (blake2b_update(&S, in, inlen) < 0) { + goto fail; + } + ret = blake2b_final(&S, out, outlen); + +fail: + //clear_internal_memory(&S, sizeof(S)); + return ret; +} + +/* Argon2 Team - Begin Code */ +int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { + uint8_t *out = (uint8_t *)pout; + blake2b_state blake_state; + uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 }; + int ret = -1; + + if (outlen > UINT32_MAX) { + goto fail; + } + + /* Ensure little-endian byte order! */ + store32(outlen_bytes, (uint32_t)outlen); + +#define TRY(statement) \ + do { \ + ret = statement; \ + if (ret < 0) { \ + goto fail; \ + } \ + } while ((void)0, 0) + + if (outlen <= BLAKE2B_OUTBYTES) { + TRY(blake2b_init(&blake_state, outlen)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out, outlen)); + } + else { + uint32_t toproduce; + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + uint8_t in_buffer[BLAKE2B_OUTBYTES]; + TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, + BLAKE2B_OUTBYTES, NULL, 0)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, + 0)); + memcpy(out, out_buffer, toproduce); + } +fail: + //clear_internal_memory(&blake_state, sizeof(blake_state)); + return ret; +#undef TRY +} +/* Argon2 Team - End Code */ + diff --git a/RandomX/src/blake2/blamka-round-ref.h b/RandomX/src/blake2/blamka-round-ref.h new file mode 100644 index 00000000..f1fb50bf --- /dev/null +++ b/RandomX/src/blake2/blamka-round-ref.h @@ -0,0 +1,73 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#ifndef BLAKE_ROUND_MKA_H +#define BLAKE_ROUND_MKA_H + +#include "blake2.h" +#include "blake2-impl.h" + + /* designed by the Lyra PHC team */ +static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { + const uint64_t m = UINT64_C(0xFFFFFFFF); + const uint64_t xy = (x & m) * (y & m); + return x + y + 2 * xy; +} + +#define G(a, b, c, d) \ + do { \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 32); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 24); \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 16); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ + v12, v13, v14, v15) \ + do { \ + G(v0, v4, v8, v12); \ + G(v1, v5, v9, v13); \ + G(v2, v6, v10, v14); \ + G(v3, v7, v11, v15); \ + G(v0, v5, v10, v15); \ + G(v1, v6, v11, v12); \ + G(v2, v7, v8, v13); \ + G(v3, v4, v9, v14); \ + } while ((void)0, 0) + +#endif diff --git a/RandomX/src/blake2/endian.h b/RandomX/src/blake2/endian.h new file mode 100644 index 00000000..c7afed26 --- /dev/null +++ b/RandomX/src/blake2/endian.h @@ -0,0 +1,107 @@ +#pragma once +#include +#include + +#if defined(_MSC_VER) +#define FORCE_INLINE __inline +#elif defined(__GNUC__) || defined(__clang__) +#define FORCE_INLINE __inline__ +#else +#define FORCE_INLINE +#endif + + /* Argon2 Team - Begin Code */ + /* + Not an exhaustive list, but should cover the majority of modern platforms + Additionally, the code will always be correct---this is only a performance + tweak. + */ +#if (defined(__BYTE_ORDER__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ + defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ + defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_ARM) +#define NATIVE_LITTLE_ENDIAN +#endif + /* Argon2 Team - End Code */ + +static FORCE_INLINE uint32_t load32(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = (const uint8_t *)src; + uint32_t w = *p++; + w |= (uint32_t)(*p++) << 8; + w |= (uint32_t)(*p++) << 16; + w |= (uint32_t)(*p++) << 24; + return w; +#endif +} + +static FORCE_INLINE uint64_t load64_native(const void *src) { + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +} + +static FORCE_INLINE uint64_t load64(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + return load64_native(src); +#else + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + w |= (uint64_t)(*p++) << 48; + w |= (uint64_t)(*p++) << 56; + return w; +#endif +} + +static FORCE_INLINE void store32(void *dst, uint32_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static FORCE_INLINE void store64_native(void *dst, uint64_t w) { + memcpy(dst, &w, sizeof w); +} + +static FORCE_INLINE void store64(void *dst, uint64_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + store64_native(dst, w); +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} diff --git a/RandomX/src/blake2_generator.cpp b/RandomX/src/blake2_generator.cpp new file mode 100644 index 00000000..dcf51cec --- /dev/null +++ b/RandomX/src/blake2_generator.cpp @@ -0,0 +1,62 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "blake2/blake2.h" +#include "blake2/endian.h" +#include "blake2_generator.hpp" + +namespace randomx { + + constexpr int maxSeedSize = 60; + + Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) { + memset(data, 0, sizeof(data)); + memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize); + store32(&data[maxSeedSize], nonce); + } + + uint8_t Blake2Generator::getByte() { + checkData(1); + return data[dataIndex++]; + } + + uint32_t Blake2Generator::getInt32() { + checkData(4); + auto ret = load32(&data[dataIndex]); + dataIndex += 4; + return ret; + } + + void Blake2Generator::checkData(const size_t bytesNeeded) { + if (dataIndex + bytesNeeded > sizeof(data)) { + blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); + dataIndex = 0; + } + } +} \ No newline at end of file diff --git a/RandomX/src/blake2_generator.hpp b/RandomX/src/blake2_generator.hpp new file mode 100644 index 00000000..b5ac0801 --- /dev/null +++ b/RandomX/src/blake2_generator.hpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include + +namespace randomx { + + class Blake2Generator { + public: + Blake2Generator(const void* seed, size_t seedSize, int nonce = 0); + uint8_t getByte(); + uint32_t getInt32(); + private: + void checkData(const size_t); + + uint8_t data[64]; + size_t dataIndex; + }; +} \ No newline at end of file diff --git a/RandomX/src/common.hpp b/RandomX/src/common.hpp new file mode 100644 index 00000000..e6682ffa --- /dev/null +++ b/RandomX/src/common.hpp @@ -0,0 +1,177 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include "blake2/endian.h" +#include "configuration.h" +#include "randomx.h" + +namespace randomx { + + static_assert(RANDOMX_ARGON_MEMORY > 0, "RANDOMX_ARGON_MEMORY must be greater than 0."); + static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); + static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64."); + static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2."); + static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296."); + static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); + static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); + static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); + static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); + static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2."); + static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2."); + static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2."); + static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); + static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64."); + static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); + static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); + static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0"); + static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0."); + static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0."); + static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16."); + + constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ + RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ + RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \ + RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \ + RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \ + RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \ + RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; + + static_assert(wtSum == 256, "Sum of instruction frequencies must be 256."); + + + constexpr uint32_t ArgonBlockSize = 1024; + constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1; + constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2; + constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; + constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; + constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1); + constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; + constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; + constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; + constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1); + constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET; + constexpr int StoreL3Condition = 14; + + //Prevent some unsafe configurations. +#ifndef RANDOMX_UNSAFE + static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs"); + static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes"); + static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies"); + static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy"); + static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead"); +#endif + +#ifdef TRACE + constexpr bool trace = true; +#else + constexpr bool trace = false; +#endif + +#ifndef UNREACHABLE +#ifdef __GNUC__ +#define UNREACHABLE __builtin_unreachable() +#elif _MSC_VER +#define UNREACHABLE __assume(false) +#else +#define UNREACHABLE +#endif +#endif + +#if defined(_M_X64) || defined(__x86_64__) + class JitCompilerX86; + using JitCompiler = JitCompilerX86; +#elif defined(__aarch64__) + class JitCompilerA64; + using JitCompiler = JitCompilerA64; +#else + class JitCompilerFallback; + using JitCompiler = JitCompilerFallback; +#endif + + using addr_t = uint32_t; + + using int_reg_t = uint64_t; + + struct fpu_reg_t { + double lo; + double hi; + }; + + constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t); + constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t); + constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t); + constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8; + constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8; + constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16; + constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16; + constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; + constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; + constexpr int RegistersCount = 8; + constexpr int RegisterCountFlt = RegistersCount / 2; + constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register + constexpr int RegisterNeedsSib = 4; //x86 r12 register + + inline bool isPowerOf2(uint64_t x) { + return (x & (x - 1)) == 0; + } + + constexpr int mantissaSize = 52; + constexpr int exponentSize = 11; + constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; + constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1; + constexpr int exponentBias = 1023; + constexpr int dynamicExponentBits = 4; + constexpr int staticExponentBits = 4; + constexpr uint64_t constExponentBits = 0x300; + constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1; + + struct MemoryRegisters { + addr_t mx, ma; + uint8_t* memory = nullptr; + }; + + struct RegisterFile { + int_reg_t r[RegistersCount]; + fpu_reg_t f[RegistersCount / 2]; + fpu_reg_t e[RegistersCount / 2]; + fpu_reg_t a[RegistersCount / 2]; + }; + + typedef void(DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]); + typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); + typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); + + typedef void(DatasetDeallocFunc)(randomx_dataset*); + typedef void(CacheDeallocFunc)(randomx_cache*); + typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t); +} diff --git a/RandomX/src/configuration.h b/RandomX/src/configuration.h new file mode 100644 index 00000000..d2ae7c14 --- /dev/null +++ b/RandomX/src/configuration.h @@ -0,0 +1,126 @@ +/* +Copyright (c) 2018-2019, tevador +Copyright (c) 2019, Wownero Inc., a Monero Enterprise Alliance partner company + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +//Cache size in KiB. Must be a power of 2. +#define RANDOMX_ARGON_MEMORY 262144 + +//Number of Argon2d iterations for Cache initialization. +#define RANDOMX_ARGON_ITERATIONS 3 + +//Number of parallel lanes for Cache initialization. +#define RANDOMX_ARGON_LANES 1 + +//Argon2d salt +#define RANDOMX_ARGON_SALT "RandomWOW\x01" + +//Number of random Cache accesses per Dataset item. Minimum is 2. +#define RANDOMX_CACHE_ACCESSES 8 + +//Target latency for SuperscalarHash (in cycles of the reference CPU). +#define RANDOMX_SUPERSCALAR_LATENCY 170 + +//Dataset base size in bytes. Must be a power of 2. +#define RANDOMX_DATASET_BASE_SIZE 2147483648 + +//Dataset extra size. Must be divisible by 64. +#define RANDOMX_DATASET_EXTRA_SIZE 33554368 + +//Number of instructions in a RandomX program. Must be divisible by 8. +#define RANDOMX_PROGRAM_SIZE 256 + +//Number of iterations during VM execution. +#define RANDOMX_PROGRAM_ITERATIONS 1024 + +//Number of chained VM executions per hash. +#define RANDOMX_PROGRAM_COUNT 16 + +//Scratchpad L3 size in bytes. Must be a power of 2. +#define RANDOMX_SCRATCHPAD_L3 1048576 // 1M + +//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3. +#define RANDOMX_SCRATCHPAD_L2 131072 // 128K + +//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2. +#define RANDOMX_SCRATCHPAD_L1 16384 // 16K + +//Jump condition mask size in bits. +#define RANDOMX_JUMP_BITS 8 + +//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16. +#define RANDOMX_JUMP_OFFSET 8 + +/* +Instruction frequencies (per 256 opcodes) +Total sum of frequencies must be 256 +*/ + +//Integer instructions +#define RANDOMX_FREQ_IADD_RS 25 +#define RANDOMX_FREQ_IADD_M 7 +#define RANDOMX_FREQ_ISUB_R 16 +#define RANDOMX_FREQ_ISUB_M 7 +#define RANDOMX_FREQ_IMUL_R 16 +#define RANDOMX_FREQ_IMUL_M 4 +#define RANDOMX_FREQ_IMULH_R 4 +#define RANDOMX_FREQ_IMULH_M 1 +#define RANDOMX_FREQ_ISMULH_R 4 +#define RANDOMX_FREQ_ISMULH_M 1 +#define RANDOMX_FREQ_IMUL_RCP 8 +#define RANDOMX_FREQ_INEG_R 2 +#define RANDOMX_FREQ_IXOR_R 15 +#define RANDOMX_FREQ_IXOR_M 5 +#define RANDOMX_FREQ_IROR_R 10 +#define RANDOMX_FREQ_IROL_R 0 +#define RANDOMX_FREQ_ISWAP_R 4 + +//Floating point instructions +#define RANDOMX_FREQ_FSWAP_R 8 +#define RANDOMX_FREQ_FADD_R 20 +#define RANDOMX_FREQ_FADD_M 5 +#define RANDOMX_FREQ_FSUB_R 20 +#define RANDOMX_FREQ_FSUB_M 5 +#define RANDOMX_FREQ_FSCAL_R 6 +#define RANDOMX_FREQ_FMUL_R 20 +#define RANDOMX_FREQ_FDIV_M 4 +#define RANDOMX_FREQ_FSQRT_R 6 + +//Control instructions +#define RANDOMX_FREQ_CBRANCH 16 +#define RANDOMX_FREQ_CFROUND 1 + +//Store instruction +#define RANDOMX_FREQ_ISTORE 16 + +//No-op instruction +#define RANDOMX_FREQ_NOP 0 +/* ------ + 256 +*/ diff --git a/RandomX/src/dataset.cpp b/RandomX/src/dataset.cpp new file mode 100644 index 00000000..e382fd09 --- /dev/null +++ b/RandomX/src/dataset.cpp @@ -0,0 +1,189 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#include +#include +#include +#include +#include +#include + +#include "common.hpp" +#include "dataset.hpp" +#include "virtual_memory.hpp" +#include "superscalar.hpp" +#include "blake2_generator.hpp" +#include "reciprocal.h" +#include "blake2/endian.h" +#include "argon2.h" +#include "argon2_core.h" +#include "jit_compiler.hpp" +#include "intrin_portable.h" + +static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); +static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE"); + +namespace randomx { + + template + void deallocCache(randomx_cache* cache) { + if (cache->memory != nullptr) + Allocator::freeMemory(cache->memory, CacheSize); + if (cache->jit != nullptr) + delete cache->jit; + } + + template void deallocCache(randomx_cache* cache); + template void deallocCache(randomx_cache* cache); + + void initCache(randomx_cache* cache, const void* key, size_t keySize) { + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + argon2_context context; + + context.out = nullptr; + context.outlen = 0; + context.pwd = CONST_CAST(uint8_t *)key; + context.pwdlen = (uint32_t)keySize; + context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; + context.saltlen = (uint32_t)randomx::ArgonSaltSize; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = RANDOMX_ARGON_ITERATIONS; + context.m_cost = RANDOMX_ARGON_MEMORY; + context.lanes = RANDOMX_ARGON_LANES; + context.threads = 1; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + context.version = ARGON2_VERSION_NUMBER; + + /* 2. Align memory size */ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + memory_blocks = context.m_cost; + + segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); + + instance.version = context.version; + instance.memory = NULL; + instance.passes = context.t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context.lanes; + instance.threads = context.threads; + instance.type = Argon2_d; + instance.memory = (block*)cache->memory; + + if (instance.threads > instance.lanes) { + instance.threads = instance.lanes; + } + + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + rxa2_argon_initialize(&instance, &context); + + rxa2_fill_memory_blocks(&instance); + + cache->reciprocalCache.clear(); + randomx::Blake2Generator gen(key, keySize); + for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { + randomx::generateSuperscalar(cache->programs[i], gen); + for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) { + auto& instr = cache->programs[i](j); + if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) { + auto rcp = randomx_reciprocal(instr.getImm32()); + instr.setImm32(cache->reciprocalCache.size()); + cache->reciprocalCache.push_back(rcp); + } + } + } + } + + void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) { + initCache(cache, key, keySize); + cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache); + cache->jit->generateDatasetInitCode(); + } + + constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; + constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL; + constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; + constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL; + constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; + constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; + constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; + constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; + + static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) { + constexpr uint32_t mask = CacheSize / CacheLineSize - 1; + return memory + (registerValue & mask) * CacheLineSize; + } + + void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) { + int_reg_t rl[8]; + uint8_t* mixBlock; + uint64_t registerValue = itemNumber; + rl[0] = (itemNumber + 1) * superscalarMul0; + rl[1] = rl[0] ^ superscalarAdd1; + rl[2] = rl[0] ^ superscalarAdd2; + rl[3] = rl[0] ^ superscalarAdd3; + rl[4] = rl[0] ^ superscalarAdd4; + rl[5] = rl[0] ^ superscalarAdd5; + rl[6] = rl[0] ^ superscalarAdd6; + rl[7] = rl[0] ^ superscalarAdd7; + for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { + mixBlock = getMixBlock(registerValue, cache->memory); + rx_prefetch_nta(mixBlock); + SuperscalarProgram& prog = cache->programs[i]; + + executeSuperscalar(rl, prog, &cache->reciprocalCache); + + for (unsigned q = 0; q < 8; ++q) + rl[q] ^= load64_native(mixBlock + 8 * q); + + registerValue = rl[prog.getAddressRegister()]; + } + + memcpy(out, &rl, CacheLineSize); + } + + void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) { + for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize) + initDatasetItem(cache, dataset, itemNumber); + } +} diff --git a/RandomX/src/dataset.hpp b/RandomX/src/dataset.hpp new file mode 100644 index 00000000..58a67f48 --- /dev/null +++ b/RandomX/src/dataset.hpp @@ -0,0 +1,76 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include "common.hpp" +#include "superscalar_program.hpp" +#include "allocator.hpp" + +/* Global scope for C binding */ +struct randomx_dataset { + uint8_t* memory = nullptr; + randomx::DatasetDeallocFunc* dealloc; +}; + +/* Global scope for C binding */ +struct randomx_cache { + uint8_t* memory = nullptr; + randomx::CacheDeallocFunc* dealloc; + randomx::JitCompiler* jit; + randomx::CacheInitializeFunc* initialize; + randomx::DatasetInitFunc* datasetInit; + randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; + std::vector reciprocalCache; +}; + +//A pointer to a standard-layout struct object points to its initial member +static_assert(std::is_standard_layout(), "randomx_dataset must be a standard-layout struct"); +static_assert(std::is_standard_layout(), "randomx_cache must be a standard-layout struct"); + +namespace randomx { + + using DefaultAllocator = AlignedAllocator; + + template + void deallocDataset(randomx_dataset* dataset) { + if (dataset->memory != nullptr) + Allocator::freeMemory(dataset->memory, DatasetSize); + } + + template + void deallocCache(randomx_cache* cache); + + void initCache(randomx_cache*, const void*, size_t); + void initCacheCompile(randomx_cache*, const void*, size_t); + void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber); + void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); +} diff --git a/RandomX/src/instruction.cpp b/RandomX/src/instruction.cpp new file mode 100644 index 00000000..1751098b --- /dev/null +++ b/RandomX/src/instruction.cpp @@ -0,0 +1,389 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "instruction.hpp" +#include "common.hpp" + +namespace randomx { + + void Instruction::print(std::ostream& os) const { + os << names[opcode] << " "; + auto handler = engine[opcode]; + (this->*handler)(os); + } + + void Instruction::genAddressReg(std::ostream& os, int srcIndex) const { + os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; + } + + void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const { + if (getModCond() < StoreL3Condition) + os << (getModMem() ? "L1" : "L2"); + else + os << "L3"; + os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; + } + + void Instruction::genAddressImm(std::ostream& os) const { + os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]"; + } + + void Instruction::h_IADD_RS(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + os << "r" << dstIndex << ", r" << srcIndex; + if(dstIndex == RegisterNeedsDisplacement) { + os << ", " << (int32_t)getImm32(); + } + os << ", SHFT " << getModShift() << std::endl; + } + + void Instruction::h_IADD_M(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + else { + os << "r" << dstIndex << ", "; + genAddressImm(os); + os << std::endl; + } + } + + void Instruction::h_ISUB_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + else { + os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl; + } + } + + void Instruction::h_ISUB_M(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + else { + os << "r" << dstIndex << ", "; + genAddressImm(os); + os << std::endl; + } + } + + void Instruction::h_IMUL_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + else { + os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl; + } + } + + void Instruction::h_IMUL_M(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + else { + os << "r" << dstIndex << ", "; + genAddressImm(os); + os << std::endl; + } + } + + void Instruction::h_IMULH_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + + void Instruction::h_IMULH_M(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + else { + os << "r" << dstIndex << ", "; + genAddressImm(os); + os << std::endl; + } + } + + void Instruction::h_ISMULH_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + + void Instruction::h_ISMULH_M(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + else { + os << "r" << dstIndex << ", "; + genAddressImm(os); + os << std::endl; + } + } + + void Instruction::h_INEG_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + os << "r" << dstIndex << std::endl; + } + + void Instruction::h_IXOR_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + else { + os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl; + } + } + + void Instruction::h_IXOR_M(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + else { + os << "r" << dstIndex << ", "; + genAddressImm(os); + os << std::endl; + } + } + + void Instruction::h_IROR_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + else { + os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl; + } + } + + void Instruction::h_IROL_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + if (dstIndex != srcIndex) { + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + else { + os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl; + } + } + + void Instruction::h_IMUL_RCP(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + os << "r" << dstIndex << ", " << getImm32() << std::endl; + } + + void Instruction::h_ISWAP_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + os << "r" << dstIndex << ", r" << srcIndex << std::endl; + } + + void Instruction::h_FSWAP_R(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f'; + dstIndex %= RegisterCountFlt; + os << reg << dstIndex << std::endl; + } + + void Instruction::h_FADD_R(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + auto srcIndex = src % RegisterCountFlt; + os << "f" << dstIndex << ", a" << srcIndex << std::endl; + } + + void Instruction::h_FADD_M(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + auto srcIndex = src % RegistersCount; + os << "f" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + + void Instruction::h_FSUB_R(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + auto srcIndex = src % RegisterCountFlt; + os << "f" << dstIndex << ", a" << srcIndex << std::endl; + } + + void Instruction::h_FSUB_M(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + auto srcIndex = src % RegistersCount; + os << "f" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + + void Instruction::h_FSCAL_R(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + os << "f" << dstIndex << std::endl; + } + + void Instruction::h_FMUL_R(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + auto srcIndex = src % RegisterCountFlt; + os << "e" << dstIndex << ", a" << srcIndex << std::endl; + } + + void Instruction::h_FDIV_M(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + auto srcIndex = src % RegistersCount; + os << "e" << dstIndex << ", "; + genAddressReg(os, srcIndex); + os << std::endl; + } + + void Instruction::h_FSQRT_R(std::ostream& os) const { + auto dstIndex = dst % RegisterCountFlt; + os << "e" << dstIndex << std::endl; + } + + void Instruction::h_CFROUND(std::ostream& os) const { + auto srcIndex = src % RegistersCount; + os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl; + } + + void Instruction::h_CBRANCH(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl; + } + + void Instruction::h_ISTORE(std::ostream& os) const { + auto dstIndex = dst % RegistersCount; + auto srcIndex = src % RegistersCount; + genAddressRegDst(os, dstIndex); + os << ", r" << srcIndex << std::endl; + } + + void Instruction::h_NOP(std::ostream& os) const { + os << std::endl; + } + +#include "instruction_weights.hpp" +#define INST_NAME(x) REPN(#x, WT(x)) +#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x)) + + const char* Instruction::names[256] = { + INST_NAME(IADD_RS) + INST_NAME(IADD_M) + INST_NAME(ISUB_R) + INST_NAME(ISUB_M) + INST_NAME(IMUL_R) + INST_NAME(IMUL_M) + INST_NAME(IMULH_R) + INST_NAME(IMULH_M) + INST_NAME(ISMULH_R) + INST_NAME(ISMULH_M) + INST_NAME(IMUL_RCP) + INST_NAME(INEG_R) + INST_NAME(IXOR_R) + INST_NAME(IXOR_M) + INST_NAME(IROR_R) + INST_NAME(ISWAP_R) + INST_NAME(FSWAP_R) + INST_NAME(FADD_R) + INST_NAME(FADD_M) + INST_NAME(FSUB_R) + INST_NAME(FSUB_M) + INST_NAME(FSCAL_R) + INST_NAME(FMUL_R) + INST_NAME(FDIV_M) + INST_NAME(FSQRT_R) + INST_NAME(CBRANCH) + INST_NAME(CFROUND) + INST_NAME(ISTORE) + INST_NAME(NOP) + }; + + InstructionFormatter Instruction::engine[256] = { + INST_HANDLE(IADD_RS) + INST_HANDLE(IADD_M) + INST_HANDLE(ISUB_R) + INST_HANDLE(ISUB_M) + INST_HANDLE(IMUL_R) + INST_HANDLE(IMUL_M) + INST_HANDLE(IMULH_R) + INST_HANDLE(IMULH_M) + INST_HANDLE(ISMULH_R) + INST_HANDLE(ISMULH_M) + INST_HANDLE(IMUL_RCP) + INST_HANDLE(INEG_R) + INST_HANDLE(IXOR_R) + INST_HANDLE(IXOR_M) + INST_HANDLE(IROR_R) + INST_HANDLE(IROL_R) + INST_HANDLE(ISWAP_R) + INST_HANDLE(FSWAP_R) + INST_HANDLE(FADD_R) + INST_HANDLE(FADD_M) + INST_HANDLE(FSUB_R) + INST_HANDLE(FSUB_M) + INST_HANDLE(FSCAL_R) + INST_HANDLE(FMUL_R) + INST_HANDLE(FDIV_M) + INST_HANDLE(FSQRT_R) + INST_HANDLE(CBRANCH) + INST_HANDLE(CFROUND) + INST_HANDLE(ISTORE) + INST_HANDLE(NOP) + }; + +} \ No newline at end of file diff --git a/RandomX/src/instruction.hpp b/RandomX/src/instruction.hpp new file mode 100644 index 00000000..b1863b5e --- /dev/null +++ b/RandomX/src/instruction.hpp @@ -0,0 +1,149 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include "blake2/endian.h" + +namespace randomx { + + class Instruction; + + typedef void(Instruction::*InstructionFormatter)(std::ostream&) const; + + enum class InstructionType : uint16_t { + IADD_RS = 0, + IADD_M = 1, + ISUB_R = 2, + ISUB_M = 3, + IMUL_R = 4, + IMUL_M = 5, + IMULH_R = 6, + IMULH_M = 7, + ISMULH_R = 8, + ISMULH_M = 9, + IMUL_RCP = 10, + INEG_R = 11, + IXOR_R = 12, + IXOR_M = 13, + IROR_R = 14, + IROL_R = 15, + ISWAP_R = 16, + FSWAP_R = 17, + FADD_R = 18, + FADD_M = 19, + FSUB_R = 20, + FSUB_M = 21, + FSCAL_R = 22, + FMUL_R = 23, + FDIV_M = 24, + FSQRT_R = 25, + CBRANCH = 26, + CFROUND = 27, + ISTORE = 28, + NOP = 29, + }; + + class Instruction { + public: + uint32_t getImm32() const { + return load32(&imm32); + } + void setImm32(uint32_t val) { + return store32(&imm32, val); + } + const char* getName() const { + return names[opcode]; + } + friend std::ostream& operator<<(std::ostream& os, const Instruction& i) { + i.print(os); + return os; + } + int getModMem() const { + return mod % 4; //bits 0-1 + } + int getModShift() const { + return (mod >> 2) % 4; //bits 2-3 + } + int getModCond() const { + return mod >> 4; //bits 4-7 + } + void setMod(uint8_t val) { + mod = val; + } + + uint8_t opcode; + uint8_t dst; + uint8_t src; + uint8_t mod; + uint32_t imm32; + private: + void print(std::ostream&) const; + static const char* names[256]; + static InstructionFormatter engine[256]; + void genAddressReg(std::ostream& os, int) const; + void genAddressImm(std::ostream& os) const; + void genAddressRegDst(std::ostream&, int) const; + void h_IADD_RS(std::ostream&) const; + void h_IADD_M(std::ostream&) const; + void h_ISUB_R(std::ostream&) const; + void h_ISUB_M(std::ostream&) const; + void h_IMUL_R(std::ostream&) const; + void h_IMUL_M(std::ostream&) const; + void h_IMULH_R(std::ostream&) const; + void h_IMULH_M(std::ostream&) const; + void h_ISMULH_R(std::ostream&) const; + void h_ISMULH_M(std::ostream&) const; + void h_IMUL_RCP(std::ostream&) const; + void h_INEG_R(std::ostream&) const; + void h_IXOR_R(std::ostream&) const; + void h_IXOR_M(std::ostream&) const; + void h_IROR_R(std::ostream&) const; + void h_IROL_R(std::ostream&) const; + void h_ISWAP_R(std::ostream&) const; + void h_FSWAP_R(std::ostream&) const; + void h_FADD_R(std::ostream&) const; + void h_FADD_M(std::ostream&) const; + void h_FSUB_R(std::ostream&) const; + void h_FSUB_M(std::ostream&) const; + void h_FSCAL_R(std::ostream&) const; + void h_FMUL_R(std::ostream&) const; + void h_FDIV_M(std::ostream&) const; + void h_FSQRT_R(std::ostream&) const; + void h_CBRANCH(std::ostream&) const; + void h_CFROUND(std::ostream&) const; + void h_ISTORE(std::ostream&) const; + void h_NOP(std::ostream&) const; + }; + + static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction"); + static_assert(std::is_standard_layout(), "randomx::Instruction must be a standard-layout struct"); +} \ No newline at end of file diff --git a/RandomX/src/instruction_weights.hpp b/RandomX/src/instruction_weights.hpp new file mode 100644 index 00000000..baafe82a --- /dev/null +++ b/RandomX/src/instruction_weights.hpp @@ -0,0 +1,113 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#define REP0(x) +#define REP1(x) x, +#define REP2(x) REP1(x) x, +#define REP3(x) REP2(x) x, +#define REP4(x) REP3(x) x, +#define REP5(x) REP4(x) x, +#define REP6(x) REP5(x) x, +#define REP7(x) REP6(x) x, +#define REP8(x) REP7(x) x, +#define REP9(x) REP8(x) x, +#define REP10(x) REP9(x) x, +#define REP11(x) REP10(x) x, +#define REP12(x) REP11(x) x, +#define REP13(x) REP12(x) x, +#define REP14(x) REP13(x) x, +#define REP15(x) REP14(x) x, +#define REP16(x) REP15(x) x, +#define REP17(x) REP16(x) x, +#define REP18(x) REP17(x) x, +#define REP19(x) REP18(x) x, +#define REP20(x) REP19(x) x, +#define REP21(x) REP20(x) x, +#define REP22(x) REP21(x) x, +#define REP23(x) REP22(x) x, +#define REP24(x) REP23(x) x, +#define REP25(x) REP24(x) x, +#define REP26(x) REP25(x) x, +#define REP27(x) REP26(x) x, +#define REP28(x) REP27(x) x, +#define REP29(x) REP28(x) x, +#define REP30(x) REP29(x) x, +#define REP31(x) REP30(x) x, +#define REP32(x) REP31(x) x, +#define REP33(x) REP32(x) x, +#define REP40(x) REP32(x) REP8(x) +#define REP64(x) REP32(x) REP32(x) +#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) +#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x) +#define REP256(x) REP128(x) REP128(x) +#define REPNX(x,N) REP##N(x) +#define REPN(x,N) REPNX(x,N) +#define NUM(x) x +#define WT(x) NUM(RANDOMX_FREQ_##x) + +#define REPCASE0(x) +#define REPCASE1(x) case __COUNTER__: +#define REPCASE2(x) REPCASE1(x) case __COUNTER__: +#define REPCASE3(x) REPCASE2(x) case __COUNTER__: +#define REPCASE4(x) REPCASE3(x) case __COUNTER__: +#define REPCASE5(x) REPCASE4(x) case __COUNTER__: +#define REPCASE6(x) REPCASE5(x) case __COUNTER__: +#define REPCASE7(x) REPCASE6(x) case __COUNTER__: +#define REPCASE8(x) REPCASE7(x) case __COUNTER__: +#define REPCASE9(x) REPCASE8(x) case __COUNTER__: +#define REPCASE10(x) REPCASE9(x) case __COUNTER__: +#define REPCASE11(x) REPCASE10(x) case __COUNTER__: +#define REPCASE12(x) REPCASE11(x) case __COUNTER__: +#define REPCASE13(x) REPCASE12(x) case __COUNTER__: +#define REPCASE14(x) REPCASE13(x) case __COUNTER__: +#define REPCASE15(x) REPCASE14(x) case __COUNTER__: +#define REPCASE16(x) REPCASE15(x) case __COUNTER__: +#define REPCASE17(x) REPCASE16(x) case __COUNTER__: +#define REPCASE18(x) REPCASE17(x) case __COUNTER__: +#define REPCASE19(x) REPCASE18(x) case __COUNTER__: +#define REPCASE20(x) REPCASE19(x) case __COUNTER__: +#define REPCASE21(x) REPCASE20(x) case __COUNTER__: +#define REPCASE22(x) REPCASE21(x) case __COUNTER__: +#define REPCASE23(x) REPCASE22(x) case __COUNTER__: +#define REPCASE24(x) REPCASE23(x) case __COUNTER__: +#define REPCASE25(x) REPCASE24(x) case __COUNTER__: +#define REPCASE26(x) REPCASE25(x) case __COUNTER__: +#define REPCASE27(x) REPCASE26(x) case __COUNTER__: +#define REPCASE28(x) REPCASE27(x) case __COUNTER__: +#define REPCASE29(x) REPCASE28(x) case __COUNTER__: +#define REPCASE30(x) REPCASE29(x) case __COUNTER__: +#define REPCASE31(x) REPCASE30(x) case __COUNTER__: +#define REPCASE32(x) REPCASE31(x) case __COUNTER__: +#define REPCASE64(x) REPCASE32(x) REPCASE32(x) +#define REPCASE128(x) REPCASE64(x) REPCASE64(x) +#define REPCASE256(x) REPCASE128(x) REPCASE128(x) +#define REPCASENX(x,N) REPCASE##N(x) +#define REPCASEN(x,N) REPCASENX(x,N) +#define CASE_REP(x) REPCASEN(x, WT(x)) diff --git a/RandomX/src/instructions_portable.cpp b/RandomX/src/instructions_portable.cpp new file mode 100644 index 00000000..7d096906 --- /dev/null +++ b/RandomX/src/instructions_portable.cpp @@ -0,0 +1,194 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma STDC FENV_ACCESS ON +#include +#include +#include "common.hpp" +#include "intrin_portable.h" +#include "blake2/endian.h" + +#if defined(__SIZEOF_INT128__) + typedef unsigned __int128 uint128_t; + typedef __int128 int128_t; + uint64_t mulh(uint64_t a, uint64_t b) { + return ((uint128_t)a * b) >> 64; + } + int64_t smulh(int64_t a, int64_t b) { + return ((int128_t)a * b) >> 64; + } + #define HAVE_MULH + #define HAVE_SMULH +#endif + +#if defined(_MSC_VER) + #define HAS_VALUE(X) X ## 0 + #define EVAL_DEFINE(X) HAS_VALUE(X) + #include + #include + + uint64_t rotl(uint64_t x, int c) { + return _rotl64(x, c); + } + uint64_t rotr(uint64_t x , int c) { + return _rotr64(x, c); + } + #define HAVE_ROTL + #define HAVE_ROTR + + #if EVAL_DEFINE(__MACHINEARM64_X64(1)) + uint64_t mulh(uint64_t a, uint64_t b) { + return __umulh(a, b); + } + #define HAVE_MULH + #endif + + #if EVAL_DEFINE(__MACHINEX64(1)) + int64_t smulh(int64_t a, int64_t b) { + int64_t hi; + _mul128(a, b, &hi); + return hi; + } + #define HAVE_SMULH + #endif + + static void setRoundMode_(uint32_t mode) { + _controlfp(mode, _MCW_RC); + } + #define HAVE_SETROUNDMODE_IMPL +#endif + +#ifndef HAVE_SETROUNDMODE_IMPL + static void setRoundMode_(uint32_t mode) { + fesetround(mode); + } +#endif + +#ifndef HAVE_ROTR + uint64_t rotr(uint64_t a, int b) { + return (a >> b) | (a << (64 - b)); + } + #define HAVE_ROTR +#endif + +#ifndef HAVE_ROTL + uint64_t rotl(uint64_t a, int b) { + return (a << b) | (a >> (64 - b)); + } + #define HAVE_ROTL +#endif + +#ifndef HAVE_MULH + #define LO(x) ((x)&0xffffffff) + #define HI(x) ((x)>>32) + uint64_t mulh(uint64_t a, uint64_t b) { + uint64_t ah = HI(a), al = LO(a); + uint64_t bh = HI(b), bl = LO(b); + uint64_t x00 = al * bl; + uint64_t x01 = al * bh; + uint64_t x10 = ah * bl; + uint64_t x11 = ah * bh; + uint64_t m1 = LO(x10) + LO(x01) + HI(x00); + uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1); + uint64_t m3 = HI(x11) + HI(m2); + + return (m3 << 32) + LO(m2); + } + #define HAVE_MULH +#endif + +#ifndef HAVE_SMULH + int64_t smulh(int64_t a, int64_t b) { + int64_t hi = mulh(a, b); + if (a < 0LL) hi -= b; + if (b < 0LL) hi -= a; + return hi; + } + #define HAVE_SMULH +#endif + +#ifdef RANDOMX_DEFAULT_FENV + +void rx_reset_float_state() { + setRoundMode_(FE_TONEAREST); + rx_set_double_precision(); //set precision to 53 bits if needed by the platform +} + +void rx_set_rounding_mode(uint32_t mode) { + switch (mode & 3) { + case RoundDown: + setRoundMode_(FE_DOWNWARD); + break; + case RoundUp: + setRoundMode_(FE_UPWARD); + break; + case RoundToZero: + setRoundMode_(FE_TOWARDZERO); + break; + case RoundToNearest: + setRoundMode_(FE_TONEAREST); + break; + default: + UNREACHABLE; + } +} + +#endif + +#ifdef RANDOMX_USE_X87 + +#ifdef _M_IX86 + +void rx_set_double_precision() { + _control87(_PC_53, _MCW_PC); +} + +#elif defined(__i386) + +void rx_set_double_precision() { + uint16_t volatile x87cw; + asm volatile("fstcw %0" : "=m" (x87cw)); + x87cw &= ~0x300; + x87cw |= 0x200; + asm volatile("fldcw %0" : : "m" (x87cw)); +} + +#endif + +#endif //RANDOMX_USE_X87 + +union double_ser_t { + double f; + uint64_t i; +}; + +double loadDoublePortable(const void* addr) { + double_ser_t ds; + ds.i = load64(addr); + return ds.f; +} diff --git a/RandomX/src/intrin_portable.h b/RandomX/src/intrin_portable.h new file mode 100644 index 00000000..ce36992d --- /dev/null +++ b/RandomX/src/intrin_portable.h @@ -0,0 +1,380 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include "blake2/endian.h" + +constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) { + return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x); +} + +constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) { + return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x); +} + +constexpr uint64_t signExtend2sCompl(uint32_t x) { + return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x); +} + +constexpr int RoundToNearest = 0; +constexpr int RoundDown = 1; +constexpr int RoundUp = 2; +constexpr int RoundToZero = 3; + +//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available +#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#define __SSE2__ 1 +#endif + +//the library "sqrt" function provided by MSVC for x86 targets doesn't give +//the correct results, so we have to use inline assembly to call x87 fsqrt directly +#if !defined(__SSE2__) +#if defined(_M_IX86) +inline double __cdecl rx_sqrt(double x) { + __asm { + fld x + fsqrt + } +} +#define rx_sqrt rx_sqrt + +void rx_set_double_precision(); +#define RANDOMX_USE_X87 + +#elif defined(__i386) + +void rx_set_double_precision(); +#define RANDOMX_USE_X87 + +#endif +#endif //__SSE2__ + +#if !defined(rx_sqrt) +#define rx_sqrt sqrt +#endif + +#if !defined(RANDOMX_USE_X87) +#define rx_set_double_precision(x) +#endif + +#ifdef __SSE2__ +#ifdef __GNUC__ +#include +#else +#include +#endif + +typedef __m128i rx_vec_i128; +typedef __m128d rx_vec_f128; + +#define rx_aligned_alloc(a, b) _mm_malloc(a,b) +#define rx_aligned_free(a) _mm_free(a) +#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA) + +#define rx_load_vec_f128 _mm_load_pd +#define rx_store_vec_f128 _mm_store_pd +#define rx_add_vec_f128 _mm_add_pd +#define rx_sub_vec_f128 _mm_sub_pd +#define rx_mul_vec_f128 _mm_mul_pd +#define rx_div_vec_f128 _mm_div_pd +#define rx_sqrt_vec_f128 _mm_sqrt_pd + +FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { + return _mm_shuffle_pd(a, a, 1); +} + +FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { + return _mm_castsi128_pd(_mm_set_epi64x(x1, x0)); +} + +FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { + return _mm_castsi128_pd(_mm_set1_epi64x(x)); +} + +#define rx_xor_vec_f128 _mm_xor_pd +#define rx_and_vec_f128 _mm_and_pd +#define rx_or_vec_f128 _mm_or_pd +#define rx_aesenc_vec_i128 _mm_aesenc_si128 +#define rx_aesdec_vec_i128 _mm_aesdec_si128 + +FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { + return _mm_cvtsi128_si32(a); +} + +FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { + return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55)); +} + +FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { + return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa)); +} + +FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { + return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff)); +} + +#define rx_set_int_vec_i128 _mm_set_epi32 +#define rx_xor_vec_i128 _mm_xor_si128 +#define rx_load_vec_i128 _mm_load_si128 +#define rx_store_vec_i128 _mm_store_si128 + +FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { + __m128i ix = _mm_loadl_epi64((const __m128i*)addr); + return _mm_cvtepi32_pd(ix); +} + +constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled + +FORCE_INLINE void rx_reset_float_state() { + _mm_setcsr(rx_mxcsr_default); +} + +FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) { + _mm_setcsr(rx_mxcsr_default | (mode << 13)); +} + +#else +#include +#include +#include +#include + +typedef union { + uint64_t u64[2]; + uint32_t u32[4]; + uint16_t u16[8]; + uint8_t u8[16]; +} rx_vec_i128; + +typedef union { + struct { + double lo; + double hi; + }; + rx_vec_i128 i; +} rx_vec_f128; + +#define rx_aligned_alloc(a, b) malloc(a) +#define rx_aligned_free(a) free(a) +#define rx_prefetch_nta(x) + +FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { + rx_vec_f128 x; + x.i.u64[0] = load64(pd + 0); + x.i.u64[1] = load64(pd + 1); + return x; +} + +FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) { + store64(mem_addr + 0, a.i.u64[0]); + store64(mem_addr + 1, a.i.u64[1]); +} + +FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { + double temp = a.hi; + a.hi = a.lo; + a.lo = temp; + return a; +} + +FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.lo = a.lo + b.lo; + x.hi = a.hi + b.hi; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.lo = a.lo - b.lo; + x.hi = a.hi - b.hi; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.lo = a.lo * b.lo; + x.hi = a.hi * b.hi; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.lo = a.lo / b.lo; + x.hi = a.hi / b.hi; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) { + rx_vec_f128 x; + x.lo = rx_sqrt(a.lo); + x.hi = rx_sqrt(a.hi); + return x; +} + +FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) { + rx_vec_i128 x; + x.u64[0] = a; + x.u64[1] = a; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) { + rx_vec_f128 x; + x.i = a; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { + rx_vec_f128 v; + v.i.u64[0] = x0; + v.i.u64[1] = x1; + return v; +} + +FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { + rx_vec_f128 v; + v.i.u64[0] = x; + v.i.u64[1] = x; + return v; +} + + +FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0]; + x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1]; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.i.u64[0] = a.i.u64[0] & b.i.u64[0]; + x.i.u64[1] = a.i.u64[1] & b.i.u64[1]; + return x; +} + +FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + rx_vec_f128 x; + x.i.u64[0] = a.i.u64[0] | b.i.u64[0]; + x.i.u64[1] = a.i.u64[1] | b.i.u64[1]; + return x; +} + +static const char* platformError = "Platform doesn't support hardware AES"; + +FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { + throw std::runtime_error(platformError); +} + +FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { + throw std::runtime_error(platformError); +} + +FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { + return a.u32[0]; +} + +FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { + return a.u32[1]; +} + +FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { + return a.u32[2]; +} + +FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { + return a.u32[3]; +} + +FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { + rx_vec_i128 v; + v.u32[0] = _I0; + v.u32[1] = _I1; + v.u32[2] = _I2; + v.u32[3] = _I3; + return v; +}; + +FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) { + rx_vec_i128 c; + c.u32[0] = _A.u32[0] ^ _B.u32[0]; + c.u32[1] = _A.u32[1] ^ _B.u32[1]; + c.u32[2] = _A.u32[2] ^ _B.u32[2]; + c.u32[3] = _A.u32[3] ^ _B.u32[3]; + return c; +} + +FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const*_P) { +#if defined(NATIVE_LITTLE_ENDIAN) + return *_P; +#else + uint32_t* ptr = (uint32_t*)_P; + rx_vec_i128 c; + c.u32[0] = load32(ptr + 0); + c.u32[1] = load32(ptr + 1); + c.u32[2] = load32(ptr + 2); + c.u32[3] = load32(ptr + 3); + return c; +#endif +} + +FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) { +#if defined(NATIVE_LITTLE_ENDIAN) + *_P = _B; +#else + uint32_t* ptr = (uint32_t*)_P; + store32(ptr + 0, _B.u32[0]); + store32(ptr + 1, _B.u32[1]); + store32(ptr + 2, _B.u32[2]); + store32(ptr + 3, _B.u32[3]); +#endif +} + +FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { + rx_vec_f128 x; + x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); + x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); + return x; +} + +#define RANDOMX_DEFAULT_FENV + +void rx_reset_float_state(); + +void rx_set_rounding_mode(uint32_t mode); + +#endif + +double loadDoublePortable(const void* addr); +uint64_t mulh(uint64_t, uint64_t); +int64_t smulh(int64_t, int64_t); +uint64_t rotl(uint64_t, int); +uint64_t rotr(uint64_t, int); diff --git a/RandomX/src/jit_compiler.hpp b/RandomX/src/jit_compiler.hpp new file mode 100644 index 00000000..bd9c2b0e --- /dev/null +++ b/RandomX/src/jit_compiler.hpp @@ -0,0 +1,37 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#if defined(_M_X64) || defined(__x86_64__) +#include "jit_compiler_x86.hpp" +#elif defined(__aarch64__) +#include "jit_compiler_a64.hpp" +#else +#include "jit_compiler_fallback.hpp" +#endif diff --git a/RandomX/src/jit_compiler_a64.hpp b/RandomX/src/jit_compiler_a64.hpp new file mode 100644 index 00000000..58aa25c4 --- /dev/null +++ b/RandomX/src/jit_compiler_a64.hpp @@ -0,0 +1,73 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include "common.hpp" + +namespace randomx { + + class Program; + class ProgramConfiguration; + class SuperscalarProgram; + + class JitCompilerA64 { + public: + JitCompilerA64() { + throw std::runtime_error("ARM64 JIT compiler is not implemented yet."); + } + void generateProgram(Program&, ProgramConfiguration&) { + + } + void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { + + } + template + void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &) { + + } + void generateDatasetInitCode() { + + } + ProgramFunc* getProgramFunc() { + return nullptr; + } + DatasetInitFunc* getDatasetInitFunc() { + return nullptr; + } + uint8_t* getCode() { + return nullptr; + } + size_t getCodeSize() { + return 0; + } + }; +} \ No newline at end of file diff --git a/RandomX/src/jit_compiler_fallback.hpp b/RandomX/src/jit_compiler_fallback.hpp new file mode 100644 index 00000000..8103a632 --- /dev/null +++ b/RandomX/src/jit_compiler_fallback.hpp @@ -0,0 +1,73 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include "common.hpp" + +namespace randomx { + + class Program; + class ProgramConfiguration; + class SuperscalarProgram; + + class JitCompilerFallback { + public: + JitCompilerFallback() { + throw std::runtime_error("JIT compilation is not supported on this platform"); + } + void generateProgram(Program&, ProgramConfiguration&) { + + } + void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { + + } + template + void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &) { + + } + void generateDatasetInitCode() { + + } + ProgramFunc* getProgramFunc() { + return nullptr; + } + DatasetInitFunc* getDatasetInitFunc() { + return nullptr; + } + uint8_t* getCode() { + return nullptr; + } + size_t getCodeSize() { + return 0; + } + }; +} \ No newline at end of file diff --git a/RandomX/src/jit_compiler_x86.cpp b/RandomX/src/jit_compiler_x86.cpp new file mode 100644 index 00000000..054a1712 --- /dev/null +++ b/RandomX/src/jit_compiler_x86.cpp @@ -0,0 +1,808 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include "jit_compiler_x86.hpp" +#include "jit_compiler_x86_static.hpp" +#include "superscalar.hpp" +#include "program.hpp" +#include "reciprocal.h" +#include "virtual_memory.hpp" + +namespace randomx { + /* + + REGISTER ALLOCATION: + + ; rax -> temporary + ; rbx -> iteration counter "ic" + ; rcx -> temporary + ; rdx -> temporary + ; rsi -> scratchpad pointer + ; rdi -> dataset pointer + ; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits) + ; rsp -> stack pointer + ; r8 -> "r0" + ; r9 -> "r1" + ; r10 -> "r2" + ; r11 -> "r3" + ; r12 -> "r4" + ; r13 -> "r5" + ; r14 -> "r6" + ; r15 -> "r7" + ; xmm0 -> "f0" + ; xmm1 -> "f1" + ; xmm2 -> "f2" + ; xmm3 -> "f3" + ; xmm4 -> "e0" + ; xmm5 -> "e1" + ; xmm6 -> "e2" + ; xmm7 -> "e3" + ; xmm8 -> "a0" + ; xmm9 -> "a1" + ; xmm10 -> "a2" + ; xmm11 -> "a3" + ; xmm12 -> temporary + ; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff + ; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000****** + ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000 + + */ + + const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue; + const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; + const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; + const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; + const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; + const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; + const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; + const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; + const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store; + const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end; + const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue; + const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end; + const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load; + const uint8_t* codeShhPrefetch = (uint8_t*)&randomx_sshash_prefetch; + const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end; + const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init; + + const int32_t prologueSize = codeLoopBegin - codePrologue; + const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; + const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset; + const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; + const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; + const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; + const int32_t datasetInitSize = codeEpilogue - codeDatasetInit; + const int32_t epilogueSize = codeShhLoad - codeEpilogue; + const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad; + const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch; + const int32_t codeSshInitSize = codeProgramEnd - codeShhInit; + + const int32_t epilogueOffset = CodeSize - epilogueSize; + constexpr int32_t superScalarHashOffset = 32768; + + static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; + static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 }; + static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b }; + static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b }; + static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b }; + static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b }; + static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b }; + static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf }; + static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 }; + static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf }; + static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 }; + static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 }; + static const uint8_t REX_81[] = { 0x49, 0x81 }; + static const uint8_t AND_EAX_I = 0x25; + static const uint8_t MOV_EAX_I = 0xb8; + static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 }; + static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 }; + static const uint8_t REX_LEA[] = { 0x4f, 0x8d }; + static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e }; + static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e }; + static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 }; + static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 }; + static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 }; + static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea }; + static const uint8_t REX_SH[] = { 0x49, 0xc1 }; + static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f }; + static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 }; + static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 }; + static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 }; + static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 }; + static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 }; + static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 }; + static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 }; + static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 }; + static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA }; + static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 }; + static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 }; + static const uint8_t REX_NEG[] = { 0x49, 0xF7 }; + static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 }; + static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 }; + static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 }; + static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 }; + static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 }; + static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 }; + static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 }; + static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 }; + static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c }; + static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 }; + static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 }; + static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f }; + static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e }; + static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; + static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 }; + static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; + static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; + static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; + static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 }; + static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 }; + static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 }; + static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 }; + static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 }; + static const uint8_t JNZ[] = { 0x0f, 0x85 }; + static const uint8_t JMP = 0xe9; + static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; + static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; + static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; + static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; + static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; + static const uint8_t CALL = 0xe8; + static const uint8_t REX_ADD_I[] = { 0x49, 0x81 }; + static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; + static const uint8_t JZ[] = { 0x0f, 0x84 }; + static const uint8_t RET = 0xc3; + static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; + static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; + static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; + + static const uint8_t NOP1[] = { 0x90 }; + static const uint8_t NOP2[] = { 0x66, 0x90 }; + static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 }; + static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 }; + static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 }; + static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; + static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + + static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; + + size_t JitCompilerX86::getCodeSize() { + return codePos - prologueSize; + } + + JitCompilerX86::JitCompilerX86() { + code = (uint8_t*)allocExecutableMemory(CodeSize); + memcpy(code, codePrologue, prologueSize); + memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); + } + + JitCompilerX86::~JitCompilerX86() { + freePagedMemory(code, CodeSize); + } + + void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { + generateProgramPrologue(prog, pcfg); + memcpy(code + codePos, codeReadDataset, readDatasetSize); + codePos += readDatasetSize; + generateProgramEpilogue(prog); + } + + void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { + generateProgramPrologue(prog, pcfg); + emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); + emit(ADD_EBX_I); + emit32(datasetOffset / CacheLineSize); + emitByte(CALL); + emit32(superScalarHashOffset - (codePos + 4)); + emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); + generateProgramEpilogue(prog); + } + + template + void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &reciprocalCache) { + memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); + codePos = superScalarHashOffset + codeSshInitSize; + for (unsigned j = 0; j < N; ++j) { + SuperscalarProgram& prog = programs[j]; + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + generateSuperscalarCode(instr, reciprocalCache); + } + emit(codeShhLoad, codeSshLoadSize); + if (j < N - 1) { + emit(REX_MOV_RR64); + emitByte(0xd8 + prog.getAddressRegister()); + emit(codeShhPrefetch, codeSshPrefetchSize); +#ifdef RANDOMX_ALIGN + int align = (codePos % 16); + while (align != 0) { + int nopSize = 16 - align; + if (nopSize > 8) nopSize = 8; + emit(NOPX[nopSize - 1], nopSize); + align = (codePos % 16); + } +#endif + } + } + emitByte(RET); + } + + template + void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector &reciprocalCache); + + void JitCompilerX86::generateDatasetInitCode() { + memcpy(code, codeDatasetInit, datasetInitSize); + } + + void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { + instructionOffsets.clear(); + for (unsigned i = 0; i < 8; ++i) { + registerUsage[i] = -1; + } + codePos = prologueSize; + memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); + emit(REX_XOR_RAX_R64); + emitByte(0xc0 + pcfg.readReg0); + emit(REX_XOR_RAX_R64); + emitByte(0xc0 + pcfg.readReg1); + memcpy(code + codePos, codeLoopLoad, loopLoadSize); + codePos += loopLoadSize; + for (unsigned i = 0; i < prog.getSize(); ++i) { + Instruction& instr = prog(i); + instr.src %= RegistersCount; + instr.dst %= RegistersCount; + generateCode(instr, i); + } + emit(REX_MOV_RR); + emitByte(0xc0 + pcfg.readReg2); + emit(REX_XOR_EAX); + emitByte(0xc0 + pcfg.readReg3); + } + + void JitCompilerX86::generateProgramEpilogue(Program& prog) { + memcpy(code + codePos, codeLoopStore, loopStoreSize); + codePos += loopStoreSize; + emit(SUB_EBX); + emit(JNZ); + emit32(prologueSize - codePos - 4); + emitByte(JMP); + emit32(epilogueOffset - codePos - 4); + } + + void JitCompilerX86::generateCode(Instruction& instr, int i) { + instructionOffsets.push_back(codePos); + auto generator = engine[instr.opcode]; + (this->*generator)(instr, i); + } + + void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { + switch ((SuperscalarInstructionType)instr.opcode) + { + case randomx::SuperscalarInstructionType::ISUB_R: + emit(REX_SUB_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + break; + case randomx::SuperscalarInstructionType::IXOR_R: + emit(REX_XOR_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + break; + case randomx::SuperscalarInstructionType::IADD_RS: + emit(REX_LEA); + emitByte(0x04 + 8 * instr.dst); + genSIB(instr.getModShift(), instr.src, instr.dst); + break; + case randomx::SuperscalarInstructionType::IMUL_R: + emit(REX_IMUL_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + break; + case randomx::SuperscalarInstructionType::IROR_C: + emit(REX_ROT_I8); + emitByte(0xc8 + instr.dst); + emitByte(instr.getImm32() & 63); + break; + case randomx::SuperscalarInstructionType::IADD_C7: + emit(REX_81); + emitByte(0xc0 + instr.dst); + emit32(instr.getImm32()); + break; + case randomx::SuperscalarInstructionType::IXOR_C7: + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); + break; + case randomx::SuperscalarInstructionType::IADD_C8: + emit(REX_81); + emitByte(0xc0 + instr.dst); + emit32(instr.getImm32()); +#ifdef RANDOMX_ALIGN + emit(NOP1); +#endif + break; + case randomx::SuperscalarInstructionType::IXOR_C8: + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); +#ifdef RANDOMX_ALIGN + emit(NOP1); +#endif + break; + case randomx::SuperscalarInstructionType::IADD_C9: + emit(REX_81); + emitByte(0xc0 + instr.dst); + emit32(instr.getImm32()); +#ifdef RANDOMX_ALIGN + emit(NOP2); +#endif + break; + case randomx::SuperscalarInstructionType::IXOR_C9: + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); +#ifdef RANDOMX_ALIGN + emit(NOP2); +#endif + break; + case randomx::SuperscalarInstructionType::IMULH_R: + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_R); + emitByte(0xe0 + instr.src); + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + break; + case randomx::SuperscalarInstructionType::ISMULH_R: + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_R); + emitByte(0xe8 + instr.src); + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + break; + case randomx::SuperscalarInstructionType::IMUL_RCP: + emit(MOV_RAX_I); + emit64(reciprocalCache[instr.getImm32()]); + emit(REX_IMUL_RM); + emitByte(0xc0 + 8 * instr.dst); + break; + default: + UNREACHABLE; + } + } + + void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { + emit(LEA_32); + emitByte(0x80 + instr.src + (rax ? 0 : 8)); + if (instr.src == RegisterNeedsSib) { + emitByte(0x24); + } + emit32(instr.getImm32()); + if (rax) + emitByte(AND_EAX_I); + else + emit(AND_ECX_I); + emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + + void JitCompilerX86::genAddressRegDst(Instruction& instr) { + emit(LEA_32); + emitByte(0x80 + instr.dst); + if (instr.dst == RegisterNeedsSib) { + emitByte(0x24); + } + emit32(instr.getImm32()); + emitByte(AND_EAX_I); + if (instr.getModCond() < StoreL3Condition) { + emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + emit32(ScratchpadL3Mask); + } + } + + void JitCompilerX86::genAddressImm(Instruction& instr) { + emit32(instr.getImm32() & ScratchpadL3Mask); + } + + void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + emit(REX_LEA); + if (instr.dst == RegisterNeedsDisplacement) + emitByte(0xac); + else + emitByte(0x04 + 8 * instr.dst); + genSIB(instr.getModShift(), instr.src, instr.dst); + if (instr.dst == RegisterNeedsDisplacement) + emit32(instr.getImm32()); + } + + void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + emit(REX_ADD_RM); + emitByte(0x04 + 8 * instr.dst); + emitByte(0x06); + } + else { + emit(REX_ADD_RM); + emitByte(0x86 + 8 * instr.dst); + genAddressImm(instr); + } + } + + void JitCompilerX86::genSIB(int scale, int index, int base) { + emitByte((scale << 6) | (index << 3) | base); + } + + void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + emit(REX_SUB_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + } + else { + emit(REX_81); + emitByte(0xe8 + instr.dst); + emit32(instr.getImm32()); + } + } + + void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + emit(REX_SUB_RM); + emitByte(0x04 + 8 * instr.dst); + emitByte(0x06); + } + else { + emit(REX_SUB_RM); + emitByte(0x86 + 8 * instr.dst); + genAddressImm(instr); + } + } + + void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + emit(REX_IMUL_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + } + else { + emit(REX_IMUL_RRI); + emitByte(0xc0 + 9 * instr.dst); + emit32(instr.getImm32()); + } + } + + void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + emit(REX_IMUL_RM); + emitByte(0x04 + 8 * instr.dst); + emitByte(0x06); + } + else { + emit(REX_IMUL_RM); + emitByte(0x86 + 8 * instr.dst); + genAddressImm(instr); + } + } + + void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_R); + emitByte(0xe0 + instr.src); + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + } + + void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr, false); + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_MEM); + } + else { + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_M); + emitByte(0xa6); + genAddressImm(instr); + } + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + } + + void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_R); + emitByte(0xe8 + instr.src); + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + } + + void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr, false); + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_IMUL_MEM); + } + else { + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.dst); + emit(REX_MUL_M); + emitByte(0xae); + genAddressImm(instr); + } + emit(REX_MOV_R64R); + emitByte(0xc2 + 8 * instr.dst); + } + + void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { + registerUsage[instr.dst] = i; + emit(MOV_RAX_I); + emit64(randomx_reciprocal_fast(divisor)); + emit(REX_IMUL_RM); + emitByte(0xc0 + 8 * instr.dst); + } + } + + void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + emit(REX_NEG); + emitByte(0xd8 + instr.dst); + } + + void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + emit(REX_XOR_RR); + emitByte(0xc0 + 8 * instr.dst + instr.src); + } + else { + emit(REX_XOR_RI); + emitByte(0xf0 + instr.dst); + emit32(instr.getImm32()); + } + } + + void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + genAddressReg(instr); + emit(REX_XOR_RM); + emitByte(0x04 + 8 * instr.dst); + emitByte(0x06); + } + else { + emit(REX_XOR_RM); + emitByte(0x86 + 8 * instr.dst); + genAddressImm(instr); + } + } + + void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + emit(REX_MOV_RR); + emitByte(0xc8 + instr.src); + emit(REX_ROT_CL); + emitByte(0xc8 + instr.dst); + } + else { + emit(REX_ROT_I8); + emitByte(0xc8 + instr.dst); + emitByte(instr.getImm32() & 63); + } + } + + void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { + registerUsage[instr.dst] = i; + if (instr.src != instr.dst) { + emit(REX_MOV_RR); + emitByte(0xc8 + instr.src); + emit(REX_ROT_CL); + emitByte(0xc0 + instr.dst); + } + else { + emit(REX_ROT_I8); + emitByte(0xc0 + instr.dst); + emitByte(instr.getImm32() & 63); + } + } + + void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { + if (instr.src != instr.dst) { + registerUsage[instr.dst] = i; + registerUsage[instr.src] = i; + emit(REX_XCHG); + emitByte(0xc0 + instr.src + 8 * instr.dst); + } + } + + void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) { + emit(SHUFPD); + emitByte(0xc0 + 9 * instr.dst); + emitByte(1); + } + + void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + instr.src %= RegisterCountFlt; + emit(REX_ADDPD); + emitByte(0xc0 + instr.src + 8 * instr.dst); + } + + void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + genAddressReg(instr); + emit(REX_CVTDQ2PD_XMM12); + emit(REX_ADDPD); + emitByte(0xc4 + 8 * instr.dst); + } + + void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + instr.src %= RegisterCountFlt; + emit(REX_SUBPD); + emitByte(0xc0 + instr.src + 8 * instr.dst); + } + + void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + genAddressReg(instr); + emit(REX_CVTDQ2PD_XMM12); + emit(REX_SUBPD); + emitByte(0xc4 + 8 * instr.dst); + } + + void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + emit(REX_XORPS); + emitByte(0xc7 + 8 * instr.dst); + } + + void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + instr.src %= RegisterCountFlt; + emit(REX_MULPD); + emitByte(0xe0 + instr.src + 8 * instr.dst); + } + + void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + genAddressReg(instr); + emit(REX_CVTDQ2PD_XMM12); + emit(REX_ANDPS_XMM12); + emit(REX_DIVPD); + emitByte(0xe4 + 8 * instr.dst); + } + + void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { + instr.dst %= RegisterCountFlt; + emit(SQRTPD); + emitByte(0xe4 + 9 * instr.dst); + } + + void JitCompilerX86::h_CFROUND(Instruction& instr, int i) { + emit(REX_MOV_RR64); + emitByte(0xc0 + instr.src); + int rotate = (13 - (instr.getImm32() & 63)) & 63; + if (rotate != 0) { + emit(ROL_RAX); + emitByte(rotate); + } + emit(AND_OR_MOV_LDMXCSR); + } + + void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { + int reg = instr.dst; + int target = registerUsage[reg] + 1; + emit(REX_ADD_I); + emitByte(0xc0 + reg); + int shift = instr.getModCond() + ConditionOffset; + uint32_t imm = instr.getImm32() | (1UL << shift); + if (ConditionOffset > 0 || shift > 0) + imm &= ~(1UL << (shift - 1)); + emit32(imm); + emit(REX_TEST); + emitByte(0xc0 + reg); + emit32(ConditionMask << shift); + emit(JZ); + emit32(instructionOffsets[target] - (codePos + 4)); + //mark all registers as used + for (unsigned j = 0; j < RegistersCount; ++j) { + registerUsage[j] = i; + } + } + + void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { + genAddressRegDst(instr); + emit(REX_MOV_MR); + emitByte(0x04 + 8 * instr.src); + emitByte(0x06); + } + + void JitCompilerX86::h_NOP(Instruction& instr, int i) { + emit(NOP1); + } + +#include "instruction_weights.hpp" +#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x)) + + InstructionGeneratorX86 JitCompilerX86::engine[256] = { + INST_HANDLE(IADD_RS) + INST_HANDLE(IADD_M) + INST_HANDLE(ISUB_R) + INST_HANDLE(ISUB_M) + INST_HANDLE(IMUL_R) + INST_HANDLE(IMUL_M) + INST_HANDLE(IMULH_R) + INST_HANDLE(IMULH_M) + INST_HANDLE(ISMULH_R) + INST_HANDLE(ISMULH_M) + INST_HANDLE(IMUL_RCP) + INST_HANDLE(INEG_R) + INST_HANDLE(IXOR_R) + INST_HANDLE(IXOR_M) + INST_HANDLE(IROR_R) + INST_HANDLE(IROL_R) + INST_HANDLE(ISWAP_R) + INST_HANDLE(FSWAP_R) + INST_HANDLE(FADD_R) + INST_HANDLE(FADD_M) + INST_HANDLE(FSUB_R) + INST_HANDLE(FSUB_M) + INST_HANDLE(FSCAL_R) + INST_HANDLE(FMUL_R) + INST_HANDLE(FDIV_M) + INST_HANDLE(FSQRT_R) + INST_HANDLE(CBRANCH) + INST_HANDLE(CFROUND) + INST_HANDLE(ISTORE) + INST_HANDLE(NOP) + }; + +} diff --git a/RandomX/src/jit_compiler_x86.hpp b/RandomX/src/jit_compiler_x86.hpp new file mode 100644 index 00000000..bd068c71 --- /dev/null +++ b/RandomX/src/jit_compiler_x86.hpp @@ -0,0 +1,141 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include "common.hpp" + +namespace randomx { + + class Program; + class ProgramConfiguration; + class SuperscalarProgram; + class JitCompilerX86; + class Instruction; + + typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); + + constexpr uint32_t CodeSize = 64 * 1024; + + class JitCompilerX86 { + public: + JitCompilerX86(); + ~JitCompilerX86(); + void generateProgram(Program&, ProgramConfiguration&); + void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); + template + void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector &); + void generateDatasetInitCode(); + ProgramFunc* getProgramFunc() { + return (ProgramFunc*)code; + } + DatasetInitFunc* getDatasetInitFunc() { + return (DatasetInitFunc*)code; + } + uint8_t* getCode() { + return code; + } + size_t getCodeSize(); + private: + static InstructionGeneratorX86 engine[256]; + std::vector instructionOffsets; + int registerUsage[RegistersCount]; + uint8_t* code; + int32_t codePos; + + void generateProgramPrologue(Program&, ProgramConfiguration&); + void generateProgramEpilogue(Program&); + void genAddressReg(Instruction&, bool); + void genAddressRegDst(Instruction&); + void genAddressImm(Instruction&); + void genSIB(int scale, int index, int base); + + void generateCode(Instruction&, int); + void generateSuperscalarCode(Instruction &, std::vector &); + + void emitByte(uint8_t val) { + code[codePos] = val; + codePos++; + } + + void emit32(uint32_t val) { + memcpy(code + codePos, &val, sizeof val); + codePos += sizeof val; + } + + void emit64(uint64_t val) { + memcpy(code + codePos, &val, sizeof val); + codePos += sizeof val; + } + + template + void emit(const uint8_t (&src)[N]) { + emit(src, N); + } + + void emit(const uint8_t* src, size_t count) { + memcpy(code + codePos, src, count); + codePos += count; + } + + void h_IADD_RS(Instruction&, int); + void h_IADD_M(Instruction&, int); + void h_ISUB_R(Instruction&, int); + void h_ISUB_M(Instruction&, int); + void h_IMUL_R(Instruction&, int); + void h_IMUL_M(Instruction&, int); + void h_IMULH_R(Instruction&, int); + void h_IMULH_M(Instruction&, int); + void h_ISMULH_R(Instruction&, int); + void h_ISMULH_M(Instruction&, int); + void h_IMUL_RCP(Instruction&, int); + void h_INEG_R(Instruction&, int); + void h_IXOR_R(Instruction&, int); + void h_IXOR_M(Instruction&, int); + void h_IROR_R(Instruction&, int); + void h_IROL_R(Instruction&, int); + void h_ISWAP_R(Instruction&, int); + void h_FSWAP_R(Instruction&, int); + void h_FADD_R(Instruction&, int); + void h_FADD_M(Instruction&, int); + void h_FSUB_R(Instruction&, int); + void h_FSUB_M(Instruction&, int); + void h_FSCAL_R(Instruction&, int); + void h_FMUL_R(Instruction&, int); + void h_FDIV_M(Instruction&, int); + void h_FSQRT_R(Instruction&, int); + void h_CBRANCH(Instruction&, int); + void h_CFROUND(Instruction&, int); + void h_ISTORE(Instruction&, int); + void h_NOP(Instruction&, int); + }; + +} \ No newline at end of file diff --git a/RandomX/src/jit_compiler_x86_static.S b/RandomX/src/jit_compiler_x86_static.S new file mode 100644 index 00000000..eab1b4c2 --- /dev/null +++ b/RandomX/src/jit_compiler_x86_static.S @@ -0,0 +1,210 @@ +# Copyright (c) 2018-2019, tevador +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holder nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +.intel_syntax noprefix +#if defined(__APPLE__) +.text +#define DECL(x) _##x +#else +.section .text +#define DECL(x) x +#endif + +#if defined(__WIN32__) || defined(__CYGWIN__) +#define WINABI +#endif + +.global DECL(randomx_program_prologue) +.global DECL(randomx_program_loop_begin) +.global DECL(randomx_program_loop_load) +.global DECL(randomx_program_start) +.global DECL(randomx_program_read_dataset) +.global DECL(randomx_program_read_dataset_sshash_init) +.global DECL(randomx_program_read_dataset_sshash_fin) +.global DECL(randomx_program_loop_store) +.global DECL(randomx_program_loop_end) +.global DECL(randomx_dataset_init) +.global DECL(randomx_program_epilogue) +.global DECL(randomx_sshash_load) +.global DECL(randomx_sshash_prefetch) +.global DECL(randomx_sshash_end) +.global DECL(randomx_sshash_init) +.global DECL(randomx_program_end) +.global DECL(randomx_reciprocal_fast) + +#include "configuration.h" + +#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64) +#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64) +#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1) + +#define db .byte + +.balign 64 +DECL(randomx_program_prologue): +#if defined(WINABI) + #include "asm/program_prologue_win64.inc" +#else + #include "asm/program_prologue_linux.inc" +#endif + movapd xmm13, xmmword ptr mantissaMask[rip] + movapd xmm14, xmmword ptr exp240[rip] + movapd xmm15, xmmword ptr scaleMask[rip] + jmp DECL(randomx_program_loop_begin) + +.balign 64 + #include "asm/program_xmm_constants.inc" + +.balign 64 +DECL(randomx_program_loop_begin): + nop + +DECL(randomx_program_loop_load): + #include "asm/program_loop_load.inc" + +DECL(randomx_program_start): + nop + +DECL(randomx_program_read_dataset): + #include "asm/program_read_dataset.inc" + +DECL(randomx_program_read_dataset_sshash_init): + #include "asm/program_read_dataset_sshash_init.inc" + +DECL(randomx_program_read_dataset_sshash_fin): + #include "asm/program_read_dataset_sshash_fin.inc" + +DECL(randomx_program_loop_store): + #include "asm/program_loop_store.inc" + +DECL(randomx_program_loop_end): + nop + +.balign 64 +DECL(randomx_dataset_init): + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 +#if defined(WINABI) + push rdi + push rsi + mov rdi, qword ptr [rcx] ;# cache->memory + mov rsi, rdx ;# dataset + mov rbp, r8 ;# block index + push r9 ;# max. block index +#else + mov rdi, qword ptr [rdi] ;# cache->memory + ;# dataset in rsi + mov rbp, rdx ;# block index + push rcx ;# max. block index +#endif +init_block_loop: + prefetchw byte ptr [rsi] + mov rbx, rbp + .byte 232 ;# 0xE8 = call + ;# .set CALL_LOC, + .int 32768 - (call_offset - DECL(randomx_dataset_init)) +call_offset: + mov qword ptr [rsi+0], r8 + mov qword ptr [rsi+8], r9 + mov qword ptr [rsi+16], r10 + mov qword ptr [rsi+24], r11 + mov qword ptr [rsi+32], r12 + mov qword ptr [rsi+40], r13 + mov qword ptr [rsi+48], r14 + mov qword ptr [rsi+56], r15 + add rbp, 1 + add rsi, 64 + cmp rbp, qword ptr [rsp] + jb init_block_loop + pop rax +#if defined(WINABI) + pop rsi + pop rdi +#endif + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + ret + +.balign 64 +DECL(randomx_program_epilogue): + #include "asm/program_epilogue_store.inc" +#if defined(WINABI) + #include "asm/program_epilogue_win64.inc" +#else + #include "asm/program_epilogue_linux.inc" +#endif + +.balign 64 +DECL(randomx_sshash_load): + #include "asm/program_sshash_load.inc" + +DECL(randomx_sshash_prefetch): + #include "asm/program_sshash_prefetch.inc" + +DECL(randomx_sshash_end): + nop + +.balign 64 +DECL(randomx_sshash_init): + lea r8, [rbx+1] + #include "asm/program_sshash_prefetch.inc" + imul r8, qword ptr r0_mul[rip] + mov r9, qword ptr r1_add[rip] + xor r9, r8 + mov r10, qword ptr r2_add[rip] + xor r10, r8 + mov r11, qword ptr r3_add[rip] + xor r11, r8 + mov r12, qword ptr r4_add[rip] + xor r12, r8 + mov r13, qword ptr r5_add[rip] + xor r13, r8 + mov r14, qword ptr r6_add[rip] + xor r14, r8 + mov r15, qword ptr r7_add[rip] + xor r15, r8 + jmp DECL(randomx_program_end) + +.balign 64 + #include "asm/program_sshash_constants.inc" + +.balign 64 +DECL(randomx_program_end): + nop + +DECL(randomx_reciprocal_fast): +#if !defined(WINABI) + mov rcx, rdi +#endif + #include "asm/randomx_reciprocal.inc" diff --git a/RandomX/src/jit_compiler_x86_static.asm b/RandomX/src/jit_compiler_x86_static.asm new file mode 100644 index 00000000..b2fad7a1 --- /dev/null +++ b/RandomX/src/jit_compiler_x86_static.asm @@ -0,0 +1,201 @@ +; Copyright (c) 2018-2019, tevador +; +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; * Neither the name of the copyright holder nor the +; names of its contributors may be used to endorse or promote products +; derived from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +IFDEF RAX + +_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE + +PUBLIC randomx_program_prologue +PUBLIC randomx_program_loop_begin +PUBLIC randomx_program_loop_load +PUBLIC randomx_program_start +PUBLIC randomx_program_read_dataset +PUBLIC randomx_program_read_dataset_sshash_init +PUBLIC randomx_program_read_dataset_sshash_fin +PUBLIC randomx_dataset_init +PUBLIC randomx_program_loop_store +PUBLIC randomx_program_loop_end +PUBLIC randomx_program_epilogue +PUBLIC randomx_sshash_load +PUBLIC randomx_sshash_prefetch +PUBLIC randomx_sshash_end +PUBLIC randomx_sshash_init +PUBLIC randomx_program_end +PUBLIC randomx_reciprocal_fast + +include asm/configuration.asm + +RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64) +RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64) +RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1) + +ALIGN 64 +randomx_program_prologue PROC + include asm/program_prologue_win64.inc + movapd xmm13, xmmword ptr [mantissaMask] + movapd xmm14, xmmword ptr [exp240] + movapd xmm15, xmmword ptr [scaleMask] + jmp randomx_program_loop_begin +randomx_program_prologue ENDP + +ALIGN 64 + include asm/program_xmm_constants.inc + +ALIGN 64 +randomx_program_loop_begin PROC + nop +randomx_program_loop_begin ENDP + +randomx_program_loop_load PROC + include asm/program_loop_load.inc +randomx_program_loop_load ENDP + +randomx_program_start PROC + nop +randomx_program_start ENDP + +randomx_program_read_dataset PROC + include asm/program_read_dataset.inc +randomx_program_read_dataset ENDP + +randomx_program_read_dataset_sshash_init PROC + include asm/program_read_dataset_sshash_init.inc +randomx_program_read_dataset_sshash_init ENDP + +randomx_program_read_dataset_sshash_fin PROC + include asm/program_read_dataset_sshash_fin.inc +randomx_program_read_dataset_sshash_fin ENDP + +randomx_program_loop_store PROC + include asm/program_loop_store.inc +randomx_program_loop_store ENDP + +randomx_program_loop_end PROC + nop +randomx_program_loop_end ENDP + +ALIGN 64 +randomx_dataset_init PROC + push rbx + push rbp + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + mov rdi, qword ptr [rcx] ;# cache->memory + mov rsi, rdx ;# dataset + mov rbp, r8 ;# block index + push r9 ;# max. block index +init_block_loop: + prefetchw byte ptr [rsi] + mov rbx, rbp + db 232 ;# 0xE8 = call + dd 32768 - distance + distance equ $ - offset randomx_dataset_init + mov qword ptr [rsi+0], r8 + mov qword ptr [rsi+8], r9 + mov qword ptr [rsi+16], r10 + mov qword ptr [rsi+24], r11 + mov qword ptr [rsi+32], r12 + mov qword ptr [rsi+40], r13 + mov qword ptr [rsi+48], r14 + mov qword ptr [rsi+56], r15 + add rbp, 1 + add rsi, 64 + cmp rbp, qword ptr [rsp] + jb init_block_loop + pop r9 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbp + pop rbx + ret +randomx_dataset_init ENDP + +ALIGN 64 +randomx_program_epilogue PROC + include asm/program_epilogue_store.inc + include asm/program_epilogue_win64.inc +randomx_program_epilogue ENDP + +ALIGN 64 +randomx_sshash_load PROC + include asm/program_sshash_load.inc +randomx_sshash_load ENDP + +randomx_sshash_prefetch PROC + include asm/program_sshash_prefetch.inc +randomx_sshash_prefetch ENDP + +randomx_sshash_end PROC + nop +randomx_sshash_end ENDP + +ALIGN 64 +randomx_sshash_init PROC + lea r8, [rbx+1] + include asm/program_sshash_prefetch.inc + imul r8, qword ptr [r0_mul] + mov r9, qword ptr [r1_add] + xor r9, r8 + mov r10, qword ptr [r2_add] + xor r10, r8 + mov r11, qword ptr [r3_add] + xor r11, r8 + mov r12, qword ptr [r4_add] + xor r12, r8 + mov r13, qword ptr [r5_add] + xor r13, r8 + mov r14, qword ptr [r6_add] + xor r14, r8 + mov r15, qword ptr [r7_add] + xor r15, r8 + jmp randomx_program_end +randomx_sshash_init ENDP + +ALIGN 64 + include asm/program_sshash_constants.inc + +ALIGN 64 +randomx_program_end PROC + nop +randomx_program_end ENDP + +randomx_reciprocal_fast PROC + include asm/randomx_reciprocal.inc +randomx_reciprocal_fast ENDP + +_RANDOMX_JITX86_STATIC ENDS + +ENDIF + +END \ No newline at end of file diff --git a/RandomX/src/jit_compiler_x86_static.hpp b/RandomX/src/jit_compiler_x86_static.hpp new file mode 100644 index 00000000..ba196862 --- /dev/null +++ b/RandomX/src/jit_compiler_x86_static.hpp @@ -0,0 +1,48 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +extern "C" { + void randomx_program_prologue(); + void randomx_program_loop_begin(); + void randomx_program_loop_load(); + void randomx_program_start(); + void randomx_program_read_dataset(); + void randomx_program_read_dataset_sshash_init(); + void randomx_program_read_dataset_sshash_fin(); + void randomx_program_loop_store(); + void randomx_program_loop_end(); + void randomx_dataset_init(); + void randomx_program_epilogue(); + void randomx_sshash_load(); + void randomx_sshash_prefetch(); + void randomx_sshash_end(); + void randomx_sshash_init(); + void randomx_program_end(); +} diff --git a/RandomX/src/program.hpp b/RandomX/src/program.hpp new file mode 100644 index 00000000..d0f68052 --- /dev/null +++ b/RandomX/src/program.hpp @@ -0,0 +1,71 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include "common.hpp" +#include "instruction.hpp" +#include "blake2/endian.h" + +namespace randomx { + + struct ProgramConfiguration { + uint64_t eMask[2]; + uint32_t readReg0, readReg1, readReg2, readReg3; + }; + + class Program { + public: + Instruction& operator()(int pc) { + return programBuffer[pc]; + } + friend std::ostream& operator<<(std::ostream& os, const Program& p) { + p.print(os); + return os; + } + uint64_t getEntropy(int i) { + return load64(&entropyBuffer[i]); + } + uint32_t getSize() { + return RANDOMX_PROGRAM_SIZE; + } + private: + void print(std::ostream& os) const { + for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { + auto instr = programBuffer[i]; + os << instr; + } + } + uint64_t entropyBuffer[16]; + Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; + }; + + static_assert(sizeof(Program) % 64 == 0, "Invalid size of class randomx::Program"); +} diff --git a/RandomX/src/randomx.cpp b/RandomX/src/randomx.cpp new file mode 100644 index 00000000..8598e6e1 --- /dev/null +++ b/RandomX/src/randomx.cpp @@ -0,0 +1,249 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "randomx.h" +#include "dataset.hpp" +#include "vm_interpreted.hpp" +#include "vm_interpreted_light.hpp" +#include "vm_compiled.hpp" +#include "vm_compiled_light.hpp" +#include "blake2/blake2.h" + +extern "C" { + + randomx_cache *randomx_alloc_cache(randomx_flags flags) { + randomx_cache *cache = new randomx_cache(); + + try { + switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) { + case RANDOMX_FLAG_DEFAULT: + cache->dealloc = &randomx::deallocCache; + cache->jit = nullptr; + cache->initialize = &randomx::initCache; + cache->datasetInit = &randomx::initDataset; + cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); + break; + + case RANDOMX_FLAG_JIT: + cache->dealloc = &randomx::deallocCache; + cache->jit = new randomx::JitCompiler(); + cache->initialize = &randomx::initCacheCompile; + cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); + break; + + case RANDOMX_FLAG_LARGE_PAGES: + cache->dealloc = &randomx::deallocCache; + cache->jit = nullptr; + cache->initialize = &randomx::initCache; + cache->datasetInit = &randomx::initDataset; + cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: + cache->dealloc = &randomx::deallocCache; + cache->jit = new randomx::JitCompiler(); + cache->initialize = &randomx::initCacheCompile; + cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize); + break; + + default: + UNREACHABLE; + } + } + catch (std::exception &ex) { + randomx_release_cache(cache); + cache = nullptr; + } + + return cache; + } + + void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) { + cache->initialize(cache, key, keySize); + } + + void randomx_release_cache(randomx_cache* cache) { + cache->dealloc(cache); + delete cache; + } + + randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { + randomx_dataset *dataset = new randomx_dataset(); + + try { + if (flags & RANDOMX_FLAG_LARGE_PAGES) { + dataset->dealloc = &randomx::deallocDataset; + dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize); + } + else { + dataset->dealloc = &randomx::deallocDataset; + dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize); + } + } + catch (std::exception &ex) { + randomx_release_dataset(dataset); + dataset = nullptr; + } + + return dataset; + } + + unsigned long randomx_dataset_item_count() { + return randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE; + } + + void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) { + cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount); + } + + void *randomx_get_dataset_memory(randomx_dataset *dataset) { + return dataset->memory; + } + + void randomx_release_dataset(randomx_dataset *dataset) { + dataset->dealloc(dataset); + delete dataset; + } + + randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) { + randomx_vm *vm = nullptr; + + try { + switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES)) { + case RANDOMX_FLAG_DEFAULT: + vm = new randomx::InterpretedLightVmDefault(); + break; + + case RANDOMX_FLAG_FULL_MEM: + vm = new randomx::InterpretedVmDefault(); + break; + + case RANDOMX_FLAG_JIT: + vm = new randomx::CompiledLightVmDefault(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT: + vm = new randomx::CompiledVmDefault(); + break; + + case RANDOMX_FLAG_HARD_AES: + vm = new randomx::InterpretedLightVmHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES: + vm = new randomx::InterpretedVmHardAes(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: + vm = new randomx::CompiledLightVmHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: + vm = new randomx::CompiledVmHardAes(); + break; + + case RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::InterpretedLightVmLargePage(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::InterpretedVmLargePage(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::CompiledLightVmLargePage(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::CompiledVmLargePage(); + break; + + case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::InterpretedLightVmLargePageHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::InterpretedVmLargePageHardAes(); + break; + + case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::CompiledLightVmLargePageHardAes(); + break; + + case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: + vm = new randomx::CompiledVmLargePageHardAes(); + break; + + default: + UNREACHABLE; + } + + if(cache != nullptr) + vm->setCache(cache); + + if(dataset != nullptr) + vm->setDataset(dataset); + + vm->allocate(); + } + catch (std::exception &ex) { + delete vm; + vm = nullptr; + } + + return vm; + } + + void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) { + machine->setCache(cache); + } + + void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) { + machine->setDataset(dataset); + } + + void randomx_destroy_vm(randomx_vm *machine) { + delete machine; + } + + void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) { + alignas(16) uint64_t tempHash[8]; + blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); + machine->initScratchpad(&tempHash); + machine->resetRoundingMode(); + for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { + machine->run(&tempHash); + blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); + } + machine->run(&tempHash); + machine->getFinalResult(output, RANDOMX_HASH_SIZE); + } + +} diff --git a/RandomX/src/randomx.h b/RandomX/src/randomx.h new file mode 100644 index 00000000..8f9b30ce --- /dev/null +++ b/RandomX/src/randomx.h @@ -0,0 +1,201 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef RANDOMX_H +#define RANDOMX_H + +#include + +#define RANDOMX_HASH_SIZE 32 +#define RANDOMX_DATASET_ITEM_SIZE 64 + +#ifndef RANDOMX_EXPORT +#define RANDOMX_EXPORT +#endif + +typedef enum { + RANDOMX_FLAG_DEFAULT = 0, + RANDOMX_FLAG_LARGE_PAGES = 1, + RANDOMX_FLAG_HARD_AES = 2, + RANDOMX_FLAG_FULL_MEM = 4, + RANDOMX_FLAG_JIT = 8, +} randomx_flags; + +typedef struct randomx_dataset randomx_dataset; +typedef struct randomx_cache randomx_cache; +typedef struct randomx_vm randomx_vm; + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * Creates a randomx_cache structure and allocates memory for RandomX Cache. + * + * @param flags is any combination of these 2 flags (each flag can be set or not set): + * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages + * RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes + * subsequent Dataset initialization faster + * + * @return Pointer to an allocated randomx_cache structure. + * NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT + * is set and JIT compilation is not supported on the current platform. + */ +RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags); + +/** + * Initializes the cache memory and SuperscalarHash using the provided key value. + * + * @param cache is a pointer to a previously allocated randomx_cache structure. Must not be NULL. + * @param key is a pointer to memory which contains the key value. Must not be NULL. + * @param keySize is the number of bytes of the key. +*/ +RANDOMX_EXPORT void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize); + +/** + * Releases all memory occupied by the randomx_cache structure. + * + * @param cache is a pointer to a previously allocated randomx_cache structure. +*/ +RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache); + +/** + * Creates a randomx_dataset structure and allocates memory for RandomX Dataset. + * + * @param flags is the initialization flags. Only one flag is supported (can be set or not set): + * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages + * + * @return Pointer to an allocated randomx_dataset structure. + * NULL is returned if memory allocation fails. + */ +RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags); + +/** + * Gets the number of items contained in the dataset. + * + * @return the number of items contained in the dataset. +*/ +RANDOMX_EXPORT unsigned long randomx_dataset_item_count(void); + +/** + * Initializes dataset items. + * + * Note: In order to use the Dataset, all items from 0 to (randomx_dataset_item_count() - 1) must be initialized. + * This may be done by several calls to this function using non-overlapping item sequences. + * + * @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL. + * @param cache is a pointer to a previously allocated and initialized randomx_cache structure. Must not be NULL. + * @param startItem is the item number where intialization should start. + * @param itemCount is the number of items that should be initialized. +*/ +RANDOMX_EXPORT void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount); + +/** + * Returns a pointer to the internal memory buffer of the dataset structure. The size + * of the internal memory buffer is randomx_dataset_item_count() * RANDOMX_DATASET_ITEM_SIZE. + * + * @param dataset is dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL. + * + * @return Pointer to the internal memory buffer of the dataset structure. +*/ +RANDOMX_EXPORT void *randomx_get_dataset_memory(randomx_dataset *dataset); + +/** + * Releases all memory occupied by the randomx_dataset structure. + * + * @param dataset is a pointer to a previously allocated randomx_dataset structure. +*/ +RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset); + +/** + * Creates and initializes a RandomX virtual machine. + * + * @param flags is any combination of these 4 flags (each flag can be set or not set): + * RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages + * RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES + * RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset + * RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler + * The numeric values of the flags are ordered so that a higher value will provide + * faster hash calculation and a lower numeric value will provide higher portability. + * Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest. + * @param cache is a pointer to an initialized randomx_cache structure. Can be + * NULL if RANDOMX_FLAG_FULL_MEM is set. + * @param dataset is a pointer to a randomx_dataset structure. Can be NULL + * if RANDOMX_FLAG_FULL_MEM is not set. + * + * @return Pointer to an initialized randomx_vm structure. + * Returns NULL if: + * (1) Scratchpad memory allocation fails. + * (2) The requested initialization flags are not supported on the current platform. + * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set + * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set +*/ +RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset); + +/** + * Reinitializes a virtual machine with a new Cache. This function should be called anytime + * the Cache is reinitialized with a new key. + * + * @param machine is a pointer to a randomx_vm structure that was initialized + * without RANDOMX_FLAG_FULL_MEM. Must not be NULL. + * @param cache is a pointer to an initialized randomx_cache structure. Must not be NULL. +*/ +RANDOMX_EXPORT void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache); + +/** + * Reinitializes a virtual machine with a new Dataset. + * + * @param machine is a pointer to a randomx_vm structure that was initialized + * with RANDOMX_FLAG_FULL_MEM. Must not be NULL. + * @param dataset is a pointer to an initialized randomx_dataset structure. Must not be NULL. +*/ +RANDOMX_EXPORT void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset); + +/** + * Releases all memory occupied by the randomx_vm structure. + * + * @param machine is a pointer to a previously created randomx_vm structure. +*/ +RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine); + +/** + * Calculates a RandomX hash value. + * + * @param machine is a pointer to a randomx_vm structure. Must not be NULL. + * @param input is a pointer to memory to be hashed. Must not be NULL. + * @param inputSize is the number of bytes to be hashed. + * @param output is a pointer to memory where the hash will be stored. Must not + * be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing. +*/ +RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/RandomX/src/reciprocal.c b/RandomX/src/reciprocal.c new file mode 100644 index 00000000..3c46759f --- /dev/null +++ b/RandomX/src/reciprocal.c @@ -0,0 +1,69 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "reciprocal.h" + +/* + Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64. + divisor must not be 0 or a power of 2 + + Equivalent x86 assembly (divisor in rcx): + + mov edx, 1 + mov r8, rcx + xor eax, eax + bsr rcx, rcx + shl rdx, cl + div r8 + ret + +*/ +uint64_t randomx_reciprocal(uint64_t divisor) { + + const uint64_t p2exp63 = 1ULL << 63; + + uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor; + + unsigned bsr = 0; //highest set bit in divisor + + for (uint64_t bit = divisor; bit > 0; bit >>= 1) + bsr++; + + for (unsigned shift = 0; shift < bsr; shift++) { + if (remainder >= divisor - remainder) { + quotient = quotient * 2 + 1; + remainder = remainder * 2 - divisor; + } + else { + quotient = quotient * 2; + remainder = remainder * 2; + } + } + + return quotient; +} diff --git a/RandomX/src/reciprocal.h b/RandomX/src/reciprocal.h new file mode 100644 index 00000000..69d25921 --- /dev/null +++ b/RandomX/src/reciprocal.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +uint64_t randomx_reciprocal(uint64_t); +uint64_t randomx_reciprocal_fast(uint64_t); + +#if defined(__cplusplus) +} +#endif diff --git a/RandomX/src/soft_aes.cpp b/RandomX/src/soft_aes.cpp new file mode 100644 index 00000000..3e82fa2e --- /dev/null +++ b/RandomX/src/soft_aes.cpp @@ -0,0 +1,364 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "soft_aes.h" + +alignas(16) const uint8_t sbox[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, +}; + +alignas(16) const uint32_t lutEnc0[256] = { + 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, + 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, + 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, + 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, + 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, + 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, + 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, + 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, + 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, + 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, + 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, + 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, + 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, + 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, + 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, + 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, + 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, + 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, + 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, + 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, + 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, + 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, + 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, + 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, + 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, + 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, + 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, + 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, + 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, + 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, + 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, + 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c, +}; + +alignas(16) const uint32_t lutEnc1[256] = { + 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154, + 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, + 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, + 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b, + 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, + 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, + 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5, + 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, + 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, + 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397, + 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, + 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, + 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194, + 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, + 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, + 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d, + 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, + 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, + 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83, + 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, + 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, + 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b, + 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, + 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, + 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751, + 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, + 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, + 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9, + 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, + 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, + 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8, + 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a, +}; + +alignas(16) const uint32_t lutEnc2[256] = { + 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5, + 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, + 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, + 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0, + 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, + 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, + 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a, + 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, + 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, + 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784, + 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, + 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, + 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485, + 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, + 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, + 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2, + 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, + 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, + 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388, + 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, + 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, + 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79, + 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, + 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, + 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6, + 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, + 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, + 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e, + 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, + 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, + 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868, + 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16, +}; + +alignas(16) const uint32_t lutEnc3[256] = { + 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5, + 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, + 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, + 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0, + 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, + 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, + 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a, + 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, + 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, + 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484, + 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, + 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, + 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585, + 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, + 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, + 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2, + 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, + 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, + 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888, + 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, + 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, + 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979, + 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, + 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, + 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6, + 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, + 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, + 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e, + 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, + 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, + 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868, + 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616, +}; + +alignas(16) const uint32_t lutDec0[256] = { + 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, + 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, + 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, + 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, + 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, + 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, + 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, + 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, + 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, + 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, + 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, + 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, + 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, + 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, + 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, + 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, + 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, + 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, + 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, + 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, + 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, + 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, + 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, + 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, + 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, + 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, + 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, + 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, + 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, + 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, + 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, + 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0, +}; + +alignas(16) const uint32_t lutDec1[256] = { + 0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93, + 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, + 0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, + 0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44, + 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, + 0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, + 0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a, + 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, + 0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, + 0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51, + 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, + 0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, + 0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e, + 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, + 0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, + 0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8, + 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, + 0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, + 0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0, + 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, + 0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, + 0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5, + 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, + 0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, + 0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0, + 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, + 0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, + 0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13, + 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, + 0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, + 0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41, + 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042, +}; + +alignas(16) const uint32_t lutDec2[256] = { + 0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303, + 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, + 0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, + 0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8, + 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, + 0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, + 0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab, + 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, + 0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, + 0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110, + 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, + 0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, + 0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72, + 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, + 0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, + 0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9, + 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, + 0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, + 0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3, + 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, + 0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, + 0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af, + 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, + 0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, + 0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066, + 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, + 0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, + 0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347, + 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, + 0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, + 0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195, + 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257, +}; + +alignas(16) const uint32_t lutDec3[256] = { + 0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3, + 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, + 0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, + 0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9, + 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, + 0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, + 0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55, + 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, + 0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, + 0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e, + 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, + 0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, + 0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a, + 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, + 0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, + 0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e, + 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, + 0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, + 0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1, + 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, + 0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, + 0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3, + 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, + 0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, + 0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2, + 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, + 0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, + 0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6, + 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, + 0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, + 0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d, + 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8, +}; + +rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key) { + uint32_t s0, s1, s2, s3; + + s0 = rx_vec_i128_w(in); + s1 = rx_vec_i128_z(in); + s2 = rx_vec_i128_y(in); + s3 = rx_vec_i128_x(in); + + rx_vec_i128 out = rx_set_int_vec_i128( + (lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]), + (lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]), + (lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]), + (lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24]) + ); + + return rx_xor_vec_i128(out, key); +} + +rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key) { + uint32_t s0, s1, s2, s3; + + s0 = rx_vec_i128_w(in); + s1 = rx_vec_i128_z(in); + s2 = rx_vec_i128_y(in); + s3 = rx_vec_i128_x(in); + + rx_vec_i128 out = rx_set_int_vec_i128( + (lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]), + (lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]), + (lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]), + (lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24]) + ); + + return rx_xor_vec_i128(out, key); +} diff --git a/RandomX/src/soft_aes.h b/RandomX/src/soft_aes.h new file mode 100644 index 00000000..254f8d63 --- /dev/null +++ b/RandomX/src/soft_aes.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include "intrin_portable.h" + +rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key); + +rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key); + +template +inline rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) { + return soft ? soft_aesenc(in, key) : rx_aesenc_vec_i128(in, key); +} + +template +inline rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) { + return soft ? soft_aesdec(in, key) : rx_aesdec_vec_i128(in, key); +} \ No newline at end of file diff --git a/RandomX/src/superscalar.cpp b/RandomX/src/superscalar.cpp new file mode 100644 index 00000000..39d772f2 --- /dev/null +++ b/RandomX/src/superscalar.cpp @@ -0,0 +1,896 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "configuration.h" +#include "program.hpp" +#include "blake2/endian.h" +#include +#include +#include +#include +#include +#include "superscalar.hpp" +#include "intrin_portable.h" +#include "reciprocal.h" + +namespace randomx { + + static bool isMultiplication(SuperscalarInstructionType type) { + return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; + } + + //uOPs (micro-ops) are represented only by the execution port they can go to + namespace ExecutionPort { + using type = int; + constexpr type Null = 0; + constexpr type P0 = 1; + constexpr type P1 = 2; + constexpr type P5 = 4; + constexpr type P01 = P0 | P1; + constexpr type P05 = P0 | P5; + constexpr type P015 = P0 | P1 | P5; + } + + //Macro-operation as output of the x86 decoder + //Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op + //Macro-op can consist of 1 or 2 uOPs. + class MacroOp { + public: + MacroOp(const char* name, int size) + : name_(name), size_(size), latency_(0), uop1_(ExecutionPort::Null), uop2_(ExecutionPort::Null) {} + MacroOp(const char* name, int size, int latency, ExecutionPort::type uop) + : name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {} + MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2) + : name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {} + MacroOp(const MacroOp& parent, bool dependent) + : name_(parent.name_), size_(parent.size_), latency_(parent.latency_), uop1_(parent.uop1_), uop2_(parent.uop2_), dependent_(dependent) {} + const char* getName() const { + return name_; + } + int getSize() const { + return size_; + } + int getLatency() const { + return latency_; + } + ExecutionPort::type getUop1() const { + return uop1_; + } + ExecutionPort::type getUop2() const { + return uop2_; + } + bool isSimple() const { + return uop2_ == ExecutionPort::Null; + } + bool isEliminated() const { + return uop1_ == ExecutionPort::Null; + } + bool isDependent() const { + return dependent_; + } + static const MacroOp Add_rr; + static const MacroOp Add_ri; + static const MacroOp Lea_sib; + static const MacroOp Sub_rr; + static const MacroOp Imul_rr; + static const MacroOp Imul_r; + static const MacroOp Mul_r; + static const MacroOp Mov_rr; + static const MacroOp Mov_ri64; + static const MacroOp Xor_rr; + static const MacroOp Xor_ri; + static const MacroOp Ror_rcl; + static const MacroOp Ror_ri; + static const MacroOp TestJz_fused; + static const MacroOp Xor_self; + static const MacroOp Cmp_ri; + static const MacroOp Setcc_r; + private: + const char* name_; + int size_; + int latency_; + ExecutionPort::type uop1_; + ExecutionPort::type uop2_; + bool dependent_ = false; + }; + + //Size: 3 bytes + const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015); + const MacroOp MacroOp::Sub_rr = MacroOp("sub r,r", 3, 1, ExecutionPort::P015); + const MacroOp MacroOp::Xor_rr = MacroOp("xor r,r", 3, 1, ExecutionPort::P015); + const MacroOp MacroOp::Imul_r = MacroOp("imul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5); + const MacroOp MacroOp::Mul_r = MacroOp("mul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5); + const MacroOp MacroOp::Mov_rr = MacroOp("mov r,r", 3); + + //Size: 4 bytes + const MacroOp MacroOp::Lea_sib = MacroOp("lea r,r+r*s", 4, 1, ExecutionPort::P01); + const MacroOp MacroOp::Imul_rr = MacroOp("imul r,r", 4, 3, ExecutionPort::P1); + const MacroOp MacroOp::Ror_ri = MacroOp("ror r,i", 4, 1, ExecutionPort::P05); + + //Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes) + const MacroOp MacroOp::Add_ri = MacroOp("add r,i", 7, 1, ExecutionPort::P015); + const MacroOp MacroOp::Xor_ri = MacroOp("xor r,i", 7, 1, ExecutionPort::P015); + + //Size: 10 bytes + const MacroOp MacroOp::Mov_ri64 = MacroOp("mov rax,i64", 10, 1, ExecutionPort::P015); + + //Unused: + const MacroOp MacroOp::Ror_rcl = MacroOp("ror r,cl", 3, 1, ExecutionPort::P0, ExecutionPort::P5); + const MacroOp MacroOp::Xor_self = MacroOp("xor rcx,rcx", 3); + const MacroOp MacroOp::Cmp_ri = MacroOp("cmp r,i", 7, 1, ExecutionPort::P015); + const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05); + const MacroOp MacroOp::TestJz_fused = MacroOp("testjz r,i", 13, 0, ExecutionPort::P5); + + const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr }; + const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr }; + const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) }; + + class SuperscalarInstructionInfo { + public: + const char* getName() const { + return name_; + } + int getSize() const { + return ops_.size(); + } + bool isSimple() const { + return getSize() == 1; + } + int getLatency() const { + return latency_; + } + const MacroOp& getOp(int index) const { + return ops_[index]; + } + SuperscalarInstructionType getType() const { + return type_; + } + int getResultOp() const { + return resultOp_; + } + int getDstOp() const { + return dstOp_; + } + int getSrcOp() const { + return srcOp_; + } + static const SuperscalarInstructionInfo ISUB_R; + static const SuperscalarInstructionInfo IXOR_R; + static const SuperscalarInstructionInfo IADD_RS; + static const SuperscalarInstructionInfo IMUL_R; + static const SuperscalarInstructionInfo IROR_C; + static const SuperscalarInstructionInfo IADD_C7; + static const SuperscalarInstructionInfo IXOR_C7; + static const SuperscalarInstructionInfo IADD_C8; + static const SuperscalarInstructionInfo IXOR_C8; + static const SuperscalarInstructionInfo IADD_C9; + static const SuperscalarInstructionInfo IXOR_C9; + static const SuperscalarInstructionInfo IMULH_R; + static const SuperscalarInstructionInfo ISMULH_R; + static const SuperscalarInstructionInfo IMUL_RCP; + static const SuperscalarInstructionInfo NOP; + private: + const char* name_; + SuperscalarInstructionType type_; + std::vector ops_; + int latency_; + int resultOp_ = 0; + int dstOp_ = 0; + int srcOp_; + + SuperscalarInstructionInfo(const char* name) + : name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {} + SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp& op, int srcOp) + : name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) { + ops_.push_back(MacroOp(op)); + } + template + SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) + : name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) { + for (unsigned i = 0; i < N; ++i) { + ops_.push_back(MacroOp(arr[i])); + latency_ += ops_.back().getLatency(); + } + static_assert(N > 1, "Invalid array size"); + } + }; + + const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1); + + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); + + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); + + const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP"); + + //these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions. + //RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate). + //Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction. + const int buffer0[] = { 4, 8, 4 }; + const int buffer1[] = { 7, 3, 3, 3 }; + const int buffer2[] = { 3, 7, 3, 3 }; + const int buffer3[] = { 4, 9, 3 }; + const int buffer4[] = { 4, 4, 4, 4 }; + const int buffer5[] = { 3, 3, 10 }; + + class DecoderBuffer { + public: + static const DecoderBuffer Default; + template + DecoderBuffer(const char* name, int index, const int(&arr)[N]) + : name_(name), index_(index), counts_(arr), opsCount_(N) {} + const int* getCounts() const { + return counts_; + } + int getSize() const { + return opsCount_; + } + int getIndex() const { + return index_; + } + const char* getName() const { + return name_; + } + const DecoderBuffer* fetchNext(SuperscalarInstructionType instrType, int cycle, int mulCount, Blake2Generator& gen) const { + //If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10 + //because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs. + //Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops. + if (instrType == SuperscalarInstructionType::IMULH_R || instrType == SuperscalarInstructionType::ISMULH_R) + return &decodeBuffer3310; + + //To make sure that the multiplication port is saturated, a 4-4-4-4 configuration is generated if the number of multiplications + //is lower than the number of cycles. + if (mulCount < cycle + 1) + return &decodeBuffer4444; + + //If the current RandomX instruction is "IMUL_RCP", the next buffer must begin with a 4-byte slot for multiplication. + if(instrType == SuperscalarInstructionType::IMUL_RCP) + return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493; + + //Default: select a random fetch configuration. + return fetchNextDefault(gen); + } + private: + const char* name_; + int index_; + const int* counts_; + int opsCount_; + DecoderBuffer() : index_(-1) {} + static const DecoderBuffer decodeBuffer484; + static const DecoderBuffer decodeBuffer7333; + static const DecoderBuffer decodeBuffer3733; + static const DecoderBuffer decodeBuffer493; + static const DecoderBuffer decodeBuffer4444; + static const DecoderBuffer decodeBuffer3310; + static const DecoderBuffer* decodeBuffers[4]; + const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const { + return decodeBuffers[gen.getByte() & 3]; + } + }; + + const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0); + const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1); + const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2); + const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3); + const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4); + const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5); + + const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = { + &DecoderBuffer::decodeBuffer484, + &DecoderBuffer::decodeBuffer7333, + &DecoderBuffer::decodeBuffer3733, + &DecoderBuffer::decodeBuffer493, + }; + + const DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); + + const SuperscalarInstructionInfo* slot_3[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R }; + const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R }; + const SuperscalarInstructionInfo* slot_4[] = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS }; + const SuperscalarInstructionInfo* slot_7[] = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 }; + const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 }; + const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 }; + const SuperscalarInstructionInfo* slot_10 = &SuperscalarInstructionInfo::IMUL_RCP; + + static bool selectRegister(std::vector& availableRegisters, Blake2Generator& gen, int& reg) { + int index; + if (availableRegisters.size() == 0) + return false; + + if (availableRegisters.size() > 1) { + index = gen.getInt32() % availableRegisters.size(); + } + else { + index = 0; + } + reg = availableRegisters[index]; + return true; + } + + class RegisterInfo { + public: + RegisterInfo() : latency(0), lastOpGroup(SuperscalarInstructionType::INVALID), lastOpPar(-1), value(0) {} + int latency; + SuperscalarInstructionType lastOpGroup; + int lastOpPar; + int value; + }; + + //"SuperscalarInstruction" consists of one or more macro-ops + class SuperscalarInstruction { + public: + void toInstr(Instruction& instr) { //translate to a RandomX instruction format + instr.opcode = (int)getType(); + instr.dst = dst_; + instr.src = src_ >= 0 ? src_ : dst_; + instr.setMod(mod_); + instr.setImm32(imm32_); + } + + void createForSlot(Blake2Generator& gen, int slotSize, int fetchType, bool isLast, bool isFirst) { + switch (slotSize) + { + case 3: + //if this is the last slot, we can also select "IMULH" instructions + if (isLast) { + create(slot_3L[gen.getByte() & 3], gen); + } + else { + create(slot_3[gen.getByte() & 1], gen); + } + break; + case 4: + //if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions + if (fetchType == 4 && !isLast) { + create(&SuperscalarInstructionInfo::IMUL_R, gen); + } + else { + create(slot_4[gen.getByte() & 1], gen); + } + break; + case 7: + create(slot_7[gen.getByte() & 1], gen); + break; + case 8: + create(slot_8[gen.getByte() & 1], gen); + break; + case 9: + create(slot_9[gen.getByte() & 1], gen); + break; + case 10: + create(slot_10, gen); + break; + default: + UNREACHABLE; + } + } + + void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) { + info_ = info; + reset(); + switch (info->getType()) + { + case SuperscalarInstructionType::ISUB_R: { + mod_ = 0; + imm32_ = 0; + opGroup_ = SuperscalarInstructionType::IADD_RS; + groupParIsSource_ = true; + } break; + + case SuperscalarInstructionType::IXOR_R: { + mod_ = 0; + imm32_ = 0; + opGroup_ = SuperscalarInstructionType::IXOR_R; + groupParIsSource_ = true; + } break; + + case SuperscalarInstructionType::IADD_RS: { + mod_ = gen.getByte(); + imm32_ = 0; + opGroup_ = SuperscalarInstructionType::IADD_RS; + groupParIsSource_ = true; + } break; + + case SuperscalarInstructionType::IMUL_R: { + mod_ = 0; + imm32_ = 0; + opGroup_ = SuperscalarInstructionType::IMUL_R; + groupParIsSource_ = true; + } break; + + case SuperscalarInstructionType::IROR_C: { + mod_ = 0; + do { + imm32_ = gen.getByte() & 63; + } while (imm32_ == 0); + opGroup_ = SuperscalarInstructionType::IROR_C; + opGroupPar_ = -1; + } break; + + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: { + mod_ = 0; + imm32_ = gen.getInt32(); + opGroup_ = SuperscalarInstructionType::IADD_C7; + opGroupPar_ = -1; + } break; + + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: { + mod_ = 0; + imm32_ = gen.getInt32(); + opGroup_ = SuperscalarInstructionType::IXOR_C7; + opGroupPar_ = -1; + } break; + + case SuperscalarInstructionType::IMULH_R: { + canReuse_ = true; + mod_ = 0; + imm32_ = 0; + opGroup_ = SuperscalarInstructionType::IMULH_R; + opGroupPar_ = gen.getInt32(); + } break; + + case SuperscalarInstructionType::ISMULH_R: { + canReuse_ = true; + mod_ = 0; + imm32_ = 0; + opGroup_ = SuperscalarInstructionType::ISMULH_R; + opGroupPar_ = gen.getInt32(); + } break; + + case SuperscalarInstructionType::IMUL_RCP: { + mod_ = 0; + do { + imm32_ = gen.getInt32(); + } while ((imm32_ & (imm32_ - 1)) == 0); + opGroup_ = SuperscalarInstructionType::IMUL_RCP; + opGroupPar_ = -1; + } break; + + default: + break; + } + } + + bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (®isters)[8], Blake2Generator& gen) { + /*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R) + std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/ + std::vector availableRegisters; + //Conditions for the destination register: + // * value must be ready at the required cycle + // * cannot be the same as the source register unless the instruction allows it + // - this avoids optimizable instructions such as "xor r, r" or "sub r, r" + // * register cannot be multiplied twice in a row unless allowChainedMul is true + // - this avoids accumulation of trailing zeroes in registers due to excessive multiplication + // - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator) + // * either the last instruction applied to the register or its source must be different than this instruction + // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2" + // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) + for (unsigned i = 0; i < 8; ++i) { + if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement)) + availableRegisters.push_back(i); + } + return selectRegister(availableRegisters, gen, dst_); + } + + bool selectSource(int cycle, RegisterInfo(®isters)[8], Blake2Generator& gen) { + std::vector availableRegisters; + //all registers that are ready at the cycle + for (unsigned i = 0; i < 8; ++i) { + if (registers[i].latency <= cycle) + availableRegisters.push_back(i); + } + //if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination + if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) { + if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) { + opGroupPar_ = src_ = RegisterNeedsDisplacement; + return true; + } + } + if (selectRegister(availableRegisters, gen, src_)) { + if (groupParIsSource_) + opGroupPar_ = src_; + return true; + } + return false; + } + + SuperscalarInstructionType getType() { + return info_->getType(); + } + int getSource() { + return src_; + } + int getDestination() { + return dst_; + } + SuperscalarInstructionType getGroup() { + return opGroup_; + } + int getGroupPar() { + return opGroupPar_; + } + + const SuperscalarInstructionInfo& getInfo() const { + return *info_; + } + + static const SuperscalarInstruction Null; + + private: + const SuperscalarInstructionInfo* info_; + int src_ = -1; + int dst_ = -1; + int mod_; + uint32_t imm32_; + SuperscalarInstructionType opGroup_; + int opGroupPar_; + bool canReuse_ = false; + bool groupParIsSource_ = false; + + void reset() { + src_ = dst_ = -1; + canReuse_ = groupParIsSource_ = false; + } + + SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) { + } + }; + + const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP); + + constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4; + constexpr int LOOK_FORWARD_CYCLES = 4; + constexpr int MAX_THROWAWAY_COUNT = 256; + + template + static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) { + //The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload + //port P1 (multiplication) by instructions that can go to any port. + for (; cycle < CYCLE_MAP_SIZE; ++cycle) { + if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) { + if (commit) { + if (trace) std::cout << "; P5 at cycle " << cycle << std::endl; + portBusy[cycle][2] = uop; + } + return cycle; + } + if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) { + if (commit) { + if (trace) std::cout << "; P0 at cycle " << cycle << std::endl; + portBusy[cycle][0] = uop; + } + return cycle; + } + if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) { + if (commit) { + if (trace) std::cout << "; P1 at cycle " << cycle << std::endl; + portBusy[cycle][1] = uop; + } + return cycle; + } + } + return -1; + } + + template + static int scheduleMop(const MacroOp& mop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle, int depCycle) { + //if this macro-op depends on the previous one, increase the starting cycle if needed + //this handles an explicit dependency chain in IMUL_RCP + if (mop.isDependent()) { + cycle = std::max(cycle, depCycle); + } + //move instructions are eliminated and don't need an execution unit + if (mop.isEliminated()) { + if (commit) + if (trace) std::cout << "; (eliminated)" << std::endl; + return cycle; + } + else if (mop.isSimple()) { + //this macro-op has only one uOP + return scheduleUop(mop.getUop1(), portBusy, cycle); + } + else { + //macro-ops with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle + for (; cycle < CYCLE_MAP_SIZE; ++cycle) { + + int cycle1 = scheduleUop(mop.getUop1(), portBusy, cycle); + int cycle2 = scheduleUop(mop.getUop2(), portBusy, cycle); + + if (cycle1 == cycle2) { + if (commit) { + scheduleUop(mop.getUop1(), portBusy, cycle1); + scheduleUop(mop.getUop2(), portBusy, cycle2); + } + return cycle1; + } + } + } + + return -1; + } + + void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) { + + ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3]; + memset(portBusy, 0, sizeof(portBusy)); + RegisterInfo registers[8]; + + const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default; + SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null; + int macroOpIndex = 0; + int codeSize = 0; + int macroOpCount = 0; + int cycle = 0; + int depCycle = 0; + int retireCycle = 0; + bool portsSaturated = false; + int programSize = 0; + int mulCount = 0; + int decodeCycle; + int throwAwayCount = 0; + + //decode instructions for RANDOMX_SUPERSCALAR_LATENCY cycles or until an execution port is saturated. + //Each decode cycle decodes 16 bytes of x86 code. + //Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always + //saturated first. The cycle limit is present only to guarantee loop termination. + //Program size is limited to SuperscalarMaxSize instructions. + for (decodeCycle = 0; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY && !portsSaturated && programSize < SuperscalarMaxSize; ++decodeCycle) { + + //select a decode configuration + decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen); + if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl; + + int bufferIndex = 0; + + //fill all instruction slots in the current decode buffer + while (bufferIndex < decodeBuffer->getSize()) { + int topCycle = cycle; + + //if we have issued all macro-ops for the current RandomX instruction, create a new instruction + if (macroOpIndex >= currentInstruction.getInfo().getSize()) { + if (portsSaturated || programSize >= SuperscalarMaxSize) + break; + //select an instruction so that the first macro-op fits into the current slot + currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0); + macroOpIndex = 0; + if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl; + } + const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex); + if (trace) std::cout << mop.getName() << " "; + + //calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution + int scheduleCycle = scheduleMop(mop, portBusy, cycle, depCycle); + if (scheduleCycle < 0) { + if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; + //__debugbreak(); + portsSaturated = true; + break; + } + + //find a source register (if applicable) that will be ready when this instruction executes + if (macroOpIndex == currentInstruction.getInfo().getSrcOp()) { + int forward; + //if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward + for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) { + if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl; + ++scheduleCycle; + ++cycle; + } + //if no register was found, throw the instruction away and try another one + if (forward == LOOK_FORWARD_CYCLES) { + if (throwAwayCount < MAX_THROWAWAY_COUNT) { + throwAwayCount++; + macroOpIndex = currentInstruction.getInfo().getSize(); + if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; + //cycle = topCycle; + continue; + } + //abort this decode buffer + if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; + currentInstruction = SuperscalarInstruction::Null; + break; + } + if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; + } + //find a destination register that will be ready when this instruction executes + if (macroOpIndex == currentInstruction.getInfo().getDstOp()) { + int forward; + for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) { + if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl; + ++scheduleCycle; + ++cycle; + } + if (forward == LOOK_FORWARD_CYCLES) { //throw instruction away + if (throwAwayCount < MAX_THROWAWAY_COUNT) { + throwAwayCount++; + macroOpIndex = currentInstruction.getInfo().getSize(); + if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; + //cycle = topCycle; + continue; + } + //abort this decode buffer + if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; + currentInstruction = SuperscalarInstruction::Null; + break; + } + if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; + } + throwAwayCount = 0; + + //recalculate when the instruction can be scheduled for execution based on operand availability + scheduleCycle = scheduleMop(mop, portBusy, scheduleCycle, scheduleCycle); + + //calculate when the result will be ready + depCycle = scheduleCycle + mop.getLatency(); + + //if this instruction writes the result, modify register information + // RegisterInfo.latency - which cycle the register will be ready + // RegisterInfo.lastOpGroup - the last operation that was applied to the register + // RegisterInfo.lastOpPar - the last operation source value (-1 = constant, 0-7 = register) + if (macroOpIndex == currentInstruction.getInfo().getResultOp()) { + int dst = currentInstruction.getDestination(); + RegisterInfo& ri = registers[dst]; + retireCycle = depCycle; + ri.latency = retireCycle; + ri.lastOpGroup = currentInstruction.getGroup(); + ri.lastOpPar = currentInstruction.getGroupPar(); + if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl; + } + codeSize += mop.getSize(); + bufferIndex++; + macroOpIndex++; + macroOpCount++; + + //terminating condition + if (scheduleCycle >= RANDOMX_SUPERSCALAR_LATENCY) { + portsSaturated = true; + } + cycle = topCycle; + + //when all macro-ops of the current instruction have been issued, add the instruction into the program + if (macroOpIndex >= currentInstruction.getInfo().getSize()) { + currentInstruction.toInstr(prog(programSize++)); + mulCount += isMultiplication(currentInstruction.getType()); + } + } + ++cycle; + } + + double ipc = (macroOpCount / (double)retireCycle); + + memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies)); + + //Calculate ASIC latency: + //Assumes 1 cycle latency for all operations and unlimited parallelization. + for (int i = 0; i < programSize; ++i) { + Instruction& instr = prog(i); + int latDst = prog.asicLatencies[instr.dst] + 1; + int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0; + prog.asicLatencies[instr.dst] = std::max(latDst, latSrc); + } + + //address register is the register with the highest ASIC latency + int asicLatencyMax = 0; + int addressReg = 0; + for (int i = 0; i < 8; ++i) { + if (prog.asicLatencies[i] > asicLatencyMax) { + asicLatencyMax = prog.asicLatencies[i]; + addressReg = i; + } + prog.cpuLatencies[i] = registers[i].latency; + } + + prog.setSize(programSize); + prog.setAddressRegister(addressReg); + + prog.cpuLatency = retireCycle; + prog.asicLatency = asicLatencyMax; + prog.codeSize = codeSize; + prog.macroOps = macroOpCount; + prog.decodeCycles = decodeCycle; + prog.ipc = ipc; + prog.mulCount = mulCount; + + + /*if(INFO) std::cout << "; ALU port utilization:" << std::endl; + if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; + + int portCycles = 0; + for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { + std::cout << "; " << std::setw(3) << i << " "; + for (int j = 0; j < 3; ++j) { + std::cout << (portBusy[i][j] ? '*' : '_'); + portCycles += !!portBusy[i][j]; + } + std::cout << std::endl; + }*/ + } + + void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals) { + for (unsigned j = 0; j < prog.getSize(); ++j) { + Instruction& instr = prog(j); + switch ((SuperscalarInstructionType)instr.opcode) + { + case SuperscalarInstructionType::ISUB_R: + r[instr.dst] -= r[instr.src]; + break; + case SuperscalarInstructionType::IXOR_R: + r[instr.dst] ^= r[instr.src]; + break; + case SuperscalarInstructionType::IADD_RS: + r[instr.dst] += r[instr.src] << instr.getModShift(); + break; + case SuperscalarInstructionType::IMUL_R: + r[instr.dst] *= r[instr.src]; + break; + case SuperscalarInstructionType::IROR_C: + r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); + break; + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: + r[instr.dst] += signExtend2sCompl(instr.getImm32()); + break; + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: + r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); + break; + case SuperscalarInstructionType::IMULH_R: + r[instr.dst] = mulh(r[instr.dst], r[instr.src]); + break; + case SuperscalarInstructionType::ISMULH_R: + r[instr.dst] = smulh(r[instr.dst], r[instr.src]); + break; + case SuperscalarInstructionType::IMUL_RCP: + if (reciprocals != nullptr) + r[instr.dst] *= (*reciprocals)[instr.getImm32()]; + else + r[instr.dst] *= randomx_reciprocal(instr.getImm32()); + break; + default: + UNREACHABLE; + } + } + } +} diff --git a/RandomX/src/superscalar.hpp b/RandomX/src/superscalar.hpp new file mode 100644 index 00000000..2e555331 --- /dev/null +++ b/RandomX/src/superscalar.hpp @@ -0,0 +1,59 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include "superscalar_program.hpp" +#include "blake2_generator.hpp" + +namespace randomx { + // Intel Ivy Bridge reference + enum class SuperscalarInstructionType { //uOPs (decode) execution ports latency code size + ISUB_R = 0, //1 p015 1 3 (sub) + IXOR_R = 1, //1 p015 1 3 (xor) + IADD_RS = 2, //1 p01 1 4 (lea) + IMUL_R = 3, //1 p1 3 4 (imul) + IROR_C = 4, //1 p05 1 4 (ror) + IADD_C7 = 5, //1 p015 1 7 (add) + IXOR_C7 = 6, //1 p015 1 7 (xor) + IADD_C8 = 7, //1+0 p015 1 7+1 (add+nop) + IXOR_C8 = 8, //1+0 p015 1 7+1 (xor+nop) + IADD_C9 = 9, //1+0 p015 1 7+2 (add+nop) + IXOR_C9 = 10, //1+0 p015 1 7+2 (xor+nop) + IMULH_R = 11, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) + ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) + IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul) + + INVALID = -1 + }; + + void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); + void executeSuperscalar(uint64_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals = nullptr); +} \ No newline at end of file diff --git a/RandomX/src/superscalar_program.hpp b/RandomX/src/superscalar_program.hpp new file mode 100644 index 00000000..145006a6 --- /dev/null +++ b/RandomX/src/superscalar_program.hpp @@ -0,0 +1,80 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include "instruction.hpp" +#include "configuration.h" + +namespace randomx { + + class SuperscalarProgram { + public: + Instruction& operator()(int pc) { + return programBuffer[pc]; + } + friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) { + p.print(os); + return os; + } + uint32_t getSize() { + return size; + } + void setSize(uint32_t val) { + size = val; + } + int getAddressRegister() { + return addrReg; + } + void setAddressRegister(uint32_t val) { + addrReg = val; + } + + Instruction programBuffer[SuperscalarMaxSize]; + uint32_t size; + int addrReg; + double ipc; + int codeSize; + int macroOps; + int decodeCycles; + int cpuLatency; + int asicLatency; + int mulCount; + int cpuLatencies[8]; + int asicLatencies[8]; + private: + void print(std::ostream& os) const { + for (unsigned i = 0; i < size; ++i) { + auto instr = programBuffer[i]; + os << instr; + } + } + }; + +} \ No newline at end of file diff --git a/RandomX/src/tests/api-example1.c b/RandomX/src/tests/api-example1.c new file mode 100644 index 00000000..f45d54bd --- /dev/null +++ b/RandomX/src/tests/api-example1.c @@ -0,0 +1,24 @@ +#include "../randomx.h" +#include + +int main() { + const char myKey[] = "RandomX example key"; + const char myInput[] = "RandomX example input"; + char hash[RANDOMX_HASH_SIZE]; + + randomx_cache *myCache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); + randomx_init_cache(myCache, &myKey, sizeof myKey); + randomx_vm *myMachine = randomx_create_vm(RANDOMX_FLAG_DEFAULT, myCache, NULL); + + randomx_calculate_hash(myMachine, &myInput, sizeof myInput, hash); + + randomx_destroy_vm(myMachine); + randomx_release_cache(myCache); + + for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i) + printf("%02x", hash[i] & 0xff); + + printf("\n"); + + return 0; +} diff --git a/RandomX/src/tests/api-example2.cpp b/RandomX/src/tests/api-example2.cpp new file mode 100644 index 00000000..5b3757ea --- /dev/null +++ b/RandomX/src/tests/api-example2.cpp @@ -0,0 +1,48 @@ +#include "../randomx.h" +#include +#include +#include + +int main() { + const char myKey[] = "RandomX example key"; + const char myInput[] = "RandomX example input"; + char hash[RANDOMX_HASH_SIZE]; + + randomx_cache *myCache = randomx_alloc_cache((randomx_flags)(RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)); + if (myCache == nullptr) { + std::cout << "Cache allocation failed" << std::endl; + return 1; + } + randomx_init_cache(myCache, myKey, sizeof myKey); + + randomx_dataset *myDataset = randomx_alloc_dataset(RANDOMX_FLAG_LARGE_PAGES); + if (myDataset == nullptr) { + std::cout << "Dataset allocation failed" << std::endl; + return 1; + } + + auto datasetItemCount = randomx_dataset_item_count(); + std::thread t1(&randomx_init_dataset, myDataset, myCache, 0, datasetItemCount / 2); + std::thread t2(&randomx_init_dataset, myDataset, myCache, datasetItemCount / 2, datasetItemCount - datasetItemCount / 2); + t1.join(); + t2.join(); + randomx_release_cache(myCache); + + randomx_vm *myMachine = randomx_create_vm((randomx_flags)(RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES), nullptr, myDataset); + if (myMachine == nullptr) { + std::cout << "Failed to create a virtual machine" << std::endl; + return 1; + } + + randomx_calculate_hash(myMachine, &myInput, sizeof myInput, hash); + + randomx_destroy_vm(myMachine); + randomx_release_dataset(myDataset); + + for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i) + std::cout << std::hex << ((int)hash[i] & 0xff); + + std::cout << std::endl; + + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/benchmark.cpp b/RandomX/src/tests/benchmark.cpp new file mode 100644 index 00000000..73437132 --- /dev/null +++ b/RandomX/src/tests/benchmark.cpp @@ -0,0 +1,257 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "stopwatch.hpp" +#include "utility.hpp" +#include "../randomx.h" +#include "../blake2/endian.h" + +const uint8_t blockTemplate_[] = { + 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, + 0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, + 0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, + 0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 +}; + +class AtomicHash { +public: + AtomicHash() { + for (int i = 0; i < 4; ++i) + hash[i].store(0); + } + void xorWith(uint64_t update[4]) { + for (int i = 0; i < 4; ++i) + hash[i].fetch_xor(update[i]); + } + void print(std::ostream& os) { + for (int i = 0; i < 4; ++i) + print(hash[i], os); + os << std::endl; + } +private: + static void print(std::atomic& hash, std::ostream& os) { + auto h = hash.load(); + outputHex(std::cout, (char*)&h, sizeof(h)); + } + std::atomic hash[4]; +}; + +void printUsage(const char* executable) { + std::cout << "Usage: " << executable << " [OPTIONS]" << std::endl; + std::cout << "Supported options:" << std::endl; + std::cout << " --help shows this message" << std::endl; + std::cout << " --mine mining mode: 2080 MiB" << std::endl; + std::cout << " --verify verification mode: 256 MiB" << std::endl; + std::cout << " --jit x86-64 JIT compiled mode (default: interpreter)" << std::endl; + std::cout << " --largePages use large pages" << std::endl; + std::cout << " --softAes use software AES (default: x86 AES-NI)" << std::endl; + std::cout << " --threads T use T threads (default: 1)" << std::endl; + std::cout << " --init Q initialize dataset with Q threads (default: 1)" << std::endl; + std::cout << " --nonces N run N nonces (default: 1000)" << std::endl; + std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl; +} + +void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) { + uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)]; + uint8_t blockTemplate[sizeof(blockTemplate_)]; + memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); + void* noncePtr = blockTemplate + 39; + auto nonce = atomicNonce.fetch_add(1); + + while (nonce < noncesCount) { + store32(noncePtr, nonce); + randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash); + result.xorWith(hash); + nonce = atomicNonce.fetch_add(1); + } +} + +int main(int argc, char** argv) { + bool softAes, miningMode, verificationMode, help, largePages, jit; + int noncesCount, threadCount, initThreadCount; + int32_t seedValue; + char seed[4]; + + readOption("--softAes", argc, argv, softAes); + readOption("--mine", argc, argv, miningMode); + readOption("--verify", argc, argv, verificationMode); + readIntOption("--threads", argc, argv, threadCount, 1); + readIntOption("--nonces", argc, argv, noncesCount, 1000); + readIntOption("--init", argc, argv, initThreadCount, 1); + readIntOption("--seed", argc, argv, seedValue, 0); + readOption("--largePages", argc, argv, largePages); + readOption("--jit", argc, argv, jit); + readOption("--help", argc, argv, help); + + store32(&seed, seedValue); + + std::cout << "RandomX benchmark" << std::endl; + + if (help || (!miningMode && !verificationMode)) { + printUsage(argv[0]); + return 0; + } + + std::atomic atomicNonce(0); + AtomicHash result; + std::vector vms; + std::vector threads; + randomx_dataset* dataset; + randomx_cache* cache; + randomx_flags flags = RANDOMX_FLAG_DEFAULT; + + if (miningMode) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM); + std::cout << " - full memory mode (2080 MiB)" << std::endl; + } + else { + std::cout << " - light memory mode (256 MiB)" << std::endl; + } + + if (jit) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT); + std::cout << " - JIT compiled mode" << std::endl; + } + else { + std::cout << " - interpreted mode" << std::endl; + } + + if (softAes) { + std::cout << " - software AES mode" << std::endl; + } + else { + flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES); + std::cout << " - hardware AES mode" << std::endl; + } + + if (largePages) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES); + std::cout << " - large pages mode" << std::endl; + } + else { + std::cout << " - small pages mode" << std::endl; + } + + std::cout << "Initializing"; + if (miningMode) + std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")"); + std::cout << " ..." << std::endl; + + try { + Stopwatch sw(true); + cache = randomx_alloc_cache(flags); + if (cache == nullptr) { + if (jit) { + throw std::runtime_error("JIT compilation is not supported or cache allocation failed"); + } + throw std::runtime_error("Cache allocation failed"); + } + randomx_init_cache(cache, &seed, sizeof(seed)); + if (miningMode) { + dataset = randomx_alloc_dataset(flags); + if (dataset == nullptr) { + throw std::runtime_error("Dataset allocation failed"); + } + uint32_t datasetItemCount = randomx_dataset_item_count(); + if (initThreadCount > 1) { + auto perThread = datasetItemCount / initThreadCount; + auto remainder = datasetItemCount % initThreadCount; + uint32_t startItem = 0; + for (int i = 0; i < initThreadCount; ++i) { + auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); + threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count)); + startItem += count; + } + for (unsigned i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + } + else { + randomx_init_dataset(dataset, cache, 0, datasetItemCount); + } + randomx_release_cache(cache); + threads.clear(); + } + std::cout << "Memory initialized in " << sw.getElapsed() << " s" << std::endl; + std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl; + for (int i = 0; i < threadCount; ++i) { + randomx_vm *vm = randomx_create_vm(flags, cache, dataset); + if (vm == nullptr) { + throw std::runtime_error("Unsupported virtual machine options"); + } + vms.push_back(vm); + } + std::cout << "Running benchmark (" << noncesCount << " nonces) ..." << std::endl; + sw.restart(); + if (threadCount > 1) { + for (unsigned i = 0; i < vms.size(); ++i) { + if (softAes) + threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i)); + else + threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i)); + } + for (unsigned i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + } + else { + mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0); + } + + double elapsed = sw.getElapsed(); + for (unsigned i = 0; i < vms.size(); ++i) + randomx_destroy_vm(vms[i]); + if (miningMode) + randomx_release_dataset(dataset); + else + randomx_release_cache(cache); + std::cout << "Calculated result: "; + result.print(std::cout); + if (noncesCount == 1000 && seedValue == 0) + std::cout << "Reference result: 669ae4f2e5e2c0d9cc232ff2c37d41ae113fa302bbf983d9f3342879831b4edf" << std::endl; + if (!miningMode) { + std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; + } + else { + std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl; + } + } + catch (std::exception& e) { + std::cout << "ERROR: " << e.what() << std::endl; + return 1; + } + return 0; +} diff --git a/RandomX/src/tests/code-generator.cpp b/RandomX/src/tests/code-generator.cpp new file mode 100644 index 00000000..302dc468 --- /dev/null +++ b/RandomX/src/tests/code-generator.cpp @@ -0,0 +1,124 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "utility.hpp" +#include "../common.hpp" +#include "../assembly_generator_x86.hpp" +#include "../superscalar.hpp" +#include "../aes_hash.hpp" +#include "../blake2/blake2.h" +#include "../program.hpp" + +const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; + +const uint8_t blockTemplate_[] = { + 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, + 0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, + 0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, + 0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 +}; + +template +void generateAsm(uint32_t nonce) { + alignas(16) uint64_t hash[8]; + uint8_t blockTemplate[sizeof(blockTemplate_)]; + memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); + store32(blockTemplate + 39, nonce); + blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); + uint8_t scratchpad[randomx::ScratchpadSize]; + fillAes1Rx4((void*)hash, randomx::ScratchpadSize, scratchpad); + randomx::AssemblyGeneratorX86 asmX86; + randomx::Program p; + fillAes1Rx4(hash, sizeof(p), &p); + asmX86.generateProgram(p); + asmX86.printCode(std::cout); +} + +template +void generateNative(uint32_t nonce) { + alignas(16) uint64_t hash[8]; + uint8_t blockTemplate[sizeof(blockTemplate_)]; + memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); + store32(blockTemplate + 39, nonce); + blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); + uint8_t scratchpad[randomx::ScratchpadSize]; + fillAes1Rx4((void*)hash, randomx::ScratchpadSize, scratchpad); + alignas(16) randomx::Program prog; + fillAes1Rx4((void*)hash, sizeof(prog), &prog); + std::cout << prog << std::endl; +} + +void printUsage(const char* executable) { + std::cout << "Usage: " << executable << " [OPTIONS]" << std::endl; + std::cout << "Supported options:" << std::endl; + std::cout << " --softAes use software AES (default: x86 AES-NI)" << std::endl; + std::cout << " --nonce N seed nonce (default: 1000)" << std::endl; + std::cout << " --genAsm generate x86-64 asm code for nonce N" << std::endl; + std::cout << " --genNative generate RandomX code for nonce N" << std::endl; + std::cout << " --genSuperscalar generate superscalar program for nonce N" << std::endl; +} + +int main(int argc, char** argv) { + bool softAes, genAsm, genNative, genSuperscalar; + int nonce; + + readOption("--softAes", argc, argv, softAes); + readOption("--genAsm", argc, argv, genAsm); + readIntOption("--nonce", argc, argv, nonce, 1000); + readOption("--genNative", argc, argv, genNative); + readOption("--genSuperscalar", argc, argv, genSuperscalar); + + if (genSuperscalar) { + randomx::SuperscalarProgram p; + randomx::Blake2Generator gen(seed, nonce); + randomx::generateSuperscalar(p, gen); + randomx::AssemblyGeneratorX86 asmX86; + asmX86.generateAsm(p); + asmX86.printCode(std::cout); + return 0; + } + + if (genAsm) { + if (softAes) + generateAsm(nonce); + else + generateAsm(nonce); + return 0; + } + + if (genNative) { + if (softAes) + generateNative(nonce); + else + generateNative(nonce); + return 0; + } + + printUsage(argv[0]); + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/jit-performance.cpp b/RandomX/src/tests/jit-performance.cpp new file mode 100644 index 00000000..57e8c7d3 --- /dev/null +++ b/RandomX/src/tests/jit-performance.cpp @@ -0,0 +1,44 @@ +#include "../aes_hash.hpp" +#include "../jit_compiler_x86.hpp" +#include "../program.hpp" +#include "utility.hpp" +#include "stopwatch.hpp" +#include "../blake2/blake2.h" +#include "../reciprocal.h" + +int main(int argc, char** argv) { + int count; + readInt(argc, argv, count, 1000000); + + const char seed[] = "JIT performance test seed"; + uint8_t hash[64]; + + blake2b(&hash, sizeof hash, &seed, sizeof seed, nullptr, 0); + + randomx::ProgramConfiguration config; + + randomx::Program program; + randomx::JitCompilerX86 jit; + + std::cout << "Compiling " << count << " programs..." << std::endl; + + Stopwatch sw(true); + + for (int i = 0; i < count; ++i) { + fillAes1Rx4(hash, sizeof(program), &program); + auto addressRegisters = program.getEntropy(12); + config.readReg0 = 0 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg1 = 2 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg2 = 4 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg3 = 6 + (addressRegisters & 1); + jit.generateProgram(program, config); + } + + std::cout << "Elapsed: " << sw.getElapsed() << " s" << std::endl; + + dump((const char*)jit.getProgramFunc(), randomx::CodeSize, "program.bin"); + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/perf-simulation.cpp b/RandomX/src/tests/perf-simulation.cpp new file mode 100644 index 00000000..dfac0ff2 --- /dev/null +++ b/RandomX/src/tests/perf-simulation.cpp @@ -0,0 +1,639 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "utility.hpp" +#include "../common.hpp" +#include "../aes_hash.hpp" +#include "../program.hpp" +#include "../blake2/blake2.h" +#include +#include + +int analyze(randomx::Program& p); +int executeInOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline); +int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline); + +constexpr uint32_t DST_NOP = 0; +constexpr uint32_t DST_INT = 1; +constexpr uint32_t DST_FLT = 2; +constexpr uint32_t DST_MEM = 3; +constexpr uint32_t MASK_DST = 3; + +constexpr uint32_t SRC_NOP = 0; +constexpr uint32_t SRC_INT = 4; +constexpr uint32_t SRC_FLT = 8; +constexpr uint32_t SRC_MEM = 12; +constexpr uint32_t MASK_SRC = 12; + +constexpr uint32_t OP_CFROUND = 16; +constexpr uint32_t OP_SWAP = 32; +constexpr uint32_t OP_BRANCH = 48; +constexpr uint32_t MASK_EXT = 48; + +constexpr uint32_t OP_FLOAT = 64; +constexpr uint32_t BRANCH_TARGET = 128; + +//template +void generate(randomx::Program& p, uint32_t nonce) { + alignas(16) uint64_t hash[8]; + blake2b(hash, sizeof(hash), &nonce, sizeof(nonce), nullptr, 0); + fillAes1Rx4((void*)hash, sizeof(p), &p); +} + +bool has(randomx::Instruction& instr, uint32_t mask, uint32_t prop) { + return (instr.opcode & mask) == prop; +} + +bool has(randomx::Instruction& instr, uint32_t prop) { + return (instr.opcode & prop) != 0; +} + +int main(int argc, char** argv) { + int nonces, seed, executionPorts, memoryPorts, pipeline; + bool print, reorder, speculate; + readOption("--print", argc, argv, print); + readOption("--reorder", argc, argv, reorder); + readOption("--speculate", argc, argv, speculate); + readIntOption("--nonces", argc, argv, nonces, 1); + readIntOption("--seed", argc, argv, seed, 0); + readIntOption("--executionPorts", argc, argv, executionPorts, 4); + readIntOption("--memoryPorts", argc, argv, memoryPorts, 2); + readIntOption("--pipeline", argc, argv, pipeline, 3); + randomx::Program p, original; + double totalCycles = 0.0; + double jumpCount = 0; + for (int i = 0; i < nonces; ++i) { + generate(original, i ^ seed); + memcpy(&p, &original, sizeof(p)); + jumpCount += analyze(p); + totalCycles += + reorder + ? + executeOutOfOrder(p, original, print, executionPorts, memoryPorts, speculate, pipeline) + : + executeInOrder(p, original, print, executionPorts, memoryPorts, speculate, pipeline); + } + totalCycles /= nonces; + jumpCount /= nonces; + std::cout << "Execution took " << totalCycles << " cycles per program" << std::endl; + //std::cout << "Jump count: " << jumpCount << std::endl; + return 0; +} + +int executeInOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) { + int cycle = pipeline - 1; + int index = 0; + int branchCount = 0; + int int_reg_ready[randomx::RegistersCount] = { 0 }; + int flt_reg_ready[randomx::RegistersCount] = { 0 }; + //each workgroup takes 1 or 2 cycles (2 cycles if any instruction has a memory operand) + while (index < RANDOMX_PROGRAM_SIZE) { + int memoryAccesses = 0; + bool hasRound = false; + int workers = 0; + //std::cout << "-----------" << std::endl; + for (; workers < executionPorts && memoryAccesses < memoryPorts && index < RANDOMX_PROGRAM_SIZE; ++workers) { + auto& instr = p(index); + auto& origi = original(index); + origi.dst %= randomx::RegistersCount; + origi.src %= randomx::RegistersCount; + + //check dependencies + if (has(instr, MASK_SRC, SRC_INT) && int_reg_ready[instr.src] > cycle) + break; + + if (has(instr, MASK_SRC, SRC_MEM) && int_reg_ready[instr.src] > cycle - 1) + break; + + if (has(instr, MASK_DST, DST_MEM) && int_reg_ready[instr.dst] > cycle - 1) + break; + + if (has(instr, MASK_DST, DST_FLT) && flt_reg_ready[instr.dst] > cycle) + break; + + if (has(instr, MASK_DST, DST_INT) && int_reg_ready[instr.dst] > cycle) + break; + + if (hasRound && has(instr, OP_FLOAT)) + break; + + //execute + index++; + + if (has(instr, MASK_EXT, OP_BRANCH)) { + branchCount++; + } + + if (has(instr, MASK_DST, DST_FLT)) + flt_reg_ready[instr.dst] = cycle + 1; + + if (has(instr, MASK_DST, DST_INT)) + int_reg_ready[instr.dst] = cycle + 1; + + if (has(instr, MASK_EXT, OP_SWAP)) { + int_reg_ready[instr.src] = cycle + 1; + } + + if (has(instr, MASK_EXT, OP_CFROUND)) + hasRound = true; + + if (has(instr, MASK_SRC, SRC_MEM) || has(instr, MASK_DST, DST_MEM)) { + memoryAccesses++; + } + + if (print) + std::cout << std::setw(2) << (cycle + 1) << ": " << origi; + + //non-speculative execution must stall after branch + if (!speculate && has(instr, MASK_EXT, OP_BRANCH)) { + cycle += pipeline - 1; + break; + } + } + //std::cout << " workers: " << workers << std::endl; + cycle++; + } + if (speculate) { + //account for mispredicted branches + int i = 0; + while (branchCount--) { + auto entropy = p.getEntropy(i / 8); + entropy >> (i % 8) * 8; + if ((entropy & 0xff) == 0) // 1/256 chance to flush the pipeline + cycle += pipeline - 1; + } + } + return cycle; +} + +int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) { + int index = 0; + int busyExecutionPorts[2 * RANDOMX_PROGRAM_SIZE] = { 0 }; + int busyMemoryPorts[2 * RANDOMX_PROGRAM_SIZE] = { 0 }; + int int_reg_ready[randomx::RegistersCount] = { 0 }; + int flt_reg_ready[randomx::RegistersCount] = { 0 }; + int fprcReady = 0; + int lastBranch = 0; + int branchCount = 0; + for (; index < RANDOMX_PROGRAM_SIZE; ++index) { + auto& instr = p(index); + int retireCycle = pipeline - 1; + + //non-speculative execution cannot reorder across branches + if (!speculate && !has(instr, MASK_EXT, OP_BRANCH)) + retireCycle = std::max(lastBranch + pipeline - 1, retireCycle); + + //check dependencies + if (has(instr, MASK_SRC, SRC_INT)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.src]); + int_reg_ready[instr.src] = retireCycle; + } + + if (has(instr, MASK_SRC, SRC_MEM)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.src] + 1); + //find free memory port + while (busyMemoryPorts[retireCycle - 1] >= memoryPorts) { + retireCycle++; + } + busyMemoryPorts[retireCycle - 1]++; + } + + if (has(instr, MASK_DST, DST_FLT)) { + retireCycle = std::max(retireCycle, flt_reg_ready[instr.dst]); + } + + if (has(instr, MASK_DST, DST_INT)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.dst]); + } + + //floating point operations depend on the fprc register + if (has(instr, OP_FLOAT)) + retireCycle = std::max(retireCycle, fprcReady); + + //execute + if (has(instr, MASK_DST, DST_MEM)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.dst] + 1); + //find free memory port + while (busyMemoryPorts[retireCycle - 1] >= memoryPorts) { + retireCycle++; + } + busyMemoryPorts[retireCycle - 1]++; + retireCycle++; + } + + if (has(instr, MASK_DST, DST_FLT)) { + //find free execution port + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + flt_reg_ready[instr.dst] = retireCycle; + } + + if (has(instr, MASK_DST, DST_INT)) { + //find free execution port + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + int_reg_ready[instr.dst] = retireCycle; + } + + if (has(instr, MASK_EXT, OP_SWAP)) { + int_reg_ready[instr.src] = retireCycle; + } + + if (has(instr, MASK_EXT, OP_CFROUND)) { + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + fprcReady = retireCycle; + } + + if (has(instr, MASK_EXT, OP_BRANCH)) { + /*if (!speculate && instr.mod == 1) { //simulated predication + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + int_reg_ready[instr.dst] = retireCycle; + }*/ + //else { + lastBranch = std::max(lastBranch, retireCycle); + branchCount++; + //} + } + + //print + auto& origi = original(index); + origi.dst %= randomx::RegistersCount; + origi.src %= randomx::RegistersCount; + if (print) { + std::cout << std::setw(2) << retireCycle << ": " << origi; + if (has(instr, MASK_EXT, OP_BRANCH)) { + std::cout << " jump: " << (int)instr.mod << std::endl; + } + } + } + int cycle = 0; + for (int i = 0; i < randomx::RegistersCount; ++i) { + cycle = std::max(cycle, int_reg_ready[i]); + } + for (int i = 0; i < randomx::RegistersCount; ++i) { + cycle = std::max(cycle, flt_reg_ready[i]); + } + if (speculate) { + //account for mispredicted branches + int i = 0; + while (branchCount--) { + auto entropy = p.getEntropy(i / 8); + entropy >> (i % 8) * 8; + if ((entropy & 0xff) == 0) // 1/256 chance to flush the pipeline + cycle += pipeline - 1; + } + } + return cycle; +} + +#include "../instruction_weights.hpp" + +//old register selection +struct RegisterUsage { + int32_t lastUsed; + int32_t count; +}; + +inline int getConditionRegister(RegisterUsage(®isterUsage)[randomx::RegistersCount]) { + int min = INT_MAX; + int minCount = 0; + int minIndex; + //prefer registers that have been used as a condition register fewer times + for (unsigned i = 0; i < randomx::RegistersCount; ++i) { + if (registerUsage[i].lastUsed < min || (registerUsage[i].lastUsed == min && registerUsage[i].count < minCount)) { + min = registerUsage[i].lastUsed; + minCount = registerUsage[i].count; + minIndex = i; + } + } + return minIndex; +} + +int analyze(randomx::Program& p) { + int jumpCount = 0; + RegisterUsage registerUsage[randomx::RegistersCount]; + for (unsigned i = 0; i < randomx::RegistersCount; ++i) { + registerUsage[i].lastUsed = -1; + registerUsage[i].count = 0; + } + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { + auto& instr = p(i); + int opcode = instr.opcode; + instr.opcode = 0; + switch (opcode) { + CASE_REP(IADD_RS) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IADD_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISUB_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISUB_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMUL_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMUL_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMULH_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMULH_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISMULH_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISMULH_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMUL_RCP) { + uint64_t divisor = instr.getImm32(); + if (!randomx::isPowerOf2(divisor)) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } + } break; + + CASE_REP(INEG_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IXOR_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IXOR_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IROR_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IROL_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISWAP_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + if (instr.src != instr.dst) { + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + instr.opcode |= OP_SWAP; + registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.src].lastUsed = i; + } + } break; + + CASE_REP(FSWAP_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_FLT; + } break; + + CASE_REP(FADD_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(FADD_M) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } break; + + CASE_REP(FSUB_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(FSUB_M) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } break; + + CASE_REP(FSCAL_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + } break; + + CASE_REP(FMUL_R) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(FDIV_M) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } break; + + CASE_REP(FSQRT_R) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(CBRANCH) { + instr.opcode |= OP_BRANCH; + instr.opcode |= DST_INT; + //jump condition + //int reg = getConditionRegister(registerUsage); + int reg = instr.dst % randomx::RegistersCount; + int target = registerUsage[reg].lastUsed; + int offset = (i - target); + instr.mod = offset; + jumpCount += offset; + p(target + 1).opcode |= BRANCH_TARGET; + registerUsage[reg].count++; + instr.dst = reg; + //mark all registers as used + for (unsigned j = 0; j < randomx::RegistersCount; ++j) { + registerUsage[j].lastUsed = i; + } + } break; + + CASE_REP(CFROUND) { + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= OP_CFROUND; + } break; + + CASE_REP(ISTORE) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_MEM; + if (instr.getModCond() < randomx::StoreL3Condition) + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + else + instr.imm32 &= randomx::ScratchpadL3Mask; + } break; + + CASE_REP(NOP) { + + } break; + + default: + UNREACHABLE; + } + } + return jumpCount; +} diff --git a/RandomX/src/tests/runtime-distr.cpp b/RandomX/src/tests/runtime-distr.cpp new file mode 100644 index 00000000..b7663d07 --- /dev/null +++ b/RandomX/src/tests/runtime-distr.cpp @@ -0,0 +1,172 @@ + +#include +#include "utility.hpp" +#include "stopwatch.hpp" +#include "../dataset.hpp" +#include "../vm_compiled.hpp" +#include "../blake2/blake2.h" + +struct Outlier { + Outlier(int idx, double rtime) : index(idx), runtime(rtime) {} + int index; + double runtime; +}; + +int main(int argc, char** argv) { + constexpr int distributionSize = 100; + int distribution[distributionSize + 1] = { 0 }; + Stopwatch sw; + alignas(16) uint64_t hash[8]; + + uint64_t checksum = 0; + double totalRuntime = 0; + double maxRuntime = 0; + std::vector outliers; + outliers.reserve(25); + randomx_flags flags = RANDOMX_FLAG_DEFAULT; + + bool softAes, largePages, jit, verify; + int totalCount, initThreadCount; + double binSize, offset; + int32_t seed; + + readOption("--verify", argc, argv, verify); + readOption("--jit", argc, argv, jit); + readOption("--softAes", argc, argv, softAes); + readIntOption("--nonces", argc, argv, totalCount, 10000); + readIntOption("--init", argc, argv, initThreadCount, 1); + readFloatOption("--binSize", argc, argv, binSize, 1e-3); + readFloatOption("--offset", argc, argv, offset, 0); + readIntOption("--seed", argc, argv, seed, 0); + readOption("--largePages", argc, argv, largePages); + + if (!verify) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM); + std::cout << "Measure program runtime" << std::endl; + } + else { + std::cout << "Measure verification time" << std::endl; + } + + std::cout << " - histogram offset: " << offset << std::endl; + std::cout << " - histogram bin size: " << binSize << std::endl; + + if (jit) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT); + std::cout << " - JIT compiled mode" << std::endl; + } + else { + std::cout << " - interpreted mode" << std::endl; + } + + if (softAes) { + std::cout << " - software AES mode" << std::endl; + } + else { + flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES); + std::cout << " - hardware AES mode" << std::endl; + } + + if (largePages) { + flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES); + std::cout << " - large pages mode" << std::endl; + } + else { + std::cout << " - small pages mode" << std::endl; + } + + std::cout << "Initializing..." << std::endl; + + randomx_cache *cache = randomx_alloc_cache(flags); + randomx_dataset *dataset = nullptr; + if (cache == nullptr) { + std::cout << "Cache allocation failed" << std::endl; + return 1; + } + randomx_init_cache(cache, &seed, sizeof seed); + + if (!verify) { + blake2b(&hash, sizeof hash, &seed, sizeof seed, nullptr, 0); + + dataset = randomx_alloc_dataset(flags); + if (dataset == nullptr) { + std::cout << "Dataset allocation failed" << std::endl; + return 1; + } + + std::vector threads; + uint32_t datasetItemCount = randomx_dataset_item_count(); + if (initThreadCount > 1) { + auto perThread = datasetItemCount / initThreadCount; + auto remainder = datasetItemCount % initThreadCount; + uint32_t startItem = 0; + for (int i = 0; i < initThreadCount; ++i) { + auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); + threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count)); + startItem += count; + } + for (unsigned i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + } + else { + randomx_init_dataset(dataset, cache, 0, datasetItemCount); + } + randomx_release_cache(cache); + cache = nullptr; + } + + std::cout << "Running " << totalCount << " programs..." << std::endl; + + randomx_vm* vm = randomx_create_vm(flags, cache, dataset); + + if (!verify) { + vm->initScratchpad(&hash); + vm->resetRoundingMode(); + } + + for (int i = 0; i < totalCount; ++i) { + sw.restart(); + if (verify) + randomx_calculate_hash(vm, &i, sizeof i, &hash); + else + vm->run(&hash); + double elapsed = sw.getElapsed(); + //std::cout << "Elapsed: " << elapsed << std::endl; + totalRuntime += elapsed; + if (elapsed > maxRuntime) + maxRuntime = elapsed; + int bin = (elapsed - offset) / binSize; + bool outlier = false; + if (bin < 0) { + bin = 0; + outlier = true; + } + if (bin > distributionSize) { + bin = distributionSize; + outlier = true; + } + if (outlier && outliers.size() < outliers.capacity()) + outliers.push_back(Outlier(i, elapsed)); + distribution[bin]++; + if(!verify) + blake2b(hash, sizeof(hash), vm->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); + checksum ^= hash[0]; + } + + for (int i = 0; i < distributionSize + 1; ++i) { + std::cout << i << " " << distribution[i] << std::endl; + } + + std::cout << "Average runtime: " << totalRuntime / totalCount << std::endl; + std::cout << "Maximum runtime: " << maxRuntime << std::endl; + std::cout << "Checksum: " << checksum << std::endl; + + std::cout << "Outliers: " << std::endl; + + for (Outlier& ol : outliers) { + std::cout << " " << ol.index << ": " << ol.runtime << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/scratchpad-entropy.cpp b/RandomX/src/tests/scratchpad-entropy.cpp new file mode 100644 index 00000000..ecb3c7d6 --- /dev/null +++ b/RandomX/src/tests/scratchpad-entropy.cpp @@ -0,0 +1,50 @@ +#include +#include +#include "utility.hpp" +#include "../randomx.h" +#include "../virtual_machine.hpp" +#include "../blake2/endian.h" + +/* + Writes final scratchpads to disk as files with .spad extension, each file is 2048 KiB. + Command line parameters: + --count N number of files to generate (default = 1) + --seed S different seed will give different outputs (default = 0) + + Entropy can be estimated by compressing the files using 7zip in Ultra mode: + + 7z.exe a -t7z -m0=lzma2 -mx=9 scratchpads.7z *.spad +*/ + +int main(int argc, char** argv) { + int count, seedValue; + + readIntOption("--count", argc, argv, count, 1); + readIntOption("--seed", argc, argv, seedValue, 0); + + std::cout << "Generating " << count << " scratchpad(s) using seed " << seedValue << " ..." << std::endl; + + char seed[4]; + char input[4]; + char hash[RANDOMX_HASH_SIZE]; + + store32(&seed, seedValue); + + randomx_cache *cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); + randomx_init_cache(cache, &seed, sizeof seed); + randomx_vm *vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, NULL); + + for (int i = 0; i < count; ++i) { + store32(&input, i); + randomx_calculate_hash(vm, &input, sizeof input, hash); + std::string filename("test-"); + filename += std::to_string(i); + filename += ".spad"; + dump((const char*)vm->getScratchpad(), randomx::ScratchpadSize, filename.c_str()); + } + + randomx_destroy_vm(vm); + randomx_release_cache(cache); + + return 0; +} diff --git a/RandomX/src/tests/stopwatch.hpp b/RandomX/src/tests/stopwatch.hpp new file mode 100644 index 00000000..d1e4912c --- /dev/null +++ b/RandomX/src/tests/stopwatch.hpp @@ -0,0 +1,84 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include + +class Stopwatch { +public: + Stopwatch(bool startNow = false) { + reset(); + if (startNow) { + start(); + } + } + void reset() { + isRunning = false; + elapsed = 0; + } + void start() { + if (!isRunning) { + startMark = std::chrono::high_resolution_clock::now(); + isRunning = true; + } + } + void restart() { + startMark = std::chrono::high_resolution_clock::now(); + isRunning = true; + elapsed = 0; + } + void stop() { + if (isRunning) { + chrono_t endMark = std::chrono::high_resolution_clock::now(); + uint64_t ns = std::chrono::duration_cast(endMark - startMark).count(); + elapsed += ns; + isRunning = false; + } + } + double getElapsed() const { + return getElapsedNanosec() / 1e+9; + } +private: + using chrono_t = std::chrono::high_resolution_clock::time_point; + using sw_unit = std::chrono::nanoseconds; + chrono_t startMark; + uint64_t elapsed; + bool isRunning; + + uint64_t getElapsedNanosec() const { + uint64_t elns = elapsed; + if (isRunning) { + chrono_t endMark = std::chrono::high_resolution_clock::now(); + uint64_t ns = std::chrono::duration_cast(endMark - startMark).count(); + elns += ns; + } + return elns; + } +}; \ No newline at end of file diff --git a/RandomX/src/tests/superscalar-avalanche.cpp b/RandomX/src/tests/superscalar-avalanche.cpp new file mode 100644 index 00000000..d9f916cd --- /dev/null +++ b/RandomX/src/tests/superscalar-avalanche.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include "../superscalar.hpp" +#include "../intrin_portable.h" + +const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; + +int main() { + + int insensitiveProgCount[64] = { 0 }; + std::vector dummy; + for (int bit = 0; bit < 64; ++bit) { + for (int i = 0; i < 10000; ++i) { + uint64_t ra[8] = { + 6364136223846793005ULL, + 9298410992540426748ULL, + 12065312585734608966ULL, + 9306329213124610396ULL, + 5281919268842080866ULL, + 10536153434571861004ULL, + 3398623926847679864ULL, + 9549104520008361294ULL, + }; + uint64_t rb[8]; + memcpy(rb, ra, sizeof rb); + rb[0] ^= (1ULL << bit); + randomx::SuperscalarProgram p; + randomx::Blake2Generator gen(seed, sizeof seed, i); + randomx::generateSuperscalar(p, gen); + randomx::executeSuperscalar(ra, p, nullptr); + randomx::executeSuperscalar(rb, p, nullptr); + uint64_t diff = 0; + for (int j = 0; j < 8; ++j) { + diff += __popcnt64(ra[j] ^ rb[j]); + } + if (diff < 192 || diff > 320) { + std::cout << "Seed: " << i << " diff = " << diff << std::endl; + insensitiveProgCount[bit]++; + } + } + } + for (int bit = 0; bit < 64; ++bit) { + std::cout << bit << " " << insensitiveProgCount[bit] << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/superscalar-init.cpp b/RandomX/src/tests/superscalar-init.cpp new file mode 100644 index 00000000..15554bbd --- /dev/null +++ b/RandomX/src/tests/superscalar-init.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include "../superscalar.hpp" +#include "../common.hpp" + +int main() { + std::cout << "THIS PROGRAM REQUIRES MORE THAN 16 GB OF RAM TO COMPLETE" << std::endl; + std::vector dummy; + constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; + constexpr uint64_t superscalarAdd1 = 0x810A978A59F5A1FC; //9298410992540426748ULL; //9298410992540426048ULL + constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; + constexpr uint64_t superscalarAdd3 = 0x8126B91CBF22495C; //9306329213124610396ULL; + constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; + constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; + constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; + constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; + constexpr uint32_t totalItems = randomx::DatasetSize / randomx::CacheLineSize; + std::unordered_set registerValues; + registerValues.reserve(totalItems); + registerValues.rehash(totalItems); + int collisionCount[9] = { 0 }; + for (uint32_t itemNumber = 0; itemNumber < totalItems; ++itemNumber) { + uint64_t rl[8]; + rl[0] = (itemNumber + 1) * superscalarMul0; + rl[1] = rl[0] ^ superscalarAdd1; + rl[2] = rl[0] ^ superscalarAdd2; + rl[3] = rl[0] ^ superscalarAdd3; + rl[4] = rl[0] ^ superscalarAdd4; + rl[5] = rl[0] ^ superscalarAdd5; + rl[6] = rl[0] ^ superscalarAdd6; + rl[7] = rl[0] ^ superscalarAdd7; + int blockCollisions = 0; + for (int i = 0; i < 8; ++i) { + uint64_t reducedValue = rl[i] & 0x3FFFFFFFFFFFF8; //bits 3-53 only + if (registerValues.find(reducedValue) != registerValues.end()) { + blockCollisions++; + std::cout << "Item " << itemNumber << ": collision of register r" << i << std::endl; + } + else { + registerValues.insert(reducedValue); + } + } + collisionCount[blockCollisions]++; + if ((itemNumber % (320 * 1024)) == 0) + std::cout << "Item " << itemNumber << " processed" << std::endl; + } + + for (int i = 0; i < 9; ++i) { + std::cout << i << " register(s) collide in " << collisionCount[i] << " items" << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/superscalar-stats.cpp b/RandomX/src/tests/superscalar-stats.cpp new file mode 100644 index 00000000..956580fc --- /dev/null +++ b/RandomX/src/tests/superscalar-stats.cpp @@ -0,0 +1,52 @@ +#include +#include +#include "../superscalar.hpp" +#include "../blake2_generator.hpp" + +const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; + +int main() { + + constexpr int count = 1000000; + int isnCounts[randomx::SuperscalarInstructionType::COUNT] = { 0 }; + int64_t asicLatency = 0; + int64_t codesize = 0; + int64_t cpuLatency = 0; + int64_t macroOps = 0; + int64_t mulCount = 0; + int64_t size = 0; + for (int i = 0; i < count; ++i) { + randomx::SuperscalarProgram prog; + randomx::Blake2Generator gen(seed, i); + randomx::generateSuperscalar(prog, gen); + asicLatency += prog.asicLatency; + codesize += prog.codeSize; + cpuLatency += prog.cpuLatency; + macroOps += prog.macroOps; + mulCount += prog.mulCount; + size += prog.getSize(); + + for (unsigned j = 0; j < prog.getSize(); ++j) { + isnCounts[prog(j).opcode]++; + } + + if ((i + 1) % (count / 100) == 0) { + std::cout << "Completed " << ((i + 1) / (count / 100)) << "% ..." << std::endl; + } + } + + std::cout << "Avg. IPC: " << (macroOps / (double)cpuLatency) << std::endl; + std::cout << "Avg. ASIC latency: " << (asicLatency / (double)count) << std::endl; + std::cout << "Avg. CPU latency: " << (cpuLatency / (double)count) << std::endl; + std::cout << "Avg. code size: " << (codesize / (double)count) << std::endl; + std::cout << "Avg. x86 ops: " << (macroOps / (double)count) << std::endl; + std::cout << "Avg. mul. count: " << (mulCount / (double)count) << std::endl; + std::cout << "Avg. RandomX ops: " << (size / (double)count) << std::endl; + + std::cout << "Frequencies: " << std::endl; + for (unsigned j = 0; j < randomx::SuperscalarInstructionType::COUNT; ++j) { + std::cout << j << " " << isnCounts[j] << " " << isnCounts[j] / (double)size << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/RandomX/src/tests/utility.hpp b/RandomX/src/tests/utility.hpp new file mode 100644 index 00000000..a036aee0 --- /dev/null +++ b/RandomX/src/tests/utility.hpp @@ -0,0 +1,85 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include +#include + +constexpr char hexmap[] = "0123456789abcdef"; +inline void outputHex(std::ostream& os, const char* data, int length) { + for (int i = 0; i < length; ++i) { + os << hexmap[(data[i] & 0xF0) >> 4]; + os << hexmap[data[i] & 0x0F]; + } +} + +inline void dump(const char* buffer, uint64_t count, const char* name) { + std::ofstream fout(name, std::ios::out | std::ios::binary); + fout.write(buffer, count); + fout.close(); +} + +inline void readOption(const char* option, int argc, char** argv, bool& out) { + for (int i = 0; i < argc; ++i) { + if (strcmp(argv[i], option) == 0) { + out = true; + return; + } + } + out = false; +} + +inline void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) { + for (int i = 0; i < argc - 1; ++i) { + if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) { + return; + } + } + out = defaultValue; +} + +inline void readFloatOption(const char* option, int argc, char** argv, double& out, double defaultValue) { + for (int i = 0; i < argc - 1; ++i) { + if (strcmp(argv[i], option) == 0 && (out = atof(argv[i + 1])) > 0) { + return; + } + } + out = defaultValue; +} + +inline void readInt(int argc, char** argv, int& out, int defaultValue) { + for (int i = 0; i < argc; ++i) { + if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) { + return; + } + } + out = defaultValue; +} diff --git a/RandomX/src/virtual_machine.cpp b/RandomX/src/virtual_machine.cpp new file mode 100644 index 00000000..d73a0247 --- /dev/null +++ b/RandomX/src/virtual_machine.cpp @@ -0,0 +1,137 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include "virtual_machine.hpp" +#include "common.hpp" +#include "aes_hash.hpp" +#include "blake2/blake2.h" +#include "intrin_portable.h" +#include "allocator.hpp" + +randomx_vm::~randomx_vm() { + +} + +void randomx_vm::resetRoundingMode() { + rx_reset_float_state(); +} + +namespace randomx { + + static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) { + auto exponent = entropy >> 59; //0..31 + auto mantissa = entropy & mantissaMask; + exponent += exponentBias; + exponent &= exponentMask; + exponent <<= mantissaSize; + return exponent | mantissa; + } + + static inline uint64_t getStaticExponent(uint64_t entropy) { + auto exponent = constExponentBits; + exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits; + exponent <<= mantissaSize; + return exponent; + } + + static inline uint64_t getFloatMask(uint64_t entropy) { + constexpr uint64_t mask22bit = (1ULL << 22) - 1; + return (entropy & mask22bit) | getStaticExponent(entropy); + } + +} + +void randomx_vm::initialize() { + store64(®.a[0].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(0))); + store64(®.a[0].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(1))); + store64(®.a[1].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(2))); + store64(®.a[1].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(3))); + store64(®.a[2].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(4))); + store64(®.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5))); + store64(®.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6))); + store64(®.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7))); + mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask; + mem.mx = program.getEntropy(10); + auto addressRegisters = program.getEntropy(12); + config.readReg0 = 0 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg1 = 2 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg2 = 4 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg3 = 6 + (addressRegisters & 1); + datasetOffset = (program.getEntropy(13) % (randomx::DatasetExtraItems + 1)) * randomx::CacheLineSize; + store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14))); + store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15))); +} + +namespace randomx { + + alignas(16) volatile static rx_vec_i128 aesDummy; + + template + VmBase::~VmBase() { + Allocator::freeMemory(scratchpad, ScratchpadSize); + } + + template + void VmBase::allocate() { + if (datasetPtr == nullptr) + throw std::invalid_argument("Cache/Dataset not set"); + if (!softAes) { //if hardware AES is not supported, it's better to fail now than to return a ticking bomb + rx_vec_i128 tmp = rx_load_vec_i128((const rx_vec_i128*)&aesDummy); + tmp = rx_aesenc_vec_i128(tmp, tmp); + rx_store_vec_i128((rx_vec_i128*)&aesDummy, tmp); + } + scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize); + } + + template + void VmBase::getFinalResult(void* out, size_t outSize) { + hashAes1Rx4(scratchpad, ScratchpadSize, ®.a); + blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0); + } + + template + void VmBase::initScratchpad(void* seed) { + fillAes1Rx4(seed, ScratchpadSize, scratchpad); + } + + template + void VmBase::generateProgram(void* seed) { + fillAes4Rx4(seed, sizeof(program), &program); + } + + template class VmBase, false>; + template class VmBase, true>; + template class VmBase; + template class VmBase; +} \ No newline at end of file diff --git a/RandomX/src/virtual_machine.hpp b/RandomX/src/virtual_machine.hpp new file mode 100644 index 00000000..488994df --- /dev/null +++ b/RandomX/src/virtual_machine.hpp @@ -0,0 +1,83 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include "common.hpp" +#include "program.hpp" + +/* Global namespace for C binding */ +class randomx_vm { +public: + virtual ~randomx_vm() = 0; + virtual void allocate() = 0; + virtual void getFinalResult(void* out, size_t outSize) = 0; + virtual void setDataset(randomx_dataset* dataset) { } + virtual void setCache(randomx_cache* cache) { } + virtual void initScratchpad(void* seed) = 0; + virtual void run(void* seed) = 0; + void resetRoundingMode(); + randomx::RegisterFile *getRegisterFile() { + return ® + } + const void* getScratchpad() { + return scratchpad; + } + const randomx::Program& getProgram() + { + return program; + } +protected: + void initialize(); + alignas(64) randomx::Program program; + alignas(64) randomx::RegisterFile reg; + alignas(16) randomx::ProgramConfiguration config; + randomx::MemoryRegisters mem; + uint8_t* scratchpad; + union { + randomx_cache* cachePtr = nullptr; + randomx_dataset* datasetPtr; + }; + uint64_t datasetOffset; +}; + +namespace randomx { + + template + class VmBase : public randomx_vm { + public: + ~VmBase() override; + void allocate() override; + void initScratchpad(void* seed) override; + void getFinalResult(void* out, size_t outSize) override; + protected: + void generateProgram(void* seed); + }; + +} \ No newline at end of file diff --git a/RandomX/src/virtual_memory.cpp b/RandomX/src/virtual_memory.cpp new file mode 100644 index 00000000..b37a75cf --- /dev/null +++ b/RandomX/src/virtual_memory.cpp @@ -0,0 +1,133 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "virtual_memory.hpp" + +#include + +#if defined(_WIN32) || defined(__CYGWIN__) +#include +#else +#ifdef __APPLE__ +#include +#endif +#include +#include +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +std::string getErrorMessage(const char* function) { + LPSTR messageBuffer = nullptr; + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); + std::string message(messageBuffer, size); + LocalFree(messageBuffer); + return std::string(function) + std::string(": ") + message; +} + +void setPrivilege(const char* pszPrivilege, BOOL bEnable) { + HANDLE hToken; + TOKEN_PRIVILEGES tp; + BOOL status; + DWORD error; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) + throw std::runtime_error(getErrorMessage("OpenProcessToken")); + + if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) + throw std::runtime_error(getErrorMessage("LookupPrivilegeValue")); + + tp.PrivilegeCount = 1; + + if (bEnable) + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + else + tp.Privileges[0].Attributes = 0; + + status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + + error = GetLastError(); + if (!status || (error != ERROR_SUCCESS)) + throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges")); + + if (!CloseHandle(hToken)) + throw std::runtime_error(getErrorMessage("CloseHandle")); +} +#endif + +void* allocExecutableMemory(std::size_t bytes) { + void* mem; +#if defined(_WIN32) || defined(__CYGWIN__) + mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + if (mem == nullptr) + throw std::runtime_error(getErrorMessage("allocExecutableMemory - VirtualAlloc")); +#else + mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (mem == MAP_FAILED) + throw std::runtime_error("allocExecutableMemory - mmap failed"); +#endif + return mem; +} + +constexpr std::size_t align(std::size_t pos, std::size_t align) { + return ((pos - 1) / align + 1) * align; +} + +void* allocLargePagesMemory(std::size_t bytes) { + void* mem; +#if defined(_WIN32) || defined(__CYGWIN__) + setPrivilege("SeLockMemoryPrivilege", 1); + auto pageMinimum = GetLargePageMinimum(); + if (pageMinimum > 0) + mem = VirtualAlloc(NULL, align(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); + else + throw std::runtime_error("allocLargePagesMemory - Large pages are not supported"); + if (mem == nullptr) + throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc")); +#else +#ifdef __APPLE__ + mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); +#else + mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0); +#endif + if (mem == MAP_FAILED) + throw std::runtime_error("allocLargePagesMemory - mmap failed"); +#endif + return mem; +} + +void freePagedMemory(void* ptr, std::size_t bytes) { +#if defined(_WIN32) || defined(__CYGWIN__) + VirtualFree(ptr, 0, MEM_RELEASE); +#else + munmap(ptr, bytes); +#endif +} diff --git a/RandomX/src/virtual_memory.hpp b/RandomX/src/virtual_memory.hpp new file mode 100644 index 00000000..d3b31db1 --- /dev/null +++ b/RandomX/src/virtual_memory.hpp @@ -0,0 +1,35 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include + +void* allocExecutableMemory(std::size_t); +void* allocLargePagesMemory(std::size_t); +void freePagedMemory(void*, std::size_t); diff --git a/RandomX/src/vm_compiled.cpp b/RandomX/src/vm_compiled.cpp new file mode 100644 index 00000000..87bc3b87 --- /dev/null +++ b/RandomX/src/vm_compiled.cpp @@ -0,0 +1,60 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "vm_compiled.hpp" +#include "common.hpp" + +namespace randomx { + + static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters"); + static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile"); + + template + void CompiledVm::setDataset(randomx_dataset* dataset) { + datasetPtr = dataset; + } + + template + void CompiledVm::run(void* seed) { + VmBase::generateProgram(seed); + randomx_vm::initialize(); + compiler.generateProgram(program, config); + mem.memory = datasetPtr->memory + datasetOffset; + execute(); + } + + template + void CompiledVm::execute() { + compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS); + } + + template class CompiledVm, false>; + template class CompiledVm, true>; + template class CompiledVm; + template class CompiledVm; +} \ No newline at end of file diff --git a/RandomX/src/vm_compiled.hpp b/RandomX/src/vm_compiled.hpp new file mode 100644 index 00000000..856f00d8 --- /dev/null +++ b/RandomX/src/vm_compiled.hpp @@ -0,0 +1,72 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include "virtual_machine.hpp" +#include "jit_compiler.hpp" +#include "allocator.hpp" +#include "dataset.hpp" + +namespace randomx { + + template + class CompiledVm : public VmBase { + public: + void* operator new(size_t size) { + void* ptr = AlignedAllocator::allocMemory(size); + if (ptr == nullptr) + throw std::bad_alloc(); + return ptr; + } + void operator delete(void* ptr) { + AlignedAllocator::freeMemory(ptr, sizeof(CompiledVm)); + } + void setDataset(randomx_dataset* dataset) override; + void run(void* seed) override; + + using VmBase::mem; + using VmBase::program; + using VmBase::config; + using VmBase::reg; + using VmBase::scratchpad; + using VmBase::datasetPtr; + using VmBase::datasetOffset; + protected: + void execute(); + + JitCompiler compiler; + }; + + using CompiledVmDefault = CompiledVm, true>; + using CompiledVmHardAes = CompiledVm, false>; + using CompiledVmLargePage = CompiledVm; + using CompiledVmLargePageHardAes = CompiledVm; +} diff --git a/RandomX/src/vm_compiled_light.cpp b/RandomX/src/vm_compiled_light.cpp new file mode 100644 index 00000000..c083f4aa --- /dev/null +++ b/RandomX/src/vm_compiled_light.cpp @@ -0,0 +1,54 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "vm_compiled_light.hpp" +#include "common.hpp" +#include + +namespace randomx { + + template + void CompiledLightVm::setCache(randomx_cache* cache) { + cachePtr = cache; + mem.memory = cache->memory; + compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); + } + + template + void CompiledLightVm::run(void* seed) { + VmBase::generateProgram(seed); + randomx_vm::initialize(); + compiler.generateProgramLight(program, config, datasetOffset); + CompiledVm::execute(); + } + + template class CompiledLightVm, false>; + template class CompiledLightVm, true>; + template class CompiledLightVm; + template class CompiledLightVm; +} \ No newline at end of file diff --git a/RandomX/src/vm_compiled_light.hpp b/RandomX/src/vm_compiled_light.hpp new file mode 100644 index 00000000..6af82bbe --- /dev/null +++ b/RandomX/src/vm_compiled_light.hpp @@ -0,0 +1,64 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include "vm_compiled.hpp" + +namespace randomx { + + template + class CompiledLightVm : public CompiledVm { + public: + void* operator new(size_t size) { + void* ptr = AlignedAllocator::allocMemory(size); + if (ptr == nullptr) + throw std::bad_alloc(); + return ptr; + } + void operator delete(void* ptr) { + AlignedAllocator::freeMemory(ptr, sizeof(CompiledLightVm)); + } + void setCache(randomx_cache* cache) override; + void setDataset(randomx_dataset* dataset) override { } + void run(void* seed) override; + + using CompiledVm::mem; + using CompiledVm::compiler; + using CompiledVm::program; + using CompiledVm::config; + using CompiledVm::cachePtr; + using CompiledVm::datasetOffset; + }; + + using CompiledLightVmDefault = CompiledLightVm, true>; + using CompiledLightVmHardAes = CompiledLightVm, false>; + using CompiledLightVmLargePage = CompiledLightVm; + using CompiledLightVmLargePageHardAes = CompiledLightVm; +} \ No newline at end of file diff --git a/RandomX/src/vm_interpreted.cpp b/RandomX/src/vm_interpreted.cpp new file mode 100644 index 00000000..dd5f217c --- /dev/null +++ b/RandomX/src/vm_interpreted.cpp @@ -0,0 +1,677 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include +#include +#include "vm_interpreted.hpp" +#include "dataset.hpp" +#include "intrin_portable.h" +#include "reciprocal.h" + +namespace randomx { + + static int_reg_t Zero = 0; + + template + void InterpretedVm::setDataset(randomx_dataset* dataset) { + datasetPtr = dataset; + mem.memory = dataset->memory; + } + + template + void InterpretedVm::run(void* seed) { + VmBase::generateProgram(seed); + randomx_vm::initialize(); + execute(); + } + + template + void InterpretedVm::executeBytecode(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { + for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) { + executeBytecode(pc, r, f, e, a); + } + } + + template + FORCE_INLINE void* InterpretedVm::getScratchpadAddress(InstructionByteCode& ibc) { + uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; + return scratchpad + addr; + } + + template + FORCE_INLINE rx_vec_f128 InterpretedVm::maskRegisterExponentMantissa(rx_vec_f128 x) { + const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask); + const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask); + x = rx_and_vec_f128(x, xmantissaMask); + x = rx_or_vec_f128(x, xexponentMask); + return x; + } + + template + void InterpretedVm::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { + auto& ibc = byteCode[pc]; + switch (ibc.type) + { + case InstructionType::IADD_RS: { + *ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm; + } break; + + case InstructionType::IADD_M: { + *ibc.idst += load64(getScratchpadAddress(ibc)); + } break; + + case InstructionType::ISUB_R: { + *ibc.idst -= *ibc.isrc; + } break; + + case InstructionType::ISUB_M: { + *ibc.idst -= load64(getScratchpadAddress(ibc)); + } break; + + case InstructionType::IMUL_R: { //also handles IMUL_RCP + *ibc.idst *= *ibc.isrc; + } break; + + case InstructionType::IMUL_M: { + *ibc.idst *= load64(getScratchpadAddress(ibc)); + } break; + + case InstructionType::IMULH_R: { + *ibc.idst = mulh(*ibc.idst, *ibc.isrc); + } break; + + case InstructionType::IMULH_M: { + *ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc))); + } break; + + case InstructionType::ISMULH_R: { + *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc)); + } break; + + case InstructionType::ISMULH_M: { + *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc)))); + } break; + + case InstructionType::INEG_R: { + *ibc.idst = ~(*ibc.idst) + 1; //two's complement negative + } break; + + case InstructionType::IXOR_R: { + *ibc.idst ^= *ibc.isrc; + } break; + + case InstructionType::IXOR_M: { + *ibc.idst ^= load64(getScratchpadAddress(ibc)); + } break; + + case InstructionType::IROR_R: { + *ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63); + } break; + + case InstructionType::IROL_R: { + *ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63); + } break; + + case InstructionType::ISWAP_R: { + int_reg_t temp = *ibc.isrc; + *ibc.isrc = *ibc.idst; + *ibc.idst = temp; + } break; + + case InstructionType::FSWAP_R: { + *ibc.fdst = rx_swap_vec_f128(*ibc.fdst); + } break; + + case InstructionType::FADD_R: { + *ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc); + } break; + + case InstructionType::FADD_M: { + rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc)); + *ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc); + } break; + + case InstructionType::FSUB_R: { + *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc); + } break; + + case InstructionType::FSUB_M: { + rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc)); + *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc); + } break; + + case InstructionType::FSCAL_R: { + const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000); + *ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask); + } break; + + case InstructionType::FMUL_R: { + *ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc); + } break; + + case InstructionType::FDIV_M: { + rx_vec_f128 fsrc = maskRegisterExponentMantissa(rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc))); + *ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc); + } break; + + case InstructionType::FSQRT_R: { + *ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst); + } break; + + case InstructionType::CBRANCH: { + *ibc.isrc += ibc.imm; + if ((*ibc.isrc & ibc.memMask) == 0) { + pc = ibc.target; + } + } break; + + case InstructionType::CFROUND: { + rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4); + } break; + + case InstructionType::ISTORE: { + store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc); + } break; + + case InstructionType::NOP: { + //nothing + } break; + + default: + UNREACHABLE; + } + } + + template + void InterpretedVm::execute() { + int_reg_t r[RegistersCount] = { 0 }; + rx_vec_f128 f[RegisterCountFlt]; + rx_vec_f128 e[RegisterCountFlt]; + rx_vec_f128 a[RegisterCountFlt]; + + for(unsigned i = 0; i < RegisterCountFlt; ++i) + a[i] = rx_load_vec_f128(®.a[i].lo); + + precompileProgram(r, f, e, a); + + uint32_t spAddr0 = mem.mx; + uint32_t spAddr1 = mem.ma; + + for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { + uint64_t spMix = r[config.readReg0] ^ r[config.readReg1]; + spAddr0 ^= spMix; + spAddr0 &= ScratchpadL3Mask64; + spAddr1 ^= spMix >> 32; + spAddr1 &= ScratchpadL3Mask64; + + for (unsigned i = 0; i < RegistersCount; ++i) + r[i] ^= load64(scratchpad + spAddr0 + 8 * i); + + for (unsigned i = 0; i < RegisterCountFlt; ++i) + f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i); + + for (unsigned i = 0; i < RegisterCountFlt; ++i) + e[i] = maskRegisterExponentMantissa(rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i))); + + executeBytecode(r, f, e, a); + + mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; + mem.mx &= CacheLineAlignMask; + datasetPrefetch(datasetOffset + mem.mx); + datasetRead(datasetOffset + mem.ma, r); + std::swap(mem.mx, mem.ma); + + for (unsigned i = 0; i < RegistersCount; ++i) + store64(scratchpad + spAddr1 + 8 * i, r[i]); + + for (unsigned i = 0; i < RegisterCountFlt; ++i) + f[i] = rx_xor_vec_f128(f[i], e[i]); + + for (unsigned i = 0; i < RegisterCountFlt; ++i) + rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), f[i]); + + spAddr0 = 0; + spAddr1 = 0; + } + + for (unsigned i = 0; i < RegistersCount; ++i) + store64(®.r[i], r[i]); + + for (unsigned i = 0; i < RegisterCountFlt; ++i) + rx_store_vec_f128(®.f[i].lo, f[i]); + + for (unsigned i = 0; i < RegisterCountFlt; ++i) + rx_store_vec_f128(®.e[i].lo, e[i]); + } + + template + void InterpretedVm::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) { + uint64_t* datasetLine = (uint64_t*)(mem.memory + address); + for (int i = 0; i < RegistersCount; ++i) + r[i] ^= datasetLine[i]; + } + + template + void InterpretedVm::datasetPrefetch(uint64_t address) { + rx_prefetch_nta(mem.memory + address); + } + +#include "instruction_weights.hpp" + + template + void InterpretedVm::precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]) { + int registerUsage[RegistersCount]; + for (unsigned i = 0; i < RegistersCount; ++i) { + registerUsage[i] = -1; + } + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { + auto& instr = program(i); + auto& ibc = byteCode[i]; + switch (instr.opcode) { + CASE_REP(IADD_RS) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IADD_RS; + ibc.idst = &r[dst]; + if (dst != RegisterNeedsDisplacement) { + ibc.isrc = &r[src]; + ibc.shift = instr.getModShift(); + ibc.imm = 0; + } + else { + ibc.isrc = &r[src]; + ibc.shift = instr.getModShift(); + ibc.imm = signExtend2sCompl(instr.getImm32()); + } + registerUsage[dst] = i; + } break; + + CASE_REP(IADD_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IADD_M; + ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &Zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + } break; + + CASE_REP(ISUB_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISUB_R; + ibc.idst = &r[dst]; + if (src != dst) { + ibc.isrc = &r[src]; + } + else { + ibc.imm = signExtend2sCompl(instr.getImm32()); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + } break; + + CASE_REP(ISUB_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISUB_M; + ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &Zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IMUL_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMUL_R; + ibc.idst = &r[dst]; + if (src != dst) { + ibc.isrc = &r[src]; + } + else { + ibc.imm = signExtend2sCompl(instr.getImm32()); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IMUL_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMUL_M; + ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &Zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IMULH_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMULH_R; + ibc.idst = &r[dst]; + ibc.isrc = &r[src]; + registerUsage[dst] = i; + } break; + + CASE_REP(IMULH_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IMULH_M; + ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &Zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + } break; + + CASE_REP(ISMULH_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISMULH_R; + ibc.idst = &r[dst]; + ibc.isrc = &r[src]; + registerUsage[dst] = i; + } break; + + CASE_REP(ISMULH_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISMULH_M; + ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &Zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IMUL_RCP) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::IMUL_R; + ibc.idst = &r[dst]; + ibc.imm = randomx_reciprocal(divisor); + ibc.isrc = &ibc.imm; + registerUsage[dst] = i; + } + else { + ibc.type = InstructionType::NOP; + } + } break; + + CASE_REP(INEG_R) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::INEG_R; + ibc.idst = &r[dst]; + registerUsage[dst] = i; + } break; + + CASE_REP(IXOR_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IXOR_R; + ibc.idst = &r[dst]; + if (src != dst) { + ibc.isrc = &r[src]; + } + else { + ibc.imm = signExtend2sCompl(instr.getImm32()); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IXOR_M) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IXOR_M; + ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (src != dst) { + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + } + else { + ibc.isrc = &Zero; + ibc.memMask = ScratchpadL3Mask; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IROR_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IROR_R; + ibc.idst = &r[dst]; + if (src != dst) { + ibc.isrc = &r[src]; + } + else { + ibc.imm = instr.getImm32(); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + } break; + + CASE_REP(IROL_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::IROL_R; + ibc.idst = &r[dst]; + if (src != dst) { + ibc.isrc = &r[src]; + } + else { + ibc.imm = instr.getImm32(); + ibc.isrc = &ibc.imm; + } + registerUsage[dst] = i; + } break; + + CASE_REP(ISWAP_R) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + if (src != dst) { + ibc.idst = &r[dst]; + ibc.isrc = &r[src]; + ibc.type = InstructionType::ISWAP_R; + registerUsage[dst] = i; + registerUsage[src] = i; + } + else { + ibc.type = InstructionType::NOP; + } + } break; + + CASE_REP(FSWAP_R) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::FSWAP_R; + if (dst < RegisterCountFlt) + ibc.fdst = &f[dst]; + else + ibc.fdst = &e[dst - RegisterCountFlt]; + } break; + + CASE_REP(FADD_R) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegisterCountFlt; + ibc.type = InstructionType::FADD_R; + ibc.fdst = &f[dst]; + ibc.fsrc = &a[src]; + } break; + + CASE_REP(FADD_M) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::FADD_M; + ibc.fdst = &f[dst]; + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + } break; + + CASE_REP(FSUB_R) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegisterCountFlt; + ibc.type = InstructionType::FSUB_R; + ibc.fdst = &f[dst]; + ibc.fsrc = &a[src]; + } break; + + CASE_REP(FSUB_M) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::FSUB_M; + ibc.fdst = &f[dst]; + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + } break; + + CASE_REP(FSCAL_R) { + auto dst = instr.dst % RegisterCountFlt; + ibc.fdst = &f[dst]; + ibc.type = InstructionType::FSCAL_R; + } break; + + CASE_REP(FMUL_R) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegisterCountFlt; + ibc.type = InstructionType::FMUL_R; + ibc.fdst = &e[dst]; + ibc.fsrc = &a[src]; + } break; + + CASE_REP(FDIV_M) { + auto dst = instr.dst % RegisterCountFlt; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::FDIV_M; + ibc.fdst = &e[dst]; + ibc.isrc = &r[src]; + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + } break; + + CASE_REP(FSQRT_R) { + auto dst = instr.dst % RegisterCountFlt; + ibc.type = InstructionType::FSQRT_R; + ibc.fdst = &e[dst]; + } break; + + CASE_REP(CBRANCH) { + ibc.type = InstructionType::CBRANCH; + //jump condition + int reg = instr.dst % RegistersCount; + ibc.isrc = &r[reg]; + ibc.target = registerUsage[reg]; + int shift = instr.getModCond() + ConditionOffset; + const uint64_t conditionMask = ConditionMask << shift; + ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); + if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 + ibc.imm &= ~(1ULL << (shift - 1)); + ibc.memMask = ConditionMask << shift; + //mark all registers as used + for (unsigned j = 0; j < RegistersCount; ++j) { + registerUsage[j] = i; + } + } break; + + CASE_REP(CFROUND) { + auto src = instr.src % RegistersCount; + ibc.isrc = &r[src]; + ibc.type = InstructionType::CFROUND; + ibc.imm = instr.getImm32() & 63; + } break; + + CASE_REP(ISTORE) { + auto dst = instr.dst % RegistersCount; + auto src = instr.src % RegistersCount; + ibc.type = InstructionType::ISTORE; + ibc.idst = &r[dst]; + ibc.isrc = &r[src]; + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (instr.getModCond() < StoreL3Condition) + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + else + ibc.memMask = ScratchpadL3Mask; + } break; + + CASE_REP(NOP) { + ibc.type = InstructionType::NOP; + } break; + + default: + UNREACHABLE; + } + } + } + + template class InterpretedVm, false>; + template class InterpretedVm, true>; + template class InterpretedVm; + template class InterpretedVm; +} \ No newline at end of file diff --git a/RandomX/src/vm_interpreted.hpp b/RandomX/src/vm_interpreted.hpp new file mode 100644 index 00000000..25795a61 --- /dev/null +++ b/RandomX/src/vm_interpreted.hpp @@ -0,0 +1,102 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include +#include "common.hpp" +#include "virtual_machine.hpp" +#include "intrin_portable.h" +#include "allocator.hpp" + +namespace randomx { + + struct InstructionByteCode { + union { + int_reg_t* idst; + rx_vec_f128* fdst; + }; + union { + int_reg_t* isrc; + rx_vec_f128* fsrc; + }; + union { + uint64_t imm; + int64_t simm; + }; + InstructionType type; + union { + int16_t target; + uint16_t shift; + }; + uint32_t memMask; + }; + + static_assert(sizeof(InstructionByteCode) == 32, "Invalid packing of struct InstructionByteCode"); + + template + class InterpretedVm : public VmBase { + public: + using VmBase::mem; + using VmBase::scratchpad; + using VmBase::program; + using VmBase::config; + using VmBase::reg; + using VmBase::datasetPtr; + using VmBase::datasetOffset; + void* operator new(size_t size) { + void* ptr = AlignedAllocator::allocMemory(size); + if (ptr == nullptr) + throw std::bad_alloc(); + return ptr; + } + void operator delete(void* ptr) { + AlignedAllocator::freeMemory(ptr, sizeof(InterpretedVm)); + } + void run(void* seed) override; + void setDataset(randomx_dataset* dataset) override; + protected: + virtual void datasetRead(uint64_t blockNumber, int_reg_t(&r)[RegistersCount]); + virtual void datasetPrefetch(uint64_t blockNumber); + private: + void execute(); + void precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); + void executeBytecode(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); + void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); + void* getScratchpadAddress(InstructionByteCode& ibc); + rx_vec_f128 maskRegisterExponentMantissa(rx_vec_f128); + + InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; + }; + + using InterpretedVmDefault = InterpretedVm, true>; + using InterpretedVmHardAes = InterpretedVm, false>; + using InterpretedVmLargePage = InterpretedVm; + using InterpretedVmLargePageHardAes = InterpretedVm; +} \ No newline at end of file diff --git a/RandomX/src/vm_interpreted_light.cpp b/RandomX/src/vm_interpreted_light.cpp new file mode 100644 index 00000000..c54b32f6 --- /dev/null +++ b/RandomX/src/vm_interpreted_light.cpp @@ -0,0 +1,55 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "vm_interpreted_light.hpp" +#include "dataset.hpp" + +namespace randomx { + + template + void InterpretedLightVm::setCache(randomx_cache* cache) { + cachePtr = cache; + mem.memory = cache->memory; + } + + template + void InterpretedLightVm::datasetRead(uint64_t address, int_reg_t(&r)[8]) { + uint32_t itemNumber = address / CacheLineSize; + int_reg_t rl[8]; + + initDatasetItem(cachePtr, (uint8_t*)rl, itemNumber); + + for (unsigned q = 0; q < 8; ++q) + r[q] ^= rl[q]; + } + + template class InterpretedLightVm, false>; + template class InterpretedLightVm, true>; + template class InterpretedLightVm; + template class InterpretedLightVm; +} diff --git a/RandomX/src/vm_interpreted_light.hpp b/RandomX/src/vm_interpreted_light.hpp new file mode 100644 index 00000000..02d678f6 --- /dev/null +++ b/RandomX/src/vm_interpreted_light.hpp @@ -0,0 +1,61 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include "vm_interpreted.hpp" + +namespace randomx { + + template + class InterpretedLightVm : public InterpretedVm { + public: + using VmBase::mem; + using VmBase::cachePtr; + void* operator new(size_t size) { + void* ptr = AlignedAllocator::allocMemory(size); + if (ptr == nullptr) + throw std::bad_alloc(); + return ptr; + } + void operator delete(void* ptr) { + AlignedAllocator::freeMemory(ptr, sizeof(InterpretedLightVm)); + } + void setDataset(randomx_dataset* dataset) override { } + void setCache(randomx_cache* cache) override; + protected: + void datasetRead(uint64_t address, int_reg_t(&r)[8]) override; + void datasetPrefetch(uint64_t address) override { } + }; + + using InterpretedLightVmDefault = InterpretedLightVm, true>; + using InterpretedLightVmHardAes = InterpretedLightVm, false>; + using InterpretedLightVmLargePage = InterpretedLightVm; + using InterpretedLightVmLargePageHardAes = InterpretedLightVm; +} diff --git a/RandomX/vcxproj/api-example1.vcxproj b/RandomX/vcxproj/api-example1.vcxproj new file mode 100644 index 00000000..c38acc33 --- /dev/null +++ b/RandomX/vcxproj/api-example1.vcxproj @@ -0,0 +1,131 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {83EA3E54-5D91-4E01-8EF6-C1E718334F83} + apiexample1 + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/api-example1.vcxproj.filters b/RandomX/vcxproj/api-example1.vcxproj.filters new file mode 100644 index 00000000..6cd41c29 --- /dev/null +++ b/RandomX/vcxproj/api-example1.vcxproj.filters @@ -0,0 +1,27 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/api-example2.vcxproj b/RandomX/vcxproj/api-example2.vcxproj new file mode 100644 index 00000000..55ca8783 --- /dev/null +++ b/RandomX/vcxproj/api-example2.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {44947B9C-E6B1-4C06-BD01-F8EF43B59223} + apiexample2 + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/api-example2.vcxproj.filters b/RandomX/vcxproj/api-example2.vcxproj.filters new file mode 100644 index 00000000..c52d1e8b --- /dev/null +++ b/RandomX/vcxproj/api-example2.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/benchmark.vcxproj b/RandomX/vcxproj/benchmark.vcxproj new file mode 100644 index 00000000..165f100d --- /dev/null +++ b/RandomX/vcxproj/benchmark.vcxproj @@ -0,0 +1,131 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {1E8A2E2F-9F9F-43AA-BB19-9107FEC64A70} + benchmark + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + false + true + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/benchmark.vcxproj.filters b/RandomX/vcxproj/benchmark.vcxproj.filters new file mode 100644 index 00000000..7c2339cb --- /dev/null +++ b/RandomX/vcxproj/benchmark.vcxproj.filters @@ -0,0 +1,27 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/code-generator.vcxproj b/RandomX/vcxproj/code-generator.vcxproj new file mode 100644 index 00000000..ee077399 --- /dev/null +++ b/RandomX/vcxproj/code-generator.vcxproj @@ -0,0 +1,129 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {3E490DEC-1874-43AA-92DA-1AC57C217EAC} + codegenerator + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + 4194304 + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/code-generator.vcxproj.filters b/RandomX/vcxproj/code-generator.vcxproj.filters new file mode 100644 index 00000000..7578ae66 --- /dev/null +++ b/RandomX/vcxproj/code-generator.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/h2inc.ps1 b/RandomX/vcxproj/h2inc.ps1 new file mode 100644 index 00000000..ded47b80 --- /dev/null +++ b/RandomX/vcxproj/h2inc.ps1 @@ -0,0 +1,90 @@ +# The MIT License (MIT) +# +# Copyright (c) .NET Foundation and Contributors +# +# All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# C to MASM include file translator +# This is replacement for the deprecated h2inc tool that used to be part of VS. + +# +# The use of [console]::WriteLine (instead of Write-Output) is intentional. +# PowerShell 2.0 (installed by default on Windows 7) wraps lines written with +# Write-Output at whatever column width is being used by the current terminal, +# even when output is being redirected to a file. We can't have this behavior +# because it will cause the generated file to be malformed. +# + +Function ProcessFile($filePath) { + + [console]::WriteLine("; File start: $filePath") + + Get-Content $filePath | ForEach-Object { + + if ($_ -match "^\s*#\spragma") { + # Ignore pragmas + return + } + + if ($_ -match "^\s*#\s*include\s*`"(.*)`"") + { + # Expand includes. + ProcessFile(Join-Path (Split-Path -Parent $filePath) $Matches[1]) + return + } + + if ($_ -match "^\s*#define\s+(\S+)\s*(.*)") + { + # Augment #defines with their MASM equivalent + $name = $Matches[1] + $value = $Matches[2] + + # Note that we do not handle multiline constants + + # Strip comments from value + $value = $value -replace "//.*", "" + $value = $value -replace "/\*.*\*/", "" + + # Strip whitespaces from value + $value = $value -replace "\s+$", "" + + # ignore #defines with arguments + if ($name -notmatch "\(") { + $HEX_NUMBER_PATTERN = "\b0x(\w+)\b" + $DECIMAL_NUMBER_PATTERN = "(-?\b\d+\b)" + + if ($value -match $HEX_NUMBER_PATTERN -or $value -match $DECIMAL_NUMBER_PATTERN) { + $value = $value -replace $HEX_NUMBER_PATTERN, "0`$1h" # Convert hex constants + $value = $value -replace $DECIMAL_NUMBER_PATTERN, "`$1t" # Convert dec constants + [console]::WriteLine("$name EQU $value") + } else { + [console]::WriteLine("$name TEXTEQU <$value>") + } + } + } + + # [console]::WriteLine("$_") + } + + [console]::WriteLine("; File end: $filePath") +} + +ProcessFile $args[0] diff --git a/RandomX/vcxproj/jit-performance.vcxproj b/RandomX/vcxproj/jit-performance.vcxproj new file mode 100644 index 00000000..5028e938 --- /dev/null +++ b/RandomX/vcxproj/jit-performance.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3} + jitperformance + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/jit-performance.vcxproj.filters b/RandomX/vcxproj/jit-performance.vcxproj.filters new file mode 100644 index 00000000..46a0be06 --- /dev/null +++ b/RandomX/vcxproj/jit-performance.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/perf-simulation.vcxproj b/RandomX/vcxproj/perf-simulation.vcxproj new file mode 100644 index 00000000..7570a8bd --- /dev/null +++ b/RandomX/vcxproj/perf-simulation.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} + perfsimulation + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/perf-simulation.vcxproj.filters b/RandomX/vcxproj/perf-simulation.vcxproj.filters new file mode 100644 index 00000000..5870291d --- /dev/null +++ b/RandomX/vcxproj/perf-simulation.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/randomx-dll.vcxproj b/RandomX/vcxproj/randomx-dll.vcxproj new file mode 100644 index 00000000..e0cf2f34 --- /dev/null +++ b/RandomX/vcxproj/randomx-dll.vcxproj @@ -0,0 +1,211 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 15.0 + {59560AD8-18E3-463E-A941-BBD808EC7C83} + Win32Proj + randomxdll + 10.0.17763.0 + + + + DynamicLibrary + true + v141 + Unicode + + + DynamicLibrary + false + v141 + true + Unicode + + + DynamicLibrary + true + v141 + Unicode + + + DynamicLibrary + false + v141 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + false + + + true + + + true + + + false + randomx + + + + Use + Level3 + MaxSpeed + true + true + true + WIN32;NDEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + + + Windows + true + true + true + + + + + Use + Level3 + Disabled + true + WIN32;_DEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + + + Windows + true + + + + + Use + Level3 + Disabled + true + _DEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + + + Windows + true + + + + + NotUsing + Level3 + MaxSpeed + true + true + false + NDEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;RANDOMX_EXPORT=__declspec(dllexport) + true + + + Windows + true + true + true + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/randomx-dll.vcxproj.filters b/RandomX/vcxproj/randomx-dll.vcxproj.filters new file mode 100644 index 00000000..a30fa8e9 --- /dev/null +++ b/RandomX/vcxproj/randomx-dll.vcxproj.filters @@ -0,0 +1,173 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/randomx.vcxproj b/RandomX/vcxproj/randomx.vcxproj new file mode 100644 index 00000000..59bc214f --- /dev/null +++ b/RandomX/vcxproj/randomx.vcxproj @@ -0,0 +1,196 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {3346A4AD-C438-4324-8B77-47A16452954B} + randomx + 10.0.17763.0 + + + + StaticLibrary + true + v141 + MultiByte + + + StaticLibrary + false + v141 + true + MultiByte + + + StaticLibrary + true + v141 + MultiByte + + + StaticLibrary + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + false + true + + + + + Level4 + Disabled + false + true + + + + + Level3 + MaxSpeed + true + true + false + true + NoExtensions + + + true + true + UseLinkTimeCodeGeneration + false + + + + + Level3 + MaxSpeed + true + true + false + true + AssemblyCode + + + true + true + UseLinkTimeCodeGeneration + false + + + 4194304 + + + powershell -ExecutionPolicy Bypass -File .\h2inc.ps1 ..\src\configuration.h > ..\src\asm\configuration.asm +SET ERRORLEVEL = 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/randomx.vcxproj.filters b/RandomX/vcxproj/randomx.vcxproj.filters new file mode 100644 index 00000000..d21e1b1b --- /dev/null +++ b/RandomX/vcxproj/randomx.vcxproj.filters @@ -0,0 +1,188 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/runtime-distr.vcxproj b/RandomX/vcxproj/runtime-distr.vcxproj new file mode 100644 index 00000000..3a551eb2 --- /dev/null +++ b/RandomX/vcxproj/runtime-distr.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {F207EC8C-C55F-46C0-8851-887A71574F54} + runtimedistr + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/runtime-distr.vcxproj.filters b/RandomX/vcxproj/runtime-distr.vcxproj.filters new file mode 100644 index 00000000..bb53c1b1 --- /dev/null +++ b/RandomX/vcxproj/runtime-distr.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/scratchpad-entropy.vcxproj b/RandomX/vcxproj/scratchpad-entropy.vcxproj new file mode 100644 index 00000000..2e283372 --- /dev/null +++ b/RandomX/vcxproj/scratchpad-entropy.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {FF8BD408-AFD8-43C6-BE98-4D03B37E840B} + scratchpadentropy + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/scratchpad-entropy.vcxproj.filters b/RandomX/vcxproj/scratchpad-entropy.vcxproj.filters new file mode 100644 index 00000000..a215bfab --- /dev/null +++ b/RandomX/vcxproj/scratchpad-entropy.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/superscalar-avalanche.vcxproj b/RandomX/vcxproj/superscalar-avalanche.vcxproj new file mode 100644 index 00000000..80d4afc7 --- /dev/null +++ b/RandomX/vcxproj/superscalar-avalanche.vcxproj @@ -0,0 +1,130 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {CF34A7EF-7DC9-4077-94A5-76F5425EA938} + superscalaravalanche + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/superscalar-avalanche.vcxproj.filters b/RandomX/vcxproj/superscalar-avalanche.vcxproj.filters new file mode 100644 index 00000000..6f33fce4 --- /dev/null +++ b/RandomX/vcxproj/superscalar-avalanche.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/superscalar-init.vcxproj b/RandomX/vcxproj/superscalar-init.vcxproj new file mode 100644 index 00000000..e17e2ece --- /dev/null +++ b/RandomX/vcxproj/superscalar-init.vcxproj @@ -0,0 +1,130 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {E59DC709-9B12-4A53-BAF3-79398821C376} + superscalarinit + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + Level3 + Disabled + false + true + + + + + Level3 + Disabled + false + true + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/superscalar-init.vcxproj.filters b/RandomX/vcxproj/superscalar-init.vcxproj.filters new file mode 100644 index 00000000..d78d2810 --- /dev/null +++ b/RandomX/vcxproj/superscalar-init.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/RandomX/vcxproj/superscalar-stats.vcxproj b/RandomX/vcxproj/superscalar-stats.vcxproj new file mode 100644 index 00000000..33d26c5e --- /dev/null +++ b/RandomX/vcxproj/superscalar-stats.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {0173D560-8C12-46B3-B467-0C6E7573AA0B} + superscalarstats + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + false + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/RandomX/vcxproj/superscalar-stats.vcxproj.filters b/RandomX/vcxproj/superscalar-stats.vcxproj.filters new file mode 100644 index 00000000..6d5129c3 --- /dev/null +++ b/RandomX/vcxproj/superscalar-stats.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/cmake/FindRandomX.cmake b/cmake/FindRandomX.cmake index 5696d563..c5b07efd 100644 --- a/cmake/FindRandomX.cmake +++ b/cmake/FindRandomX.cmake @@ -1,25 +1,3 @@ -find_path( - RANDOMX_INCLUDE_DIR - NAMES randomx.h - PATHS "${XMRIG_DEPS}" ENV "XMRIG_DEPS" - PATH_SUFFIXES "include" - NO_DEFAULT_PATH -) - -find_path(RANDOMX_INCLUDE_DIR NAMES randomx.h) - -find_library( - RANDOMX_LIBRARY - NAMES librandomx.a randomx librandomx - PATHS "${XMRIG_DEPS}" ENV "XMRIG_DEPS" - PATH_SUFFIXES "lib" - NO_DEFAULT_PATH -) - -find_library(RANDOMX_LIBRARY NAMES librandomx.a randomx librandomx) - -set(RANDOMX_LIBRARIES ${RANDOMX_LIBRARY}) -set(RANDOMX_INCLUDE_DIRS ${RANDOMX_INCLUDE_DIR}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(RANDOMX DEFAULT_MSG RANDOMX_LIBRARY RANDOMX_INCLUDE_DIR) +add_subdirectory(RandomX) +set(RANDOMX_INCLUDE_DIR RandomX/src) +set(RANDOMX_LIBRARIES randomx) From 24d43c013facc25ac3063f75698615d097ac1714 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:11:45 -0700 Subject: [PATCH 122/141] Disabled UNICODE on Windows --- RandomX/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index 3c248495..c0008321 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -78,3 +78,5 @@ set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) + +add_definitions(/UUNICODE) \ No newline at end of file From 3b8c1bdae2f1145c7b6cd306c0e3e299f914613b Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:16:42 -0700 Subject: [PATCH 123/141] Disabled UNICODE on Windows --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d3ce86f2..564b3df8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") endif() add_definitions(/D__STDC_FORMAT_MACROS) -add_definitions(/DUNICODE) +add_definitions(/UUNICODE) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") From 0e3914e26d48dd70623d24a473c656ed3cee6db2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:16:45 -0700 Subject: [PATCH 124/141] Disabled UNICODE on Windows --- RandomX/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index c0008321..3c248495 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -78,5 +78,3 @@ set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) - -add_definitions(/UUNICODE) \ No newline at end of file From d4537f443eb02935edda8d6f1e42c23248fd1b5e Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:32:27 -0700 Subject: [PATCH 125/141] Fixed wchar case? --- src/Mem_win.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp index e7afb5d3..b006a41c 100644 --- a/src/Mem_win.cpp +++ b/src/Mem_win.cpp @@ -82,7 +82,7 @@ static BOOL SetLockPagesPrivilege() { } -static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) { +static LSA_UNICODE_STRING StringToLsaUnicodeString(LPWSTR string) { LSA_UNICODE_STRING lsaString; DWORD dwLen = (DWORD) wcslen(string); @@ -119,7 +119,13 @@ static BOOL ObtainLockPagesPrivilege() { BOOL result = FALSE; if (LsaOpenPolicy(nullptr, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME)); + + LPCTSTR string = _T(SE_LOCK_MEMORY_NAME); + DWORD dwLen = (DWORD) strlen(string); + std::wstring wc( dwLen + 1, L'#' ); + mbstowcs( &wc[0], string, dwLen + 1); + + LSA_UNICODE_STRING str = StringToLsaUnicodeString(wc.c_str()); if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); From 556c01d75ccab833e30eb1d49c00ca03c69ec7e8 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:39:34 -0700 Subject: [PATCH 126/141] Fixed wchar case? --- src/Mem_win.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp index b006a41c..2f376e04 100644 --- a/src/Mem_win.cpp +++ b/src/Mem_win.cpp @@ -119,13 +119,9 @@ static BOOL ObtainLockPagesPrivilege() { BOOL result = FALSE; if (LsaOpenPolicy(nullptr, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - - LPCTSTR string = _T(SE_LOCK_MEMORY_NAME); - DWORD dwLen = (DWORD) strlen(string); - std::wstring wc( dwLen + 1, L'#' ); - mbstowcs( &wc[0], string, dwLen + 1); - - LSA_UNICODE_STRING str = StringToLsaUnicodeString(wc.c_str()); + wchar_t wc[256]; + swprintf(wc, 256, L"%hs", T(SE_LOCK_MEMORY_NAME)); + LSA_UNICODE_STRING str = StringToLsaUnicodeString(wc); if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); From 9e4da80852ab4e394fa5f8f4ea9476ec18fc251d Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:44:29 -0700 Subject: [PATCH 127/141] Fixed wchar case? --- src/Mem_win.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp index 2f376e04..ad1612e9 100644 --- a/src/Mem_win.cpp +++ b/src/Mem_win.cpp @@ -119,9 +119,10 @@ static BOOL ObtainLockPagesPrivilege() { BOOL result = FALSE; if (LsaOpenPolicy(nullptr, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - wchar_t wc[256]; - swprintf(wc, 256, L"%hs", T(SE_LOCK_MEMORY_NAME)); - LSA_UNICODE_STRING str = StringToLsaUnicodeString(wc); + char* s = T(SE_LOCK_MEMORY_NAME); + wchar_t ws[256]; + mbstowcs( &ws[0], s, strlen(s) + 1); + LSA_UNICODE_STRING str = StringToLsaUnicodeString(ws); if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); From 49775ee7c2ffbf61f48b6066ec4679a0a9b09015 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:45:35 -0700 Subject: [PATCH 128/141] Fixed wchar case? --- src/Mem_win.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp index ad1612e9..8253ba5a 100644 --- a/src/Mem_win.cpp +++ b/src/Mem_win.cpp @@ -119,9 +119,8 @@ static BOOL ObtainLockPagesPrivilege() { BOOL result = FALSE; if (LsaOpenPolicy(nullptr, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - char* s = T(SE_LOCK_MEMORY_NAME); wchar_t ws[256]; - mbstowcs( &ws[0], s, strlen(s) + 1); + mbstowcs( &ws[0], SE_LOCK_MEMORY_NAME, strlen(SE_LOCK_MEMORY_NAME) + 1); LSA_UNICODE_STRING str = StringToLsaUnicodeString(ws); if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { From 571103d2ce6934700340837b697b28f54115b17f Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:50:03 -0700 Subject: [PATCH 129/141] Fixed wchar case? --- src/Mem_win.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp index 8253ba5a..e7afb5d3 100644 --- a/src/Mem_win.cpp +++ b/src/Mem_win.cpp @@ -82,7 +82,7 @@ static BOOL SetLockPagesPrivilege() { } -static LSA_UNICODE_STRING StringToLsaUnicodeString(LPWSTR string) { +static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) { LSA_UNICODE_STRING lsaString; DWORD dwLen = (DWORD) wcslen(string); @@ -119,9 +119,7 @@ static BOOL ObtainLockPagesPrivilege() { BOOL result = FALSE; if (LsaOpenPolicy(nullptr, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - wchar_t ws[256]; - mbstowcs( &ws[0], SE_LOCK_MEMORY_NAME, strlen(SE_LOCK_MEMORY_NAME) + 1); - LSA_UNICODE_STRING str = StringToLsaUnicodeString(ws); + LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME)); if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); From d69c69700542debe995aadd9057399766bbeb3c0 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 15:51:08 -0700 Subject: [PATCH 130/141] Fixed wchar case? --- CMakeLists.txt | 2 +- RandomX/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 564b3df8..d3ce86f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") endif() add_definitions(/D__STDC_FORMAT_MACROS) -add_definitions(/UUNICODE) +add_definitions(/DUNICODE) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index 3c248495..c0008321 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -78,3 +78,5 @@ set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) + +add_definitions(/UUNICODE) \ No newline at end of file From 128f44c21a5ffc8e528897036c4d650515586d65 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 16:15:36 -0700 Subject: [PATCH 131/141] Windows build fix --- RandomX/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index c0008321..8bdea600 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -79,4 +79,8 @@ set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) -add_definitions(/UUNICODE) \ No newline at end of file +if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MT") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MT") + add_definitions(/UUNICODE) +endif() From 922449118fea45d973b94707eb44e82fff612168 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 16:23:02 -0700 Subject: [PATCH 132/141] Windows build fix --- RandomX/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index 8bdea600..837de9f1 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -27,6 +27,7 @@ # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmake_minimum_required(VERSION 2.8.7) +project(RandomX) set (randomx_sources src/aes_hash.cpp @@ -79,6 +80,8 @@ set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) +message ("${CMAKE_CXX_COMPILER_ID}") + if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MT") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MT") From e7e844dfad39c3124684a5b8e84c684463fae9b2 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 16:35:39 -0700 Subject: [PATCH 133/141] Windows build fix --- RandomX/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index 837de9f1..bfb4ca61 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -59,6 +59,8 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +message ("${ARCH_ID}") + if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") list(APPEND randomx_sources src/jit_compiler_x86_static.S From a7746af684e9552ae1a244d2b84c267e9cd1c7d8 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 16:40:31 -0700 Subject: [PATCH 134/141] Windows build fix --- RandomX/CMakeLists.txt | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index bfb4ca61..04dd2c09 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -27,7 +27,6 @@ # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmake_minimum_required(VERSION 2.8.7) -project(RandomX) set (randomx_sources src/aes_hash.cpp @@ -59,8 +58,6 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -message ("${ARCH_ID}") - if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") list(APPEND randomx_sources src/jit_compiler_x86_static.S @@ -69,6 +66,15 @@ if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL " set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") endif() +if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MT") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MT") + add_definitions(/UUNICODE) + list(APPEND randomx_sources + src/jit_compiler_x86_static.asm + src/jit_compiler_x86.cpp) +endif() + set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path") add_library(randomx @@ -81,11 +87,3 @@ set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) - -message ("${CMAKE_CXX_COMPILER_ID}") - -if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MT") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MT") - add_definitions(/UUNICODE) -endif() From 8bc932bd5b4394dcd62326db8f6ba3624016308b Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 16:49:41 -0700 Subject: [PATCH 135/141] Windows build fix --- RandomX/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index 04dd2c09..7a087d39 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -82,7 +82,7 @@ add_library(randomx target_link_libraries(randomx PRIVATE ${CMAKE_THREAD_LIBS_INIT}) -set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) +# set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other From d9b8b54bfbc2f41e5db83a8567f298ff56bd98c1 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 16:56:04 -0700 Subject: [PATCH 136/141] Windows build fix --- RandomX/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/RandomX/CMakeLists.txt b/RandomX/CMakeLists.txt index 7a087d39..fa0967c1 100644 --- a/RandomX/CMakeLists.txt +++ b/RandomX/CMakeLists.txt @@ -67,8 +67,8 @@ if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL " endif() if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MT") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MT") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS} /MT") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT") add_definitions(/UUNICODE) list(APPEND randomx_sources src/jit_compiler_x86_static.asm @@ -82,7 +82,7 @@ add_library(randomx target_link_libraries(randomx PRIVATE ${CMAKE_THREAD_LIBS_INIT}) -# set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET randomx PROPERTY CXX_STANDARD 11) # cheat because cmake and ccache hate each other From ca70cd4d7551bb15bd1b48e53563b46092fc0d07 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 17:18:59 -0700 Subject: [PATCH 137/141] Fixed 7z name --- build.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.bat b/build.bat index 60429c7f..22e15c30 100644 --- a/build.bat +++ b/build.bat @@ -13,6 +13,6 @@ cmake .. -G "Visual Studio 15 2017 Win64" -DXMRIG_DEPS=%~dp0\build\xmrig-deps\ms msbuild /p:Configuration=Release xmrig.sln &&^ cd Release &&^ copy ..\..\src\config.json . &&^ -7za a -tzip -mx %~dp0\xmrig-%1-win64.zip xmrig.exe config.json &&^ +7z a -tzip -mx %~dp0\xmrig-%1-win64.zip xmrig.exe config.json &&^ cd %~dp0 &&^ rmdir /S /Q build From 9499d03bf1a47658c9d1093976396e60611186f6 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 22:06:39 -0700 Subject: [PATCH 138/141] Added more detailed thread perf --- src/core/config/Config.cpp | 58 +++++++++++++++++++------------------- src/core/config/Config.h | 20 ++++++------- src/workers/Workers.cpp | 6 ++-- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/core/config/Config.cpp b/src/core/config/Config.cpp index e85da6b5..732e91b0 100644 --- a/src/core/config/Config.cpp +++ b/src/core/config/Config.cpp @@ -157,12 +157,12 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const // save extended "threads" based on m_threads Value threads(kObjectType); - for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { - const xmrig::Algo algo = static_cast(a); - Value key(xmrig::Algorithm::perfAlgoName(xmrig::Algorithm(algo).perf_algo()), allocator); - if (threadsMode(algo) != Simple) { + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + Value key(xmrig::Algorithm::perfAlgoName(pa), allocator); + if (threadsMode(pa) != Simple) { Value threads2(kArrayType); - for (const IThread *thread : m_threads[algo].list) { + for (const IThread *thread : m_threads[pa].list) { threads2.PushBack(thread->toConfig(doc), allocator); } @@ -194,40 +194,40 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const bool xmrig::Config::finalize() { - for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { - const xmrig::Algo algo = static_cast(a); - if (!m_threads[algo].cpu.empty()) { - m_threads[algo].mode = Advanced; + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + if (!m_threads[pa].cpu.empty()) { + m_threads[pa].mode = Advanced; const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; - for (size_t i = 0; i < m_threads[algo].cpu.size(); ++i) { - m_threads[algo].list.push_back(CpuThread::createFromData(i, algo, m_threads[algo].cpu[i], m_priority, softAES)); + for (size_t i = 0; i < m_threads[pa].cpu.size(); ++i) { + m_threads[pa].list.push_back(CpuThread::createFromData(i, xmrig::Algorithm(pa), m_threads[pa].cpu[i], m_priority, softAES)); } continue; } const AlgoVariant av = getAlgoVariant(); - m_threads[algo].mode = m_threads[algo].count ? Simple : Automatic; + m_threads[pa].mode = m_threads[pa].count ? Simple : Automatic; const Variant v = m_algorithm.variant(); - const size_t size = CpuThread::multiway(av) * cn_select_memory(algo, v) / 1024; + const size_t size = CpuThread::multiway(av) * cn_select_memory(xmrig::Algorithm(pa), v) / 1024; - if (!m_threads[algo].count) { - m_threads[algo].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); + if (!m_threads[pa].count) { + m_threads[pa].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); } else if (m_safe) { const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads[algo].count > count) { - m_threads[algo].count = count; + if (m_threads[pa].count > count) { + m_threads[pa].count = count; } } - for (size_t i = 0; i < m_threads[algo].count; ++i) { - m_threads[algo].list.push_back(CpuThread::createFromAV(i, algo, av, m_threads[algo].mask, m_priority, m_assembly)); + for (size_t i = 0; i < m_threads[pa].count; ++i) { + m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa), av, m_threads[pa].mask, m_priority, m_assembly)); } - m_shouldSave = m_shouldSave || m_threads[algo].mode == Automatic; + m_shouldSave = m_shouldSave || m_threads[pa].mode == Automatic; } return true; @@ -267,9 +267,9 @@ void xmrig::Config::setPriority(int priority) // parse specific perf algo (or generic) threads config -void xmrig::Config::setThread(const rapidjson::Value &threads, const xmrig::Algo algo) +void xmrig::Config::setThread(const rapidjson::Value &threads, const xmrig::PerfAlgo pa) { - m_threads[algo].cpu.clear(); + m_threads[pa].cpu.clear(); for (const rapidjson::Value &value : threads.GetArray()) { if (!value.IsObject()) { @@ -280,7 +280,7 @@ void xmrig::Config::setThread(const rapidjson::Value &threads, const xmrig::Algo auto data = CpuThread::parse(value); if (data.valid) { - m_threads[algo].cpu.push_back(std::move(data)); + m_threads[pa].cpu.push_back(std::move(data)); } } } @@ -290,22 +290,22 @@ void xmrig::Config::setThread(const rapidjson::Value &threads, const xmrig::Algo void xmrig::Config::setThreads(const rapidjson::Value &threads) { if (threads.IsArray()) { - setThread(threads, m_algorithm.algo()); + setThread(threads, m_algorithm.perf_algo()); } else if (threads.IsObject()) { // parse new specific perf algo threads - for (int a = 0; a != xmrig::Algo::ALGO_MAX; ++ a) { - const xmrig::Algo algo = static_cast(a); - const rapidjson::Value &threads2 = threads[xmrig::Algorithm::perfAlgoName(xmrig::Algorithm(algo).perf_algo())]; + for (int a = 0; a != xmrig::PerfAlgo::PA_MAX; ++ a) { + const xmrig::PerfAlgo pa = static_cast(a); + const rapidjson::Value &threads2 = threads[xmrig::Algorithm::perfAlgoName(pa)]; if (threads2.IsArray()) { - setThread(threads2, algo); + setThread(threads2, pa); } } } else if (threads.IsUint()) { const unsigned count = threads.GetUint(); if (count < 1024) { - m_threads[m_algorithm.algo()].count = count; + m_threads[m_algorithm.perf_algo()].count = count; } } } diff --git a/src/core/config/Config.h b/src/core/config/Config.h index 4716dd24..0451387a 100644 --- a/src/core/config/Config.h +++ b/src/core/config/Config.h @@ -80,17 +80,17 @@ public: inline int priority() const { return m_priority; } // access to m_threads taking into accoun that it is now separated for each perf algo - inline const std::vector &threads(const xmrig::Algo algo = INVALID_ALGO) const { - return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].list; + inline const std::vector &threads(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].list; } - inline int threadsCount(const xmrig::Algo algo = INVALID_ALGO) const { - return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].list.size(); + inline int threadsCount(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].list.size(); } - inline int64_t affinity(const xmrig::Algo algo = INVALID_ALGO) const { - return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].mask; + inline int64_t affinity(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].mask; } - inline ThreadsMode threadsMode(const xmrig::Algo algo = INVALID_ALGO) const { - return m_threads[algo == INVALID_ALGO ? m_algorithm.algo() : algo].mode; + inline ThreadsMode threadsMode(const xmrig::PerfAlgo pa = PA_INVALID) const { + return m_threads[pa == PA_INVALID ? m_algorithm.perf_algo() : pa].mode; } // access to perf algo results @@ -104,7 +104,7 @@ private: void setMaxCpuUsage(int max); void setPriority(int priority); // parse specific perf algo (or generic) threads config - void setThread(const rapidjson::Value &threads, xmrig::Algo); + void setThread(const rapidjson::Value &threads, xmrig::PerfAlgo); void setThreads(const rapidjson::Value &threads); AlgoVariant getAlgoVariant() const; @@ -138,7 +138,7 @@ private: int m_maxCpuUsage; int m_priority; // threads config for each algo - Threads m_threads[xmrig::Algo::ALGO_MAX]; + Threads m_threads[xmrig::PerfAlgo::PA_MAX]; // perf algo hashrate results float m_algo_perf[xmrig::PerfAlgo::PA_MAX]; }; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 4dce6101..4a35db17 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -165,7 +165,6 @@ void Workers::setJob(const xmrig::Job &job, bool donate) m_paused = 0; } - void Workers::start(xmrig::Controller *controller) { # ifdef APP_DEBUG @@ -234,15 +233,16 @@ void Workers::soft_stop() // stop current workers leaving uv stuff intact (used // setups workers based on specified algorithm (or its basic perf algo more specifically) void Workers::switch_algo(const xmrig::Algorithm& algorithm) { - if (m_status.algo == algorithm.algo()) return; + if (xmrig::Algorithm(m_status.algo, m_status.variant).perf_algo() == algorithm.perf_algo()) return; soft_stop(); m_sequence = 1; m_paused = 1; - const std::vector &threads = m_controller->config()->threads(algorithm.algo()); + const std::vector &threads = m_controller->config()->threads(algorithm.perf_algo()); m_status.algo = algorithm.algo(); + m_status.variant = algorithm.variant(); m_status.threads = threads.size(); // string with multiway thread info From 4bc8afa4270297c780f4015dd988b088c103806d Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 22:20:39 -0700 Subject: [PATCH 139/141] Algo usage fix --- src/core/config/Config.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/config/Config.cpp b/src/core/config/Config.cpp index 732e91b0..aead28c7 100644 --- a/src/core/config/Config.cpp +++ b/src/core/config/Config.cpp @@ -201,7 +201,7 @@ bool xmrig::Config::finalize() const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; for (size_t i = 0; i < m_threads[pa].cpu.size(); ++i) { - m_threads[pa].list.push_back(CpuThread::createFromData(i, xmrig::Algorithm(pa), m_threads[pa].cpu[i], m_priority, softAES)); + m_threads[pa].list.push_back(CpuThread::createFromData(i, xmrig::Algorithm(pa).algo(), m_threads[pa].cpu[i], m_priority, softAES)); } continue; @@ -211,7 +211,7 @@ bool xmrig::Config::finalize() m_threads[pa].mode = m_threads[pa].count ? Simple : Automatic; const Variant v = m_algorithm.variant(); - const size_t size = CpuThread::multiway(av) * cn_select_memory(xmrig::Algorithm(pa), v) / 1024; + const size_t size = CpuThread::multiway(av) * cn_select_memory(xmrig::Algorithm(pa).algo(), v) / 1024; if (!m_threads[pa].count) { m_threads[pa].count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); @@ -224,7 +224,7 @@ bool xmrig::Config::finalize() } for (size_t i = 0; i < m_threads[pa].count; ++i) { - m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa), av, m_threads[pa].mask, m_priority, m_assembly)); + m_threads[pa].list.push_back(CpuThread::createFromAV(i, xmrig::Algorithm(pa).algo(), av, m_threads[pa].mask, m_priority, m_assembly)); } m_shouldSave = m_shouldSave || m_threads[pa].mode == Automatic; From 01b7084ea4995344ea6426d81d2cd38b26a8e9a9 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Tue, 18 Jun 2019 22:31:44 -0700 Subject: [PATCH 140/141] Updated version --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 2f6a0d28..ac54bc1f 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.15.4-beta-mo1" +#define APP_VERSION "2.15.4-beta-mo2" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" From b68132f88416803954ec9555de7421e3e7fc0ca5 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Wed, 26 Jun 2019 09:03:04 -0700 Subject: [PATCH 141/141] Fixed Windows RandomX support --- RandomX/src/asm/configuration.asm | 10 +++++----- src/version.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/RandomX/src/asm/configuration.asm b/RandomX/src/asm/configuration.asm index f2f2069c..5192342b 100644 --- a/RandomX/src/asm/configuration.asm +++ b/RandomX/src/asm/configuration.asm @@ -2,16 +2,16 @@ RANDOMX_ARGON_MEMORY EQU 262144t RANDOMX_ARGON_ITERATIONS EQU 3t RANDOMX_ARGON_LANES EQU 1t -RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03"> +RANDOMX_ARGON_SALT TEXTEQU <"RandomWOW\x01"> RANDOMX_CACHE_ACCESSES EQU 8t RANDOMX_SUPERSCALAR_LATENCY EQU 170t RANDOMX_DATASET_BASE_SIZE EQU 2147483648t RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t RANDOMX_PROGRAM_SIZE EQU 256t -RANDOMX_PROGRAM_ITERATIONS EQU 2048t -RANDOMX_PROGRAM_COUNT EQU 8t -RANDOMX_SCRATCHPAD_L3 EQU 2097152t -RANDOMX_SCRATCHPAD_L2 EQU 262144t +RANDOMX_PROGRAM_ITERATIONS EQU 1024t +RANDOMX_PROGRAM_COUNT EQU 16t +RANDOMX_SCRATCHPAD_L3 EQU 1048576t +RANDOMX_SCRATCHPAD_L2 EQU 131072t RANDOMX_SCRATCHPAD_L1 EQU 16384t RANDOMX_JUMP_BITS EQU 8t RANDOMX_JUMP_OFFSET EQU 8t diff --git a/src/version.h b/src/version.h index ac54bc1f..013885c1 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.15.4-beta-mo2" +#define APP_VERSION "2.15.4-beta-mo3" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"