#2.2.1
- Integrated RandomSFX algo (rx/sfx) - Performance improvements for RandomX variants - Fixed some cc connection problems
This commit is contained in:
parent
621dffeafc
commit
23430259fa
24 changed files with 4340 additions and 3343 deletions
|
@ -1,3 +1,7 @@
|
||||||
|
# 2.2.1
|
||||||
|
* Integrated RandomSFX algo (rx/sfx)
|
||||||
|
* Performance improvements for RandomX variants
|
||||||
|
* Fixed some cc connection problems
|
||||||
# 2.2.0
|
# 2.2.0
|
||||||
* Integrated RandomxARQ algo (rx/arq)
|
* Integrated RandomxARQ algo (rx/arq)
|
||||||
* Dashboard:
|
* Dashboard:
|
||||||
|
|
|
@ -261,6 +261,8 @@ if (WITH_CC_SERVER OR WITH_CC_CLIENT)
|
||||||
src/cc/ClientStatus.cpp
|
src/cc/ClientStatus.cpp
|
||||||
src/cc/GPUInfo.cpp)
|
src/cc/GPUInfo.cpp)
|
||||||
|
|
||||||
|
add_definitions("/DCPPHTTPLIB_USE_POLL")
|
||||||
|
|
||||||
if (WITH_ZLIB)
|
if (WITH_ZLIB)
|
||||||
set(ZLIB_ROOT ${XMRIG_DEPS})
|
set(ZLIB_ROOT ${XMRIG_DEPS})
|
||||||
find_package(ZLIB)
|
find_package(ZLIB)
|
||||||
|
|
|
@ -29,6 +29,7 @@ Full Windows/Linux compatible, and you can mix Linux and Windows miner on one XM
|
||||||
## Additional features of XMRigCC (on top of XMRig)
|
## Additional features of XMRigCC (on top of XMRig)
|
||||||
|
|
||||||
Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide
|
Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide
|
||||||
|
* **Support of RandomxSFX variant (algo: "rx/sfx")**
|
||||||
* **Support of RandomxARQ variant (algo: "rx/arq")**
|
* **Support of RandomxARQ variant (algo: "rx/arq")**
|
||||||
* **Support of UPX2 variant (algo: "cn-extremelite/upx2")**
|
* **Support of UPX2 variant (algo: "cn-extremelite/upx2")**
|
||||||
* **Support of CN-Conceal variant (algo: "cn/conceal")**
|
* **Support of CN-Conceal variant (algo: "cn/conceal")**
|
||||||
|
@ -133,7 +134,7 @@ xmrigDaemon -o pool.hashvault.pro:5555 -u YOUR_WALLET -p x -k --cc-url=IP_OF_CC_
|
||||||
cn-pico
|
cn-pico
|
||||||
cn-extremelite
|
cn-extremelite
|
||||||
argon2/chukwa, argon2/wrkz
|
argon2/chukwa, argon2/wrkz
|
||||||
rx/wow, rx/loki, rx/arq
|
rx/wow, rx/loki, rx/arq, rx/sfx
|
||||||
--coin=COIN specify coin instead of algorithm
|
--coin=COIN specify coin instead of algorithm
|
||||||
-o, --url=URL URL of mining server
|
-o, --url=URL URL of mining server
|
||||||
-O, --userpass=U:P username:password pair for mining server
|
-O, --userpass=U:P username:password pair for mining server
|
||||||
|
|
1389
src/3rdparty/cpp-httplib/httplib.h
vendored
1389
src/3rdparty/cpp-httplib/httplib.h
vendored
File diff suppressed because it is too large
Load diff
|
@ -96,6 +96,11 @@ void xmrig::Workers<T>::start(const std::vector<T> &data)
|
||||||
|
|
||||||
for (Thread<T> *worker : m_workers) {
|
for (Thread<T> *worker : m_workers) {
|
||||||
worker->start(Workers<T>::onReady);
|
worker->start(Workers<T>::onReady);
|
||||||
|
|
||||||
|
// This sleep is important for optimal caching!
|
||||||
|
// Threads must allocate scratchpads in order so that adjacent cores will use adjacent scratchpads
|
||||||
|
// Sub-optimal caching can result in up to 0.5% hashrate penalty
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,7 @@ static const char *kCnExtremelite = "cn-extremelite";
|
||||||
static const char *kRx = "rx";
|
static const char *kRx = "rx";
|
||||||
static const char *kRxWOW = "rx/wow";
|
static const char *kRxWOW = "rx/wow";
|
||||||
static const char *kRxARQ = "rx/arq";
|
static const char *kRxARQ = "rx/arq";
|
||||||
|
static const char *kRxSFX = "rx/sfx";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef XMRIG_ALGO_ARGON2
|
#ifdef XMRIG_ALGO_ARGON2
|
||||||
|
@ -200,6 +201,7 @@ void xmrig::CpuConfig::generate()
|
||||||
m_threads.move(kRx, cpu->threads(Algorithm::RX_0));
|
m_threads.move(kRx, cpu->threads(Algorithm::RX_0));
|
||||||
m_threads.move(kRxWOW, cpu->threads(Algorithm::RX_WOW));
|
m_threads.move(kRxWOW, cpu->threads(Algorithm::RX_WOW));
|
||||||
m_threads.move(kRxARQ, cpu->threads(Algorithm::RX_ARQ));
|
m_threads.move(kRxARQ, cpu->threads(Algorithm::RX_ARQ));
|
||||||
|
m_threads.move(kRxSFX, cpu->threads(Algorithm::RX_SFX));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
generateArgon2();
|
generateArgon2();
|
||||||
|
|
|
@ -191,8 +191,17 @@ void xmrig::CpuWorker<N>::start()
|
||||||
consumeJob();
|
consumeJob();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t storeStatsMask = 7;
|
||||||
|
|
||||||
|
# ifdef XMRIG_ALGO_RANDOMX
|
||||||
|
// RandomX is faster, we don't need to store stats so often
|
||||||
|
if (m_job.currentJob().algorithm().family() == Algorithm::RANDOM_X) {
|
||||||
|
storeStatsMask = 63;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
while (!Nonce::isOutdated(Nonce::CPU, m_job.sequence())) {
|
while (!Nonce::isOutdated(Nonce::CPU, m_job.sequence())) {
|
||||||
if ((m_count & 0x7) == 0) {
|
if ((m_count & storeStatsMask) == 0) {
|
||||||
storeStats();
|
storeStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -379,6 +379,8 @@ std::shared_ptr<httplib::Response> xmrig::CCClient::performRequest(const std::st
|
||||||
|
|
||||||
auto res = std::make_shared<httplib::Response>();
|
auto res = std::make_shared<httplib::Response>();
|
||||||
|
|
||||||
|
cli->follow_location(false);
|
||||||
|
|
||||||
return cli->send(req, *res) ? res : nullptr;
|
return cli->send(req, *res) ? res : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <3rdparty/cpp-httplib/httplib.h>
|
#include <3rdparty/cpp-httplib/httplib.h>
|
||||||
#include <3rdparty/base64/base64.h>
|
|
||||||
|
|
||||||
#include "base/io/log/Log.h"
|
#include "base/io/log/Log.h"
|
||||||
|
|
||||||
|
@ -147,31 +146,33 @@ int Httpd::basicAuth(const httplib::Request& req, httplib::Response& res)
|
||||||
{
|
{
|
||||||
int result = HTTP_UNAUTHORIZED;
|
int result = HTTP_UNAUTHORIZED;
|
||||||
|
|
||||||
|
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
|
||||||
|
|
||||||
if (m_config->adminUser().empty() || m_config->adminPass().empty())
|
if (m_config->adminUser().empty() || m_config->adminPass().empty())
|
||||||
{
|
{
|
||||||
res.set_content(std::string("<html><body\\>"
|
res.set_content(std::string("<html><body\\>"
|
||||||
"Please configure admin user and pass to view this Page."
|
"Please configure admin user and pass to view this Page."
|
||||||
"</body><html\\>"), CONTENT_TYPE_HTML);
|
"</body><html\\>"), CONTENT_TYPE_HTML);
|
||||||
|
|
||||||
LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password not set!", req.remoteAddr.c_str());
|
LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password not set!", removeAddr.c_str());
|
||||||
result = HTTP_FORBIDDEN;
|
result = HTTP_FORBIDDEN;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto authHeader = req.get_header_value("Authorization");
|
auto authHeader = req.get_header_value("Authorization");
|
||||||
auto credentials = std::string("Basic ") + Base64::Encode(m_config->adminUser() + std::string(":") + m_config->adminPass());
|
auto credentials = httplib::make_basic_authentication_header(m_config->adminUser(), m_config->adminPass());
|
||||||
|
|
||||||
if (!authHeader.empty() && credentials == authHeader)
|
if (!authHeader.empty() && credentials.second == authHeader)
|
||||||
{
|
{
|
||||||
result = HTTP_OK;
|
result = HTTP_OK;
|
||||||
}
|
}
|
||||||
else if (authHeader.empty())
|
else if (authHeader.empty())
|
||||||
{
|
{
|
||||||
LOG_WARN("[%s] 401 UNAUTHORIZED", req.remoteAddr.c_str());
|
LOG_WARN("[%s] 401 UNAUTHORIZED", removeAddr.c_str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password wrong!", req.remoteAddr.c_str());
|
LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password wrong!", removeAddr.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,9 +185,11 @@ int Httpd::bearerAuth(const httplib::Request& req, httplib::Response& res)
|
||||||
{
|
{
|
||||||
int result = HTTP_UNAUTHORIZED;
|
int result = HTTP_UNAUTHORIZED;
|
||||||
|
|
||||||
|
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
|
||||||
|
|
||||||
if (m_config->token().empty())
|
if (m_config->token().empty())
|
||||||
{
|
{
|
||||||
LOG_WARN("[%s] 200 OK - WARNING AccessToken not set!", req.remoteAddr.c_str());
|
LOG_WARN("[%s] 200 OK - WARNING AccessToken not set!", removeAddr.c_str());
|
||||||
result = HTTP_OK;
|
result = HTTP_OK;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -200,11 +203,11 @@ int Httpd::bearerAuth(const httplib::Request& req, httplib::Response& res)
|
||||||
}
|
}
|
||||||
else if (authHeader.empty())
|
else if (authHeader.empty())
|
||||||
{
|
{
|
||||||
LOG_WARN("[%s] 401 UNAUTHORIZED", req.remoteAddr.c_str());
|
LOG_WARN("[%s] 401 UNAUTHORIZED", removeAddr.c_str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LOG_ERR("[%s] 403 FORBIDDEN - AccessToken wrong!", req.remoteAddr.c_str());
|
LOG_ERR("[%s] 403 FORBIDDEN - AccessToken wrong!", removeAddr.c_str());
|
||||||
result = HTTP_FORBIDDEN;
|
result = HTTP_FORBIDDEN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,8 +107,9 @@ int Service::handleGET(const httplib::Request& req, httplib::Response& res)
|
||||||
int resultCode = HTTP_NOT_FOUND;
|
int resultCode = HTTP_NOT_FOUND;
|
||||||
|
|
||||||
std::string clientId = req.get_param_value("clientId");
|
std::string clientId = req.get_param_value("clientId");
|
||||||
|
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
|
||||||
|
|
||||||
LOG_INFO("[%s] GET %s%s%s", req.remoteAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str());
|
LOG_INFO("[%s] GET %s%s%s", removeAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str());
|
||||||
|
|
||||||
if (req.path == "/")
|
if (req.path == "/")
|
||||||
{
|
{
|
||||||
|
@ -140,14 +141,14 @@ int Service::handleGET(const httplib::Request& req, httplib::Response& res)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LOG_WARN("[%s] 404 NOT FOUND (%s)", req.remoteAddr.c_str(), req.path.c_str());
|
LOG_WARN("[%s] 404 NOT FOUND (%s)", removeAddr.c_str(), req.path.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
resultCode = HTTP_BAD_REQUEST;
|
resultCode = HTTP_BAD_REQUEST;
|
||||||
LOG_ERR("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)",
|
LOG_ERR("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)",
|
||||||
req.remoteAddr.c_str(), req.path.c_str());
|
removeAddr.c_str(), req.path.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,8 +162,9 @@ int Service::handlePOST(const httplib::Request& req, httplib::Response& res)
|
||||||
int resultCode = HTTP_NOT_FOUND;
|
int resultCode = HTTP_NOT_FOUND;
|
||||||
|
|
||||||
std::string clientId = req.get_param_value("clientId");
|
std::string clientId = req.get_param_value("clientId");
|
||||||
|
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
|
||||||
|
|
||||||
LOG_INFO("[%s] POST %s%s%s", req.remoteAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str());
|
LOG_INFO("[%s] POST %s%s%s", removeAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str());
|
||||||
|
|
||||||
if (!clientId.empty())
|
if (!clientId.empty())
|
||||||
{
|
{
|
||||||
|
@ -185,7 +187,7 @@ int Service::handlePOST(const httplib::Request& req, httplib::Response& res)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
resultCode = HTTP_BAD_REQUEST;
|
resultCode = HTTP_BAD_REQUEST;
|
||||||
LOG_WARN("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)", req.remoteAddr.c_str(), req.path.c_str());
|
LOG_WARN("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)", removeAddr.c_str(), req.path.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -196,7 +198,7 @@ int Service::handlePOST(const httplib::Request& req, httplib::Response& res)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LOG_WARN("[%s] 404 NOT FOUND (%s)", req.remoteAddr.c_str(), req.path.c_str());
|
LOG_WARN("[%s] 404 NOT FOUND (%s)", removeAddr.c_str(), req.path.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,12 +273,14 @@ int Service::setClientStatus(const httplib::Request& req, const std::string& cli
|
||||||
{
|
{
|
||||||
int resultCode = HTTP_BAD_REQUEST;
|
int resultCode = HTTP_BAD_REQUEST;
|
||||||
|
|
||||||
|
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
|
||||||
|
|
||||||
rapidjson::Document document;
|
rapidjson::Document document;
|
||||||
if (!document.Parse(req.body.c_str()).HasParseError())
|
if (!document.Parse(req.body.c_str()).HasParseError())
|
||||||
{
|
{
|
||||||
ClientStatus clientStatus;
|
ClientStatus clientStatus;
|
||||||
clientStatus.parseFromJson(document);
|
clientStatus.parseFromJson(document);
|
||||||
clientStatus.setExternalIp(req.remoteAddr);
|
clientStatus.setExternalIp(removeAddr);
|
||||||
|
|
||||||
setClientLog(static_cast<size_t>(m_config->clientLogHistory()), clientId, clientStatus.getLog());
|
setClientLog(static_cast<size_t>(m_config->clientLogHistory()), clientId, clientStatus.getLog());
|
||||||
|
|
||||||
|
@ -294,7 +298,7 @@ int Service::setClientStatus(const httplib::Request& req, const std::string& cli
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LOG_ERR("[%s] ClientStatus for client '%s' - Parse Error Occured: %d",
|
LOG_ERR("[%s] ClientStatus for client '%s' - Parse Error Occured: %d",
|
||||||
req.remoteAddr.c_str(), clientId.c_str(), document.GetParseError());
|
removeAddr.c_str(), clientId.c_str(), document.GetParseError());
|
||||||
}
|
}
|
||||||
|
|
||||||
return resultCode;
|
return resultCode;
|
||||||
|
|
|
@ -183,6 +183,7 @@ private:
|
||||||
0, // RX_WOW
|
0, // RX_WOW
|
||||||
0, // RX_LOKI
|
0, // RX_LOKI
|
||||||
0, // RX_ARQ
|
0, // RX_ARQ
|
||||||
|
0, // RX_SFX
|
||||||
# endif
|
# endif
|
||||||
# ifdef XMRIG_ALGO_ARGON2
|
# ifdef XMRIG_ALGO_ARGON2
|
||||||
0, // AR2_CHUKWA
|
0, // AR2_CHUKWA
|
||||||
|
@ -227,6 +228,7 @@ private:
|
||||||
0, // RX_WOW
|
0, // RX_WOW
|
||||||
0, // RX_LOKI
|
0, // RX_LOKI
|
||||||
0, // RX_ARQ
|
0, // RX_ARQ
|
||||||
|
0, // RX_SFX
|
||||||
# endif
|
# endif
|
||||||
# ifdef XMRIG_ALGO_ARGON2
|
# ifdef XMRIG_ALGO_ARGON2
|
||||||
0, // AR2_CHUKWA
|
0, // AR2_CHUKWA
|
||||||
|
@ -271,6 +273,7 @@ private:
|
||||||
Algorithm::INVALID, // RX_WOW
|
Algorithm::INVALID, // RX_WOW
|
||||||
Algorithm::INVALID, // RX_LOKI
|
Algorithm::INVALID, // RX_LOKI
|
||||||
Algorithm::INVALID, // RX_ARQ
|
Algorithm::INVALID, // RX_ARQ
|
||||||
|
Algorithm::INVALID, // RX_SFX
|
||||||
# endif
|
# endif
|
||||||
# ifdef XMRIG_ALGO_ARGON2
|
# ifdef XMRIG_ALGO_ARGON2
|
||||||
Algorithm::INVALID, // AR2_CHUKWA
|
Algorithm::INVALID, // AR2_CHUKWA
|
||||||
|
|
|
@ -125,6 +125,8 @@ static AlgoName const algorithm_names[] = {
|
||||||
{ "RandomXL", nullptr, Algorithm::RX_LOKI },
|
{ "RandomXL", nullptr, Algorithm::RX_LOKI },
|
||||||
{ "randomx/arq", "rx/arq", Algorithm::RX_ARQ },
|
{ "randomx/arq", "rx/arq", Algorithm::RX_ARQ },
|
||||||
{ "RandomARQ", nullptr, Algorithm::RX_ARQ },
|
{ "RandomARQ", nullptr, Algorithm::RX_ARQ },
|
||||||
|
{ "randomx/sfx", "rx/sfx", Algorithm::RX_SFX },
|
||||||
|
{ "RandomSFX", nullptr, Algorithm::RX_SFX },
|
||||||
# endif
|
# endif
|
||||||
# ifdef XMRIG_ALGO_ARGON2
|
# ifdef XMRIG_ALGO_ARGON2
|
||||||
{ "argon2/chukwa", nullptr, Algorithm::AR2_CHUKWA },
|
{ "argon2/chukwa", nullptr, Algorithm::AR2_CHUKWA },
|
||||||
|
@ -155,6 +157,7 @@ size_t xmrig::Algorithm::l2() const
|
||||||
switch (m_id) {
|
switch (m_id) {
|
||||||
case RX_0:
|
case RX_0:
|
||||||
case RX_LOKI:
|
case RX_LOKI:
|
||||||
|
case RX_SFX:
|
||||||
return 0x40000;
|
return 0x40000;
|
||||||
|
|
||||||
case RX_WOW:
|
case RX_WOW:
|
||||||
|
@ -188,6 +191,7 @@ size_t xmrig::Algorithm::l3() const
|
||||||
switch (m_id) {
|
switch (m_id) {
|
||||||
case RX_0:
|
case RX_0:
|
||||||
case RX_LOKI:
|
case RX_LOKI:
|
||||||
|
case RX_SFX:
|
||||||
return oneMiB * 2;
|
return oneMiB * 2;
|
||||||
|
|
||||||
case RX_WOW:
|
case RX_WOW:
|
||||||
|
@ -294,6 +298,7 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
|
||||||
case RX_WOW:
|
case RX_WOW:
|
||||||
case RX_LOKI:
|
case RX_LOKI:
|
||||||
case RX_ARQ:
|
case RX_ARQ:
|
||||||
|
case RX_SFX:
|
||||||
return RANDOM_X;
|
return RANDOM_X;
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,7 @@ public:
|
||||||
RX_WOW, // "rx/wow" RandomWOW (Wownero).
|
RX_WOW, // "rx/wow" RandomWOW (Wownero).
|
||||||
RX_LOKI, // "rx/loki" RandomXL (Loki).
|
RX_LOKI, // "rx/loki" RandomXL (Loki).
|
||||||
RX_ARQ, // "rx/arq" RandomARQ (Arqma).
|
RX_ARQ, // "rx/arq" RandomARQ (Arqma).
|
||||||
|
RX_SFX, // "rx/sfx" RandomSFX (Safex).
|
||||||
# endif
|
# endif
|
||||||
# ifdef XMRIG_ALGO_ARGON2
|
# ifdef XMRIG_ALGO_ARGON2
|
||||||
AR2_CHUKWA, // "argon2/chukwa"
|
AR2_CHUKWA, // "argon2/chukwa"
|
||||||
|
|
|
@ -212,3 +212,84 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
|
|
||||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
|
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||||
|
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||||
|
|
||||||
|
// initial state
|
||||||
|
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||||
|
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||||
|
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||||
|
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
|
||||||
|
|
||||||
|
const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
|
||||||
|
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
|
||||||
|
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
|
||||||
|
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
|
||||||
|
|
||||||
|
rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
|
||||||
|
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
|
||||||
|
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
|
||||||
|
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
|
||||||
|
|
||||||
|
constexpr int PREFETCH_DISTANCE = 4096;
|
||||||
|
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
|
||||||
|
scratchpadEnd -= PREFETCH_DISTANCE;
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
//process 64 bytes at a time in 4 lanes
|
||||||
|
while (scratchpadPtr < scratchpadEnd) {
|
||||||
|
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
|
||||||
|
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
|
||||||
|
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
|
||||||
|
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
|
||||||
|
|
||||||
|
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
||||||
|
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
||||||
|
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
||||||
|
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
||||||
|
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
|
||||||
|
|
||||||
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
|
scratchpadPtr += 64;
|
||||||
|
prefetchPtr += 64;
|
||||||
|
}
|
||||||
|
prefetchPtr = (const char*) scratchpad;
|
||||||
|
scratchpadEnd += PREFETCH_DISTANCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
|
||||||
|
|
||||||
|
//two extra rounds to achieve full diffusion
|
||||||
|
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
|
||||||
|
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
|
||||||
|
|
||||||
|
hash_state0 = aesenc<softAes>(hash_state0, xkey0);
|
||||||
|
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
|
||||||
|
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
|
||||||
|
hash_state3 = aesdec<softAes>(hash_state3, xkey0);
|
||||||
|
|
||||||
|
hash_state0 = aesenc<softAes>(hash_state0, xkey1);
|
||||||
|
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
|
||||||
|
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
|
||||||
|
hash_state3 = aesdec<softAes>(hash_state3, xkey1);
|
||||||
|
|
||||||
|
//output hash
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||||
|
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
|
@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
|
@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
|
||||||
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
|
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
|
||||||
#define rx_aligned_free(a) _mm_free(a)
|
#define rx_aligned_free(a) _mm_free(a)
|
||||||
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
||||||
|
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
|
||||||
|
|
||||||
#define rx_load_vec_f128 _mm_load_pd
|
#define rx_load_vec_f128 _mm_load_pd
|
||||||
#define rx_store_vec_f128 _mm_store_pd
|
#define rx_store_vec_f128 _mm_store_pd
|
||||||
|
@ -201,6 +202,7 @@ typedef union{
|
||||||
#define rx_aligned_alloc(a, b) malloc(a)
|
#define rx_aligned_alloc(a, b) malloc(a)
|
||||||
#define rx_aligned_free(a) free(a)
|
#define rx_aligned_free(a) free(a)
|
||||||
#define rx_prefetch_nta(x)
|
#define rx_prefetch_nta(x)
|
||||||
|
#define rx_prefetch_t0(x)
|
||||||
|
|
||||||
/* Splat 64-bit long long to 2 64-bit long longs */
|
/* Splat 64-bit long long to 2 64-bit long longs */
|
||||||
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
|
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
|
||||||
|
@ -376,11 +378,142 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||||
|
|
||||||
#define RANDOMX_DEFAULT_FENV
|
#define RANDOMX_DEFAULT_FENV
|
||||||
|
|
||||||
void rx_reset_float_state();
|
#elif defined(__aarch64__)
|
||||||
|
|
||||||
void rx_set_rounding_mode(uint32_t mode);
|
#include <stdlib.h>
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#include <arm_acle.h>
|
||||||
|
|
||||||
#else //end altivec
|
typedef uint8x16_t rx_vec_i128;
|
||||||
|
typedef float64x2_t rx_vec_f128;
|
||||||
|
|
||||||
|
inline void* rx_aligned_alloc(size_t size, size_t align) {
|
||||||
|
void* p;
|
||||||
|
if (posix_memalign(&p, align, size) == 0)
|
||||||
|
return p;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define rx_aligned_free(a) free(a)
|
||||||
|
|
||||||
|
inline void rx_prefetch_nta(void* ptr) {
|
||||||
|
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void rx_prefetch_t0(const void* ptr) {
|
||||||
|
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||||
|
return vld1q_f64((const float64_t*)pd);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
|
||||||
|
vst1q_f64((float64_t*)mem_addr, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||||
|
float64x2_t temp;
|
||||||
|
temp = vcopyq_laneq_f64(temp, 1, a, 1);
|
||||||
|
a = vcopyq_laneq_f64(a, 1, a, 0);
|
||||||
|
return vcopyq_laneq_f64(a, 0, temp, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||||
|
uint64x2_t temp0 = vdupq_n_u64(x0);
|
||||||
|
uint64x2_t temp1 = vdupq_n_u64(x1);
|
||||||
|
return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||||
|
return vreinterpretq_f64_u64(vdupq_n_u64(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define rx_add_vec_f128 vaddq_f64
|
||||||
|
#define rx_sub_vec_f128 vsubq_f64
|
||||||
|
#define rx_mul_vec_f128 vmulq_f64
|
||||||
|
#define rx_div_vec_f128 vdivq_f64
|
||||||
|
#define rx_sqrt_vec_f128 vsqrtq_f64
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
|
return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
|
return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
|
return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_FEATURE_CRYPTO
|
||||||
|
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
|
||||||
|
const uint8x16_t zero = { 0 };
|
||||||
|
return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
|
||||||
|
const uint8x16_t zero = { 0 };
|
||||||
|
return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define HAVE_AES
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define rx_xor_vec_i128 veorq_u8
|
||||||
|
|
||||||
|
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||||
|
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||||
|
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||||
|
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||||
|
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
|
||||||
|
int32_t data[4];
|
||||||
|
data[0] = _I0;
|
||||||
|
data[1] = _I1;
|
||||||
|
data[2] = _I2;
|
||||||
|
data[3] = _I3;
|
||||||
|
return vreinterpretq_u8_s32(vld1q_s32(data));
|
||||||
|
};
|
||||||
|
|
||||||
|
#define rx_xor_vec_i128 veorq_u8
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
|
||||||
|
return vld1q_u8((const uint8_t*)mem_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
|
||||||
|
vst1q_u8((uint8_t*)mem_addr, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||||
|
double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||||
|
double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||||
|
rx_vec_f128 x;
|
||||||
|
x = vsetq_lane_f64(lo, x, 0);
|
||||||
|
x = vsetq_lane_f64(hi, x, 1);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RANDOMX_DEFAULT_FENV
|
||||||
|
|
||||||
|
#else //portable fallback
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
@ -405,6 +538,7 @@ typedef union {
|
||||||
#define rx_aligned_alloc(a, b) malloc(a)
|
#define rx_aligned_alloc(a, b) malloc(a)
|
||||||
#define rx_aligned_free(a) free(a)
|
#define rx_aligned_free(a) free(a)
|
||||||
#define rx_prefetch_nta(x)
|
#define rx_prefetch_nta(x)
|
||||||
|
#define rx_prefetch_t0(x)
|
||||||
|
|
||||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||||
rx_vec_f128 x;
|
rx_vec_f128 x;
|
||||||
|
@ -487,7 +621,6 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
rx_vec_f128 x;
|
rx_vec_f128 x;
|
||||||
x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
|
x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
|
||||||
|
@ -578,10 +711,6 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||||
|
|
||||||
#define RANDOMX_DEFAULT_FENV
|
#define RANDOMX_DEFAULT_FENV
|
||||||
|
|
||||||
void rx_reset_float_state();
|
|
||||||
|
|
||||||
void rx_set_rounding_mode(uint32_t mode);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HAVE_AES
|
#ifndef HAVE_AES
|
||||||
|
@ -598,6 +727,14 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef RANDOMX_DEFAULT_FENV
|
||||||
|
|
||||||
|
void rx_reset_float_state();
|
||||||
|
|
||||||
|
void rx_set_rounding_mode(uint32_t mode);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
double loadDoublePortable(const void* addr);
|
double loadDoublePortable(const void* addr);
|
||||||
uint64_t mulh(uint64_t, uint64_t);
|
uint64_t mulh(uint64_t, uint64_t);
|
||||||
int64_t smulh(int64_t, int64_t);
|
int64_t smulh(int64_t, int64_t);
|
||||||
|
|
|
@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
#include <atomic>
|
||||||
#include "crypto/randomx/jit_compiler_x86.hpp"
|
#include "crypto/randomx/jit_compiler_x86.hpp"
|
||||||
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
||||||
#include "crypto/randomx/superscalar.hpp"
|
#include "crypto/randomx/superscalar.hpp"
|
||||||
|
@ -36,6 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "crypto/randomx/reciprocal.h"
|
#include "crypto/randomx/reciprocal.h"
|
||||||
#include "crypto/randomx/virtual_memory.hpp"
|
#include "crypto/randomx/virtual_memory.hpp"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
# include <intrin.h>
|
||||||
|
#else
|
||||||
|
# include <cpuid.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
/*
|
/*
|
||||||
|
|
||||||
|
@ -108,7 +115,7 @@ namespace randomx {
|
||||||
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
|
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
|
||||||
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
|
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
|
||||||
|
|
||||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
const int32_t epilogueOffset = (CodeSize - epilogueSize) & ~63;
|
||||||
constexpr int32_t superScalarHashOffset = 32768;
|
constexpr int32_t superScalarHashOffset = 32768;
|
||||||
|
|
||||||
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
|
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
|
||||||
|
@ -183,6 +190,7 @@ namespace randomx {
|
||||||
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
||||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||||
|
static const uint8_t JZ_SHORT = 0x74;
|
||||||
static const uint8_t RET = 0xc3;
|
static const uint8_t RET = 0xc3;
|
||||||
static const uint8_t LEA_32[] = { 0x41, 0x8d };
|
static const uint8_t LEA_32[] = { 0x41, 0x8d };
|
||||||
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
|
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
|
||||||
|
@ -197,20 +205,100 @@ namespace randomx {
|
||||||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
|
||||||
// static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||||
|
|
||||||
|
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
||||||
|
{},
|
||||||
|
{0x2E},
|
||||||
|
{0x2E, 0x2E},
|
||||||
|
{0x2E, 0x2E, 0x2E},
|
||||||
|
{0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||||
|
};
|
||||||
|
|
||||||
|
bool JitCompilerX86::BranchesWithin32B = false;
|
||||||
|
|
||||||
size_t JitCompilerX86::getCodeSize() {
|
size_t JitCompilerX86::getCodeSize() {
|
||||||
return codePos < prologueSize ? 0 : codePos - prologueSize;
|
return codePos < prologueSize ? 0 : codePos - prologueSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void cpuid(uint32_t level, int32_t output[4])
|
||||||
|
{
|
||||||
|
memset(output, 0, sizeof(int32_t) * 4);
|
||||||
|
|
||||||
|
# ifdef _MSC_VER
|
||||||
|
__cpuid(output, static_cast<int>(level));
|
||||||
|
# else
|
||||||
|
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPU-specific tweaks
|
||||||
|
void JitCompilerX86::applyTweaks() {
|
||||||
|
int32_t info[4];
|
||||||
|
cpuid(0, info);
|
||||||
|
|
||||||
|
int32_t manufacturer[4];
|
||||||
|
manufacturer[0] = info[1];
|
||||||
|
manufacturer[1] = info[3];
|
||||||
|
manufacturer[2] = info[2];
|
||||||
|
manufacturer[3] = 0;
|
||||||
|
|
||||||
|
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned int stepping : 4;
|
||||||
|
unsigned int model : 4;
|
||||||
|
unsigned int family : 4;
|
||||||
|
unsigned int processor_type : 2;
|
||||||
|
unsigned int reserved1 : 2;
|
||||||
|
unsigned int ext_model : 4;
|
||||||
|
unsigned int ext_family : 8;
|
||||||
|
unsigned int reserved2 : 4;
|
||||||
|
} processor_info;
|
||||||
|
|
||||||
|
cpuid(1, info);
|
||||||
|
memcpy(&processor_info, info, sizeof(processor_info));
|
||||||
|
|
||||||
|
// Intel JCC erratum mitigation
|
||||||
|
if (processor_info.family == 6) {
|
||||||
|
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
|
||||||
|
const uint32_t stepping = processor_info.stepping;
|
||||||
|
|
||||||
|
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||||
|
BranchesWithin32B =
|
||||||
|
((model == 0x4E) && (stepping == 0x3)) ||
|
||||||
|
((model == 0x55) && (stepping == 0x4)) ||
|
||||||
|
((model == 0x5E) && (stepping == 0x3)) ||
|
||||||
|
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||||
|
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||||
|
((model == 0xA6) && (stepping == 0x0)) ||
|
||||||
|
((model == 0xAE) && (stepping == 0xA));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::atomic<size_t> codeOffset;
|
||||||
|
|
||||||
JitCompilerX86::JitCompilerX86() {
|
JitCompilerX86::JitCompilerX86() {
|
||||||
code = (uint8_t*)allocExecutableMemory(CodeSize);
|
applyTweaks();
|
||||||
|
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
|
||||||
|
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
|
||||||
|
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
|
||||||
memcpy(code, codePrologue, prologueSize);
|
memcpy(code, codePrologue, prologueSize);
|
||||||
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
|
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
JitCompilerX86::~JitCompilerX86() {
|
JitCompilerX86::~JitCompilerX86() {
|
||||||
freePagedMemory(code, CodeSize);
|
freePagedMemory(allocatedCode, CodeSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
||||||
|
@ -268,8 +356,6 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||||
memset(registerUsage, -1, sizeof(registerUsage));
|
|
||||||
|
|
||||||
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
|
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
|
||||||
code[codePos + 2] = 0xc0 + pcfg.readReg0;
|
code[codePos + 2] = 0xc0 + pcfg.readReg0;
|
||||||
code[codePos + 5] = 0xc0 + pcfg.readReg1;
|
code[codePos + 5] = 0xc0 + pcfg.readReg1;
|
||||||
|
@ -280,13 +366,21 @@ namespace randomx {
|
||||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||||
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||||
codePos += loopLoadSize;
|
codePos += loopLoadSize;
|
||||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
|
||||||
Instruction& instr = prog(i);
|
//mark all registers as used
|
||||||
instr.src %= RegistersCount;
|
uint64_t* r = (uint64_t*)registerUsage;
|
||||||
instr.dst %= RegistersCount;
|
uint64_t k = codePos;
|
||||||
instructionOffsets[i] = codePos;
|
k |= k << 32;
|
||||||
(this->*(engine[instr.opcode]))(instr, i);
|
for (unsigned j = 0; j < RegistersCount / 2; ++j) {
|
||||||
|
r[j] = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; ++i) {
|
||||||
|
Instruction instr = prog(i);
|
||||||
|
*((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
|
||||||
|
(this->*(engine[instr.opcode]))(instr);
|
||||||
|
}
|
||||||
|
|
||||||
emit(REX_MOV_RR, code, codePos);
|
emit(REX_MOV_RR, code, codePos);
|
||||||
emitByte(0xc0 + pcfg.readReg2, code, codePos);
|
emitByte(0xc0 + pcfg.readReg2, code, codePos);
|
||||||
emit(REX_XOR_EAX, code, codePos);
|
emit(REX_XOR_EAX, code, codePos);
|
||||||
|
@ -301,6 +395,22 @@ namespace randomx {
|
||||||
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
|
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
|
||||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||||
codePos += loopStoreSize;
|
codePos += loopStoreSize;
|
||||||
|
|
||||||
|
if (BranchesWithin32B) {
|
||||||
|
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
|
||||||
|
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
|
||||||
|
|
||||||
|
// If the jump crosses or touches 32-byte boundary, align it
|
||||||
|
if ((branch_begin ^ branch_end) >= 32) {
|
||||||
|
uint32_t alignment_size = 32 - (branch_begin & 31);
|
||||||
|
if (alignment_size > 8) {
|
||||||
|
emit(NOPX[alignment_size - 9], alignment_size - 8, code, codePos);
|
||||||
|
alignment_size = 8;
|
||||||
|
}
|
||||||
|
emit(NOPX[alignment_size - 1], alignment_size, code, codePos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
emit(SUB_EBX, code, codePos);
|
emit(SUB_EBX, code, codePos);
|
||||||
emit(JNZ, code, codePos);
|
emit(JNZ, code, codePos);
|
||||||
emit32(prologueSize - codePos - 4, code, codePos);
|
emit32(prologueSize - codePos - 4, code, codePos);
|
||||||
|
@ -402,12 +512,13 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genAddressReg(Instruction& instr, uint8_t* code, int& codePos, bool rax) {
|
template<bool rax>
|
||||||
emit(LEA_32, code, codePos);
|
FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||||
emitByte(0x80 + instr.src + (rax ? 0 : 8), code, codePos);
|
const uint32_t src = *((uint32_t*)&instr) & 0xFF0000;
|
||||||
if (instr.src == RegisterNeedsSib) {
|
|
||||||
emitByte(0x24, code, codePos);
|
*(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + src;
|
||||||
}
|
codePos += (src == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||||
|
|
||||||
emit32(instr.getImm32(), code, codePos);
|
emit32(instr.getImm32(), code, codePos);
|
||||||
if (rax)
|
if (rax)
|
||||||
emitByte(AND_EAX_I, code, codePos);
|
emitByte(AND_EAX_I, code, codePos);
|
||||||
|
@ -416,12 +527,14 @@ namespace randomx {
|
||||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
|
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genAddressRegDst(Instruction& instr, uint8_t* code, int& codePos) {
|
template void JitCompilerX86::genAddressReg<false>(const Instruction& instr, uint8_t* code, int& codePos);
|
||||||
emit(LEA_32, code, codePos);
|
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, uint8_t* code, int& codePos);
|
||||||
emitByte(0x80 + instr.dst, code, codePos);
|
|
||||||
if (instr.dst == RegisterNeedsSib) {
|
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||||
emitByte(0x24, code, codePos);
|
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
|
||||||
}
|
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
||||||
|
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||||
|
|
||||||
emit32(instr.getImm32(), code, codePos);
|
emit32(instr.getImm32(), code, codePos);
|
||||||
emitByte(AND_EAX_I, code, codePos);
|
emitByte(AND_EAX_I, code, codePos);
|
||||||
if (instr.getModCond() < StoreL3Condition) {
|
if (instr.getModCond() < StoreL3Condition) {
|
||||||
|
@ -432,7 +545,7 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genAddressImm(Instruction& instr, uint8_t* code, int& codePos) {
|
FORCE_INLINE void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||||
emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos);
|
emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -447,17 +560,18 @@ namespace randomx {
|
||||||
0x3c8d4f,
|
0x3c8d4f,
|
||||||
};
|
};
|
||||||
|
|
||||||
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
|
void JitCompilerX86::h_IADD_RS(const Instruction& instr) {
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
uint8_t* const p = code + pos;
|
uint8_t* const p = code + pos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
|
|
||||||
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst;
|
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst;
|
||||||
*(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24);
|
*(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24);
|
||||||
*(uint32_t*)(p + 4) = instr.getImm32();
|
*(uint32_t*)(p + 4) = instr.getImm32();
|
||||||
|
|
||||||
codePos = pos + ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
|
pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const uint32_t template_IADD_M[8] = {
|
static const uint32_t template_IADD_M[8] = {
|
||||||
|
@ -471,13 +585,12 @@ namespace randomx {
|
||||||
0x063c034c,
|
0x063c034c,
|
||||||
};
|
};
|
||||||
|
|
||||||
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_IADD_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit32(template_IADD_M[instr.dst], p, pos);
|
emit32(template_IADD_M[instr.dst], p, pos);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -486,6 +599,7 @@ namespace randomx {
|
||||||
genAddressImm(instr, p, pos);
|
genAddressImm(instr, p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -493,11 +607,10 @@ namespace randomx {
|
||||||
emitByte((scale << 6) | (index << 3) | base, code, codePos);
|
emitByte((scale << 6) | (index << 3) | base, code, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_ISUB_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
emit(REX_SUB_RR, p, pos);
|
emit(REX_SUB_RR, p, pos);
|
||||||
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
|
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
|
||||||
|
@ -508,16 +621,16 @@ namespace randomx {
|
||||||
emit32(instr.getImm32(), p, pos);
|
emit32(instr.getImm32(), p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_ISUB_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit(REX_SUB_RM, p, pos);
|
emit(REX_SUB_RM, p, pos);
|
||||||
emitByte(0x04 + 8 * instr.dst, p, pos);
|
emitByte(0x04 + 8 * instr.dst, p, pos);
|
||||||
emitByte(0x06, p, pos);
|
emitByte(0x06, p, pos);
|
||||||
|
@ -528,14 +641,14 @@ namespace randomx {
|
||||||
genAddressImm(instr, p, pos);
|
genAddressImm(instr, p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_IMUL_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
emit(REX_IMUL_RR, p, pos);
|
emit(REX_IMUL_RR, p, pos);
|
||||||
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
|
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
|
||||||
|
@ -546,16 +659,16 @@ namespace randomx {
|
||||||
emit32(instr.getImm32(), p, pos);
|
emit32(instr.getImm32(), p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_IMUL_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit(REX_IMUL_RM, p, pos);
|
emit(REX_IMUL_RM, p, pos);
|
||||||
emitByte(0x04 + 8 * instr.dst, p, pos);
|
emitByte(0x04 + 8 * instr.dst, p, pos);
|
||||||
emitByte(0x06, p, pos);
|
emitByte(0x06, p, pos);
|
||||||
|
@ -566,14 +679,14 @@ namespace randomx {
|
||||||
genAddressImm(instr, p, pos);
|
genAddressImm(instr, p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_IMULH_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
emit(REX_MOV_RR64, p, pos);
|
emit(REX_MOV_RR64, p, pos);
|
||||||
emitByte(0xc0 + instr.dst, p, pos);
|
emitByte(0xc0 + instr.dst, p, pos);
|
||||||
emit(REX_MUL_R, p, pos);
|
emit(REX_MUL_R, p, pos);
|
||||||
|
@ -581,16 +694,16 @@ namespace randomx {
|
||||||
emit(REX_MOV_R64R, p, pos);
|
emit(REX_MOV_R64R, p, pos);
|
||||||
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_IMULH_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
genAddressReg(instr, p, pos, false);
|
genAddressReg<false>(instr, p, pos);
|
||||||
emit(REX_MOV_RR64, p, pos);
|
emit(REX_MOV_RR64, p, pos);
|
||||||
emitByte(0xc0 + instr.dst, p, pos);
|
emitByte(0xc0 + instr.dst, p, pos);
|
||||||
emit(REX_MUL_MEM, p, pos);
|
emit(REX_MUL_MEM, p, pos);
|
||||||
|
@ -605,14 +718,14 @@ namespace randomx {
|
||||||
emit(REX_MOV_R64R, p, pos);
|
emit(REX_MOV_R64R, p, pos);
|
||||||
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_ISMULH_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
emit(REX_MOV_RR64, p, pos);
|
emit(REX_MOV_RR64, p, pos);
|
||||||
emitByte(0xc0 + instr.dst, p, pos);
|
emitByte(0xc0 + instr.dst, p, pos);
|
||||||
emit(REX_MUL_R, p, pos);
|
emit(REX_MUL_R, p, pos);
|
||||||
|
@ -620,16 +733,16 @@ namespace randomx {
|
||||||
emit(REX_MOV_R64R, p, pos);
|
emit(REX_MOV_R64R, p, pos);
|
||||||
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_ISMULH_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
genAddressReg(instr, p, pos, false);
|
genAddressReg<false>(instr, p, pos);
|
||||||
emit(REX_MOV_RR64, p, pos);
|
emit(REX_MOV_RR64, p, pos);
|
||||||
emitByte(0xc0 + instr.dst, p, pos);
|
emitByte(0xc0 + instr.dst, p, pos);
|
||||||
emit(REX_IMUL_MEM, p, pos);
|
emit(REX_IMUL_MEM, p, pos);
|
||||||
|
@ -644,41 +757,41 @@ namespace randomx {
|
||||||
emit(REX_MOV_R64R, p, pos);
|
emit(REX_MOV_R64R, p, pos);
|
||||||
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
void JitCompilerX86::h_IMUL_RCP(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
uint64_t divisor = instr.getImm32();
|
uint64_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
emit(MOV_RAX_I, p, pos);
|
emit(MOV_RAX_I, p, pos);
|
||||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
||||||
emit(REX_IMUL_RM, p, pos);
|
emit(REX_IMUL_RM, p, pos);
|
||||||
emitByte(0xc0 + 8 * instr.dst, p, pos);
|
emitByte(0xc0 + 8 * instr.dst, p, pos);
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_INEG_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
emit(REX_NEG, p, pos);
|
emit(REX_NEG, p, pos);
|
||||||
emitByte(0xd8 + instr.dst, p, pos);
|
emitByte(0xd8 + instr.dst, p, pos);
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_IXOR_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
emit(REX_XOR_RR, p, pos);
|
emit(REX_XOR_RR, p, pos);
|
||||||
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
|
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
|
||||||
|
@ -689,16 +802,16 @@ namespace randomx {
|
||||||
emit32(instr.getImm32(), p, pos);
|
emit32(instr.getImm32(), p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_IXOR_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit(REX_XOR_RM, p, pos);
|
emit(REX_XOR_RM, p, pos);
|
||||||
emitByte(0x04 + 8 * instr.dst, p, pos);
|
emitByte(0x04 + 8 * instr.dst, p, pos);
|
||||||
emitByte(0x06, p, pos);
|
emitByte(0x06, p, pos);
|
||||||
|
@ -709,14 +822,14 @@ namespace randomx {
|
||||||
genAddressImm(instr, p, pos);
|
genAddressImm(instr, p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_IROR_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
emit(REX_MOV_RR, p, pos);
|
emit(REX_MOV_RR, p, pos);
|
||||||
emitByte(0xc8 + instr.src, p, pos);
|
emitByte(0xc8 + instr.src, p, pos);
|
||||||
|
@ -729,14 +842,14 @@ namespace randomx {
|
||||||
emitByte(instr.getImm32() & 63, p, pos);
|
emitByte(instr.getImm32() & 63, p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_IROL_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
emit(REX_MOV_RR, p, pos);
|
emit(REX_MOV_RR, p, pos);
|
||||||
emitByte(0xc8 + instr.src, p, pos);
|
emitByte(0xc8 + instr.src, p, pos);
|
||||||
|
@ -749,24 +862,25 @@ namespace randomx {
|
||||||
emitByte(instr.getImm32() & 63, p, pos);
|
emitByte(instr.getImm32() & 63, p, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_ISWAP_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
registerUsage[instr.dst] = i;
|
|
||||||
registerUsage[instr.src] = i;
|
|
||||||
emit(REX_XCHG, p, pos);
|
emit(REX_XCHG, p, pos);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
|
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
|
||||||
|
registerUsage[instr.dst] = pos;
|
||||||
|
registerUsage[instr.src] = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_FSWAP_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
|
@ -777,105 +891,105 @@ namespace randomx {
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_FADD_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
instr.src %= RegisterCountFlt;
|
const uint32_t src = instr.src % RegisterCountFlt;
|
||||||
emit(REX_ADDPD, p, pos);
|
emit(REX_ADDPD, p, pos);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
|
emitByte(0xc0 + src + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_FADD_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
||||||
emit(REX_ADDPD, p, pos);
|
emit(REX_ADDPD, p, pos);
|
||||||
emitByte(0xc4 + 8 * instr.dst, p, pos);
|
emitByte(0xc4 + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_FSUB_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
instr.src %= RegisterCountFlt;
|
const uint32_t src = instr.src % RegisterCountFlt;
|
||||||
emit(REX_SUBPD, p, pos);
|
emit(REX_SUBPD, p, pos);
|
||||||
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
|
emitByte(0xc0 + src + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_FSUB_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
||||||
emit(REX_SUBPD, p, pos);
|
emit(REX_SUBPD, p, pos);
|
||||||
emitByte(0xc4 + 8 * instr.dst, p, pos);
|
emitByte(0xc4 + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_FSCAL_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
emit(REX_XORPS, p, pos);
|
emit(REX_XORPS, p, pos);
|
||||||
emitByte(0xc7 + 8 * instr.dst, p, pos);
|
emitByte(0xc7 + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_FMUL_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
instr.src %= RegisterCountFlt;
|
const uint32_t src = instr.src % RegisterCountFlt;
|
||||||
emit(REX_MULPD, p, pos);
|
emit(REX_MULPD, p, pos);
|
||||||
emitByte(0xe0 + instr.src + 8 * instr.dst, p, pos);
|
emitByte(0xe0 + src + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
|
void JitCompilerX86::h_FDIV_M(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
genAddressReg(instr, p, pos);
|
genAddressReg<true>(instr, p, pos);
|
||||||
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
||||||
emit(REX_ANDPS_XMM12, p, pos);
|
emit(REX_ANDPS_XMM12, p, pos);
|
||||||
emit(REX_DIVPD, p, pos);
|
emit(REX_DIVPD, p, pos);
|
||||||
emitByte(0xe4 + 8 * instr.dst, p, pos);
|
emitByte(0xe4 + 8 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
|
void JitCompilerX86::h_FSQRT_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
instr.dst %= RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
emit(SQRTPD, p, pos);
|
emit(SQRTPD, p, pos);
|
||||||
emitByte(0xe4 + 9 * instr.dst, p, pos);
|
emitByte(0xe4 + 9 * dst, p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
|
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
|
@ -891,27 +1005,46 @@ namespace randomx {
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
|
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
int reg = instr.dst;
|
const int reg = instr.dst;
|
||||||
int target = registerUsage[reg] + 1;
|
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||||
|
|
||||||
|
if (BranchesWithin32B) {
|
||||||
|
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
||||||
|
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
|
||||||
|
|
||||||
|
// If the jump crosses or touches 32-byte boundary, align it
|
||||||
|
if ((branch_begin ^ branch_end) >= 32) {
|
||||||
|
const uint32_t alignment_size = 32 - (branch_begin & 31);
|
||||||
|
jmp_offset -= alignment_size;
|
||||||
|
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size, p, pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
emit(REX_ADD_I, p, pos);
|
emit(REX_ADD_I, p, pos);
|
||||||
emitByte(0xc0 + reg, p, pos);
|
emitByte(0xc0 + reg, p, pos);
|
||||||
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||||
uint32_t imm = instr.getImm32() | (1UL << shift);
|
const uint32_t imm = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
|
||||||
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
|
|
||||||
imm &= ~(1UL << (shift - 1));
|
|
||||||
emit32(imm, p, pos);
|
emit32(imm, p, pos);
|
||||||
emit(REX_TEST, p, pos);
|
emit(REX_TEST, p, pos);
|
||||||
emitByte(0xc0 + reg, p, pos);
|
emitByte(0xc0 + reg, p, pos);
|
||||||
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos);
|
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos);
|
||||||
|
|
||||||
|
if (jmp_offset >= -128) {
|
||||||
|
emitByte(JZ_SHORT, p, pos);
|
||||||
|
emitByte(jmp_offset, p, pos);
|
||||||
|
}
|
||||||
|
else {
|
||||||
emit(JZ, p, pos);
|
emit(JZ, p, pos);
|
||||||
emit32(instructionOffsets[target] - (pos + 4), p, pos);
|
emit32(jmp_offset - 4, p, pos);
|
||||||
|
}
|
||||||
|
|
||||||
//mark all registers as used
|
//mark all registers as used
|
||||||
uint64_t* r = (uint64_t*) registerUsage;
|
uint64_t* r = (uint64_t*) registerUsage;
|
||||||
uint64_t k = i;
|
uint64_t k = pos;
|
||||||
k |= k << 32;
|
k |= k << 32;
|
||||||
for (unsigned j = 0; j < RegistersCount / 2; ++j) {
|
for (unsigned j = 0; j < RegistersCount / 2; ++j) {
|
||||||
r[j] = k;
|
r[j] = k;
|
||||||
|
@ -920,7 +1053,7 @@ namespace randomx {
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
|
void JitCompilerX86::h_ISTORE(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
int pos = codePos;
|
int pos = codePos;
|
||||||
|
|
||||||
|
@ -932,7 +1065,7 @@ namespace randomx {
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
|
void JitCompilerX86::h_NOP(const Instruction& instr) {
|
||||||
emit(NOP1, code, codePos);
|
emit(NOP1, code, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,12 +36,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
class Program;
|
class Program;
|
||||||
class ProgramConfiguration;
|
struct ProgramConfiguration;
|
||||||
class SuperscalarProgram;
|
class SuperscalarProgram;
|
||||||
class JitCompilerX86;
|
class JitCompilerX86;
|
||||||
class Instruction;
|
class Instruction;
|
||||||
|
|
||||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&);
|
||||||
|
|
||||||
constexpr uint32_t CodeSize = 64 * 1024;
|
constexpr uint32_t CodeSize = 64 * 1024;
|
||||||
|
|
||||||
|
@ -66,16 +66,20 @@ namespace randomx {
|
||||||
size_t getCodeSize();
|
size_t getCodeSize();
|
||||||
|
|
||||||
static InstructionGeneratorX86 engine[256];
|
static InstructionGeneratorX86 engine[256];
|
||||||
int32_t instructionOffsets[512];
|
|
||||||
int registerUsage[RegistersCount];
|
int registerUsage[RegistersCount];
|
||||||
|
uint8_t* allocatedCode;
|
||||||
uint8_t* code;
|
uint8_t* code;
|
||||||
int32_t codePos;
|
int32_t codePos;
|
||||||
|
|
||||||
|
static bool BranchesWithin32B;
|
||||||
|
|
||||||
|
static void applyTweaks();
|
||||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||||
static void genAddressReg(Instruction&, uint8_t* code, int& codePos, bool rax = true);
|
template<bool rax>
|
||||||
static void genAddressRegDst(Instruction&, uint8_t* code, int& codePos);
|
static void genAddressReg(const Instruction&, uint8_t* code, int& codePos);
|
||||||
static void genAddressImm(Instruction&, uint8_t* code, int& codePos);
|
static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos);
|
||||||
|
static void genAddressImm(const Instruction&, uint8_t* code, int& codePos);
|
||||||
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
|
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
|
||||||
|
|
||||||
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
|
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
|
||||||
|
@ -105,36 +109,36 @@ namespace randomx {
|
||||||
codePos += count;
|
codePos += count;
|
||||||
}
|
}
|
||||||
|
|
||||||
void h_IADD_RS(Instruction&, int);
|
void h_IADD_RS(const Instruction&);
|
||||||
void h_IADD_M(Instruction&, int);
|
void h_IADD_M(const Instruction&);
|
||||||
void h_ISUB_R(Instruction&, int);
|
void h_ISUB_R(const Instruction&);
|
||||||
void h_ISUB_M(Instruction&, int);
|
void h_ISUB_M(const Instruction&);
|
||||||
void h_IMUL_R(Instruction&, int);
|
void h_IMUL_R(const Instruction&);
|
||||||
void h_IMUL_M(Instruction&, int);
|
void h_IMUL_M(const Instruction&);
|
||||||
void h_IMULH_R(Instruction&, int);
|
void h_IMULH_R(const Instruction&);
|
||||||
void h_IMULH_M(Instruction&, int);
|
void h_IMULH_M(const Instruction&);
|
||||||
void h_ISMULH_R(Instruction&, int);
|
void h_ISMULH_R(const Instruction&);
|
||||||
void h_ISMULH_M(Instruction&, int);
|
void h_ISMULH_M(const Instruction&);
|
||||||
void h_IMUL_RCP(Instruction&, int);
|
void h_IMUL_RCP(const Instruction&);
|
||||||
void h_INEG_R(Instruction&, int);
|
void h_INEG_R(const Instruction&);
|
||||||
void h_IXOR_R(Instruction&, int);
|
void h_IXOR_R(const Instruction&);
|
||||||
void h_IXOR_M(Instruction&, int);
|
void h_IXOR_M(const Instruction&);
|
||||||
void h_IROR_R(Instruction&, int);
|
void h_IROR_R(const Instruction&);
|
||||||
void h_IROL_R(Instruction&, int);
|
void h_IROL_R(const Instruction&);
|
||||||
void h_ISWAP_R(Instruction&, int);
|
void h_ISWAP_R(const Instruction&);
|
||||||
void h_FSWAP_R(Instruction&, int);
|
void h_FSWAP_R(const Instruction&);
|
||||||
void h_FADD_R(Instruction&, int);
|
void h_FADD_R(const Instruction&);
|
||||||
void h_FADD_M(Instruction&, int);
|
void h_FADD_M(const Instruction&);
|
||||||
void h_FSUB_R(Instruction&, int);
|
void h_FSUB_R(const Instruction&);
|
||||||
void h_FSUB_M(Instruction&, int);
|
void h_FSUB_M(const Instruction&);
|
||||||
void h_FSCAL_R(Instruction&, int);
|
void h_FSCAL_R(const Instruction&);
|
||||||
void h_FMUL_R(Instruction&, int);
|
void h_FMUL_R(const Instruction&);
|
||||||
void h_FDIV_M(Instruction&, int);
|
void h_FDIV_M(const Instruction&);
|
||||||
void h_FSQRT_R(Instruction&, int);
|
void h_FSQRT_R(const Instruction&);
|
||||||
void h_CBRANCH(Instruction&, int);
|
void h_CBRANCH(const Instruction&);
|
||||||
void h_CFROUND(Instruction&, int);
|
void h_CFROUND(const Instruction&);
|
||||||
void h_ISTORE(Instruction&, int);
|
void h_ISTORE(const Instruction&);
|
||||||
void h_NOP(Instruction&, int);
|
void h_NOP(const Instruction&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -26,6 +26,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "crypto/randomx/common.hpp"
|
||||||
#include "crypto/randomx/randomx.h"
|
#include "crypto/randomx/randomx.h"
|
||||||
#include "crypto/randomx/dataset.hpp"
|
#include "crypto/randomx/dataset.hpp"
|
||||||
#include "crypto/randomx/vm_interpreted.hpp"
|
#include "crypto/randomx/vm_interpreted.hpp"
|
||||||
|
@ -33,7 +34,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "crypto/randomx/vm_compiled.hpp"
|
#include "crypto/randomx/vm_compiled.hpp"
|
||||||
#include "crypto/randomx/vm_compiled_light.hpp"
|
#include "crypto/randomx/vm_compiled_light.hpp"
|
||||||
#include "crypto/randomx/blake2/blake2.h"
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
||||||
|
#elif defined(XMRIG_ARMv8)
|
||||||
|
#include "crypto/randomx/jit_compiler_a64_static.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||||
|
@ -85,6 +92,16 @@ RandomX_ConfigurationArqma::RandomX_ConfigurationArqma()
|
||||||
ScratchpadL3_Size = 262144;
|
ScratchpadL3_Size = 262144;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RandomX_ConfigurationSafex::RandomX_ConfigurationSafex()
|
||||||
|
{
|
||||||
|
ArgonIterations = 3;
|
||||||
|
ArgonSalt = "RandomSFX\x01";
|
||||||
|
ProgramIterations = 2048;
|
||||||
|
ProgramCount = 8;
|
||||||
|
ScratchpadL2_Size = 262144;
|
||||||
|
ScratchpadL3_Size = 2097152;
|
||||||
|
}
|
||||||
|
|
||||||
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
: ArgonMemory(262144)
|
: ArgonMemory(262144)
|
||||||
, ArgonIterations(3)
|
, ArgonIterations(3)
|
||||||
|
@ -166,19 +183,10 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
|
||||||
|
|
||||||
void RandomX_ConfigurationBase::Apply()
|
void RandomX_ConfigurationBase::Apply()
|
||||||
{
|
{
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
|
||||||
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
|
||||||
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
|
||||||
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
|
|
||||||
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
|
|
||||||
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
|
|
||||||
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
|
|
||||||
|
|
||||||
ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
||||||
ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
|
ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
|
||||||
ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
|
ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
|
||||||
|
@ -186,22 +194,40 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
|
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
|
||||||
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
|
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ConditionMask_Calculated = (1 << JumpBits) - 1;
|
ConditionMask_Calculated = (1 << JumpBits) - 1;
|
||||||
|
|
||||||
constexpr int CEIL_NULL = 0;
|
|
||||||
int k = 0;
|
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
||||||
|
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
||||||
|
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
|
||||||
|
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
|
||||||
|
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||||
|
|
||||||
|
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||||
|
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
||||||
|
|
||||||
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
|
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
|
||||||
|
|
||||||
|
#elif defined(XMRIG_ARMv8)
|
||||||
|
|
||||||
|
Log2_ScratchpadL1 = Log2(ScratchpadL1_Size);
|
||||||
|
Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
|
||||||
|
Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
|
||||||
|
Log2_DatasetBaseSize = Log2(DatasetBaseSize);
|
||||||
|
Log2_CacheSize = Log2((ArgonMemory * randomx::ArgonBlockSize) / randomx::CacheLineSize);
|
||||||
|
|
||||||
|
#define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define JIT_HANDLE(x, prev)
|
#define JIT_HANDLE(x, prev)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
constexpr int CEIL_NULL = 0;
|
||||||
|
int k = 0;
|
||||||
|
|
||||||
#define INST_HANDLE(x, prev) \
|
#define INST_HANDLE(x, prev) \
|
||||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
||||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
|
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
|
||||||
|
@ -243,7 +269,7 @@ RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
||||||
RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
||||||
RandomX_ConfigurationLoki RandomX_LokiConfig;
|
RandomX_ConfigurationLoki RandomX_LokiConfig;
|
||||||
RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
||||||
|
RandomX_ConfigurationSafex RandomX_SafexConfig;
|
||||||
RandomX_ConfigurationBase RandomX_CurrentConfig;
|
RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -457,4 +483,22 @@ extern "C" {
|
||||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
||||||
|
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||||
|
machine->initScratchpad(tempHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
||||||
|
machine->resetRoundingMode();
|
||||||
|
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||||
|
machine->run(&tempHash);
|
||||||
|
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||||
|
}
|
||||||
|
machine->run(&tempHash);
|
||||||
|
|
||||||
|
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||||
|
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
|
||||||
|
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -29,8 +29,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#ifndef RANDOMX_H
|
#ifndef RANDOMX_H
|
||||||
#define RANDOMX_H
|
#define RANDOMX_H
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <cstddef>
|
||||||
#include <stdint.h>
|
#include <cstdint>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include "crypto/randomx/intrin_portable.h"
|
#include "crypto/randomx/intrin_portable.h"
|
||||||
|
|
||||||
|
@ -41,17 +41,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define RANDOMX_EXPORT
|
#define RANDOMX_EXPORT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
|
enum randomx_flags {
|
||||||
RANDOMX_FLAG_DEFAULT = 0,
|
RANDOMX_FLAG_DEFAULT = 0,
|
||||||
RANDOMX_FLAG_LARGE_PAGES = 1,
|
RANDOMX_FLAG_LARGE_PAGES = 1,
|
||||||
RANDOMX_FLAG_HARD_AES = 2,
|
RANDOMX_FLAG_HARD_AES = 2,
|
||||||
RANDOMX_FLAG_FULL_MEM = 4,
|
RANDOMX_FLAG_FULL_MEM = 4,
|
||||||
RANDOMX_FLAG_JIT = 8,
|
RANDOMX_FLAG_JIT = 8,
|
||||||
} randomx_flags;
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct randomx_dataset;
|
||||||
|
struct randomx_cache;
|
||||||
|
class randomx_vm;
|
||||||
|
|
||||||
typedef struct randomx_dataset randomx_dataset;
|
|
||||||
typedef struct randomx_cache randomx_cache;
|
|
||||||
typedef struct randomx_vm randomx_vm;
|
|
||||||
|
|
||||||
struct RandomX_ConfigurationBase
|
struct RandomX_ConfigurationBase
|
||||||
{
|
{
|
||||||
|
@ -130,6 +133,14 @@ struct RandomX_ConfigurationBase
|
||||||
|
|
||||||
uint32_t ConditionMask_Calculated;
|
uint32_t ConditionMask_Calculated;
|
||||||
|
|
||||||
|
#if defined(XMRIG_ARMv8)
|
||||||
|
uint32_t Log2_ScratchpadL1;
|
||||||
|
uint32_t Log2_ScratchpadL2;
|
||||||
|
uint32_t Log2_ScratchpadL3;
|
||||||
|
uint32_t Log2_DatasetBaseSize;
|
||||||
|
uint32_t Log2_CacheSize;
|
||||||
|
#endif
|
||||||
|
|
||||||
int CEIL_IADD_RS;
|
int CEIL_IADD_RS;
|
||||||
int CEIL_IADD_M;
|
int CEIL_IADD_M;
|
||||||
int CEIL_ISUB_R;
|
int CEIL_ISUB_R;
|
||||||
|
@ -166,11 +177,13 @@ struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
|
||||||
struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); };
|
struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); };
|
||||||
struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); };
|
struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); };
|
||||||
struct RandomX_ConfigurationArqma : public RandomX_ConfigurationBase { RandomX_ConfigurationArqma(); };
|
struct RandomX_ConfigurationArqma : public RandomX_ConfigurationBase { RandomX_ConfigurationArqma(); };
|
||||||
|
struct RandomX_ConfigurationSafex : public RandomX_ConfigurationBase { RandomX_ConfigurationSafex(); };
|
||||||
|
|
||||||
extern RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
extern RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
||||||
extern RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
extern RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
||||||
extern RandomX_ConfigurationLoki RandomX_LokiConfig;
|
extern RandomX_ConfigurationLoki RandomX_LokiConfig;
|
||||||
extern RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
extern RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
||||||
|
extern RandomX_ConfigurationSafex RandomX_SafexConfig;
|
||||||
|
|
||||||
extern RandomX_ConfigurationBase RandomX_CurrentConfig;
|
extern RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||||
|
|
||||||
|
@ -327,6 +340,9 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
|
||||||
*/
|
*/
|
||||||
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
|
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
|
||||||
|
|
||||||
|
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
|
||||||
|
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -114,6 +114,12 @@ namespace randomx {
|
||||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
|
||||||
|
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||||
|
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void VmBase<softAes>::initScratchpad(void* seed) {
|
void VmBase<softAes>::initScratchpad(void* seed) {
|
||||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||||
|
|
|
@ -39,6 +39,7 @@ public:
|
||||||
virtual ~randomx_vm() = 0;
|
virtual ~randomx_vm() = 0;
|
||||||
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
||||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||||
|
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
|
||||||
virtual void setDataset(randomx_dataset* dataset) { }
|
virtual void setDataset(randomx_dataset* dataset) { }
|
||||||
virtual void setCache(randomx_cache* cache) { }
|
virtual void setCache(randomx_cache* cache) { }
|
||||||
virtual void initScratchpad(void* seed) = 0;
|
virtual void initScratchpad(void* seed) = 0;
|
||||||
|
@ -64,7 +65,7 @@ protected:
|
||||||
alignas(64) randomx::RegisterFile reg;
|
alignas(64) randomx::RegisterFile reg;
|
||||||
alignas(16) randomx::ProgramConfiguration config;
|
alignas(16) randomx::ProgramConfiguration config;
|
||||||
randomx::MemoryRegisters mem;
|
randomx::MemoryRegisters mem;
|
||||||
uint8_t* scratchpad;
|
uint8_t* scratchpad = nullptr;
|
||||||
union {
|
union {
|
||||||
randomx_cache* cachePtr = nullptr;
|
randomx_cache* cachePtr = nullptr;
|
||||||
randomx_dataset* datasetPtr;
|
randomx_dataset* datasetPtr;
|
||||||
|
@ -82,6 +83,7 @@ namespace randomx {
|
||||||
void setScratchpad(uint8_t *scratchpad) override;
|
void setScratchpad(uint8_t *scratchpad) override;
|
||||||
void initScratchpad(void* seed) override;
|
void initScratchpad(void* seed) override;
|
||||||
void getFinalResult(void* out, size_t outSize) override;
|
void getFinalResult(void* out, size_t outSize) override;
|
||||||
|
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void generateProgram(void* seed);
|
void generateProgram(void* seed);
|
||||||
|
|
|
@ -49,6 +49,9 @@ const RandomX_ConfigurationBase *xmrig::RxAlgo::base(Algorithm::Id algorithm)
|
||||||
case Algorithm::RX_ARQ:
|
case Algorithm::RX_ARQ:
|
||||||
return &RandomX_ArqmaConfig;
|
return &RandomX_ArqmaConfig;
|
||||||
|
|
||||||
|
case Algorithm::RX_SFX:
|
||||||
|
return &RandomX_SafexConfig;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#define APP_ID "XMRigCC"
|
#define APP_ID "XMRigCC"
|
||||||
#define APP_NAME "XMRigCC"
|
#define APP_NAME "XMRigCC"
|
||||||
#define APP_DESC "XMRigCC CPU miner"
|
#define APP_DESC "XMRigCC CPU miner"
|
||||||
#define APP_VERSION "2.2.0"
|
#define APP_VERSION "2.2.1"
|
||||||
#define APP_DOMAIN ""
|
#define APP_DOMAIN ""
|
||||||
#define APP_SITE "https://github.com/BenDr0id/xmrigCC/"
|
#define APP_SITE "https://github.com/BenDr0id/xmrigCC/"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2017- XMRigCC"
|
#define APP_COPYRIGHT "Copyright (C) 2017- XMRigCC"
|
||||||
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
#define APP_VER_MAJOR 2
|
#define APP_VER_MAJOR 2
|
||||||
#define APP_VER_MINOR 2
|
#define APP_VER_MINOR 2
|
||||||
#define APP_VER_PATCH 0
|
#define APP_VER_PATCH 1
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
#define BUILD_TYPE "DEBUG"
|
#define BUILD_TYPE "DEBUG"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue