- Integrated RandomSFX algo (rx/sfx)
- Performance improvements for RandomX variants
- Fixed some cc connection problems
This commit is contained in:
Ben Gräf 2019-12-09 23:01:37 +01:00
parent 621dffeafc
commit 23430259fa
24 changed files with 4340 additions and 3343 deletions

View file

@ -1,3 +1,7 @@
# 2.2.1
* Integrated RandomSFX algo (rx/sfx)
* Performance improvements for RandomX variants
* Fixed some cc connection problems
# 2.2.0 # 2.2.0
* Integrated RandomxARQ algo (rx/arq) * Integrated RandomxARQ algo (rx/arq)
* Dashboard: * Dashboard:

View file

@ -261,6 +261,8 @@ if (WITH_CC_SERVER OR WITH_CC_CLIENT)
src/cc/ClientStatus.cpp src/cc/ClientStatus.cpp
src/cc/GPUInfo.cpp) src/cc/GPUInfo.cpp)
add_definitions("/DCPPHTTPLIB_USE_POLL")
if (WITH_ZLIB) if (WITH_ZLIB)
set(ZLIB_ROOT ${XMRIG_DEPS}) set(ZLIB_ROOT ${XMRIG_DEPS})
find_package(ZLIB) find_package(ZLIB)

View file

@ -29,6 +29,7 @@ Full Windows/Linux compatible, and you can mix Linux and Windows miner on one XM
## Additional features of XMRigCC (on top of XMRig) ## Additional features of XMRigCC (on top of XMRig)
Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide
* **Support of RandomxSFX variant (algo: "rx/sfx")**
* **Support of RandomxARQ variant (algo: "rx/arq")** * **Support of RandomxARQ variant (algo: "rx/arq")**
* **Support of UPX2 variant (algo: "cn-extremelite/upx2")** * **Support of UPX2 variant (algo: "cn-extremelite/upx2")**
* **Support of CN-Conceal variant (algo: "cn/conceal")** * **Support of CN-Conceal variant (algo: "cn/conceal")**
@ -133,7 +134,7 @@ xmrigDaemon -o pool.hashvault.pro:5555 -u YOUR_WALLET -p x -k --cc-url=IP_OF_CC_
cn-pico cn-pico
cn-extremelite cn-extremelite
argon2/chukwa, argon2/wrkz argon2/chukwa, argon2/wrkz
rx/wow, rx/loki, rx/arq rx/wow, rx/loki, rx/arq, rx/sfx
--coin=COIN specify coin instead of algorithm --coin=COIN specify coin instead of algorithm
-o, --url=URL URL of mining server -o, --url=URL URL of mining server
-O, --userpass=U:P username:password pair for mining server -O, --userpass=U:P username:password pair for mining server

File diff suppressed because it is too large Load diff

View file

@ -96,6 +96,11 @@ void xmrig::Workers<T>::start(const std::vector<T> &data)
for (Thread<T> *worker : m_workers) { for (Thread<T> *worker : m_workers) {
worker->start(Workers<T>::onReady); worker->start(Workers<T>::onReady);
// This sleep is important for optimal caching!
// Threads must allocate scratchpads in order so that adjacent cores will use adjacent scratchpads
// Sub-optimal caching can result in up to 0.5% hashrate penalty
std::this_thread::sleep_for(std::chrono::milliseconds(20));
} }
} }

View file

@ -65,6 +65,7 @@ static const char *kCnExtremelite = "cn-extremelite";
static const char *kRx = "rx"; static const char *kRx = "rx";
static const char *kRxWOW = "rx/wow"; static const char *kRxWOW = "rx/wow";
static const char *kRxARQ = "rx/arq"; static const char *kRxARQ = "rx/arq";
static const char *kRxSFX = "rx/sfx";
#endif #endif
#ifdef XMRIG_ALGO_ARGON2 #ifdef XMRIG_ALGO_ARGON2
@ -200,6 +201,7 @@ void xmrig::CpuConfig::generate()
m_threads.move(kRx, cpu->threads(Algorithm::RX_0)); m_threads.move(kRx, cpu->threads(Algorithm::RX_0));
m_threads.move(kRxWOW, cpu->threads(Algorithm::RX_WOW)); m_threads.move(kRxWOW, cpu->threads(Algorithm::RX_WOW));
m_threads.move(kRxARQ, cpu->threads(Algorithm::RX_ARQ)); m_threads.move(kRxARQ, cpu->threads(Algorithm::RX_ARQ));
m_threads.move(kRxSFX, cpu->threads(Algorithm::RX_SFX));
# endif # endif
generateArgon2(); generateArgon2();

View file

@ -191,8 +191,17 @@ void xmrig::CpuWorker<N>::start()
consumeJob(); consumeJob();
} }
uint64_t storeStatsMask = 7;
# ifdef XMRIG_ALGO_RANDOMX
// RandomX is faster, we don't need to store stats so often
if (m_job.currentJob().algorithm().family() == Algorithm::RANDOM_X) {
storeStatsMask = 63;
}
# endif
while (!Nonce::isOutdated(Nonce::CPU, m_job.sequence())) { while (!Nonce::isOutdated(Nonce::CPU, m_job.sequence())) {
if ((m_count & 0x7) == 0) { if ((m_count & storeStatsMask) == 0) {
storeStats(); storeStats();
} }

View file

@ -379,6 +379,8 @@ std::shared_ptr<httplib::Response> xmrig::CCClient::performRequest(const std::st
auto res = std::make_shared<httplib::Response>(); auto res = std::make_shared<httplib::Response>();
cli->follow_location(false);
return cli->send(req, *res) ? res : nullptr; return cli->send(req, *res) ? res : nullptr;
} }

View file

@ -20,7 +20,6 @@
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <3rdparty/cpp-httplib/httplib.h> #include <3rdparty/cpp-httplib/httplib.h>
#include <3rdparty/base64/base64.h>
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
@ -147,31 +146,33 @@ int Httpd::basicAuth(const httplib::Request& req, httplib::Response& res)
{ {
int result = HTTP_UNAUTHORIZED; int result = HTTP_UNAUTHORIZED;
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
if (m_config->adminUser().empty() || m_config->adminPass().empty()) if (m_config->adminUser().empty() || m_config->adminPass().empty())
{ {
res.set_content(std::string("<html><body\\>" res.set_content(std::string("<html><body\\>"
"Please configure admin user and pass to view this Page." "Please configure admin user and pass to view this Page."
"</body><html\\>"), CONTENT_TYPE_HTML); "</body><html\\>"), CONTENT_TYPE_HTML);
LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password not set!", req.remoteAddr.c_str()); LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password not set!", removeAddr.c_str());
result = HTTP_FORBIDDEN; result = HTTP_FORBIDDEN;
} }
else else
{ {
auto authHeader = req.get_header_value("Authorization"); auto authHeader = req.get_header_value("Authorization");
auto credentials = std::string("Basic ") + Base64::Encode(m_config->adminUser() + std::string(":") + m_config->adminPass()); auto credentials = httplib::make_basic_authentication_header(m_config->adminUser(), m_config->adminPass());
if (!authHeader.empty() && credentials == authHeader) if (!authHeader.empty() && credentials.second == authHeader)
{ {
result = HTTP_OK; result = HTTP_OK;
} }
else if (authHeader.empty()) else if (authHeader.empty())
{ {
LOG_WARN("[%s] 401 UNAUTHORIZED", req.remoteAddr.c_str()); LOG_WARN("[%s] 401 UNAUTHORIZED", removeAddr.c_str());
} }
else else
{ {
LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password wrong!", req.remoteAddr.c_str()); LOG_ERR("[%s] 403 FORBIDDEN - Admin user/password wrong!", removeAddr.c_str());
} }
} }
@ -184,9 +185,11 @@ int Httpd::bearerAuth(const httplib::Request& req, httplib::Response& res)
{ {
int result = HTTP_UNAUTHORIZED; int result = HTTP_UNAUTHORIZED;
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
if (m_config->token().empty()) if (m_config->token().empty())
{ {
LOG_WARN("[%s] 200 OK - WARNING AccessToken not set!", req.remoteAddr.c_str()); LOG_WARN("[%s] 200 OK - WARNING AccessToken not set!", removeAddr.c_str());
result = HTTP_OK; result = HTTP_OK;
} }
else else
@ -200,11 +203,11 @@ int Httpd::bearerAuth(const httplib::Request& req, httplib::Response& res)
} }
else if (authHeader.empty()) else if (authHeader.empty())
{ {
LOG_WARN("[%s] 401 UNAUTHORIZED", req.remoteAddr.c_str()); LOG_WARN("[%s] 401 UNAUTHORIZED", removeAddr.c_str());
} }
else else
{ {
LOG_ERR("[%s] 403 FORBIDDEN - AccessToken wrong!", req.remoteAddr.c_str()); LOG_ERR("[%s] 403 FORBIDDEN - AccessToken wrong!", removeAddr.c_str());
result = HTTP_FORBIDDEN; result = HTTP_FORBIDDEN;
} }
} }

View file

@ -107,8 +107,9 @@ int Service::handleGET(const httplib::Request& req, httplib::Response& res)
int resultCode = HTTP_NOT_FOUND; int resultCode = HTTP_NOT_FOUND;
std::string clientId = req.get_param_value("clientId"); std::string clientId = req.get_param_value("clientId");
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
LOG_INFO("[%s] GET %s%s%s", req.remoteAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str()); LOG_INFO("[%s] GET %s%s%s", removeAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str());
if (req.path == "/") if (req.path == "/")
{ {
@ -140,14 +141,14 @@ int Service::handleGET(const httplib::Request& req, httplib::Response& res)
} }
else else
{ {
LOG_WARN("[%s] 404 NOT FOUND (%s)", req.remoteAddr.c_str(), req.path.c_str()); LOG_WARN("[%s] 404 NOT FOUND (%s)", removeAddr.c_str(), req.path.c_str());
} }
} }
else else
{ {
resultCode = HTTP_BAD_REQUEST; resultCode = HTTP_BAD_REQUEST;
LOG_ERR("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)", LOG_ERR("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)",
req.remoteAddr.c_str(), req.path.c_str()); removeAddr.c_str(), req.path.c_str());
} }
} }
@ -161,8 +162,9 @@ int Service::handlePOST(const httplib::Request& req, httplib::Response& res)
int resultCode = HTTP_NOT_FOUND; int resultCode = HTTP_NOT_FOUND;
std::string clientId = req.get_param_value("clientId"); std::string clientId = req.get_param_value("clientId");
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
LOG_INFO("[%s] POST %s%s%s", req.remoteAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str()); LOG_INFO("[%s] POST %s%s%s", removeAddr.c_str(), req.path.c_str(), clientId.empty() ? "" : "/?clientId=", clientId.c_str());
if (!clientId.empty()) if (!clientId.empty())
{ {
@ -185,7 +187,7 @@ int Service::handlePOST(const httplib::Request& req, httplib::Response& res)
else else
{ {
resultCode = HTTP_BAD_REQUEST; resultCode = HTTP_BAD_REQUEST;
LOG_WARN("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)", req.remoteAddr.c_str(), req.path.c_str()); LOG_WARN("[%s] 400 BAD REQUEST - Request does not contain clientId (%s)", removeAddr.c_str(), req.path.c_str());
} }
} }
else else
@ -196,7 +198,7 @@ int Service::handlePOST(const httplib::Request& req, httplib::Response& res)
} }
else else
{ {
LOG_WARN("[%s] 404 NOT FOUND (%s)", req.remoteAddr.c_str(), req.path.c_str()); LOG_WARN("[%s] 404 NOT FOUND (%s)", removeAddr.c_str(), req.path.c_str());
} }
} }
@ -271,12 +273,14 @@ int Service::setClientStatus(const httplib::Request& req, const std::string& cli
{ {
int resultCode = HTTP_BAD_REQUEST; int resultCode = HTTP_BAD_REQUEST;
std::string removeAddr = req.get_header_value("REMOTE_ADDR");
rapidjson::Document document; rapidjson::Document document;
if (!document.Parse(req.body.c_str()).HasParseError()) if (!document.Parse(req.body.c_str()).HasParseError())
{ {
ClientStatus clientStatus; ClientStatus clientStatus;
clientStatus.parseFromJson(document); clientStatus.parseFromJson(document);
clientStatus.setExternalIp(req.remoteAddr); clientStatus.setExternalIp(removeAddr);
setClientLog(static_cast<size_t>(m_config->clientLogHistory()), clientId, clientStatus.getLog()); setClientLog(static_cast<size_t>(m_config->clientLogHistory()), clientId, clientStatus.getLog());
@ -294,7 +298,7 @@ int Service::setClientStatus(const httplib::Request& req, const std::string& cli
else else
{ {
LOG_ERR("[%s] ClientStatus for client '%s' - Parse Error Occured: %d", LOG_ERR("[%s] ClientStatus for client '%s' - Parse Error Occured: %d",
req.remoteAddr.c_str(), clientId.c_str(), document.GetParseError()); removeAddr.c_str(), clientId.c_str(), document.GetParseError());
} }
return resultCode; return resultCode;

View file

@ -183,6 +183,7 @@ private:
0, // RX_WOW 0, // RX_WOW
0, // RX_LOKI 0, // RX_LOKI
0, // RX_ARQ 0, // RX_ARQ
0, // RX_SFX
# endif # endif
# ifdef XMRIG_ALGO_ARGON2 # ifdef XMRIG_ALGO_ARGON2
0, // AR2_CHUKWA 0, // AR2_CHUKWA
@ -227,6 +228,7 @@ private:
0, // RX_WOW 0, // RX_WOW
0, // RX_LOKI 0, // RX_LOKI
0, // RX_ARQ 0, // RX_ARQ
0, // RX_SFX
# endif # endif
# ifdef XMRIG_ALGO_ARGON2 # ifdef XMRIG_ALGO_ARGON2
0, // AR2_CHUKWA 0, // AR2_CHUKWA
@ -271,6 +273,7 @@ private:
Algorithm::INVALID, // RX_WOW Algorithm::INVALID, // RX_WOW
Algorithm::INVALID, // RX_LOKI Algorithm::INVALID, // RX_LOKI
Algorithm::INVALID, // RX_ARQ Algorithm::INVALID, // RX_ARQ
Algorithm::INVALID, // RX_SFX
# endif # endif
# ifdef XMRIG_ALGO_ARGON2 # ifdef XMRIG_ALGO_ARGON2
Algorithm::INVALID, // AR2_CHUKWA Algorithm::INVALID, // AR2_CHUKWA

View file

@ -125,6 +125,8 @@ static AlgoName const algorithm_names[] = {
{ "RandomXL", nullptr, Algorithm::RX_LOKI }, { "RandomXL", nullptr, Algorithm::RX_LOKI },
{ "randomx/arq", "rx/arq", Algorithm::RX_ARQ }, { "randomx/arq", "rx/arq", Algorithm::RX_ARQ },
{ "RandomARQ", nullptr, Algorithm::RX_ARQ }, { "RandomARQ", nullptr, Algorithm::RX_ARQ },
{ "randomx/sfx", "rx/sfx", Algorithm::RX_SFX },
{ "RandomSFX", nullptr, Algorithm::RX_SFX },
# endif # endif
# ifdef XMRIG_ALGO_ARGON2 # ifdef XMRIG_ALGO_ARGON2
{ "argon2/chukwa", nullptr, Algorithm::AR2_CHUKWA }, { "argon2/chukwa", nullptr, Algorithm::AR2_CHUKWA },
@ -155,6 +157,7 @@ size_t xmrig::Algorithm::l2() const
switch (m_id) { switch (m_id) {
case RX_0: case RX_0:
case RX_LOKI: case RX_LOKI:
case RX_SFX:
return 0x40000; return 0x40000;
case RX_WOW: case RX_WOW:
@ -188,6 +191,7 @@ size_t xmrig::Algorithm::l3() const
switch (m_id) { switch (m_id) {
case RX_0: case RX_0:
case RX_LOKI: case RX_LOKI:
case RX_SFX:
return oneMiB * 2; return oneMiB * 2;
case RX_WOW: case RX_WOW:
@ -294,6 +298,7 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
case RX_WOW: case RX_WOW:
case RX_LOKI: case RX_LOKI:
case RX_ARQ: case RX_ARQ:
case RX_SFX:
return RANDOM_X; return RANDOM_X;
# endif # endif

View file

@ -77,6 +77,7 @@ public:
RX_WOW, // "rx/wow" RandomWOW (Wownero). RX_WOW, // "rx/wow" RandomWOW (Wownero).
RX_LOKI, // "rx/loki" RandomXL (Loki). RX_LOKI, // "rx/loki" RandomXL (Loki).
RX_ARQ, // "rx/arq" RandomARQ (Arqma). RX_ARQ, // "rx/arq" RandomARQ (Arqma).
RX_SFX, // "rx/sfx" RandomSFX (Safex).
# endif # endif
# ifdef XMRIG_ALGO_ARGON2 # ifdef XMRIG_ALGO_ARGON2
AR2_CHUKWA, // "argon2/chukwa" AR2_CHUKWA, // "argon2/chukwa"

View file

@ -51,52 +51,52 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
template<bool softAes> template<bool softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (uint8_t*)input; const uint8_t* inptr = (uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize; const uint8_t* inputEnd = inptr + inputSize;
rx_vec_i128 state0, state1, state2, state3; rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 in0, in1, in2, in3; rx_vec_i128 in0, in1, in2, in3;
//intial state //intial state
state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0); state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1); state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2); state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3); state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
//process 64 bytes at a time in 4 lanes //process 64 bytes at a time in 4 lanes
while (inptr < inputEnd) { while (inptr < inputEnd) {
in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0); in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0);
in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1); in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1);
in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2); in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2);
in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3); in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3);
state0 = aesenc<softAes>(state0, in0); state0 = aesenc<softAes>(state0, in0);
state1 = aesdec<softAes>(state1, in1); state1 = aesdec<softAes>(state1, in1);
state2 = aesenc<softAes>(state2, in2); state2 = aesenc<softAes>(state2, in2);
state3 = aesdec<softAes>(state3, in3); state3 = aesdec<softAes>(state3, in3);
inptr += 64; inptr += 64;
} }
//two extra rounds to achieve full diffusion //two extra rounds to achieve full diffusion
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0); rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1); rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
state0 = aesenc<softAes>(state0, xkey0); state0 = aesenc<softAes>(state0, xkey0);
state1 = aesdec<softAes>(state1, xkey0); state1 = aesdec<softAes>(state1, xkey0);
state2 = aesenc<softAes>(state2, xkey0); state2 = aesenc<softAes>(state2, xkey0);
state3 = aesdec<softAes>(state3, xkey0); state3 = aesdec<softAes>(state3, xkey0);
state0 = aesenc<softAes>(state0, xkey1); state0 = aesenc<softAes>(state0, xkey1);
state1 = aesdec<softAes>(state1, xkey1); state1 = aesdec<softAes>(state1, xkey1);
state2 = aesenc<softAes>(state2, xkey1); state2 = aesenc<softAes>(state2, xkey1);
state3 = aesdec<softAes>(state3, xkey1); state3 = aesdec<softAes>(state3, xkey1);
//output hash //output hash
rx_store_vec_i128((rx_vec_i128*)hash + 0, state0); rx_store_vec_i128((rx_vec_i128*)hash + 0, state0);
rx_store_vec_i128((rx_vec_i128*)hash + 1, state1); rx_store_vec_i128((rx_vec_i128*)hash + 1, state1);
rx_store_vec_i128((rx_vec_i128*)hash + 2, state2); rx_store_vec_i128((rx_vec_i128*)hash + 2, state2);
rx_store_vec_i128((rx_vec_i128*)hash + 3, state3); rx_store_vec_i128((rx_vec_i128*)hash + 3, state3);
} }
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash); template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
@ -119,40 +119,40 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
*/ */
template<bool softAes> template<bool softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer; const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize; const uint8_t* outputEnd = outptr + outputSize;
rx_vec_i128 state0, state1, state2, state3; rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 key0, key1, key2, key3; rx_vec_i128 key0, key1, key2, key3;
key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0); key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1); key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2); key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3); key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
while (outptr < outputEnd) { while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0); state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key1); state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key2); state2 = aesdec<softAes>(state2, key2);
state3 = aesenc<softAes>(state3, key3); state3 = aesenc<softAes>(state3, key3);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
outptr += 64; outptr += 64;
} }
rx_store_vec_i128((rx_vec_i128*)state + 0, state0); rx_store_vec_i128((rx_vec_i128*)state + 0, state0);
rx_store_vec_i128((rx_vec_i128*)state + 1, state1); rx_store_vec_i128((rx_vec_i128*)state + 1, state1);
rx_store_vec_i128((rx_vec_i128*)state + 2, state2); rx_store_vec_i128((rx_vec_i128*)state + 2, state2);
rx_store_vec_i128((rx_vec_i128*)state + 3, state3); rx_store_vec_i128((rx_vec_i128*)state + 3, state3);
} }
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
@ -160,55 +160,136 @@ template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes> template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer; const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize; const uint8_t* outputEnd = outptr + outputSize;
rx_vec_i128 state0, state1, state2, state3; rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7; rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
key0 = RandomX_CurrentConfig.fillAes4Rx4_Key[0]; key0 = RandomX_CurrentConfig.fillAes4Rx4_Key[0];
key1 = RandomX_CurrentConfig.fillAes4Rx4_Key[1]; key1 = RandomX_CurrentConfig.fillAes4Rx4_Key[1];
key2 = RandomX_CurrentConfig.fillAes4Rx4_Key[2]; key2 = RandomX_CurrentConfig.fillAes4Rx4_Key[2];
key3 = RandomX_CurrentConfig.fillAes4Rx4_Key[3]; key3 = RandomX_CurrentConfig.fillAes4Rx4_Key[3];
key4 = RandomX_CurrentConfig.fillAes4Rx4_Key[4]; key4 = RandomX_CurrentConfig.fillAes4Rx4_Key[4];
key5 = RandomX_CurrentConfig.fillAes4Rx4_Key[5]; key5 = RandomX_CurrentConfig.fillAes4Rx4_Key[5];
key6 = RandomX_CurrentConfig.fillAes4Rx4_Key[6]; key6 = RandomX_CurrentConfig.fillAes4Rx4_Key[6];
key7 = RandomX_CurrentConfig.fillAes4Rx4_Key[7]; key7 = RandomX_CurrentConfig.fillAes4Rx4_Key[7];
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
while (outptr < outputEnd) { while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0); state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key0); state1 = aesenc<softAes>(state1, key0);
state2 = aesdec<softAes>(state2, key4); state2 = aesdec<softAes>(state2, key4);
state3 = aesenc<softAes>(state3, key4); state3 = aesenc<softAes>(state3, key4);
state0 = aesdec<softAes>(state0, key1); state0 = aesdec<softAes>(state0, key1);
state1 = aesenc<softAes>(state1, key1); state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key5); state2 = aesdec<softAes>(state2, key5);
state3 = aesenc<softAes>(state3, key5); state3 = aesenc<softAes>(state3, key5);
state0 = aesdec<softAes>(state0, key2); state0 = aesdec<softAes>(state0, key2);
state1 = aesenc<softAes>(state1, key2); state1 = aesenc<softAes>(state1, key2);
state2 = aesdec<softAes>(state2, key6); state2 = aesdec<softAes>(state2, key6);
state3 = aesenc<softAes>(state3, key6); state3 = aesenc<softAes>(state3, key6);
state0 = aesdec<softAes>(state0, key3); state0 = aesdec<softAes>(state0, key3);
state1 = aesenc<softAes>(state1, key3); state1 = aesenc<softAes>(state1, key3);
state2 = aesdec<softAes>(state2, key7); state2 = aesdec<softAes>(state2, key7);
state3 = aesenc<softAes>(state3, key7); state3 = aesenc<softAes>(state3, key7);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
outptr += 64; outptr += 64;
} }
} }
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
// initial state
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
constexpr int PREFETCH_DISTANCE = 4096;
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
scratchpadEnd -= PREFETCH_DISTANCE;
for (int i = 0; i < 2; ++i) {
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
rx_prefetch_t0(prefetchPtr);
scratchpadPtr += 64;
prefetchPtr += 64;
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;
}
rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
//two extra rounds to achieve full diffusion
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
hash_state0 = aesenc<softAes>(hash_state0, xkey0);
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
hash_state3 = aesdec<softAes>(hash_state3, xkey0);
hash_state0 = aesenc<softAes>(hash_state0, xkey1);
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
hash_state3 = aesdec<softAes>(hash_state3, xkey1);
//output hash
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
}
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View file

@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes> template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer); void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View file

@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
#define rx_aligned_alloc(a, b) _mm_malloc(a,b) #define rx_aligned_alloc(a, b) _mm_malloc(a,b)
#define rx_aligned_free(a) _mm_free(a) #define rx_aligned_free(a) _mm_free(a)
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA) #define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
#define rx_load_vec_f128 _mm_load_pd #define rx_load_vec_f128 _mm_load_pd
#define rx_store_vec_f128 _mm_store_pd #define rx_store_vec_f128 _mm_store_pd
@ -201,6 +202,7 @@ typedef union{
#define rx_aligned_alloc(a, b) malloc(a) #define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a) #define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x) #define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)
/* Splat 64-bit long long to 2 64-bit long longs */ /* Splat 64-bit long long to 2 64-bit long longs */
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar) FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
@ -376,11 +378,142 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
#define RANDOMX_DEFAULT_FENV #define RANDOMX_DEFAULT_FENV
void rx_reset_float_state(); #elif defined(__aarch64__)
void rx_set_rounding_mode(uint32_t mode); #include <stdlib.h>
#include <arm_neon.h>
#include <arm_acle.h>
#else //end altivec typedef uint8x16_t rx_vec_i128;
typedef float64x2_t rx_vec_f128;
inline void* rx_aligned_alloc(size_t size, size_t align) {
void* p;
if (posix_memalign(&p, align, size) == 0)
return p;
return 0;
};
#define rx_aligned_free(a) free(a)
inline void rx_prefetch_nta(void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
inline void rx_prefetch_t0(const void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
return vld1q_f64((const float64_t*)pd);
}
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
vst1q_f64((float64_t*)mem_addr, val);
}
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
float64x2_t temp;
temp = vcopyq_laneq_f64(temp, 1, a, 1);
a = vcopyq_laneq_f64(a, 1, a, 0);
return vcopyq_laneq_f64(a, 0, temp, 1);
}
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
uint64x2_t temp0 = vdupq_n_u64(x0);
uint64x2_t temp1 = vdupq_n_u64(x1);
return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
}
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
return vreinterpretq_f64_u64(vdupq_n_u64(x));
}
#define rx_add_vec_f128 vaddq_f64
#define rx_sub_vec_f128 vsubq_f64
#define rx_mul_vec_f128 vmulq_f64
#define rx_div_vec_f128 vdivq_f64
#define rx_sqrt_vec_f128 vsqrtq_f64
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
}
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
}
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
}
#ifdef __ARM_FEATURE_CRYPTO
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
const uint8x16_t zero = { 0 };
return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
const uint8x16_t zero = { 0 };
return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
}
#define HAVE_AES
#endif
#define rx_xor_vec_i128 veorq_u8
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
}
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
}
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
}
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
}
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
int32_t data[4];
data[0] = _I0;
data[1] = _I1;
data[2] = _I2;
data[3] = _I3;
return vreinterpretq_u8_s32(vld1q_s32(data));
};
#define rx_xor_vec_i128 veorq_u8
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
return vld1q_u8((const uint8_t*)mem_addr);
}
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
vst1q_u8((uint8_t*)mem_addr, val);
}
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
rx_vec_f128 x;
x = vsetq_lane_f64(lo, x, 0);
x = vsetq_lane_f64(hi, x, 1);
return x;
}
#define RANDOMX_DEFAULT_FENV
#else //portable fallback
#include <cstdint> #include <cstdint>
#include <stdexcept> #include <stdexcept>
@ -405,6 +538,7 @@ typedef union {
#define rx_aligned_alloc(a, b) malloc(a) #define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a) #define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x) #define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
rx_vec_f128 x; rx_vec_f128 x;
@ -487,7 +621,6 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
return v; return v;
} }
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x; rx_vec_f128 x;
x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0]; x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
@ -578,10 +711,6 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
#define RANDOMX_DEFAULT_FENV #define RANDOMX_DEFAULT_FENV
void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
#endif #endif
#ifndef HAVE_AES #ifndef HAVE_AES
@ -598,8 +727,16 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
} }
#endif #endif
#ifdef RANDOMX_DEFAULT_FENV
void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
#endif
double loadDoublePortable(const void* addr); double loadDoublePortable(const void* addr);
uint64_t mulh(uint64_t, uint64_t); uint64_t mulh(uint64_t, uint64_t);
int64_t smulh(int64_t, int64_t); int64_t smulh(int64_t, int64_t);
uint64_t rotl64(uint64_t, unsigned int); uint64_t rotl64(uint64_t, unsigned int);
uint64_t rotr64(uint64_t, unsigned int); uint64_t rotr64(uint64_t, unsigned int);

View file

@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdexcept> #include <stdexcept>
#include <cstring> #include <cstring>
#include <climits> #include <climits>
#include <atomic>
#include "crypto/randomx/jit_compiler_x86.hpp" #include "crypto/randomx/jit_compiler_x86.hpp"
#include "crypto/randomx/jit_compiler_x86_static.hpp" #include "crypto/randomx/jit_compiler_x86_static.hpp"
#include "crypto/randomx/superscalar.hpp" #include "crypto/randomx/superscalar.hpp"
@ -36,6 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/reciprocal.h" #include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/virtual_memory.hpp" #include "crypto/randomx/virtual_memory.hpp"
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <cpuid.h>
#endif
namespace randomx { namespace randomx {
/* /*
@ -108,7 +115,7 @@ namespace randomx {
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch; const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit; const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
const int32_t epilogueOffset = CodeSize - epilogueSize; const int32_t epilogueOffset = (CodeSize - epilogueSize) & ~63;
constexpr int32_t superScalarHashOffset = 32768; constexpr int32_t superScalarHashOffset = 32768;
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
@ -183,6 +190,7 @@ namespace randomx {
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 }; static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 }; static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t JZ_SHORT = 0x74;
static const uint8_t RET = 0xc3; static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x41, 0x8d }; static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
@ -197,20 +205,100 @@ namespace randomx {
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
// static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
{},
{0x2E},
{0x2E, 0x2E},
{0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
};
bool JitCompilerX86::BranchesWithin32B = false;
size_t JitCompilerX86::getCodeSize() { size_t JitCompilerX86::getCodeSize() {
return codePos < prologueSize ? 0 : codePos - prologueSize; return codePos < prologueSize ? 0 : codePos - prologueSize;
} }
static inline void cpuid(uint32_t level, int32_t output[4])
{
memset(output, 0, sizeof(int32_t) * 4);
# ifdef _MSC_VER
__cpuid(output, static_cast<int>(level));
# else
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
# endif
}
// CPU-specific tweaks
void JitCompilerX86::applyTweaks() {
int32_t info[4];
cpuid(0, info);
int32_t manufacturer[4];
manufacturer[0] = info[1];
manufacturer[1] = info[3];
manufacturer[2] = info[2];
manufacturer[3] = 0;
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
struct
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
} processor_info;
cpuid(1, info);
memcpy(&processor_info, info, sizeof(processor_info));
// Intel JCC erratum mitigation
if (processor_info.family == 6) {
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
const uint32_t stepping = processor_info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
}
static std::atomic<size_t> codeOffset;
JitCompilerX86::JitCompilerX86() { JitCompilerX86::JitCompilerX86() {
code = (uint8_t*)allocExecutableMemory(CodeSize); applyTweaks();
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
memcpy(code, codePrologue, prologueSize); memcpy(code, codePrologue, prologueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
} }
JitCompilerX86::~JitCompilerX86() { JitCompilerX86::~JitCompilerX86() {
freePagedMemory(code, CodeSize); freePagedMemory(allocatedCode, CodeSize);
} }
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
@ -268,8 +356,6 @@ namespace randomx {
} }
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
memset(registerUsage, -1, sizeof(registerUsage));
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue); codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + 2] = 0xc0 + pcfg.readReg0; code[codePos + 2] = 0xc0 + pcfg.readReg0;
code[codePos + 5] = 0xc0 + pcfg.readReg1; code[codePos + 5] = 0xc0 + pcfg.readReg1;
@ -280,13 +366,21 @@ namespace randomx {
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize); memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize; codePos += loopLoadSize;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i); //mark all registers as used
instr.src %= RegistersCount; uint64_t* r = (uint64_t*)registerUsage;
instr.dst %= RegistersCount; uint64_t k = codePos;
instructionOffsets[i] = codePos; k |= k << 32;
(this->*(engine[instr.opcode]))(instr, i); for (unsigned j = 0; j < RegistersCount / 2; ++j) {
r[j] = k;
} }
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; ++i) {
Instruction instr = prog(i);
*((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
(this->*(engine[instr.opcode]))(instr);
}
emit(REX_MOV_RR, code, codePos); emit(REX_MOV_RR, code, codePos);
emitByte(0xc0 + pcfg.readReg2, code, codePos); emitByte(0xc0 + pcfg.readReg2, code, codePos);
emit(REX_XOR_EAX, code, codePos); emit(REX_XOR_EAX, code, codePos);
@ -301,6 +395,22 @@ namespace randomx {
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos); emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
memcpy(code + codePos, codeLoopStore, loopStoreSize); memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize; codePos += loopStoreSize;
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
uint32_t alignment_size = 32 - (branch_begin & 31);
if (alignment_size > 8) {
emit(NOPX[alignment_size - 9], alignment_size - 8, code, codePos);
alignment_size = 8;
}
emit(NOPX[alignment_size - 1], alignment_size, code, codePos);
}
}
emit(SUB_EBX, code, codePos); emit(SUB_EBX, code, codePos);
emit(JNZ, code, codePos); emit(JNZ, code, codePos);
emit32(prologueSize - codePos - 4, code, codePos); emit32(prologueSize - codePos - 4, code, codePos);
@ -311,103 +421,104 @@ namespace randomx {
void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) { void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) {
switch ((SuperscalarInstructionType)instr.opcode) switch ((SuperscalarInstructionType)instr.opcode)
{ {
case randomx::SuperscalarInstructionType::ISUB_R: case randomx::SuperscalarInstructionType::ISUB_R:
emit(REX_SUB_RR, code, codePos); emit(REX_SUB_RR, code, codePos);
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IXOR_R: case randomx::SuperscalarInstructionType::IXOR_R:
emit(REX_XOR_RR, code, codePos); emit(REX_XOR_RR, code, codePos);
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IADD_RS: case randomx::SuperscalarInstructionType::IADD_RS:
emit(REX_LEA, code, codePos); emit(REX_LEA, code, codePos);
emitByte(0x04 + 8 * instr.dst, code, codePos); emitByte(0x04 + 8 * instr.dst, code, codePos);
genSIB(instr.getModShift(), instr.src, instr.dst, code, codePos); genSIB(instr.getModShift(), instr.src, instr.dst, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IMUL_R: case randomx::SuperscalarInstructionType::IMUL_R:
emit(REX_IMUL_RR, code, codePos); emit(REX_IMUL_RR, code, codePos);
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IROR_C: case randomx::SuperscalarInstructionType::IROR_C:
emit(REX_ROT_I8, code, codePos); emit(REX_ROT_I8, code, codePos);
emitByte(0xc8 + instr.dst, code, codePos); emitByte(0xc8 + instr.dst, code, codePos);
emitByte(instr.getImm32() & 63, code, codePos); emitByte(instr.getImm32() & 63, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IADD_C7: case randomx::SuperscalarInstructionType::IADD_C7:
emit(REX_81, code, codePos); emit(REX_81, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos); emitByte(0xc0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IXOR_C7: case randomx::SuperscalarInstructionType::IXOR_C7:
emit(REX_XOR_RI, code, codePos); emit(REX_XOR_RI, code, codePos);
emitByte(0xf0 + instr.dst, code, codePos); emitByte(0xf0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IADD_C8: case randomx::SuperscalarInstructionType::IADD_C8:
emit(REX_81, code, codePos); emit(REX_81, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos); emitByte(0xc0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
#ifdef RANDOMX_ALIGN #ifdef RANDOMX_ALIGN
emit(NOP1, code, codePos); emit(NOP1, code, codePos);
#endif #endif
break; break;
case randomx::SuperscalarInstructionType::IXOR_C8: case randomx::SuperscalarInstructionType::IXOR_C8:
emit(REX_XOR_RI, code, codePos); emit(REX_XOR_RI, code, codePos);
emitByte(0xf0 + instr.dst, code, codePos); emitByte(0xf0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
#ifdef RANDOMX_ALIGN #ifdef RANDOMX_ALIGN
emit(NOP1, code, codePos); emit(NOP1, code, codePos);
#endif #endif
break; break;
case randomx::SuperscalarInstructionType::IADD_C9: case randomx::SuperscalarInstructionType::IADD_C9:
emit(REX_81, code, codePos); emit(REX_81, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos); emitByte(0xc0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
#ifdef RANDOMX_ALIGN #ifdef RANDOMX_ALIGN
emit(NOP2, code, codePos); emit(NOP2, code, codePos);
#endif #endif
break; break;
case randomx::SuperscalarInstructionType::IXOR_C9: case randomx::SuperscalarInstructionType::IXOR_C9:
emit(REX_XOR_RI, code, codePos); emit(REX_XOR_RI, code, codePos);
emitByte(0xf0 + instr.dst, code, codePos); emitByte(0xf0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
#ifdef RANDOMX_ALIGN #ifdef RANDOMX_ALIGN
emit(NOP2, code, codePos); emit(NOP2, code, codePos);
#endif #endif
break; break;
case randomx::SuperscalarInstructionType::IMULH_R: case randomx::SuperscalarInstructionType::IMULH_R:
emit(REX_MOV_RR64, code, codePos); emit(REX_MOV_RR64, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos); emitByte(0xc0 + instr.dst, code, codePos);
emit(REX_MUL_R, code, codePos); emit(REX_MUL_R, code, codePos);
emitByte(0xe0 + instr.src, code, codePos); emitByte(0xe0 + instr.src, code, codePos);
emit(REX_MOV_R64R, code, codePos); emit(REX_MOV_R64R, code, codePos);
emitByte(0xc2 + 8 * instr.dst, code, codePos); emitByte(0xc2 + 8 * instr.dst, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::ISMULH_R: case randomx::SuperscalarInstructionType::ISMULH_R:
emit(REX_MOV_RR64, code, codePos); emit(REX_MOV_RR64, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos); emitByte(0xc0 + instr.dst, code, codePos);
emit(REX_MUL_R, code, codePos); emit(REX_MUL_R, code, codePos);
emitByte(0xe8 + instr.src, code, codePos); emitByte(0xe8 + instr.src, code, codePos);
emit(REX_MOV_R64R, code, codePos); emit(REX_MOV_R64R, code, codePos);
emitByte(0xc2 + 8 * instr.dst, code, codePos); emitByte(0xc2 + 8 * instr.dst, code, codePos);
break; break;
case randomx::SuperscalarInstructionType::IMUL_RCP: case randomx::SuperscalarInstructionType::IMUL_RCP:
emit(MOV_RAX_I, code, codePos); emit(MOV_RAX_I, code, codePos);
emit64(reciprocalCache[instr.getImm32()], code, codePos); emit64(reciprocalCache[instr.getImm32()], code, codePos);
emit(REX_IMUL_RM, code, codePos); emit(REX_IMUL_RM, code, codePos);
emitByte(0xc0 + 8 * instr.dst, code, codePos); emitByte(0xc0 + 8 * instr.dst, code, codePos);
break; break;
default: default:
UNREACHABLE; UNREACHABLE;
} }
} }
void JitCompilerX86::genAddressReg(Instruction& instr, uint8_t* code, int& codePos, bool rax) { template<bool rax>
emit(LEA_32, code, codePos); FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos) {
emitByte(0x80 + instr.src + (rax ? 0 : 8), code, codePos); const uint32_t src = *((uint32_t*)&instr) & 0xFF0000;
if (instr.src == RegisterNeedsSib) {
emitByte(0x24, code, codePos); *(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + src;
} codePos += (src == (RegisterNeedsSib << 16)) ? 4 : 3;
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
if (rax) if (rax)
emitByte(AND_EAX_I, code, codePos); emitByte(AND_EAX_I, code, codePos);
@ -416,12 +527,14 @@ namespace randomx {
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
} }
void JitCompilerX86::genAddressRegDst(Instruction& instr, uint8_t* code, int& codePos) { template void JitCompilerX86::genAddressReg<false>(const Instruction& instr, uint8_t* code, int& codePos);
emit(LEA_32, code, codePos); template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, uint8_t* code, int& codePos);
emitByte(0x80 + instr.dst, code, codePos);
if (instr.dst == RegisterNeedsSib) { FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
emitByte(0x24, code, codePos); const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
} *(uint32_t*)(code + codePos) = 0x24808d41 + dst;
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
emit32(instr.getImm32(), code, codePos); emit32(instr.getImm32(), code, codePos);
emitByte(AND_EAX_I, code, codePos); emitByte(AND_EAX_I, code, codePos);
if (instr.getModCond() < StoreL3Condition) { if (instr.getModCond() < StoreL3Condition) {
@ -432,7 +545,7 @@ namespace randomx {
} }
} }
void JitCompilerX86::genAddressImm(Instruction& instr, uint8_t* code, int& codePos) { FORCE_INLINE void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos); emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos);
} }
@ -447,17 +560,18 @@ namespace randomx {
0x3c8d4f, 0x3c8d4f,
}; };
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) { void JitCompilerX86::h_IADD_RS(const Instruction& instr) {
int pos = codePos; int pos = codePos;
uint8_t* const p = code + pos; uint8_t* const p = code + pos;
registerUsage[instr.dst] = i;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst; const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst;
*(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24); *(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24);
*(uint32_t*)(p + 4) = instr.getImm32(); *(uint32_t*)(p + 4) = instr.getImm32();
codePos = pos + ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4); pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
registerUsage[instr.dst] = pos;
codePos = pos;
} }
static const uint32_t template_IADD_M[8] = { static const uint32_t template_IADD_M[8] = {
@ -471,13 +585,12 @@ namespace randomx {
0x063c034c, 0x063c034c,
}; };
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { void JitCompilerX86::h_IADD_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit32(template_IADD_M[instr.dst], p, pos); emit32(template_IADD_M[instr.dst], p, pos);
} }
else { else {
@ -486,6 +599,7 @@ namespace randomx {
genAddressImm(instr, p, pos); genAddressImm(instr, p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
@ -493,11 +607,10 @@ namespace randomx {
emitByte((scale << 6) | (index << 3) | base, code, codePos); emitByte((scale << 6) | (index << 3) | base, code, codePos);
} }
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { void JitCompilerX86::h_ISUB_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
emit(REX_SUB_RR, p, pos); emit(REX_SUB_RR, p, pos);
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
@ -508,16 +621,16 @@ namespace randomx {
emit32(instr.getImm32(), p, pos); emit32(instr.getImm32(), p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { void JitCompilerX86::h_ISUB_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit(REX_SUB_RM, p, pos); emit(REX_SUB_RM, p, pos);
emitByte(0x04 + 8 * instr.dst, p, pos); emitByte(0x04 + 8 * instr.dst, p, pos);
emitByte(0x06, p, pos); emitByte(0x06, p, pos);
@ -528,14 +641,14 @@ namespace randomx {
genAddressImm(instr, p, pos); genAddressImm(instr, p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { void JitCompilerX86::h_IMUL_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
emit(REX_IMUL_RR, p, pos); emit(REX_IMUL_RR, p, pos);
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
@ -546,16 +659,16 @@ namespace randomx {
emit32(instr.getImm32(), p, pos); emit32(instr.getImm32(), p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { void JitCompilerX86::h_IMUL_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit(REX_IMUL_RM, p, pos); emit(REX_IMUL_RM, p, pos);
emitByte(0x04 + 8 * instr.dst, p, pos); emitByte(0x04 + 8 * instr.dst, p, pos);
emitByte(0x06, p, pos); emitByte(0x06, p, pos);
@ -566,14 +679,14 @@ namespace randomx {
genAddressImm(instr, p, pos); genAddressImm(instr, p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { void JitCompilerX86::h_IMULH_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
emit(REX_MOV_RR64, p, pos); emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos); emitByte(0xc0 + instr.dst, p, pos);
emit(REX_MUL_R, p, pos); emit(REX_MUL_R, p, pos);
@ -581,16 +694,16 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos); emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos); emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { void JitCompilerX86::h_IMULH_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
genAddressReg(instr, p, pos, false); genAddressReg<false>(instr, p, pos);
emit(REX_MOV_RR64, p, pos); emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos); emitByte(0xc0 + instr.dst, p, pos);
emit(REX_MUL_MEM, p, pos); emit(REX_MUL_MEM, p, pos);
@ -605,14 +718,14 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos); emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos); emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { void JitCompilerX86::h_ISMULH_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
emit(REX_MOV_RR64, p, pos); emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos); emitByte(0xc0 + instr.dst, p, pos);
emit(REX_MUL_R, p, pos); emit(REX_MUL_R, p, pos);
@ -620,16 +733,16 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos); emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos); emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { void JitCompilerX86::h_ISMULH_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
genAddressReg(instr, p, pos, false); genAddressReg<false>(instr, p, pos);
emit(REX_MOV_RR64, p, pos); emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos); emitByte(0xc0 + instr.dst, p, pos);
emit(REX_IMUL_MEM, p, pos); emit(REX_IMUL_MEM, p, pos);
@ -644,41 +757,41 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos); emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos); emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { void JitCompilerX86::h_IMUL_RCP(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
uint64_t divisor = instr.getImm32(); uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) { if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I, p, pos); emit(MOV_RAX_I, p, pos);
emit64(randomx_reciprocal_fast(divisor), p, pos); emit64(randomx_reciprocal_fast(divisor), p, pos);
emit(REX_IMUL_RM, p, pos); emit(REX_IMUL_RM, p, pos);
emitByte(0xc0 + 8 * instr.dst, p, pos); emitByte(0xc0 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
} }
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { void JitCompilerX86::h_INEG_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
emit(REX_NEG, p, pos); emit(REX_NEG, p, pos);
emitByte(0xd8 + instr.dst, p, pos); emitByte(0xd8 + instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { void JitCompilerX86::h_IXOR_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
emit(REX_XOR_RR, p, pos); emit(REX_XOR_RR, p, pos);
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
@ -689,16 +802,16 @@ namespace randomx {
emit32(instr.getImm32(), p, pos); emit32(instr.getImm32(), p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { void JitCompilerX86::h_IXOR_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit(REX_XOR_RM, p, pos); emit(REX_XOR_RM, p, pos);
emitByte(0x04 + 8 * instr.dst, p, pos); emitByte(0x04 + 8 * instr.dst, p, pos);
emitByte(0x06, p, pos); emitByte(0x06, p, pos);
@ -709,14 +822,14 @@ namespace randomx {
genAddressImm(instr, p, pos); genAddressImm(instr, p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { void JitCompilerX86::h_IROR_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
emit(REX_MOV_RR, p, pos); emit(REX_MOV_RR, p, pos);
emitByte(0xc8 + instr.src, p, pos); emitByte(0xc8 + instr.src, p, pos);
@ -729,14 +842,14 @@ namespace randomx {
emitByte(instr.getImm32() & 63, p, pos); emitByte(instr.getImm32() & 63, p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { void JitCompilerX86::h_IROL_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
emit(REX_MOV_RR, p, pos); emit(REX_MOV_RR, p, pos);
emitByte(0xc8 + instr.src, p, pos); emitByte(0xc8 + instr.src, p, pos);
@ -749,27 +862,28 @@ namespace randomx {
emitByte(instr.getImm32() & 63, p, pos); emitByte(instr.getImm32() & 63, p, pos);
} }
registerUsage[instr.dst] = pos;
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { void JitCompilerX86::h_ISWAP_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
registerUsage[instr.dst] = i;
registerUsage[instr.src] = i;
emit(REX_XCHG, p, pos); emit(REX_XCHG, p, pos);
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
registerUsage[instr.src] = pos;
} }
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) { void JitCompilerX86::h_FSWAP_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
emit(SHUFPD, p, pos); emit(SHUFPD, p, pos);
emitByte(0xc0 + 9 * instr.dst, p, pos); emitByte(0xc0 + 9 * instr.dst, p, pos);
emitByte(1, p, pos); emitByte(1, p, pos);
@ -777,105 +891,105 @@ namespace randomx {
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { void JitCompilerX86::h_FADD_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
instr.src %= RegisterCountFlt; const uint32_t src = instr.src % RegisterCountFlt;
emit(REX_ADDPD, p, pos); emit(REX_ADDPD, p, pos);
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); emitByte(0xc0 + src + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { void JitCompilerX86::h_FADD_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos); emit(REX_CVTDQ2PD_XMM12, p, pos);
emit(REX_ADDPD, p, pos); emit(REX_ADDPD, p, pos);
emitByte(0xc4 + 8 * instr.dst, p, pos); emitByte(0xc4 + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { void JitCompilerX86::h_FSUB_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
instr.src %= RegisterCountFlt; const uint32_t src = instr.src % RegisterCountFlt;
emit(REX_SUBPD, p, pos); emit(REX_SUBPD, p, pos);
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); emitByte(0xc0 + src + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { void JitCompilerX86::h_FSUB_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos); emit(REX_CVTDQ2PD_XMM12, p, pos);
emit(REX_SUBPD, p, pos); emit(REX_SUBPD, p, pos);
emitByte(0xc4 + 8 * instr.dst, p, pos); emitByte(0xc4 + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { void JitCompilerX86::h_FSCAL_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
emit(REX_XORPS, p, pos); emit(REX_XORPS, p, pos);
emitByte(0xc7 + 8 * instr.dst, p, pos); emitByte(0xc7 + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { void JitCompilerX86::h_FMUL_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
instr.src %= RegisterCountFlt; const uint32_t src = instr.src % RegisterCountFlt;
emit(REX_MULPD, p, pos); emit(REX_MULPD, p, pos);
emitByte(0xe0 + instr.src + 8 * instr.dst, p, pos); emitByte(0xe0 + src + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { void JitCompilerX86::h_FDIV_M(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg(instr, p, pos); genAddressReg<true>(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos); emit(REX_CVTDQ2PD_XMM12, p, pos);
emit(REX_ANDPS_XMM12, p, pos); emit(REX_ANDPS_XMM12, p, pos);
emit(REX_DIVPD, p, pos); emit(REX_DIVPD, p, pos);
emitByte(0xe4 + 8 * instr.dst, p, pos); emitByte(0xe4 + 8 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { void JitCompilerX86::h_FSQRT_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
instr.dst %= RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
emit(SQRTPD, p, pos); emit(SQRTPD, p, pos);
emitByte(0xe4 + 9 * instr.dst, p, pos); emitByte(0xe4 + 9 * dst, p, pos);
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) { void JitCompilerX86::h_CFROUND(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
@ -891,27 +1005,46 @@ namespace randomx {
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
int reg = instr.dst; const int reg = instr.dst;
int target = registerUsage[reg] + 1; int32_t jmp_offset = registerUsage[reg] - (pos + 16);
if (BranchesWithin32B) {
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
// If the jump crosses or touches 32-byte boundary, align it
if ((branch_begin ^ branch_end) >= 32) {
const uint32_t alignment_size = 32 - (branch_begin & 31);
jmp_offset -= alignment_size;
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size, p, pos);
}
}
emit(REX_ADD_I, p, pos); emit(REX_ADD_I, p, pos);
emitByte(0xc0 + reg, p, pos); emitByte(0xc0 + reg, p, pos);
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
uint32_t imm = instr.getImm32() | (1UL << shift); const uint32_t imm = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm, p, pos); emit32(imm, p, pos);
emit(REX_TEST, p, pos); emit(REX_TEST, p, pos);
emitByte(0xc0 + reg, p, pos); emitByte(0xc0 + reg, p, pos);
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos); emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos);
emit(JZ, p, pos);
emit32(instructionOffsets[target] - (pos + 4), p, pos); if (jmp_offset >= -128) {
emitByte(JZ_SHORT, p, pos);
emitByte(jmp_offset, p, pos);
}
else {
emit(JZ, p, pos);
emit32(jmp_offset - 4, p, pos);
}
//mark all registers as used //mark all registers as used
uint64_t* r = (uint64_t*) registerUsage; uint64_t* r = (uint64_t*) registerUsage;
uint64_t k = i; uint64_t k = pos;
k |= k << 32; k |= k << 32;
for (unsigned j = 0; j < RegistersCount / 2; ++j) { for (unsigned j = 0; j < RegistersCount / 2; ++j) {
r[j] = k; r[j] = k;
@ -920,7 +1053,7 @@ namespace randomx {
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { void JitCompilerX86::h_ISTORE(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
int pos = codePos; int pos = codePos;
@ -932,10 +1065,10 @@ namespace randomx {
codePos = pos; codePos = pos;
} }
void JitCompilerX86::h_NOP(Instruction& instr, int i) { void JitCompilerX86::h_NOP(const Instruction& instr) {
emit(NOP1, code, codePos); emit(NOP1, code, codePos);
} }
InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
} }

View file

@ -36,12 +36,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
class Program; class Program;
class ProgramConfiguration; struct ProgramConfiguration;
class SuperscalarProgram; class SuperscalarProgram;
class JitCompilerX86; class JitCompilerX86;
class Instruction; class Instruction;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&);
constexpr uint32_t CodeSize = 64 * 1024; constexpr uint32_t CodeSize = 64 * 1024;
@ -66,16 +66,20 @@ namespace randomx {
size_t getCodeSize(); size_t getCodeSize();
static InstructionGeneratorX86 engine[256]; static InstructionGeneratorX86 engine[256];
int32_t instructionOffsets[512];
int registerUsage[RegistersCount]; int registerUsage[RegistersCount];
uint8_t* allocatedCode;
uint8_t* code; uint8_t* code;
int32_t codePos; int32_t codePos;
static bool BranchesWithin32B;
static void applyTweaks();
void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&);
static void genAddressReg(Instruction&, uint8_t* code, int& codePos, bool rax = true); template<bool rax>
static void genAddressRegDst(Instruction&, uint8_t* code, int& codePos); static void genAddressReg(const Instruction&, uint8_t* code, int& codePos);
static void genAddressImm(Instruction&, uint8_t* code, int& codePos); static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos);
static void genAddressImm(const Instruction&, uint8_t* code, int& codePos);
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos); static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &); void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
@ -105,36 +109,36 @@ namespace randomx {
codePos += count; codePos += count;
} }
void h_IADD_RS(Instruction&, int); void h_IADD_RS(const Instruction&);
void h_IADD_M(Instruction&, int); void h_IADD_M(const Instruction&);
void h_ISUB_R(Instruction&, int); void h_ISUB_R(const Instruction&);
void h_ISUB_M(Instruction&, int); void h_ISUB_M(const Instruction&);
void h_IMUL_R(Instruction&, int); void h_IMUL_R(const Instruction&);
void h_IMUL_M(Instruction&, int); void h_IMUL_M(const Instruction&);
void h_IMULH_R(Instruction&, int); void h_IMULH_R(const Instruction&);
void h_IMULH_M(Instruction&, int); void h_IMULH_M(const Instruction&);
void h_ISMULH_R(Instruction&, int); void h_ISMULH_R(const Instruction&);
void h_ISMULH_M(Instruction&, int); void h_ISMULH_M(const Instruction&);
void h_IMUL_RCP(Instruction&, int); void h_IMUL_RCP(const Instruction&);
void h_INEG_R(Instruction&, int); void h_INEG_R(const Instruction&);
void h_IXOR_R(Instruction&, int); void h_IXOR_R(const Instruction&);
void h_IXOR_M(Instruction&, int); void h_IXOR_M(const Instruction&);
void h_IROR_R(Instruction&, int); void h_IROR_R(const Instruction&);
void h_IROL_R(Instruction&, int); void h_IROL_R(const Instruction&);
void h_ISWAP_R(Instruction&, int); void h_ISWAP_R(const Instruction&);
void h_FSWAP_R(Instruction&, int); void h_FSWAP_R(const Instruction&);
void h_FADD_R(Instruction&, int); void h_FADD_R(const Instruction&);
void h_FADD_M(Instruction&, int); void h_FADD_M(const Instruction&);
void h_FSUB_R(Instruction&, int); void h_FSUB_R(const Instruction&);
void h_FSUB_M(Instruction&, int); void h_FSUB_M(const Instruction&);
void h_FSCAL_R(Instruction&, int); void h_FSCAL_R(const Instruction&);
void h_FMUL_R(Instruction&, int); void h_FMUL_R(const Instruction&);
void h_FDIV_M(Instruction&, int); void h_FDIV_M(const Instruction&);
void h_FSQRT_R(Instruction&, int); void h_FSQRT_R(const Instruction&);
void h_CBRANCH(Instruction&, int); void h_CBRANCH(const Instruction&);
void h_CFROUND(Instruction&, int); void h_CFROUND(const Instruction&);
void h_ISTORE(Instruction&, int); void h_ISTORE(const Instruction&);
void h_NOP(Instruction&, int); void h_NOP(const Instruction&);
}; };
} }

View file

@ -26,6 +26,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "crypto/randomx/common.hpp"
#include "crypto/randomx/randomx.h" #include "crypto/randomx/randomx.h"
#include "crypto/randomx/dataset.hpp" #include "crypto/randomx/dataset.hpp"
#include "crypto/randomx/vm_interpreted.hpp" #include "crypto/randomx/vm_interpreted.hpp"
@ -33,7 +34,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/vm_compiled.hpp" #include "crypto/randomx/vm_compiled.hpp"
#include "crypto/randomx/vm_compiled_light.hpp" #include "crypto/randomx/vm_compiled_light.hpp"
#include "crypto/randomx/blake2/blake2.h" #include "crypto/randomx/blake2/blake2.h"
#if defined(_M_X64) || defined(__x86_64__)
#include "crypto/randomx/jit_compiler_x86_static.hpp" #include "crypto/randomx/jit_compiler_x86_static.hpp"
#elif defined(XMRIG_ARMv8)
#include "crypto/randomx/jit_compiler_a64_static.hpp"
#endif
#include <cassert> #include <cassert>
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
@ -85,6 +92,16 @@ RandomX_ConfigurationArqma::RandomX_ConfigurationArqma()
ScratchpadL3_Size = 262144; ScratchpadL3_Size = 262144;
} }
RandomX_ConfigurationSafex::RandomX_ConfigurationSafex()
{
ArgonIterations = 3;
ArgonSalt = "RandomSFX\x01";
ProgramIterations = 2048;
ProgramCount = 8;
ScratchpadL2_Size = 262144;
ScratchpadL3_Size = 2097152;
}
RandomX_ConfigurationBase::RandomX_ConfigurationBase() RandomX_ConfigurationBase::RandomX_ConfigurationBase()
: ArgonMemory(262144) : ArgonMemory(262144)
, ArgonIterations(3) , ArgonIterations(3)
@ -166,19 +183,10 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
#endif #endif
} }
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
void RandomX_ConfigurationBase::Apply() void RandomX_ConfigurationBase::Apply()
{ {
#if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
#endif
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8; ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16; ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8; ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
@ -186,22 +194,40 @@ void RandomX_ConfigurationBase::Apply()
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8); ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64; ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
#if defined(_M_X64) || defined(__x86_64__) CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated; DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
#endif
ConditionMask_Calculated = (1 << JumpBits) - 1; ConditionMask_Calculated = (1 << JumpBits) - 1;
constexpr int CEIL_NULL = 0;
int k = 0;
#if defined(_M_X64) || defined(__x86_64__) #if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x #define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
#elif defined(XMRIG_ARMv8)
Log2_ScratchpadL1 = Log2(ScratchpadL1_Size);
Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
Log2_DatasetBaseSize = Log2(DatasetBaseSize);
Log2_CacheSize = Log2((ArgonMemory * randomx::ArgonBlockSize) / randomx::CacheLineSize);
#define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
#else #else
#define JIT_HANDLE(x, prev) #define JIT_HANDLE(x, prev)
#endif #endif
constexpr int CEIL_NULL = 0;
int k = 0;
#define INST_HANDLE(x, prev) \ #define INST_HANDLE(x, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); } for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
@ -243,218 +269,236 @@ RandomX_ConfigurationMonero RandomX_MoneroConfig;
RandomX_ConfigurationWownero RandomX_WowneroConfig; RandomX_ConfigurationWownero RandomX_WowneroConfig;
RandomX_ConfigurationLoki RandomX_LokiConfig; RandomX_ConfigurationLoki RandomX_LokiConfig;
RandomX_ConfigurationArqma RandomX_ArqmaConfig; RandomX_ConfigurationArqma RandomX_ArqmaConfig;
RandomX_ConfigurationSafex RandomX_SafexConfig;
RandomX_ConfigurationBase RandomX_CurrentConfig; RandomX_ConfigurationBase RandomX_CurrentConfig;
extern "C" { extern "C" {
randomx_cache *randomx_alloc_cache(randomx_flags flags) { randomx_cache *randomx_alloc_cache(randomx_flags flags) {
randomx_cache *cache = nullptr; randomx_cache *cache = nullptr;
try { try {
cache = new randomx_cache(); cache = new randomx_cache();
switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) { switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) {
case RANDOMX_FLAG_DEFAULT: case RANDOMX_FLAG_DEFAULT:
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>; cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
cache->jit = nullptr; cache->jit = nullptr;
cache->initialize = &randomx::initCache; cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset; cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break; break;
case RANDOMX_FLAG_JIT: case RANDOMX_FLAG_JIT:
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>; cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
cache->jit = new randomx::JitCompiler(); cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile; cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc(); cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break; break;
case RANDOMX_FLAG_LARGE_PAGES: case RANDOMX_FLAG_LARGE_PAGES:
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>; cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
cache->jit = nullptr; cache->jit = nullptr;
cache->initialize = &randomx::initCache; cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset; cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break; break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>; cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
cache->jit = new randomx::JitCompiler(); cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile; cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc(); cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break; break;
default: default:
UNREACHABLE; UNREACHABLE;
}
} }
catch (std::exception &ex) { }
if (cache != nullptr) { catch (std::exception &ex) {
randomx_release_cache(cache); if (cache != nullptr) {
cache = nullptr; randomx_release_cache(cache);
} cache = nullptr;
} }
return cache;
}
void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) {
assert(cache != nullptr);
assert(keySize == 0 || key != nullptr);
cache->initialize(cache, key, keySize);
}
void randomx_release_cache(randomx_cache* cache) {
assert(cache != nullptr);
cache->dealloc(cache);
delete cache;
}
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
randomx_dataset *dataset = nullptr;
try {
dataset = new randomx_dataset();
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
else {
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
}
catch (std::exception &ex) {
if (dataset != nullptr) {
randomx_release_dataset(dataset);
dataset = nullptr;
}
}
return dataset;
}
#define DatasetItemCount ((RandomX_CurrentConfig.DatasetBaseSize + RandomX_CurrentConfig.DatasetExtraSize) / RANDOMX_DATASET_ITEM_SIZE)
unsigned long randomx_dataset_item_count() {
return DatasetItemCount;
}
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {
assert(dataset != nullptr);
assert(cache != nullptr);
assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount);
assert(startItem + itemCount <= DatasetItemCount);
cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount);
}
void *randomx_get_dataset_memory(randomx_dataset *dataset) {
assert(dataset != nullptr);
return dataset->memory;
}
void randomx_release_dataset(randomx_dataset *dataset) {
assert(dataset != nullptr);
dataset->dealloc(dataset);
delete dataset;
}
randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad) {
assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
assert(cache == nullptr || cache->isInitialized());
assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
randomx_vm *vm = nullptr;
try {
switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES)) {
case RANDOMX_FLAG_DEFAULT:
vm = new randomx::InterpretedLightVmDefault();
break;
case RANDOMX_FLAG_FULL_MEM:
vm = new randomx::InterpretedVmDefault();
break;
case RANDOMX_FLAG_JIT:
vm = new randomx::CompiledLightVmDefault();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
vm = new randomx::CompiledVmDefault();
break;
case RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedLightVmHardAes();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedVmHardAes();
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
vm = new randomx::CompiledLightVmHardAes();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
vm = new randomx::CompiledVmHardAes();
break;
default:
UNREACHABLE;
}
if (cache != nullptr) {
vm->setCache(cache);
}
if (dataset != nullptr) {
vm->setDataset(dataset);
}
vm->setScratchpad(scratchpad);
}
catch (std::exception &ex) {
delete vm;
vm = nullptr;
}
return vm;
}
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
machine->setCache(cache);
}
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
assert(machine != nullptr);
assert(dataset != nullptr);
machine->setDataset(dataset);
}
void randomx_destroy_vm(randomx_vm *machine) {
assert(machine != nullptr);
delete machine;
}
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
alignas(16) uint64_t tempHash[8];
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
} }
return cache;
} }
void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) {
assert(cache != nullptr);
assert(keySize == 0 || key != nullptr);
cache->initialize(cache, key, keySize);
}
void randomx_release_cache(randomx_cache* cache) {
assert(cache != nullptr);
cache->dealloc(cache);
delete cache;
}
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
randomx_dataset *dataset = nullptr;
try {
dataset = new randomx_dataset();
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
else {
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
}
catch (std::exception &ex) {
if (dataset != nullptr) {
randomx_release_dataset(dataset);
dataset = nullptr;
}
}
return dataset;
}
#define DatasetItemCount ((RandomX_CurrentConfig.DatasetBaseSize + RandomX_CurrentConfig.DatasetExtraSize) / RANDOMX_DATASET_ITEM_SIZE)
unsigned long randomx_dataset_item_count() {
return DatasetItemCount;
}
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {
assert(dataset != nullptr);
assert(cache != nullptr);
assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount);
assert(startItem + itemCount <= DatasetItemCount);
cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount);
}
void *randomx_get_dataset_memory(randomx_dataset *dataset) {
assert(dataset != nullptr);
return dataset->memory;
}
void randomx_release_dataset(randomx_dataset *dataset) {
assert(dataset != nullptr);
dataset->dealloc(dataset);
delete dataset;
}
randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad) {
assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
assert(cache == nullptr || cache->isInitialized());
assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
randomx_vm *vm = nullptr;
try {
switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES)) {
case RANDOMX_FLAG_DEFAULT:
vm = new randomx::InterpretedLightVmDefault();
break;
case RANDOMX_FLAG_FULL_MEM:
vm = new randomx::InterpretedVmDefault();
break;
case RANDOMX_FLAG_JIT:
vm = new randomx::CompiledLightVmDefault();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
vm = new randomx::CompiledVmDefault();
break;
case RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedLightVmHardAes();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedVmHardAes();
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
vm = new randomx::CompiledLightVmHardAes();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
vm = new randomx::CompiledVmHardAes();
break;
default:
UNREACHABLE;
}
if (cache != nullptr) {
vm->setCache(cache);
}
if (dataset != nullptr) {
vm->setDataset(dataset);
}
vm->setScratchpad(scratchpad);
}
catch (std::exception &ex) {
delete vm;
vm = nullptr;
}
return vm;
}
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
machine->setCache(cache);
}
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
assert(machine != nullptr);
assert(dataset != nullptr);
machine->setDataset(dataset);
}
void randomx_destroy_vm(randomx_vm *machine) {
assert(machine != nullptr);
delete machine;
}
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
alignas(16) uint64_t tempHash[8];
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(tempHash);
}
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(&tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
}
}

View file

@ -29,8 +29,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef RANDOMX_H #ifndef RANDOMX_H
#define RANDOMX_H #define RANDOMX_H
#include <stddef.h> #include <cstddef>
#include <stdint.h> #include <cstdint>
#include <type_traits> #include <type_traits>
#include "crypto/randomx/intrin_portable.h" #include "crypto/randomx/intrin_portable.h"
@ -41,17 +41,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_EXPORT #define RANDOMX_EXPORT
#endif #endif
typedef enum {
RANDOMX_FLAG_DEFAULT = 0,
RANDOMX_FLAG_LARGE_PAGES = 1,
RANDOMX_FLAG_HARD_AES = 2,
RANDOMX_FLAG_FULL_MEM = 4,
RANDOMX_FLAG_JIT = 8,
} randomx_flags;
typedef struct randomx_dataset randomx_dataset; enum randomx_flags {
typedef struct randomx_cache randomx_cache; RANDOMX_FLAG_DEFAULT = 0,
typedef struct randomx_vm randomx_vm; RANDOMX_FLAG_LARGE_PAGES = 1,
RANDOMX_FLAG_HARD_AES = 2,
RANDOMX_FLAG_FULL_MEM = 4,
RANDOMX_FLAG_JIT = 8,
};
struct randomx_dataset;
struct randomx_cache;
class randomx_vm;
struct RandomX_ConfigurationBase struct RandomX_ConfigurationBase
{ {
@ -130,6 +133,14 @@ struct RandomX_ConfigurationBase
uint32_t ConditionMask_Calculated; uint32_t ConditionMask_Calculated;
#if defined(XMRIG_ARMv8)
uint32_t Log2_ScratchpadL1;
uint32_t Log2_ScratchpadL2;
uint32_t Log2_ScratchpadL3;
uint32_t Log2_DatasetBaseSize;
uint32_t Log2_CacheSize;
#endif
int CEIL_IADD_RS; int CEIL_IADD_RS;
int CEIL_IADD_M; int CEIL_IADD_M;
int CEIL_ISUB_R; int CEIL_ISUB_R;
@ -166,11 +177,13 @@ struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); }; struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); };
struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); }; struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); };
struct RandomX_ConfigurationArqma : public RandomX_ConfigurationBase { RandomX_ConfigurationArqma(); }; struct RandomX_ConfigurationArqma : public RandomX_ConfigurationBase { RandomX_ConfigurationArqma(); };
struct RandomX_ConfigurationSafex : public RandomX_ConfigurationBase { RandomX_ConfigurationSafex(); };
extern RandomX_ConfigurationMonero RandomX_MoneroConfig; extern RandomX_ConfigurationMonero RandomX_MoneroConfig;
extern RandomX_ConfigurationWownero RandomX_WowneroConfig; extern RandomX_ConfigurationWownero RandomX_WowneroConfig;
extern RandomX_ConfigurationLoki RandomX_LokiConfig; extern RandomX_ConfigurationLoki RandomX_LokiConfig;
extern RandomX_ConfigurationArqma RandomX_ArqmaConfig; extern RandomX_ConfigurationArqma RandomX_ArqmaConfig;
extern RandomX_ConfigurationSafex RandomX_SafexConfig;
extern RandomX_ConfigurationBase RandomX_CurrentConfig; extern RandomX_ConfigurationBase RandomX_CurrentConfig;
@ -327,8 +340,11 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
*/ */
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output); RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif
#endif #endif

View file

@ -111,7 +111,13 @@ namespace randomx {
template<bool softAes> template<bool softAes>
void VmBase<softAes>::getFinalResult(void* out, size_t outSize) { void VmBase<softAes>::getFinalResult(void* out, size_t outSize) {
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a); hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a);
rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0); rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<bool softAes>
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a, fill_state);
rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
} }
template<bool softAes> template<bool softAes>
@ -126,4 +132,4 @@ namespace randomx {
template class VmBase<false>; template class VmBase<false>;
template class VmBase<true>; template class VmBase<true>;
} }

View file

@ -39,6 +39,7 @@ public:
virtual ~randomx_vm() = 0; virtual ~randomx_vm() = 0;
virtual void setScratchpad(uint8_t *scratchpad) = 0; virtual void setScratchpad(uint8_t *scratchpad) = 0;
virtual void getFinalResult(void* out, size_t outSize) = 0; virtual void getFinalResult(void* out, size_t outSize) = 0;
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
virtual void setDataset(randomx_dataset* dataset) { } virtual void setDataset(randomx_dataset* dataset) { }
virtual void setCache(randomx_cache* cache) { } virtual void setCache(randomx_cache* cache) { }
virtual void initScratchpad(void* seed) = 0; virtual void initScratchpad(void* seed) = 0;
@ -64,7 +65,7 @@ protected:
alignas(64) randomx::RegisterFile reg; alignas(64) randomx::RegisterFile reg;
alignas(16) randomx::ProgramConfiguration config; alignas(16) randomx::ProgramConfiguration config;
randomx::MemoryRegisters mem; randomx::MemoryRegisters mem;
uint8_t* scratchpad; uint8_t* scratchpad = nullptr;
union { union {
randomx_cache* cachePtr = nullptr; randomx_cache* cachePtr = nullptr;
randomx_dataset* datasetPtr; randomx_dataset* datasetPtr;
@ -82,9 +83,10 @@ namespace randomx {
void setScratchpad(uint8_t *scratchpad) override; void setScratchpad(uint8_t *scratchpad) override;
void initScratchpad(void* seed) override; void initScratchpad(void* seed) override;
void getFinalResult(void* out, size_t outSize) override; void getFinalResult(void* out, size_t outSize) override;
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
protected: protected:
void generateProgram(void* seed); void generateProgram(void* seed);
}; };
} }

View file

@ -49,6 +49,9 @@ const RandomX_ConfigurationBase *xmrig::RxAlgo::base(Algorithm::Id algorithm)
case Algorithm::RX_ARQ: case Algorithm::RX_ARQ:
return &RandomX_ArqmaConfig; return &RandomX_ArqmaConfig;
case Algorithm::RX_SFX:
return &RandomX_SafexConfig;
default: default:
break; break;
} }

View file

@ -28,7 +28,7 @@
#define APP_ID "XMRigCC" #define APP_ID "XMRigCC"
#define APP_NAME "XMRigCC" #define APP_NAME "XMRigCC"
#define APP_DESC "XMRigCC CPU miner" #define APP_DESC "XMRigCC CPU miner"
#define APP_VERSION "2.2.0" #define APP_VERSION "2.2.1"
#define APP_DOMAIN "" #define APP_DOMAIN ""
#define APP_SITE "https://github.com/BenDr0id/xmrigCC/" #define APP_SITE "https://github.com/BenDr0id/xmrigCC/"
#define APP_COPYRIGHT "Copyright (C) 2017- XMRigCC" #define APP_COPYRIGHT "Copyright (C) 2017- XMRigCC"
@ -36,7 +36,7 @@
#define APP_VER_MAJOR 2 #define APP_VER_MAJOR 2
#define APP_VER_MINOR 2 #define APP_VER_MINOR 2
#define APP_VER_PATCH 0 #define APP_VER_PATCH 1
#ifndef NDEBUG #ifndef NDEBUG
#define BUILD_TYPE "DEBUG" #define BUILD_TYPE "DEBUG"