RandomX: added cache QoS support
False by default. If set to true, all non-mining CPU cores will not have access to L3 cache.
This commit is contained in:
parent
e5a2689052
commit
c83429c55c
10 changed files with 126 additions and 19 deletions
|
@ -28,6 +28,7 @@
|
|||
#include "crypto/rx/Rx.h"
|
||||
#include "backend/common/Tags.h"
|
||||
#include "backend/cpu/CpuConfig.h"
|
||||
#include "backend/cpu/CpuThreads.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
|
@ -78,7 +79,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp
|
|||
}
|
||||
|
||||
if (!msrInitialized) {
|
||||
msrInit(config);
|
||||
msrInit(config, cpu.threads().get(job.algorithm()).data());
|
||||
msrInitialized = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
|
||||
#include "crypto/common/HugePagesInfo.h"
|
||||
|
@ -41,6 +42,7 @@ namespace xmrig
|
|||
|
||||
class Algorithm;
|
||||
class CpuConfig;
|
||||
class CpuThread;
|
||||
class IRxListener;
|
||||
class Job;
|
||||
class RxConfig;
|
||||
|
@ -62,7 +64,7 @@ public:
|
|||
# endif
|
||||
|
||||
private:
|
||||
static void msrInit(const RxConfig &config);
|
||||
static void msrInit(const RxConfig &config, const std::vector<CpuThread>& threads);
|
||||
static void msrDestroy();
|
||||
static void setupMainLoopExceptionFrame();
|
||||
};
|
||||
|
|
|
@ -51,6 +51,7 @@ static const char *kMode = "mode";
|
|||
static const char *kOneGbPages = "1gb-pages";
|
||||
static const char *kRdmsr = "rdmsr";
|
||||
static const char *kWrmsr = "wrmsr";
|
||||
static const char *kCacheQoS = "cache_qos";
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
static const char *kNUMA = "numa";
|
||||
|
@ -89,6 +90,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
|
|||
readMSR(Json::getValue(value, kWrmsr));
|
||||
# endif
|
||||
|
||||
m_cacheQoS = Json::getBool(value, kCacheQoS, m_cacheQoS);
|
||||
|
||||
# ifdef XMRIG_OS_LINUX
|
||||
m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages);
|
||||
# endif
|
||||
|
@ -151,6 +154,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
|
|||
obj.AddMember(StringRef(kWrmsr), false, allocator);
|
||||
# endif
|
||||
|
||||
obj.AddMember(StringRef(kCacheQoS), m_cacheQoS, allocator);
|
||||
|
||||
# ifdef XMRIG_FEATURE_HWLOC
|
||||
if (!m_nodeset.empty()) {
|
||||
Value numa(kArrayType);
|
||||
|
|
|
@ -65,6 +65,7 @@ public:
|
|||
inline bool isOneGbPages() const { return m_oneGbPages; }
|
||||
inline bool rdmsr() const { return m_rdmsr; }
|
||||
inline bool wrmsr() const { return m_wrmsr; }
|
||||
inline bool cacheQoS() const { return m_cacheQoS; }
|
||||
inline Mode mode() const { return m_mode; }
|
||||
|
||||
# ifdef XMRIG_FEATURE_MSR
|
||||
|
@ -83,6 +84,8 @@ private:
|
|||
bool m_wrmsr = false;
|
||||
# endif
|
||||
|
||||
bool m_cacheQoS = false;
|
||||
|
||||
Mode readMode(const rapidjson::Value &value) const;
|
||||
|
||||
bool m_numa = true;
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
#include "crypto/rx/Rx.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "backend/cpu/CpuThread.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
|
@ -123,14 +124,15 @@ static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t ma
|
|||
}
|
||||
|
||||
|
||||
static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask)
|
||||
template<typename T>
|
||||
static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask, T&& callback)
|
||||
{
|
||||
struct dirent **namelist;
|
||||
int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0);
|
||||
int errors = 0;
|
||||
|
||||
while (dir_entries--) {
|
||||
if (!wrmsr_on_cpu(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
|
||||
if (!callback(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
|
||||
++errors;
|
||||
}
|
||||
|
||||
|
@ -159,7 +161,7 @@ static bool wrmsr_modprobe()
|
|||
}
|
||||
|
||||
|
||||
static bool wrmsr(const MsrItems &preset, bool save)
|
||||
static bool wrmsr(const MsrItems& preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
|
||||
{
|
||||
if (!wrmsr_modprobe()) {
|
||||
return false;
|
||||
|
@ -177,12 +179,61 @@ static bool wrmsr(const MsrItems &preset, bool save)
|
|||
}
|
||||
|
||||
for (const auto &i : preset) {
|
||||
if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask())) {
|
||||
if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask(), [](uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask) { return wrmsr_on_cpu(reg, cpu, value, mask); })) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
const uint32_t n = Cpu::info()->threads();
|
||||
|
||||
// Which CPU cores will have access to the full L3 cache
|
||||
std::vector<bool> cacheEnabled(n, false);
|
||||
bool cacheQoSDisabled = threads.empty();
|
||||
|
||||
for (const CpuThread& t : threads) {
|
||||
// If some thread has no affinity or wrong affinity, disable cache QoS
|
||||
if ((t.affinity() < 0) || (t.affinity() >= n)) {
|
||||
cacheQoSDisabled = true;
|
||||
if (cache_qos) {
|
||||
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
cacheEnabled[t.affinity()] = true;
|
||||
}
|
||||
|
||||
if (cache_qos && !Cpu::info()->hasCatL3()) {
|
||||
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
|
||||
cache_qos = false;
|
||||
}
|
||||
|
||||
bool result = true;
|
||||
|
||||
if (cache_qos) {
|
||||
result = wrmsr_on_all_cpus(0xC8F, 0, MsrItem::kNoMask, [&cacheEnabled, cacheQoSDisabled](uint32_t, uint32_t cpu, uint64_t, uint64_t) {
|
||||
if (cacheQoSDisabled || (cpu >= cacheEnabled.size()) || cacheEnabled[cpu]) {
|
||||
// Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
|
||||
if (!wrmsr_on_cpu(0xC8F, cpu, 0, MsrItem::kNoMask)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Disable L3 cache for Class Of Service 1
|
||||
if (!wrmsr_on_cpu(0xC91, cpu, 0, MsrItem::kNoMask)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Assign Class Of Service 1 to current CPU core
|
||||
if (!wrmsr_on_cpu(0xC8F, cpu, 1ULL << 32, MsrItem::kNoMask)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -216,7 +267,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
|
|||
} // namespace xmrig
|
||||
|
||||
|
||||
void xmrig::Rx::msrInit(const RxConfig &config)
|
||||
void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
|
||||
{
|
||||
const auto &preset = config.msrPreset();
|
||||
if (preset.empty()) {
|
||||
|
@ -225,7 +276,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)
|
|||
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
if (wrmsr(preset, config.rdmsr())) {
|
||||
if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
|
||||
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
|
||||
}
|
||||
else {
|
||||
|
@ -242,7 +293,7 @@ void xmrig::Rx::msrDestroy()
|
|||
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
if (!wrmsr(savedState, false)) {
|
||||
if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
|
||||
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include "crypto/rx/Rx.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "backend/cpu/CpuThread.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
|
@ -256,7 +257,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask)
|
|||
}
|
||||
|
||||
|
||||
static bool wrmsr(const MsrItems &preset, bool save)
|
||||
static bool wrmsr(const MsrItems &preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
|
||||
{
|
||||
bool success = true;
|
||||
|
||||
|
@ -282,14 +283,52 @@ static bool wrmsr(const MsrItems &preset, bool save)
|
|||
}
|
||||
}
|
||||
|
||||
std::thread wrmsr_thread([driver, &preset, &success]() {
|
||||
for (uint32_t i = 0, n = Cpu::info()->threads(); i < n; ++i) {
|
||||
const uint32_t n = Cpu::info()->threads();
|
||||
|
||||
// Which CPU cores will have access to the full L3 cache
|
||||
std::vector<bool> cacheEnabled(n, false);
|
||||
bool cacheQoSDisabled = threads.empty();
|
||||
|
||||
for (const CpuThread& t : threads) {
|
||||
// If some thread has no affinity or wrong affinity, disable cache QoS
|
||||
if ((t.affinity() < 0) || (t.affinity() >= n)) {
|
||||
cacheQoSDisabled = true;
|
||||
if (cache_qos) {
|
||||
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
cacheEnabled[t.affinity()] = true;
|
||||
}
|
||||
|
||||
if (cache_qos && !Cpu::info()->hasCatL3()) {
|
||||
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
|
||||
cache_qos = false;
|
||||
}
|
||||
|
||||
std::thread wrmsr_thread([n, driver, &preset, &cacheEnabled, cache_qos, cacheQoSDisabled, &success]() {
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
if (!Platform::setThreadAffinity(i)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &i : preset) {
|
||||
success = wrmsr(driver, i.reg(), i.value(), i.mask());
|
||||
success &= wrmsr(driver, i.reg(), i.value(), i.mask());
|
||||
}
|
||||
|
||||
if (cache_qos) {
|
||||
if (cacheQoSDisabled || cacheEnabled[i]) {
|
||||
// Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
|
||||
success &= wrmsr(driver, 0xC8F, 0, MsrItem::kNoMask);
|
||||
}
|
||||
else {
|
||||
// Disable L3 cache for Class Of Service 1
|
||||
success &= wrmsr(driver, 0xC91, 0, MsrItem::kNoMask);
|
||||
|
||||
// Assign Class Of Service 1 to current CPU core
|
||||
success &= wrmsr(driver, 0xC8F, 1ULL << 32, MsrItem::kNoMask);
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
|
@ -349,7 +388,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
|
|||
} // namespace xmrig
|
||||
|
||||
|
||||
void xmrig::Rx::msrInit(const RxConfig &config)
|
||||
void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
|
||||
{
|
||||
const auto &preset = config.msrPreset();
|
||||
if (preset.empty()) {
|
||||
|
@ -358,7 +397,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)
|
|||
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
if (wrmsr(preset, config.rdmsr())) {
|
||||
if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
|
||||
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
|
||||
}
|
||||
else {
|
||||
|
@ -375,7 +414,7 @@ void xmrig::Rx::msrDestroy()
|
|||
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
if (!wrmsr(savedState, false)) {
|
||||
if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
|
||||
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue