AstroBWT 20-50% speedup

Skips hashes with large stage 2 size. Added configurable `astrobwt-max-size` parameter, default value is 550, min 400, max 1200, optimal value ranges from 500 to 600 depending on CPU.

- Intel CPUs get 20-25% speedup
- 1st- and 2nd-gen Ryzens get 30% speedup
- 3rd-gen Ryzens get up to 50% speedup
This commit is contained in:
SChernykh 2020-03-05 12:20:21 +01:00
parent c80ef54b60
commit eeadea53e2
11 changed files with 63 additions and 10 deletions

View file

@ -416,6 +416,10 @@ rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const
out.AddMember("argon2-impl", argon2::Impl::name().toJSON(), allocator);
# endif
# ifdef XMRIG_ALGO_ASTROBWT
out.AddMember("astrobwt-max-size", cpu.astrobwtMaxSize(), allocator);
# endif
out.AddMember("hugepages", d_ptr->hugePages(2, doc), allocator);
out.AddMember("memory", static_cast<uint64_t>(d_ptr->algo.isValid() ? (d_ptr->ways() * d_ptr->algo.l3()) : 0), allocator);

View file

@ -28,6 +28,7 @@
#include "backend/cpu/Cpu.h"
#include "base/io/json/Json.h"
#include "rapidjson/document.h"
#include <algorithm>
namespace xmrig {
@ -48,6 +49,11 @@ static const char *kAsm = "asm";
static const char *kArgon2Impl = "argon2-impl";
#endif
#ifdef XMRIG_ALGO_ASTROBWT
static const char* kAstroBWTMaxSize = "astrobwt-max-size";
#endif
extern template class Threads<CpuThreads>;
}
@ -85,6 +91,10 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const
obj.AddMember(StringRef(kArgon2Impl), m_argon2Impl.toJSON(), allocator);
# endif
# ifdef XMRIG_ALGO_ASTROBWT
obj.AddMember(StringRef(kAstroBWTMaxSize), m_astrobwtMaxSize, allocator);
# endif
m_threads.toJSON(obj, doc);
return obj;
@ -136,6 +146,16 @@ void xmrig::CpuConfig::read(const rapidjson::Value &value)
m_argon2Impl = Json::getString(value, kArgon2Impl);
# endif
# ifdef XMRIG_ALGO_ASTROBWT
const auto& obj = Json::getValue(value, kAstroBWTMaxSize);
if (obj.IsNull() || !obj.IsInt()) {
m_shouldSave = true;
}
else {
m_astrobwtMaxSize = std::min(std::max(obj.GetInt(), 400), 1200);
}
# endif
m_threads.read(value);
generate();
@ -167,7 +187,7 @@ void xmrig::CpuConfig::generate()
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
m_shouldSave = count > 0;
m_shouldSave |= count > 0;
}

View file

@ -58,6 +58,7 @@ public:
inline bool isYield() const { return m_yield; }
inline const Assembly &assembly() const { return m_assembly; }
inline const String &argon2Impl() const { return m_argon2Impl; }
inline int astrobwtMaxSize() const { return m_astrobwtMaxSize; }
inline const Threads<CpuThreads> &threads() const { return m_threads; }
inline int priority() const { return m_priority; }
inline uint32_t limit() const { return m_limit; }
@ -78,6 +79,7 @@ private:
int m_memoryPool = 0;
int m_priority = -1;
String m_argon2Impl;
int m_astrobwtMaxSize = 550;
Threads<CpuThreads> m_threads;
uint32_t m_limit = 100;
};

View file

@ -42,7 +42,8 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
priority(config.priority()),
affinity(thread.affinity()),
miner(miner),
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()))
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity())),
astrobwtMaxSize(config.astrobwtMaxSize())
{
}

View file

@ -65,6 +65,7 @@ public:
const int64_t affinity;
const Miner *miner;
const uint32_t intensity;
const int astrobwtMaxSize;
};

View file

@ -32,6 +32,7 @@
#include "core/Miner.h"
#include "crypto/cn/CnCtx.h"
#include "crypto/cn/CryptoNight_test.h"
#include "crypto/cn/CryptoNight.h"
#include "crypto/common/Nonce.h"
#include "crypto/common/VirtualMemory.h"
#include "crypto/rx/Rx.h"
@ -76,6 +77,7 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
Worker(id, data.affinity, data.priority),
m_algorithm(data.algorithm),
m_assembly(data.assembly),
m_astrobwtMaxSize(data.astrobwtMaxSize * 1000),
m_hwAES(data.hwAES),
m_yield(data.yield),
m_av(data.av()),
@ -240,6 +242,8 @@ void xmrig::CpuWorker<N>::start()
current_job_nonces[i] = *m_job.nonce(i);
}
bool valid = true;
# ifdef XMRIG_ALGO_RANDOMX
if (job.algorithm().family() == Algorithm::RANDOM_X) {
if (first) {
@ -256,20 +260,31 @@ void xmrig::CpuWorker<N>::start()
else
# endif
{
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
# ifdef XMRIG_ALGO_ASTROBWT
if (job.algorithm().family() == Algorithm::ASTROBWT) {
if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize))
valid = false;
}
else
# endif
{
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
}
if (!nextRound(m_job)) {
break;
};
}
for (size_t i = 0; i < N; ++i) {
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < job.target()) {
JobResults::submit(job, current_job_nonces[i], m_hash + (i * 32));
if (valid) {
for (size_t i = 0; i < N; ++i) {
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < job.target()) {
JobResults::submit(job, current_job_nonces[i], m_hash + (i * 32));
}
}
m_count += N;
}
m_count += N;
if (m_yield) {
std::this_thread::yield();
}

View file

@ -70,6 +70,7 @@ private:
const Algorithm m_algorithm;
const Assembly m_assembly;
const int m_astrobwtMaxSize;
const bool m_hwAES;
const bool m_yield;
const CnHash::AlgoVariant m_av;