AstroBWT 20-50% speedup

Skips hashes with large stage 2 size. Added configurable `astrobwt-max-size` parameter, default value is 550, min 400, max 1200, optimal value ranges from 500 to 600 depending on CPU.

- Intel CPUs get 20-25% speedup
- 1st- and 2nd-gen Ryzens get 30% speedup
- 3rd-gen Ryzens get up to 50% speedup
This commit is contained in:
SChernykh 2020-03-05 12:20:21 +01:00
parent c80ef54b60
commit eeadea53e2
11 changed files with 63 additions and 10 deletions

View file

@ -32,6 +32,7 @@
#include "core/Miner.h"
#include "crypto/cn/CnCtx.h"
#include "crypto/cn/CryptoNight_test.h"
#include "crypto/cn/CryptoNight.h"
#include "crypto/common/Nonce.h"
#include "crypto/common/VirtualMemory.h"
#include "crypto/rx/Rx.h"
@ -76,6 +77,7 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
Worker(id, data.affinity, data.priority),
m_algorithm(data.algorithm),
m_assembly(data.assembly),
m_astrobwtMaxSize(data.astrobwtMaxSize * 1000),
m_hwAES(data.hwAES),
m_yield(data.yield),
m_av(data.av()),
@ -240,6 +242,8 @@ void xmrig::CpuWorker<N>::start()
current_job_nonces[i] = *m_job.nonce(i);
}
bool valid = true;
# ifdef XMRIG_ALGO_RANDOMX
if (job.algorithm().family() == Algorithm::RANDOM_X) {
if (first) {
@ -256,20 +260,31 @@ void xmrig::CpuWorker<N>::start()
else
# endif
{
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
# ifdef XMRIG_ALGO_ASTROBWT
if (job.algorithm().family() == Algorithm::ASTROBWT) {
if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize))
valid = false;
}
else
# endif
{
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
}
if (!nextRound(m_job)) {
break;
};
}
for (size_t i = 0; i < N; ++i) {
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < job.target()) {
JobResults::submit(job, current_job_nonces[i], m_hash + (i * 32));
if (valid) {
for (size_t i = 0; i < N; ++i) {
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < job.target()) {
JobResults::submit(job, current_job_nonces[i], m_hash + (i * 32));
}
}
m_count += N;
}
m_count += N;
if (m_yield) {
std::this_thread::yield();
}