More precise hashrate calculation

- Use only steady timestamp counters to guarantee correctness
- CPU backend: directly measure total hashrate using raw hash counters from each thread; update data more often on ARM CPUs because they're slower
- GPU backends: directly measure total hashrate too, but use interpolator with 4 second lag to fix variance from batches of hashes

Total hashrate is now measured directly (realtime for CPU, 4 seconds lag for GPU), so it might differ a bit from the sum of all thread hashrates because data points are taken at different moments in time.

Overhead is reduced a lot since it doesn't have to go through all threads to calculate max total hashrate on every timer tick (2 times a second).
This commit is contained in:
SChernykh 2020-10-10 11:18:01 +02:00
parent 0a2fe5caa7
commit 3fbf2ac3d4
17 changed files with 192 additions and 39 deletions

View file

@ -409,9 +409,9 @@ void xmrig::CudaBackend::printHashrate(bool details)
Log::print("| %8zu | %8" PRId64 " | %8s | %8s | %8s |" CYAN_BOLD(" #%u") YELLOW(" %s") GREEN(" %s"),
i,
data.thread.affinity(),
Hashrate::format(hashrate()->calc(i, Hashrate::ShortInterval) * scale, num, sizeof num / 3),
Hashrate::format(hashrate()->calc(i, Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3),
Hashrate::format(hashrate()->calc(i, Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3),
Hashrate::format(hashrate()->calc(i + 1, Hashrate::ShortInterval) * scale, num, sizeof num / 3),
Hashrate::format(hashrate()->calc(i + 1, Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3),
Hashrate::format(hashrate()->calc(i + 1, Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3),
data.device.index(),
data.device.topology().toString().data(),
data.device.name().data()
@ -421,9 +421,9 @@ void xmrig::CudaBackend::printHashrate(bool details)
}
Log::print(WHITE_BOLD_S "| - | - | %8s | %8s | %8s |",
Hashrate::format(hashrate()->calc(Hashrate::ShortInterval) * scale, num, sizeof num / 3),
Hashrate::format(hashrate()->calc(Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3),
Hashrate::format(hashrate()->calc(Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3)
Hashrate::format(hashrate_short * scale, num, sizeof num / 3),
Hashrate::format(hashrate_medium * scale, num + 16, sizeof num / 3),
Hashrate::format(hashrate_large * scale, num + 16 * 2, sizeof num / 3)
);
}

View file

@ -120,6 +120,12 @@ xmrig::CudaWorker::~CudaWorker()
}
uint64_t xmrig::CudaWorker::rawHashes() const
{
return m_hashrateData.interpolate(Chrono::steadyMSecs());
}
void xmrig::CudaWorker::jobEarlyNotification(const Job& job)
{
if (m_runner) {
@ -207,5 +213,8 @@ void xmrig::CudaWorker::storeStats()
m_count += m_runner ? m_runner->processedHashes() : 0;
const uint64_t timeStamp = Chrono::steadyMSecs();
m_hashrateData.addDataPoint(m_count, timeStamp);
Worker::storeStats();
}

View file

@ -27,6 +27,7 @@
#define XMRIG_CUDAWORKER_H
#include "backend/common/HashrateInterpolator.h"
#include "backend/common/Worker.h"
#include "backend/common/WorkerJob.h"
#include "backend/cuda/CudaLaunchData.h"
@ -49,6 +50,7 @@ public:
~CudaWorker() override;
uint64_t rawHashes() const override;
void jobEarlyNotification(const Job&) override;
static std::atomic<bool> ready;
@ -67,6 +69,8 @@ private:
ICudaRunner *m_runner = nullptr;
WorkerJob<1> m_job;
uint32_t m_deviceIndex;
HashrateInterpolator m_hashrateData;
};