Implemented VM mode for OpenCL RandomX.
This commit is contained in:
parent
4c90f9960e
commit
95daab4bc0
42 changed files with 450 additions and 165 deletions
|
@ -105,3 +105,14 @@ void xmrig::OclBaseRunner::enqueueWriteBuffer(cl_mem buffer, cl_bool blocking_wr
|
|||
throw std::runtime_error(OclError::toString(ret));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclBaseRunner::finalize(uint32_t *hashOutput)
|
||||
{
|
||||
enqueueReadBuffer(m_output, CL_TRUE, 0, sizeof(cl_uint) * 0x100, hashOutput);
|
||||
|
||||
uint32_t &results = hashOutput[0xFF];
|
||||
if (results > 0xFF) {
|
||||
results = 0xFF;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ protected:
|
|||
protected:
|
||||
void enqueueReadBuffer(cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr);
|
||||
void enqueueWriteBuffer(cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void *ptr);
|
||||
void finalize(uint32_t *hashOutput);
|
||||
|
||||
Algorithm m_algorithm;
|
||||
cl_command_queue m_queue = nullptr;
|
||||
|
|
|
@ -81,7 +81,7 @@ xmrig::OclCnRunner::~OclCnRunner()
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
{
|
||||
static const cl_uint zero = 0;
|
||||
|
||||
|
@ -105,18 +105,11 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
kernel->enqueue(m_queue, nonce, g_thd, w_size);
|
||||
}
|
||||
|
||||
enqueueReadBuffer(m_output, CL_TRUE, 0, sizeof(cl_uint) * 0x100, hashOutput);
|
||||
|
||||
uint32_t &results = hashOutput[0xFF];
|
||||
if (results > 0xFF) {
|
||||
results = 0xFF;
|
||||
}
|
||||
|
||||
return true;
|
||||
finalize(hashOutput);
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
||||
void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
||||
{
|
||||
if (job.size() > (Job::kMaxBlobSize - 4)) {
|
||||
throw std::length_error("job size too big");
|
||||
|
@ -139,8 +132,6 @@ bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
|||
for (auto kernel : m_branchKernels) {
|
||||
kernel->setTarget(job.target());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -47,8 +47,8 @@ public:
|
|||
~OclCnRunner() override;
|
||||
|
||||
protected:
|
||||
bool run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
bool set(const Job &job, uint8_t *blob) override;
|
||||
void run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
void set(const Job &job, uint8_t *blob) override;
|
||||
void build() override;
|
||||
void init() override;
|
||||
|
||||
|
|
|
@ -31,7 +31,10 @@
|
|||
#include "backend/opencl/kernels/rx/HashAesKernel.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "base/net/stratum/Job.h"
|
||||
#include "crypto/rx/Rx.h"
|
||||
#include "crypto/rx/RxAlgo.h"
|
||||
#include "crypto/rx/RxDataset.h"
|
||||
|
||||
|
||||
xmrig::OclRxBaseRunner::OclRxBaseRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data)
|
||||
|
@ -78,15 +81,61 @@ xmrig::OclRxBaseRunner::~OclRxBaseRunner()
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
{
|
||||
return false;
|
||||
static const uint32_t zero = 0;
|
||||
|
||||
m_blake2b_initial_hash->setNonce(nonce);
|
||||
m_find_shares->setNonce(nonce);
|
||||
|
||||
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero);
|
||||
|
||||
const uint32_t g_intensity = data().thread.intensity();
|
||||
|
||||
m_blake2b_initial_hash->enqueue(m_queue, g_intensity);
|
||||
m_fillAes1Rx4_scratchpad->enqueue(m_queue, g_intensity);
|
||||
|
||||
const uint32_t programCount = RxAlgo::programCount(m_algorithm);
|
||||
|
||||
for (uint32_t i = 0; i < programCount; ++i) {
|
||||
m_fillAes4Rx4_entropy->enqueue(m_queue, g_intensity);
|
||||
|
||||
execute(i);
|
||||
|
||||
if (i == programCount - 1) {
|
||||
m_hashAes1Rx4->enqueue(m_queue, g_intensity);
|
||||
m_blake2b_hash_registers_32->enqueue(m_queue, g_intensity);
|
||||
}
|
||||
else {
|
||||
m_blake2b_hash_registers_64->enqueue(m_queue, g_intensity);
|
||||
}
|
||||
}
|
||||
|
||||
m_find_shares->enqueue(m_queue, g_intensity);
|
||||
|
||||
finalize(hashOutput);
|
||||
|
||||
OclLib::finish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
|
||||
void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
|
||||
{
|
||||
return false;
|
||||
if (!data().thread.isDatasetHost() && m_seed != job.seed()) {
|
||||
m_seed = job.seed();
|
||||
|
||||
auto dataset = Rx::dataset(job, 0);
|
||||
enqueueWriteBuffer(data().dataset->get(), CL_TRUE, 0, dataset->size(), dataset->raw());
|
||||
}
|
||||
|
||||
if (job.size() < Job::kMaxBlobSize) {
|
||||
memset(blob + job.size(), 0, Job::kMaxBlobSize - job.size());
|
||||
}
|
||||
|
||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
|
||||
|
||||
m_blake2b_initial_hash->setBlobSize(job.size());
|
||||
m_find_shares->setTarget(job.target());
|
||||
}
|
||||
|
||||
|
||||
|
@ -101,7 +150,7 @@ void xmrig::OclRxBaseRunner::build()
|
|||
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, batch_size, rx_version);
|
||||
|
||||
m_fillAes4Rx4_entropy = new FillAesKernel(m_program, "fillAes4Rx4_entropy");
|
||||
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_entropy, batch_size, rx_version);
|
||||
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, batch_size, rx_version);
|
||||
|
||||
m_hashAes1Rx4 = new HashAesKernel(m_program);
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
|
||||
#include "backend/opencl/runners/OclBaseRunner.h"
|
||||
#include "base/tools/Buffer.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -48,15 +49,18 @@ public:
|
|||
~OclRxBaseRunner() override;
|
||||
|
||||
protected:
|
||||
bool run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
bool set(const Job &job, uint8_t *blob) override;
|
||||
void run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
void set(const Job &job, uint8_t *blob) override;
|
||||
void build() override;
|
||||
void init() override;
|
||||
|
||||
protected:
|
||||
virtual void execute(uint32_t iteration) = 0;
|
||||
|
||||
Blake2bHashRegistersKernel *m_blake2b_hash_registers_32 = nullptr;
|
||||
Blake2bHashRegistersKernel *m_blake2b_hash_registers_64 = nullptr;
|
||||
Blake2bInitialHashKernel *m_blake2b_initial_hash = nullptr;
|
||||
Buffer m_seed;
|
||||
cl_mem m_entropy = nullptr;
|
||||
cl_mem m_hashes = nullptr;
|
||||
cl_mem m_rounding = nullptr;
|
||||
|
|
|
@ -32,27 +32,6 @@
|
|||
|
||||
xmrig::OclRxJitRunner::OclRxJitRunner(size_t index, const OclLaunchData &data) : OclRxBaseRunner(index, data)
|
||||
{
|
||||
if (m_rounding == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t g_thd = data.thread.intensity();
|
||||
cl_int ret;
|
||||
|
||||
m_registers = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 256 * g_thd, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_intermediate_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 5120 * g_thd, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 10048 * g_thd, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -74,3 +53,20 @@ void xmrig::OclRxJitRunner::build()
|
|||
m_blake2b_hash_registers_32->setArgs(m_hashes, m_registers, 256);
|
||||
m_blake2b_hash_registers_64->setArgs(m_hashes, m_registers, 256);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRxJitRunner::execute(uint32_t iteration)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRxJitRunner::init()
|
||||
{
|
||||
OclRxBaseRunner::init();
|
||||
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
m_registers = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 256 * g_thd, nullptr);
|
||||
m_intermediate_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 5120 * g_thd, nullptr);
|
||||
m_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 10048 * g_thd, nullptr);
|
||||
}
|
||||
|
|
|
@ -42,6 +42,8 @@ public:
|
|||
|
||||
protected:
|
||||
void build() override;
|
||||
void execute(uint32_t iteration) override;
|
||||
void init() override;
|
||||
|
||||
private:
|
||||
cl_mem m_intermediate_programs = nullptr;
|
||||
|
|
|
@ -32,20 +32,11 @@
|
|||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "crypto/rx/RxAlgo.h"
|
||||
|
||||
#include "base/io/log/Log.h"
|
||||
|
||||
|
||||
xmrig::OclRxVmRunner::OclRxVmRunner(size_t index, const OclLaunchData &data) : OclRxBaseRunner(index, data)
|
||||
{
|
||||
if (m_rounding == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t g_thd = data.thread.intensity();
|
||||
cl_int ret;
|
||||
|
||||
m_vm_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 2560 * g_thd, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -75,3 +66,44 @@ void xmrig::OclRxVmRunner::build()
|
|||
m_execute_vm = new ExecuteVmKernel(m_program);
|
||||
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), batch_size);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRxVmRunner::execute(uint32_t iteration)
|
||||
{
|
||||
const uint32_t bfactor = std::min(data().thread.bfactor(), 8u);
|
||||
const uint32_t num_iterations = RxAlgo::programIterations(m_algorithm) >> bfactor;
|
||||
const uint32_t g_intensity = data().thread.intensity();
|
||||
|
||||
m_init_vm->enqueue(m_queue, g_intensity, iteration);
|
||||
|
||||
// LOG_WARN("bfactor:%u %u %u", bfactor, RxAlgo::programIterations(m_algorithm), num_iterations);
|
||||
|
||||
uint32_t first = 1;
|
||||
uint32_t last = 0;
|
||||
|
||||
m_execute_vm->setIterations(num_iterations);
|
||||
m_execute_vm->setFirst(first);
|
||||
m_execute_vm->setLast(last);
|
||||
|
||||
for (int j = 0, n = 1 << bfactor; j < n; ++j) {
|
||||
if (j == n - 1) {
|
||||
last = 1;
|
||||
m_execute_vm->setLast(last);
|
||||
}
|
||||
|
||||
m_execute_vm->enqueue(m_queue, g_intensity, data().thread.worksize());
|
||||
|
||||
if (j == 0) {
|
||||
first = 0;
|
||||
m_execute_vm->setFirst(first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRxVmRunner::init()
|
||||
{
|
||||
OclRxBaseRunner::init();
|
||||
|
||||
m_vm_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 2560 * data().thread.intensity());
|
||||
}
|
||||
|
|
|
@ -46,6 +46,8 @@ public:
|
|||
|
||||
protected:
|
||||
void build() override;
|
||||
void execute(uint32_t iteration) override;
|
||||
void init() override;
|
||||
|
||||
private:
|
||||
cl_mem m_vm_states = nullptr;
|
||||
|
|
|
@ -61,7 +61,7 @@ xmrig::OclRyoRunner::~OclRyoRunner()
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
{
|
||||
static const cl_uint zero = 0;
|
||||
|
||||
|
@ -78,21 +78,14 @@ bool xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
m_cn1->enqueue(m_queue, g_thd, w_size);
|
||||
m_cn2->enqueue(m_queue, nonce, g_thd);
|
||||
|
||||
enqueueReadBuffer(m_output, CL_TRUE, 0, sizeof(cl_uint) * 0x100, hashOutput);
|
||||
|
||||
uint32_t &results = hashOutput[0xFF];
|
||||
if (results > 0xFF) {
|
||||
results = 0xFF;
|
||||
}
|
||||
|
||||
return true;
|
||||
finalize(hashOutput);
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::OclRyoRunner::set(const Job &job, uint8_t *blob)
|
||||
void xmrig::OclRyoRunner::set(const Job &job, uint8_t *blob)
|
||||
{
|
||||
if (job.size() > (Job::kMaxBlobSize - 4)) {
|
||||
return false;
|
||||
throw std::length_error("job size too big");
|
||||
}
|
||||
|
||||
blob[job.size()] = 0x01;
|
||||
|
@ -101,8 +94,6 @@ bool xmrig::OclRyoRunner::set(const Job &job, uint8_t *blob)
|
|||
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
|
||||
|
||||
m_cn2->setTarget(job.target());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -48,8 +48,8 @@ public:
|
|||
~OclRyoRunner() override;
|
||||
|
||||
protected:
|
||||
bool run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
bool set(const Job &job, uint8_t *blob) override;
|
||||
void run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
void set(const Job &job, uint8_t *blob) override;
|
||||
void build() override;
|
||||
void init() override;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue