Use sub buffers.

This commit is contained in:
XMRig 2019-09-13 16:10:12 +07:00
parent 5a91552060
commit 9399491a64
15 changed files with 160 additions and 18 deletions

View file

@ -27,10 +27,11 @@
#include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/runners/OclBaseRunner.h"
#include "backend/opencl/wrappers/OclError.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/net/stratum/Job.h"
#include "backend/opencl/wrappers/OclError.h"
#include "crypto/common/VirtualMemory.h"
xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
@ -38,6 +39,7 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
m_ctx(data.ctx),
m_source(OclSource::get(data.algorithm)),
m_data(data),
m_align(OclLib::getUint(data.device.id(), CL_DEVICE_MEM_BASE_ADDR_ALIGN)),
m_threadId(id)
{
m_deviceKey = data.device.name();
@ -61,10 +63,17 @@ xmrig::OclBaseRunner::~OclBaseRunner()
OclLib::release(m_program);
OclLib::release(m_input);
OclLib::release(m_output);
OclLib::release(m_buffer);
OclLib::release(m_queue);
}
size_t xmrig::OclBaseRunner::bufferSize() const
{
return align(Job::kMaxBlobSize) + align(sizeof(cl_uint) * 0x100);
}
uint32_t xmrig::OclBaseRunner::deviceIndex() const
{
return data().thread.index();
@ -84,8 +93,33 @@ void xmrig::OclBaseRunner::build()
void xmrig::OclBaseRunner::init()
{
m_queue = OclLib::createCommandQueue(m_ctx, data().device.id());
m_input = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize);
m_output = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100);
constexpr size_t oneGiB = 1024 * 1024 * 1024;
size_t size = bufferSize();
if (size < oneGiB && data().device.freeMemSize() >= oneGiB) {
size = oneGiB;
}
m_buffer = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, size);
m_input = createSubBuffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, Job::kMaxBlobSize);
m_output = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100);
}
cl_mem xmrig::OclBaseRunner::createSubBuffer(cl_mem_flags flags, size_t size)
{
auto mem = OclLib::createSubBuffer(m_buffer, flags, m_offset, size);
m_offset += align(size);
return mem;
}
size_t xmrig::OclBaseRunner::align(size_t size) const
{
return VirtualMemory::align(size, m_align);
}

View file

@ -57,11 +57,14 @@ protected:
inline const OclLaunchData &data() const override { return m_data; }
inline size_t threadId() const override { return m_threadId; }
size_t bufferSize() const override;
uint32_t deviceIndex() const override;
void build() override;
void init() override;
protected:
cl_mem createSubBuffer(cl_mem_flags flags, size_t size);
size_t align(size_t size) const;
void enqueueReadBuffer(cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr);
void enqueueWriteBuffer(cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void *ptr);
void finalize(uint32_t *hashOutput);
@ -69,12 +72,15 @@ protected:
Algorithm m_algorithm;
cl_command_queue m_queue = nullptr;
cl_context m_ctx;
cl_mem m_buffer = nullptr;
cl_mem m_input = nullptr;
cl_mem m_output = nullptr;
cl_program m_program = nullptr;
const char *m_source;
const OclLaunchData &m_data;
const size_t m_align;
const size_t m_threadId;
size_t m_offset = 0;
std::string m_deviceKey;
std::string m_options;
};

View file

@ -81,6 +81,17 @@ xmrig::OclCnRunner::~OclCnRunner()
}
size_t xmrig::OclCnRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() +
align(m_algorithm.l3() * g_thd) +
align(200 * g_thd) +
(align(sizeof(cl_uint) * (g_thd + 2)) * BRANCH_MAX);
}
void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
@ -167,10 +178,10 @@ void xmrig::OclCnRunner::init()
const size_t g_thd = data().thread.intensity();
m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd);
m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
m_branches[i] = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2));
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2));
}
}

View file

@ -47,6 +47,7 @@ public:
~OclCnRunner() override;
protected:
size_t bufferSize() const override;
void run(uint32_t nonce, uint32_t *hashOutput) override;
void set(const Job &job, uint8_t *blob) override;
void build() override;

View file

@ -136,6 +136,18 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
}
size_t xmrig::OclRxBaseRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() +
align((m_algorithm.l3() + 64) * g_thd) +
align(64 * g_thd) +
align((128 + 2560) * g_thd) +
align(sizeof(uint32_t) * g_thd);
}
void xmrig::OclRxBaseRunner::build()
{
OclBaseRunner::build();
@ -168,8 +180,8 @@ void xmrig::OclRxBaseRunner::init()
const size_t g_thd = data().thread.intensity();
m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, (m_algorithm.l3() + 64) * g_thd);
m_hashes = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 64 * g_thd);
m_entropy = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, (128 + 2560) * g_thd);
m_rounding = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(uint32_t) * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * g_thd);
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * g_thd);
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * g_thd);
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * g_thd);
}

View file

@ -49,10 +49,11 @@ public:
~OclRxBaseRunner() override;
protected:
void run(uint32_t nonce, uint32_t *hashOutput) override;
void set(const Job &job, uint8_t *blob) override;
size_t bufferSize() const override;
void build() override;
void init() override;
void run(uint32_t nonce, uint32_t *hashOutput) override;
void set(const Job &job, uint8_t *blob) override;
protected:
virtual void execute(uint32_t iteration) = 0;

View file

@ -52,6 +52,14 @@ xmrig::OclRxJitRunner::~OclRxJitRunner()
}
size_t xmrig::OclRxJitRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclRxBaseRunner::bufferSize() + align(256 * g_thd) + align(5120 * g_thd) + align(10048 * g_thd);
}
void xmrig::OclRxJitRunner::build()
{
OclRxBaseRunner::build();
@ -92,9 +100,9 @@ void xmrig::OclRxJitRunner::init()
const size_t g_thd = data().thread.intensity();
m_registers = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 256 * g_thd, nullptr);
m_intermediate_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 5120 * g_thd, nullptr);
m_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 10048 * g_thd, nullptr);
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * g_thd);
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * g_thd);
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * g_thd);
}

View file

@ -45,6 +45,7 @@ public:
~OclRxJitRunner() override;
protected:
size_t bufferSize() const override;
void build() override;
void execute(uint32_t iteration) override;
void init() override;

View file

@ -47,6 +47,12 @@ xmrig::OclRxVmRunner::~OclRxVmRunner()
}
size_t xmrig::OclRxVmRunner::bufferSize() const
{
return OclRxBaseRunner::bufferSize() + (align(2560 * data().thread.intensity()));
}
void xmrig::OclRxVmRunner::build()
{
OclRxBaseRunner::build();
@ -94,5 +100,5 @@ void xmrig::OclRxVmRunner::init()
{
OclRxBaseRunner::init();
m_vm_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 2560 * data().thread.intensity());
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * data().thread.intensity());
}

View file

@ -45,6 +45,7 @@ public:
~OclRxVmRunner() override;
protected:
size_t bufferSize() const override;
void build() override;
void execute(uint32_t iteration) override;
void init() override;

View file

@ -61,6 +61,14 @@ xmrig::OclRyoRunner::~OclRyoRunner()
}
size_t xmrig::OclRyoRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * g_thd) + align(200 * g_thd);
}
void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
@ -123,6 +131,6 @@ void xmrig::OclRyoRunner::init()
const size_t g_thd = data().thread.intensity();
m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd);
m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
}

View file

@ -48,6 +48,7 @@ public:
~OclRyoRunner() override;
protected:
size_t bufferSize() const override;
void run(uint32_t nonce, uint32_t *hashOutput) override;
void set(const Job &job, uint8_t *blob) override;
void build() override;