From e8acb8a2a944d5a92875b028381036ddcf0c5632 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 16 Sep 2019 23:53:39 +0700 Subject: [PATCH] Simplify code. --- src/backend/opencl/runners/OclBaseRunner.cpp | 3 +- src/backend/opencl/runners/OclBaseRunner.h | 1 + src/backend/opencl/runners/OclCnRunner.cpp | 35 +++++++--------- .../opencl/runners/OclRxBaseRunner.cpp | 41 ++++++++----------- src/backend/opencl/runners/OclRxJitRunner.cpp | 26 ++++-------- src/backend/opencl/runners/OclRxVmRunner.cpp | 16 ++++---- src/backend/opencl/runners/OclRyoRunner.cpp | 23 ++++------- 7 files changed, 58 insertions(+), 87 deletions(-) diff --git a/src/backend/opencl/runners/OclBaseRunner.cpp b/src/backend/opencl/runners/OclBaseRunner.cpp index 43cffab2..b0149f34 100644 --- a/src/backend/opencl/runners/OclBaseRunner.cpp +++ b/src/backend/opencl/runners/OclBaseRunner.cpp @@ -40,7 +40,8 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) : m_source(OclSource::get(data.algorithm)), m_data(data), m_align(OclLib::getUint(data.device.id(), CL_DEVICE_MEM_BASE_ADDR_ALIGN)), - m_threadId(id) + m_threadId(id), + m_intensity(data.thread.intensity()) { m_deviceKey = data.device.name(); diff --git a/src/backend/opencl/runners/OclBaseRunner.h b/src/backend/opencl/runners/OclBaseRunner.h index a6ab0587..ea115957 100644 --- a/src/backend/opencl/runners/OclBaseRunner.h +++ b/src/backend/opencl/runners/OclBaseRunner.h @@ -83,6 +83,7 @@ protected: size_t m_offset = 0; std::string m_deviceKey; std::string m_options; + uint32_t m_intensity; }; diff --git a/src/backend/opencl/runners/OclCnRunner.cpp b/src/backend/opencl/runners/OclCnRunner.cpp index 65e9611f..8e58ac54 100644 --- a/src/backend/opencl/runners/OclCnRunner.cpp +++ b/src/backend/opencl/runners/OclCnRunner.cpp @@ -83,12 +83,10 @@ xmrig::OclCnRunner::~OclCnRunner() size_t xmrig::OclCnRunner::bufferSize() const { - const size_t g_thd = data().thread.intensity(); - return OclBaseRunner::bufferSize() + - align(m_algorithm.l3() * g_thd) + - align(200 * g_thd) + - (align(sizeof(cl_uint) * (g_thd + 2)) * BRANCH_MAX); + align(m_algorithm.l3() * m_intensity) + + align(200 * m_intensity) + + (align(sizeof(cl_uint) * (m_intensity + 2)) * BRANCH_MAX); } @@ -96,14 +94,13 @@ void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput) { static const cl_uint zero = 0; - const size_t g_intensity = data().thread.intensity(); - const size_t w_size = data().thread.worksize(); - const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + const size_t w_size = data().thread.worksize(); + const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size; assert(g_thd % w_size == 0); for (size_t i = 0; i < BRANCH_MAX; ++i) { - enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero); + enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * m_intensity, sizeof(cl_uint), &zero); } enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero); @@ -137,7 +134,7 @@ void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob) m_height = job.height(); m_cnr = OclCnR::get(*this, m_height); m_cn1 = new Cn1Kernel(m_cnr, m_height); - m_cn1->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity()); + m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity); } for (auto kernel : m_branchKernels) { @@ -150,22 +147,20 @@ void xmrig::OclCnRunner::build() { OclBaseRunner::build(); - const uint32_t intensity = data().thread.intensity(); - m_cn0 = new Cn0Kernel(m_program); - m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity); + m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity); m_cn2 = new Cn2Kernel(m_program); - m_cn2->setArgs(m_scratchpads, m_states, m_branches, intensity); + m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity); if (m_algorithm != Algorithm::CN_R) { m_cn1 = new Cn1Kernel(m_program); - m_cn1->setArgs(m_input, m_scratchpads, m_states, intensity); + m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity); } for (size_t i = 0; i < BRANCH_MAX; ++i) { auto kernel = new CnBranchKernel(i, m_program); - kernel->setArgs(m_states, m_branches[i], m_output, intensity); + kernel->setArgs(m_states, m_branches[i], m_output, m_intensity); m_branchKernels[i] = kernel; } @@ -176,12 +171,10 @@ void xmrig::OclCnRunner::init() { OclBaseRunner::init(); - const size_t g_thd = data().thread.intensity(); - - m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd); - m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd); + m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * m_intensity); + m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity); for (size_t i = 0; i < BRANCH_MAX; ++i) { - m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2)); + m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (m_intensity + 2)); } } diff --git a/src/backend/opencl/runners/OclRxBaseRunner.cpp b/src/backend/opencl/runners/OclRxBaseRunner.cpp index 640803da..767b1083 100644 --- a/src/backend/opencl/runners/OclRxBaseRunner.cpp +++ b/src/backend/opencl/runners/OclRxBaseRunner.cpp @@ -87,28 +87,26 @@ void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput) enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero); - const uint32_t g_intensity = data().thread.intensity(); - - m_blake2b_initial_hash->enqueue(m_queue, g_intensity); - m_fillAes1Rx4_scratchpad->enqueue(m_queue, g_intensity); + m_blake2b_initial_hash->enqueue(m_queue, m_intensity); + m_fillAes1Rx4_scratchpad->enqueue(m_queue, m_intensity); const uint32_t programCount = RxAlgo::programCount(m_algorithm); for (uint32_t i = 0; i < programCount; ++i) { - m_fillAes4Rx4_entropy->enqueue(m_queue, g_intensity); + m_fillAes4Rx4_entropy->enqueue(m_queue, m_intensity); execute(i); if (i == programCount - 1) { - m_hashAes1Rx4->enqueue(m_queue, g_intensity); - m_blake2b_hash_registers_32->enqueue(m_queue, g_intensity); + m_hashAes1Rx4->enqueue(m_queue, m_intensity); + m_blake2b_hash_registers_32->enqueue(m_queue, m_intensity); } else { - m_blake2b_hash_registers_64->enqueue(m_queue, g_intensity); + m_blake2b_hash_registers_64->enqueue(m_queue, m_intensity); } } - m_find_shares->enqueue(m_queue, g_intensity); + m_find_shares->enqueue(m_queue, m_intensity); finalize(hashOutput); @@ -138,13 +136,11 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob) size_t xmrig::OclRxBaseRunner::bufferSize() const { - const size_t g_thd = data().thread.intensity(); - return OclBaseRunner::bufferSize() + - align((m_algorithm.l3() + 64) * g_thd) + - align(64 * g_thd) + - align((128 + 2560) * g_thd) + - align(sizeof(uint32_t) * g_thd); + align((m_algorithm.l3() + 64) * m_intensity) + + align(64 * m_intensity) + + align((128 + 2560) * m_intensity) + + align(sizeof(uint32_t) * m_intensity); } @@ -152,14 +148,13 @@ void xmrig::OclRxBaseRunner::build() { OclBaseRunner::build(); - const uint32_t batch_size = data().thread.intensity(); const uint32_t rx_version = RxAlgo::version(m_algorithm); m_fillAes1Rx4_scratchpad = new FillAesKernel(m_program, "fillAes1Rx4_scratchpad"); - m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, batch_size, rx_version); + m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, m_intensity, rx_version); m_fillAes4Rx4_entropy = new FillAesKernel(m_program, "fillAes4Rx4_entropy"); - m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, batch_size, rx_version); + m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, m_intensity, rx_version); m_hashAes1Rx4 = new HashAesKernel(m_program); @@ -178,10 +173,8 @@ void xmrig::OclRxBaseRunner::init() { OclBaseRunner::init(); - const size_t g_thd = data().thread.intensity(); - - m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * g_thd); - m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * g_thd); - m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * g_thd); - m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * g_thd); + m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * m_intensity); + m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * m_intensity); + m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * m_intensity); + m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * m_intensity); } diff --git a/src/backend/opencl/runners/OclRxJitRunner.cpp b/src/backend/opencl/runners/OclRxJitRunner.cpp index 0bf39437..29020fe1 100644 --- a/src/backend/opencl/runners/OclRxJitRunner.cpp +++ b/src/backend/opencl/runners/OclRxJitRunner.cpp @@ -54,9 +54,7 @@ xmrig::OclRxJitRunner::~OclRxJitRunner() size_t xmrig::OclRxJitRunner::bufferSize() const { - const size_t g_thd = data().thread.intensity(); - - return OclRxBaseRunner::bufferSize() + align(256 * g_thd) + align(5120 * g_thd) + align(10048 * g_thd); + return OclRxBaseRunner::bufferSize() + align(256 * m_intensity) + align(5120 * m_intensity) + align(10048 * m_intensity); } @@ -64,33 +62,29 @@ void xmrig::OclRxJitRunner::build() { OclRxBaseRunner::build(); - const uint32_t batch_size = data().thread.intensity(); - - m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, batch_size); + m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, m_intensity); m_blake2b_hash_registers_32->setArgs(m_hashes, m_registers, 256); m_blake2b_hash_registers_64->setArgs(m_hashes, m_registers, 256); m_randomx_jit = new RxJitKernel(m_program); - m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, batch_size, m_rounding); + m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, m_intensity, m_rounding); if (!loadAsmProgram()) { throw std::runtime_error(OclError::toString(CL_INVALID_PROGRAM)); } m_randomx_run = new RxRunKernel(m_asmProgram); - m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, batch_size, m_algorithm); + m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, m_intensity, m_algorithm); } void xmrig::OclRxJitRunner::execute(uint32_t iteration) { - const uint32_t g_intensity = data().thread.intensity(); - - m_randomx_jit->enqueue(m_queue, g_intensity, iteration); + m_randomx_jit->enqueue(m_queue, m_intensity, iteration); OclLib::finish(m_queue); - m_randomx_run->enqueue(m_queue, g_intensity); + m_randomx_run->enqueue(m_queue, m_intensity); } @@ -98,11 +92,9 @@ void xmrig::OclRxJitRunner::init() { OclRxBaseRunner::init(); - const size_t g_thd = data().thread.intensity(); - - m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * g_thd); - m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * g_thd); - m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * g_thd); + m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * m_intensity); + m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * m_intensity); + m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * m_intensity); } diff --git a/src/backend/opencl/runners/OclRxVmRunner.cpp b/src/backend/opencl/runners/OclRxVmRunner.cpp index 57f5be3c..193b68f0 100644 --- a/src/backend/opencl/runners/OclRxVmRunner.cpp +++ b/src/backend/opencl/runners/OclRxVmRunner.cpp @@ -52,7 +52,7 @@ xmrig::OclRxVmRunner::~OclRxVmRunner() size_t xmrig::OclRxVmRunner::bufferSize() const { - return OclRxBaseRunner::bufferSize() + (align(2560 * data().thread.intensity())); + return OclRxBaseRunner::bufferSize() + (align(2560 * m_intensity)); } @@ -60,10 +60,9 @@ void xmrig::OclRxVmRunner::build() { OclRxBaseRunner::build(); - const uint32_t batch_size = data().thread.intensity(); - const uint32_t hashStrideBytes = RxAlgo::programSize(m_algorithm) * 8; + const uint32_t hashStrideBytes = RxAlgo::programSize(m_algorithm) * 8; - m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, batch_size); + m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, m_intensity); m_blake2b_hash_registers_32->setArgs(m_hashes, m_vm_states, hashStrideBytes); m_blake2b_hash_registers_64->setArgs(m_hashes, m_vm_states, hashStrideBytes); @@ -71,7 +70,7 @@ void xmrig::OclRxVmRunner::build() m_init_vm->setArgs(m_entropy, m_vm_states, m_rounding); m_execute_vm = new ExecuteVmKernel(m_program); - m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), batch_size); + m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), m_intensity); } @@ -79,9 +78,8 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration) { const uint32_t bfactor = std::min(data().thread.bfactor(), 8u); const uint32_t num_iterations = RxAlgo::programIterations(m_algorithm) >> bfactor; - const uint32_t g_intensity = data().thread.intensity(); - m_init_vm->enqueue(m_queue, g_intensity, iteration); + m_init_vm->enqueue(m_queue, m_intensity, iteration); m_execute_vm->setIterations(num_iterations); @@ -90,7 +88,7 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration) m_execute_vm->setLast(1); } - m_execute_vm->enqueue(m_queue, g_intensity, m_worksize); + m_execute_vm->enqueue(m_queue, m_intensity, m_worksize); if (j == 0) { m_execute_vm->setFirst(0); @@ -103,5 +101,5 @@ void xmrig::OclRxVmRunner::init() { OclRxBaseRunner::init(); - m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * data().thread.intensity()); + m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * m_intensity); } diff --git a/src/backend/opencl/runners/OclRyoRunner.cpp b/src/backend/opencl/runners/OclRyoRunner.cpp index 47e9098d..0907903e 100644 --- a/src/backend/opencl/runners/OclRyoRunner.cpp +++ b/src/backend/opencl/runners/OclRyoRunner.cpp @@ -63,9 +63,7 @@ xmrig::OclRyoRunner::~OclRyoRunner() size_t xmrig::OclRyoRunner::bufferSize() const { - const size_t g_thd = data().thread.intensity(); - - return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * g_thd) + align(200 * g_thd); + return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * m_intensity) + align(200 * m_intensity); } @@ -73,9 +71,8 @@ void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput) { static const cl_uint zero = 0; - const size_t g_intensity = data().thread.intensity(); - const size_t w_size = data().thread.worksize(); - const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + const size_t w_size = data().thread.worksize(); + const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size; assert(g_thd % w_size == 0); @@ -109,19 +106,17 @@ void xmrig::OclRyoRunner::build() { OclBaseRunner::build(); - const uint32_t intensity = data().thread.intensity(); - m_cn00 = new Cn00RyoKernel(m_program); m_cn00->setArgs(m_scratchpads, m_states); m_cn0 = new Cn0Kernel(m_program); - m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity); + m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity); m_cn1 = new Cn1RyoKernel(m_program); - m_cn1->setArgs(m_scratchpads, m_states, intensity); + m_cn1->setArgs(m_scratchpads, m_states, m_intensity); m_cn2 = new Cn2RyoKernel(m_program); - m_cn2->setArgs(m_scratchpads, m_states, m_output, intensity); + m_cn2->setArgs(m_scratchpads, m_states, m_output, m_intensity); } @@ -129,8 +124,6 @@ void xmrig::OclRyoRunner::init() { OclBaseRunner::init(); - const size_t g_thd = data().thread.intensity(); - - m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd); - m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd); + m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * m_intensity); + m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity); }