KawPow WIP
This commit is contained in:
parent
07025dc41b
commit
22b937cc1c
88 changed files with 11004 additions and 8383 deletions
180
src/backend/opencl/runners/OclKawPowRunner.cpp
Normal file
180
src/backend/opencl/runners/OclKawPowRunner.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "backend/common/Tags.h"
|
||||
#include "backend/opencl/kernels/kawpow/KawPow_CalculateDAGKernel.h"
|
||||
#include "backend/opencl/runners/OclKawPowRunner.h"
|
||||
#include "backend/opencl/runners/tools/OclKawPow.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/wrappers/OclError.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/net/stratum/Job.h"
|
||||
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
#include "crypto/kawpow/KPHash.h"
|
||||
|
||||
#include "3rdparty/libethash/ethash_internal.h"
|
||||
#include <Windows.h>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
OclKawPowRunner::OclKawPowRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data)
|
||||
{
|
||||
if (data.device.vendorId() == OclVendor::OCL_VENDOR_NVIDIA) {
|
||||
m_options += " -DPLATFORM=OPENCL_PLATFORM_NVIDIA";
|
||||
m_workGroupSize = 32;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
OclKawPowRunner::~OclKawPowRunner()
|
||||
{
|
||||
OclLib::release(m_lightCache);
|
||||
OclLib::release(m_dag);
|
||||
|
||||
delete m_calculateDagKernel;
|
||||
|
||||
OclLib::release(m_searchKernel);
|
||||
|
||||
OclKawPow::clear();
|
||||
}
|
||||
|
||||
|
||||
void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
{
|
||||
const size_t local_work_size = 128;
|
||||
const size_t global_work_offset = nonce;
|
||||
const size_t global_work_size = m_intensity - (m_intensity % local_work_size);
|
||||
|
||||
enqueueWriteBuffer(m_input, CL_FALSE, 0, 40, m_blob);
|
||||
|
||||
const uint32_t zero = 0;
|
||||
enqueueWriteBuffer(m_output, CL_FALSE, 0, sizeof(uint32_t), &zero);
|
||||
|
||||
const cl_int ret = OclLib::enqueueNDRangeKernel(m_queue, m_searchKernel, 1, &global_work_offset, &global_work_size, &local_work_size, 0, nullptr, nullptr);
|
||||
if (ret != CL_SUCCESS) {
|
||||
LOG_ERR("%s" RED(" error ") RED_BOLD("%s") RED(" when calling ") RED_BOLD("clEnqueueNDRangeKernel") RED(" for kernel ") RED_BOLD("progpow_search"),
|
||||
ocl_tag(), OclError::toString(ret));
|
||||
|
||||
throw std::runtime_error(OclError::toString(ret));
|
||||
}
|
||||
|
||||
uint32_t output[16] = {};
|
||||
enqueueReadBuffer(m_output, CL_TRUE, 0, sizeof(output), output);
|
||||
|
||||
if (output[0] > 15) {
|
||||
output[0] = 15;
|
||||
}
|
||||
|
||||
hashOutput[0xFF] = output[0];
|
||||
memcpy(hashOutput, output + 1, output[0] * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
|
||||
void OclKawPowRunner::set(const Job &job, uint8_t *blob)
|
||||
{
|
||||
m_blockHeight = static_cast<uint32_t>(job.height());
|
||||
m_searchProgram = OclKawPow::get(*this, m_blockHeight);
|
||||
m_searchKernel = OclLib::createKernel(m_searchProgram, "progpow_search");
|
||||
|
||||
const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH;
|
||||
|
||||
const uint64_t dag_size = KPCache::dag_size(epoch);
|
||||
if (dag_size > m_dagCapacity) {
|
||||
OclLib::release(m_dag);
|
||||
|
||||
m_dagCapacity = VirtualMemory::align(dag_size, 16 * 1024 * 1024);
|
||||
m_dag = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, m_dagCapacity);
|
||||
}
|
||||
|
||||
if (epoch != m_epoch) {
|
||||
m_epoch = epoch;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(KPCache::s_cacheMutex);
|
||||
|
||||
KPCache::s_cache.init(epoch);
|
||||
|
||||
if (KPCache::s_cache.size() > m_lightCacheCapacity) {
|
||||
OclLib::release(m_lightCache);
|
||||
|
||||
m_lightCacheCapacity = VirtualMemory::align(KPCache::s_cache.size());
|
||||
m_lightCache = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, m_lightCacheCapacity);
|
||||
}
|
||||
|
||||
m_lightCacheSize = KPCache::s_cache.size();
|
||||
enqueueWriteBuffer(m_lightCache, CL_TRUE, 0, m_lightCacheSize, KPCache::s_cache.data());
|
||||
}
|
||||
|
||||
const uint64_t start_ms = Chrono::steadyMSecs();
|
||||
|
||||
const uint32_t dag_words = dag_size / sizeof(node);
|
||||
m_calculateDagKernel->setArgs(0, m_lightCache, m_dag, dag_words, m_lightCacheSize / sizeof(node));
|
||||
|
||||
constexpr uint32_t N = 1 << 18;
|
||||
|
||||
for (uint32_t start = 0; start < dag_words; start += N) {
|
||||
m_calculateDagKernel->setArg(0, sizeof(start), &start);
|
||||
m_calculateDagKernel->enqueue(m_queue, N, m_workGroupSize);
|
||||
}
|
||||
|
||||
OclLib::finish(m_queue);
|
||||
|
||||
LOG_INFO("KawPow DAG for epoch %u calculated (%" PRIu64 " ms)", epoch, Chrono::steadyMSecs() - start_ms);
|
||||
}
|
||||
|
||||
const uint64_t target = job.target();
|
||||
const uint32_t hack_false = 0;
|
||||
|
||||
OclLib::setKernelArg(m_searchKernel, 0, sizeof(m_dag), &m_dag);
|
||||
OclLib::setKernelArg(m_searchKernel, 1, sizeof(m_input), &m_input);
|
||||
OclLib::setKernelArg(m_searchKernel, 2, sizeof(target), &target);
|
||||
OclLib::setKernelArg(m_searchKernel, 3, sizeof(hack_false), &hack_false);
|
||||
OclLib::setKernelArg(m_searchKernel, 4, sizeof(m_output), &m_output);
|
||||
|
||||
m_blob = blob;
|
||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, sizeof(m_blob), m_blob);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclKawPowRunner::build()
|
||||
{
|
||||
OclBaseRunner::build();
|
||||
|
||||
m_calculateDagKernel = new KawPow_CalculateDAGKernel(m_program);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclKawPowRunner::init()
|
||||
{
|
||||
OclBaseRunner::init();
|
||||
}
|
||||
|
||||
} // namespace xmrig
|
|
@ -22,49 +22,58 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_OCLRYORUNNER_H
|
||||
#define XMRIG_OCLRYORUNNER_H
|
||||
#ifndef XMRIG_OCLKAWPOWRUNNER_H
|
||||
#define XMRIG_OCLKAWPOWRUNNER_H
|
||||
|
||||
|
||||
#include "backend/opencl/runners/OclBaseRunner.h"
|
||||
#include "crypto/kawpow/KPCache.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class Cn00RyoKernel;
|
||||
class Cn0Kernel;
|
||||
class Cn1RyoKernel;
|
||||
class Cn2RyoKernel;
|
||||
class KawPow_CalculateDAGKernel;
|
||||
|
||||
|
||||
class OclRyoRunner : public OclBaseRunner
|
||||
class OclKawPowRunner : public OclBaseRunner
|
||||
{
|
||||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(OclRyoRunner)
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(OclKawPowRunner)
|
||||
|
||||
OclRyoRunner(size_t index, const OclLaunchData &data);
|
||||
|
||||
~OclRyoRunner() override;
|
||||
OclKawPowRunner(size_t index, const OclLaunchData &data);
|
||||
~OclKawPowRunner() override;
|
||||
|
||||
protected:
|
||||
size_t bufferSize() const override;
|
||||
void run(uint32_t nonce, uint32_t *hashOutput) override;
|
||||
void set(const Job &job, uint8_t *blob) override;
|
||||
void build() override;
|
||||
void init() override;
|
||||
|
||||
private:
|
||||
cl_mem m_scratchpads = nullptr;
|
||||
cl_mem m_states = nullptr;
|
||||
Cn00RyoKernel *m_cn00 = nullptr;
|
||||
Cn0Kernel *m_cn0 = nullptr;
|
||||
Cn1RyoKernel *m_cn1 = nullptr;
|
||||
Cn2RyoKernel *m_cn2 = nullptr;
|
||||
uint8_t* m_blob;
|
||||
|
||||
uint32_t m_blockHeight = 0;
|
||||
uint32_t m_epoch = 0xFFFFFFFFUL;
|
||||
|
||||
cl_mem m_lightCache = nullptr;
|
||||
size_t m_lightCacheSize = 0;
|
||||
size_t m_lightCacheCapacity = 0;
|
||||
|
||||
cl_mem m_dag = nullptr;
|
||||
size_t m_dagCapacity = 0;
|
||||
|
||||
KawPow_CalculateDAGKernel* m_calculateDagKernel = nullptr;
|
||||
|
||||
cl_program m_searchProgram = nullptr;
|
||||
cl_kernel m_searchKernel = nullptr;
|
||||
|
||||
size_t m_workGroupSize = 64;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif // XMRIG_OCLRYORUNNER_H
|
||||
#endif // XMRIG_OCLKAWPOWRUNNER_H
|
|
@ -1,128 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "backend/opencl/runners/OclRyoRunner.h"
|
||||
#include "backend/opencl/kernels/Cn00RyoKernel.h"
|
||||
#include "backend/opencl/kernels/Cn0Kernel.h"
|
||||
#include "backend/opencl/kernels/Cn1RyoKernel.h"
|
||||
#include "backend/opencl/kernels/Cn2RyoKernel.h"
|
||||
#include "backend/opencl/kernels/CnBranchKernel.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/net/stratum/Job.h"
|
||||
#include "crypto/cn/CnAlgo.h"
|
||||
|
||||
|
||||
xmrig::OclRyoRunner::OclRyoRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data)
|
||||
{
|
||||
m_options += " -DITERATIONS=" + std::to_string(CnAlgo<>::iterations(m_algorithm)) + "U";
|
||||
m_options += " -DMASK=" + std::to_string(CnAlgo<>::mask(m_algorithm)) + "U";
|
||||
m_options += " -DWORKSIZE=" + std::to_string(data.thread.worksize()) + "U";
|
||||
m_options += " -DMEMORY=" + std::to_string(m_algorithm.l3()) + "LU";
|
||||
m_options += " -DCN_UNROLL=" + std::to_string(data.thread.unrollFactor());
|
||||
|
||||
m_options += " -cl-fp32-correctly-rounded-divide-sqrt";
|
||||
}
|
||||
|
||||
|
||||
xmrig::OclRyoRunner::~OclRyoRunner()
|
||||
{
|
||||
delete m_cn00;
|
||||
delete m_cn0;
|
||||
delete m_cn1;
|
||||
delete m_cn2;
|
||||
|
||||
OclLib::release(m_scratchpads);
|
||||
OclLib::release(m_states);
|
||||
}
|
||||
|
||||
|
||||
size_t xmrig::OclRyoRunner::bufferSize() const
|
||||
{
|
||||
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * m_intensity) + align(200 * m_intensity);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
{
|
||||
static const cl_uint zero = 0;
|
||||
|
||||
const size_t w_size = data().thread.worksize();
|
||||
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
|
||||
|
||||
assert(g_thd % w_size == 0);
|
||||
|
||||
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero);
|
||||
|
||||
m_cn0->enqueue(m_queue, nonce, g_thd);
|
||||
m_cn00->enqueue(m_queue, g_thd);
|
||||
m_cn1->enqueue(m_queue, g_thd, w_size);
|
||||
m_cn2->enqueue(m_queue, nonce, g_thd);
|
||||
|
||||
finalize(hashOutput);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRyoRunner::set(const Job &job, uint8_t *blob)
|
||||
{
|
||||
if (job.size() > (Job::kMaxBlobSize - 4)) {
|
||||
throw std::length_error("job size too big");
|
||||
}
|
||||
|
||||
blob[job.size()] = 0x01;
|
||||
memset(blob + job.size() + 1, 0, Job::kMaxBlobSize - job.size() - 1);
|
||||
|
||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
|
||||
|
||||
m_cn2->setTarget(job.target());
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRyoRunner::build()
|
||||
{
|
||||
OclBaseRunner::build();
|
||||
|
||||
m_cn00 = new Cn00RyoKernel(m_program);
|
||||
m_cn00->setArgs(m_scratchpads, m_states);
|
||||
|
||||
m_cn0 = new Cn0Kernel(m_program);
|
||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
||||
|
||||
m_cn1 = new Cn1RyoKernel(m_program);
|
||||
m_cn1->setArgs(m_scratchpads, m_states, m_intensity);
|
||||
|
||||
m_cn2 = new Cn2RyoKernel(m_program);
|
||||
m_cn2->setArgs(m_scratchpads, m_states, m_output, m_intensity);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRyoRunner::init()
|
||||
{
|
||||
OclBaseRunner::init();
|
||||
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * m_intensity);
|
||||
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
|
||||
}
|
|
@ -67,9 +67,9 @@ public:
|
|||
cl_program program;
|
||||
|
||||
private:
|
||||
const Algorithm m_algo;
|
||||
const uint32_t m_index;
|
||||
const uint64_t m_offset;
|
||||
Algorithm m_algo;
|
||||
uint32_t m_index;
|
||||
uint64_t m_offset;
|
||||
};
|
||||
|
||||
|
||||
|
@ -126,10 +126,11 @@ private:
|
|||
void gc(uint64_t offset)
|
||||
{
|
||||
for (size_t i = 0; i < m_data.size();) {
|
||||
const auto &entry = m_data[i];
|
||||
auto &entry = m_data[i];
|
||||
|
||||
if (entry.isExpired(offset)) {
|
||||
m_data.back().release();
|
||||
entry.release();
|
||||
entry = m_data.back();
|
||||
m_data.pop_back();
|
||||
}
|
||||
else {
|
||||
|
|
406
src/backend/opencl/runners/tools/OclKawPow.cpp
Normal file
406
src/backend/opencl/runners/tools/OclKawPow.cpp
Normal file
|
@ -0,0 +1,406 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "backend/opencl/runners/tools/OclKawPow.h"
|
||||
|
||||
#include "backend/opencl/cl/kawpow/kawpow_cl.h"
|
||||
#include "backend/opencl/interfaces/IOclRunner.h"
|
||||
#include "backend/opencl/OclCache.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/OclThread.h"
|
||||
#include "backend/opencl/wrappers/OclError.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/tools/Baton.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "crypto/kawpow/KPHash.h"
|
||||
|
||||
#include "3rdparty/libethash/ethash_internal.h"
|
||||
#include "3rdparty/libethash/data_sizes.h"
|
||||
|
||||
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class KawPowCacheEntry
|
||||
{
|
||||
public:
|
||||
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t index, cl_program program) :
|
||||
program(program),
|
||||
m_algo(algo),
|
||||
m_index(index),
|
||||
m_period(period)
|
||||
{}
|
||||
|
||||
inline bool isExpired(uint64_t period) const { return m_period + 1 < period; }
|
||||
inline bool match(const Algorithm &algo, uint64_t period, uint32_t index) const { return m_algo == algo && m_period == period && m_index == index; }
|
||||
inline bool match(const IOclRunner &runner, uint64_t period) const { return match(runner.algorithm(), period, runner.deviceIndex()); }
|
||||
inline void release() { OclLib::release(program); }
|
||||
|
||||
cl_program program;
|
||||
|
||||
private:
|
||||
Algorithm m_algo;
|
||||
uint32_t m_index;
|
||||
uint64_t m_period;
|
||||
};
|
||||
|
||||
|
||||
class KawPowCache
|
||||
{
|
||||
public:
|
||||
KawPowCache() = default;
|
||||
|
||||
inline cl_program search(const IOclRunner &runner, uint64_t period) { return search(runner.algorithm(), period, runner.deviceIndex()); }
|
||||
|
||||
|
||||
inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t index)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
for (const auto &entry : m_data) {
|
||||
if (entry.match(algo, period, index)) {
|
||||
return entry.program;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
void add(const Algorithm &algo, uint64_t period, uint32_t index, cl_program program)
|
||||
{
|
||||
if (search(algo, period, index)) {
|
||||
OclLib::release(program);
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
gc(period);
|
||||
m_data.emplace_back(algo, period, index, program);
|
||||
}
|
||||
|
||||
|
||||
void clear()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
for (auto &entry : m_data) {
|
||||
entry.release();
|
||||
}
|
||||
|
||||
m_data.clear();
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
void gc(uint64_t period)
|
||||
{
|
||||
for (size_t i = 0; i < m_data.size();) {
|
||||
auto& entry = m_data[i];
|
||||
|
||||
if (entry.isExpired(period)) {
|
||||
entry.release();
|
||||
entry = m_data.back();
|
||||
m_data.pop_back();
|
||||
}
|
||||
else {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::mutex m_mutex;
|
||||
std::vector<KawPowCacheEntry> m_data;
|
||||
};
|
||||
|
||||
|
||||
static KawPowCache cache;
|
||||
|
||||
|
||||
#define rnd() (kiss99(rnd_state))
|
||||
#define mix_src() ("mix[" + std::to_string(rnd() % KPHash::REGS) + "]")
|
||||
#define mix_dst() ("mix[" + std::to_string(mix_seq_dst[(mix_seq_dst_cnt++) % KPHash::REGS]) + "]")
|
||||
#define mix_cache() ("mix[" + std::to_string(mix_seq_cache[(mix_seq_cache_cnt++) % KPHash::REGS]) + "]")
|
||||
|
||||
class KawPowBuilder
|
||||
{
|
||||
public:
|
||||
cl_program build(const IOclRunner &runner, uint64_t period)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
cl_program program = cache.search(runner, period);
|
||||
if (program) {
|
||||
return program;
|
||||
}
|
||||
|
||||
cl_int ret;
|
||||
const std::string source = getSource(period);
|
||||
cl_device_id device = runner.data().device.id();
|
||||
const char *s = source.c_str();
|
||||
|
||||
program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string options = " -DPROGPOW_DAG_ELEMENTS=";
|
||||
|
||||
const uint64_t epoch = (period * KPHash::PERIOD_LENGTH) / KPHash::EPOCH_LENGTH;
|
||||
const uint64_t dag_elements = dag_sizes[epoch] / 256;
|
||||
|
||||
options += std::to_string(dag_elements);
|
||||
options += runner.buildOptions();
|
||||
|
||||
if (OclLib::buildProgram(program, 1, &device, options.c_str()) != CL_SUCCESS) {
|
||||
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
|
||||
|
||||
OclLib::release(program);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LOG_INFO("KawPow program for period %" PRIu64 " compiled. (%" PRIu64 "ms)", period, Chrono::steadyMSecs() - ts);
|
||||
|
||||
cache.add(runner.algorithm(), period, runner.deviceIndex(), program);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
std::mutex m_mutex;
|
||||
|
||||
typedef struct {
|
||||
uint32_t z, w, jsr, jcong;
|
||||
} kiss99_t;
|
||||
|
||||
|
||||
std::string getSource(uint64_t prog_seed) const
|
||||
{
|
||||
std::stringstream ret;
|
||||
|
||||
uint32_t seed0 = static_cast<uint32_t>(prog_seed);
|
||||
uint32_t seed1 = static_cast<uint32_t>(prog_seed >> 32);
|
||||
|
||||
kiss99_t rnd_state;
|
||||
uint32_t fnv_hash = 0x811c9dc5;
|
||||
rnd_state.z = fnv1a(fnv_hash, seed0);
|
||||
rnd_state.w = fnv1a(fnv_hash, seed1);
|
||||
rnd_state.jsr = fnv1a(fnv_hash, seed0);
|
||||
rnd_state.jcong = fnv1a(fnv_hash, seed1);
|
||||
|
||||
// Create a random sequence of mix destinations and cache sources
|
||||
// Merge is a read-modify-write, guaranteeing every mix element is modified every loop
|
||||
// Guarantee no cache load is duplicated and can be optimized away
|
||||
int mix_seq_dst[KPHash::REGS];
|
||||
int mix_seq_cache[KPHash::REGS];
|
||||
int mix_seq_dst_cnt = 0;
|
||||
int mix_seq_cache_cnt = 0;
|
||||
|
||||
for (int i = 0; i < KPHash::REGS; i++) {
|
||||
mix_seq_dst[i] = i;
|
||||
mix_seq_cache[i] = i;
|
||||
}
|
||||
|
||||
for (int i = KPHash::REGS - 1; i > 0; i--) {
|
||||
int j;
|
||||
j = rnd() % (i + 1);
|
||||
std::swap(mix_seq_dst[i], mix_seq_dst[j]);
|
||||
j = rnd() % (i + 1);
|
||||
std::swap(mix_seq_cache[i], mix_seq_cache[j]);
|
||||
}
|
||||
|
||||
for (int i = 0; (i < KPHash::CNT_CACHE) || (i < KPHash::CNT_MATH); ++i) {
|
||||
if (i < KPHash::CNT_CACHE) {
|
||||
// Cached memory access
|
||||
// lanes access random locations
|
||||
std::string src = mix_cache();
|
||||
std::string dest = mix_dst();
|
||||
uint32_t r = rnd();
|
||||
ret << "offset = " << src << " % PROGPOW_CACHE_WORDS;\n";
|
||||
ret << "data = c_dag[offset];\n";
|
||||
ret << merge(dest, "data", r);
|
||||
}
|
||||
|
||||
if (i < KPHash::CNT_MATH) {
|
||||
// Random Math
|
||||
// Generate 2 unique sources
|
||||
int src_rnd = rnd() % ((KPHash::REGS - 1) * KPHash::REGS);
|
||||
int src1 = src_rnd % KPHash::REGS; // 0 <= src1 < KPHash::REGS
|
||||
int src2 = src_rnd / KPHash::REGS; // 0 <= src2 < KPHash::REGS - 1
|
||||
if (src2 >= src1) ++src2; // src2 is now any reg other than src1
|
||||
std::string src1_str = "mix[" + std::to_string(src1) + "]";
|
||||
std::string src2_str = "mix[" + std::to_string(src2) + "]";
|
||||
uint32_t r1 = rnd();
|
||||
std::string dest = mix_dst();
|
||||
uint32_t r2 = rnd();
|
||||
ret << math("data", src1_str, src2_str, r1);
|
||||
ret << merge(dest, "data", r2);
|
||||
}
|
||||
}
|
||||
|
||||
std::string kernel = std::regex_replace(std::string(kawpow_cl), std::regex("XMRIG_INCLUDE_PROGPOW_RANDOM_MATH"), ret.str());
|
||||
ret.str(std::string());
|
||||
|
||||
ret << merge("mix[0]", "data_dag.s[0]", rnd());
|
||||
|
||||
constexpr size_t num_words_per_lane = 256 / (sizeof(uint32_t) * KPHash::LANES);
|
||||
for (int i = 1; i < num_words_per_lane; i++)
|
||||
{
|
||||
std::string dest = mix_dst();
|
||||
uint32_t r = rnd();
|
||||
ret << merge(dest, "data_dag.s[" + std::to_string(i) + "]", r);
|
||||
}
|
||||
|
||||
kernel = std::regex_replace(kernel, std::regex("XMRIG_INCLUDE_PROGPOW_DATA_LOADS"), ret.str());
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
||||
static std::string merge(std::string a, std::string b, uint32_t r)
|
||||
{
|
||||
switch (r % 4)
|
||||
{
|
||||
case 0:
|
||||
return a + " = (" + a + " * 33) + " + b + ";\n";
|
||||
case 1:
|
||||
return a + " = (" + a + " ^ " + b + ") * 33;\n";
|
||||
case 2:
|
||||
return a + " = ROTL32(" + a + ", " + std::to_string(((r >> 16) % 31) + 1) + ") ^ " + b + ";\n";
|
||||
case 3:
|
||||
return a + " = ROTR32(" + a + ", " + std::to_string(((r >> 16) % 31) + 1) + ") ^ " + b + ";\n";
|
||||
}
|
||||
return "#error\n";
|
||||
}
|
||||
|
||||
|
||||
static std::string math(std::string d, std::string a, std::string b, uint32_t r)
|
||||
{
|
||||
switch (r % 11)
|
||||
{
|
||||
case 0:
|
||||
return d + " = " + a + " + " + b + ";\n";
|
||||
case 1:
|
||||
return d + " = " + a + " * " + b + ";\n";
|
||||
case 2:
|
||||
return d + " = mul_hi(" + a + ", " + b + ");\n";
|
||||
case 3:
|
||||
return d + " = min(" + a + ", " + b + ");\n";
|
||||
case 4:
|
||||
return d + " = ROTL32(" + a + ", " + b + " % 32);\n";
|
||||
case 5:
|
||||
return d + " = ROTR32(" + a + ", " + b + " % 32);\n";
|
||||
case 6:
|
||||
return d + " = " + a + " & " + b + ";\n";
|
||||
case 7:
|
||||
return d + " = " + a + " | " + b + ";\n";
|
||||
case 8:
|
||||
return d + " = " + a + " ^ " + b + ";\n";
|
||||
case 9:
|
||||
return d + " = clz(" + a + ") + clz(" + b + ");\n";
|
||||
case 10:
|
||||
return d + " = popcount(" + a + ") + popcount(" + b + ");\n";
|
||||
}
|
||||
return "#error\n";
|
||||
}
|
||||
|
||||
|
||||
static uint32_t fnv1a(uint32_t& h, uint32_t d)
|
||||
{
|
||||
return h = (h ^ d) * 0x1000193;
|
||||
}
|
||||
|
||||
static uint32_t kiss99(kiss99_t& st)
|
||||
{
|
||||
st.z = 36969 * (st.z & 65535) + (st.z >> 16);
|
||||
st.w = 18000 * (st.w & 65535) + (st.w >> 16);
|
||||
uint32_t MWC = ((st.z << 16) + st.w);
|
||||
st.jsr ^= (st.jsr << 17);
|
||||
st.jsr ^= (st.jsr >> 13);
|
||||
st.jsr ^= (st.jsr << 5);
|
||||
st.jcong = 69069 * st.jcong + 1234567;
|
||||
return ((MWC ^ st.jcong) + st.jsr);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class KawPowBaton : public Baton<uv_work_t>
|
||||
{
|
||||
public:
|
||||
inline KawPowBaton(const IOclRunner &runner, uint64_t period) :
|
||||
runner(runner),
|
||||
period(period)
|
||||
{}
|
||||
|
||||
const IOclRunner &runner;
|
||||
const uint64_t period;
|
||||
};
|
||||
|
||||
|
||||
static KawPowBuilder builder;
|
||||
|
||||
|
||||
cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height)
|
||||
{
|
||||
const uint64_t period = height / KPHash::PERIOD_LENGTH;
|
||||
|
||||
KawPowBaton* baton = new KawPowBaton(runner, period + 1);
|
||||
|
||||
uv_queue_work(uv_default_loop(), &baton->req,
|
||||
[](uv_work_t *req) {
|
||||
KawPowBaton* baton = static_cast<KawPowBaton*>(req->data);
|
||||
builder.build(baton->runner, baton->period);
|
||||
},
|
||||
[](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); }
|
||||
);
|
||||
|
||||
cl_program program = cache.search(runner, period);
|
||||
if (program) {
|
||||
return program;
|
||||
}
|
||||
|
||||
return builder.build(runner, period);
|
||||
}
|
||||
|
||||
|
||||
void OclKawPow::clear()
|
||||
{
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
} // namespace xmrig
|
53
src/backend/opencl/runners/tools/OclKawPow.h
Normal file
53
src/backend/opencl/runners/tools/OclKawPow.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_OCLKAWPOW_H
|
||||
#define XMRIG_OCLKAWPOW_H
|
||||
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
using cl_program = struct _cl_program *;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class IOclRunner;
|
||||
|
||||
|
||||
class OclKawPow
|
||||
{
|
||||
public:
|
||||
static cl_program get(const IOclRunner &runner, uint64_t height);
|
||||
static void clear();
|
||||
};
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_OCLKAWPOW_H */
|
Loading…
Add table
Add a link
Reference in a new issue