REDACTED-rig/src/backend/opencl/runners/tools/OclKawPow.cpp
2020-05-28 10:58:06 +02:00

414 lines
13 KiB
C++

/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/runners/tools/OclKawPow.h"
#include "backend/opencl/cl/kawpow/kawpow_cl.h"
#include "backend/opencl/interfaces/IOclRunner.h"
#include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/OclThread.h"
#include "backend/opencl/wrappers/OclError.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/tools/Baton.h"
#include "base/tools/Chrono.h"
#include "crypto/kawpow/KPHash.h"
#include "3rdparty/libethash/ethash_internal.h"
#include "3rdparty/libethash/data_sizes.h"
#include <cstring>
#include <mutex>
#include <regex>
#include <sstream>
#include <string>
#include <thread>
#include <uv.h>
namespace xmrig {
class KawPowCacheEntry
{
public:
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program) :
program(program),
m_algo(algo),
m_index(index),
m_period(period),
m_worksize(worksize)
{}
inline bool isExpired(uint64_t period) const { return m_period + 1 < period; }
inline bool match(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) const { return m_algo == algo && m_period == period && m_worksize == worksize && m_index == index; }
inline bool match(const IOclRunner &runner, uint64_t period, uint32_t worksize) const { return match(runner.algorithm(), period, worksize, runner.deviceIndex()); }
inline void release() { OclLib::release(program); }
cl_program program;
private:
Algorithm m_algo;
uint32_t m_index;
uint64_t m_period;
uint32_t m_worksize;
};
class KawPowCache
{
public:
KawPowCache() = default;
inline cl_program search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); }
inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index)
{
std::lock_guard<std::mutex> lock(m_mutex);
for (const auto &entry : m_data) {
if (entry.match(algo, period, worksize, index)) {
return entry.program;
}
}
return nullptr;
}
void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program)
{
if (search(algo, period, worksize, index)) {
OclLib::release(program);
return;
}
std::lock_guard<std::mutex> lock(m_mutex);
gc(period);
m_data.emplace_back(algo, period, worksize, index, program);
}
void clear()
{
std::lock_guard<std::mutex> lock(m_mutex);
for (auto &entry : m_data) {
entry.release();
}
m_data.clear();
}
private:
void gc(uint64_t period)
{
for (size_t i = 0; i < m_data.size();) {
auto& entry = m_data[i];
if (entry.isExpired(period)) {
entry.release();
entry = m_data.back();
m_data.pop_back();
}
else {
++i;
}
}
}
std::mutex m_mutex;
std::vector<KawPowCacheEntry> m_data;
};
static KawPowCache cache;
#define rnd() (kiss99(rnd_state))
#define mix_src() ("mix[" + std::to_string(rnd() % KPHash::REGS) + "]")
#define mix_dst() ("mix[" + std::to_string(mix_seq_dst[(mix_seq_dst_cnt++) % KPHash::REGS]) + "]")
#define mix_cache() ("mix[" + std::to_string(mix_seq_cache[(mix_seq_cache_cnt++) % KPHash::REGS]) + "]")
class KawPowBuilder
{
public:
cl_program build(const IOclRunner &runner, uint64_t period, uint32_t worksize)
{
std::lock_guard<std::mutex> lock(m_mutex);
const uint64_t ts = Chrono::steadyMSecs();
cl_program program = cache.search(runner, period, worksize);
if (program) {
return program;
}
cl_int ret;
const std::string source = getSource(period);
cl_device_id device = runner.data().device.id();
const char *s = source.c_str();
program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
}
std::string options = " -DPROGPOW_DAG_ELEMENTS=";
const uint64_t epoch = (period * KPHash::PERIOD_LENGTH) / KPHash::EPOCH_LENGTH;
const uint64_t dag_elements = dag_sizes[epoch] / 256;
options += std::to_string(dag_elements);
options += " -DGROUP_SIZE=";
options += std::to_string(worksize);
options += runner.buildOptions();
if (OclLib::buildProgram(program, 1, &device, options.c_str()) != CL_SUCCESS) {
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
OclLib::release(program);
return nullptr;
}
LOG_INFO("KawPow program for period %" PRIu64 " compiled. (%" PRIu64 "ms)", period, Chrono::steadyMSecs() - ts);
cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program);
return program;
}
private:
std::mutex m_mutex;
typedef struct {
uint32_t z, w, jsr, jcong;
} kiss99_t;
std::string getSource(uint64_t prog_seed) const
{
std::stringstream ret;
uint32_t seed0 = static_cast<uint32_t>(prog_seed);
uint32_t seed1 = static_cast<uint32_t>(prog_seed >> 32);
kiss99_t rnd_state;
uint32_t fnv_hash = 0x811c9dc5;
rnd_state.z = fnv1a(fnv_hash, seed0);
rnd_state.w = fnv1a(fnv_hash, seed1);
rnd_state.jsr = fnv1a(fnv_hash, seed0);
rnd_state.jcong = fnv1a(fnv_hash, seed1);
// Create a random sequence of mix destinations and cache sources
// Merge is a read-modify-write, guaranteeing every mix element is modified every loop
// Guarantee no cache load is duplicated and can be optimized away
int mix_seq_dst[KPHash::REGS];
int mix_seq_cache[KPHash::REGS];
int mix_seq_dst_cnt = 0;
int mix_seq_cache_cnt = 0;
for (int i = 0; i < KPHash::REGS; i++) {
mix_seq_dst[i] = i;
mix_seq_cache[i] = i;
}
for (int i = KPHash::REGS - 1; i > 0; i--) {
int j;
j = rnd() % (i + 1);
std::swap(mix_seq_dst[i], mix_seq_dst[j]);
j = rnd() % (i + 1);
std::swap(mix_seq_cache[i], mix_seq_cache[j]);
}
for (int i = 0; (i < KPHash::CNT_CACHE) || (i < KPHash::CNT_MATH); ++i) {
if (i < KPHash::CNT_CACHE) {
// Cached memory access
// lanes access random locations
std::string src = mix_cache();
std::string dest = mix_dst();
uint32_t r = rnd();
ret << "offset = " << src << " % PROGPOW_CACHE_WORDS;\n";
ret << "data = c_dag[offset];\n";
ret << merge(dest, "data", r);
}
if (i < KPHash::CNT_MATH) {
// Random Math
// Generate 2 unique sources
int src_rnd = rnd() % ((KPHash::REGS - 1) * KPHash::REGS);
int src1 = src_rnd % KPHash::REGS; // 0 <= src1 < KPHash::REGS
int src2 = src_rnd / KPHash::REGS; // 0 <= src2 < KPHash::REGS - 1
if (src2 >= src1) ++src2; // src2 is now any reg other than src1
std::string src1_str = "mix[" + std::to_string(src1) + "]";
std::string src2_str = "mix[" + std::to_string(src2) + "]";
uint32_t r1 = rnd();
std::string dest = mix_dst();
uint32_t r2 = rnd();
ret << math("data", src1_str, src2_str, r1);
ret << merge(dest, "data", r2);
}
}
std::string kernel = std::regex_replace(std::string(kawpow_cl), std::regex("XMRIG_INCLUDE_PROGPOW_RANDOM_MATH"), ret.str());
ret.str(std::string());
ret << merge("mix[0]", "data_dag.s[0]", rnd());
constexpr size_t num_words_per_lane = 256 / (sizeof(uint32_t) * KPHash::LANES);
for (int i = 1; i < num_words_per_lane; i++)
{
std::string dest = mix_dst();
uint32_t r = rnd();
ret << merge(dest, "data_dag.s[" + std::to_string(i) + "]", r);
}
kernel = std::regex_replace(kernel, std::regex("XMRIG_INCLUDE_PROGPOW_DATA_LOADS"), ret.str());
return kernel;
}
static std::string merge(std::string a, std::string b, uint32_t r)
{
switch (r % 4)
{
case 0:
return a + " = (" + a + " * 33) + " + b + ";\n";
case 1:
return a + " = (" + a + " ^ " + b + ") * 33;\n";
case 2:
return a + " = ROTL32(" + a + ", " + std::to_string(((r >> 16) % 31) + 1) + ") ^ " + b + ";\n";
case 3:
return a + " = ROTR32(" + a + ", " + std::to_string(((r >> 16) % 31) + 1) + ") ^ " + b + ";\n";
}
return "#error\n";
}
static std::string math(std::string d, std::string a, std::string b, uint32_t r)
{
switch (r % 11)
{
case 0:
return d + " = " + a + " + " + b + ";\n";
case 1:
return d + " = " + a + " * " + b + ";\n";
case 2:
return d + " = mul_hi(" + a + ", " + b + ");\n";
case 3:
return d + " = min(" + a + ", " + b + ");\n";
case 4:
return d + " = ROTL32(" + a + ", " + b + " % 32);\n";
case 5:
return d + " = ROTR32(" + a + ", " + b + " % 32);\n";
case 6:
return d + " = " + a + " & " + b + ";\n";
case 7:
return d + " = " + a + " | " + b + ";\n";
case 8:
return d + " = " + a + " ^ " + b + ";\n";
case 9:
return d + " = clz(" + a + ") + clz(" + b + ");\n";
case 10:
return d + " = popcount(" + a + ") + popcount(" + b + ");\n";
}
return "#error\n";
}
static uint32_t fnv1a(uint32_t& h, uint32_t d)
{
return h = (h ^ d) * 0x1000193;
}
static uint32_t kiss99(kiss99_t& st)
{
st.z = 36969 * (st.z & 65535) + (st.z >> 16);
st.w = 18000 * (st.w & 65535) + (st.w >> 16);
uint32_t MWC = ((st.z << 16) + st.w);
st.jsr ^= (st.jsr << 17);
st.jsr ^= (st.jsr >> 13);
st.jsr ^= (st.jsr << 5);
st.jcong = 69069 * st.jcong + 1234567;
return ((MWC ^ st.jcong) + st.jsr);
}
};
class KawPowBaton : public Baton<uv_work_t>
{
public:
inline KawPowBaton(const IOclRunner &runner, uint64_t period, uint32_t worksize) :
runner(runner),
period(period),
worksize(worksize)
{}
const IOclRunner &runner;
const uint64_t period;
const uint32_t worksize;
};
static KawPowBuilder builder;
cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize)
{
const uint64_t period = height / KPHash::PERIOD_LENGTH;
KawPowBaton* baton = new KawPowBaton(runner, period + 1, worksize);
uv_queue_work(uv_default_loop(), &baton->req,
[](uv_work_t *req) {
KawPowBaton* baton = static_cast<KawPowBaton*>(req->data);
builder.build(baton->runner, baton->period, baton->worksize);
},
[](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); }
);
cl_program program = cache.search(runner, period, worksize);
if (program) {
return program;
}
return builder.build(runner, period, worksize);
}
void OclKawPow::clear()
{
cache.clear();
}
} // namespace xmrig