REDACTED-rig/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp
2019-08-28 00:53:36 +03:00

898 lines
34 KiB
C++
Executable file

//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#include <crypto/Argon2_constants.h>
#include "../../../common/common.h"
#include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#include "OpenCLHasher.h"
#include "OpenCLKernel.h"
#include "crypto/argon2_hasher/common/DLLExport.h"
#if defined(WITH_OPENCL)
#ifndef CL_DEVICE_BOARD_NAME_AMD
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
#endif
#ifndef CL_DEVICE_TOPOLOGY_AMD
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
#endif
#ifndef CL_DEVICE_PCI_BUS_ID_NV
#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
#endif
#ifndef CL_DEVICE_PCI_SLOT_ID_NV
#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
#endif
typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} device_topology_amd;
#define KERNEL_WORKGROUP_SIZE 32
OpenCLHasher::OpenCLHasher() {
m_type = "GPU";
m_subType = "OPENCL";
m_shortSubType = "OCL";
m_intensity = 0;
m_description = "";
m_computingThreads = 0;
}
OpenCLHasher::~OpenCLHasher() {
// this->cleanup();
}
bool OpenCLHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cl_int error = CL_SUCCESS;
string error_message;
m_profile = getArgon2Profile(algorithm, variant);
m_devices = queryOpenCLDevices(error, error_message);
if(error != CL_SUCCESS) {
m_description = "No compatible GPU detected: " + error_message;
return false;
}
if (m_devices.empty()) {
m_description = "No compatible GPU detected.";
return false;
}
return true;
}
vector<OpenCLDeviceInfo*> OpenCLHasher::queryOpenCLDevices(cl_int &error, string &error_message) {
cl_int err;
cl_uint platform_count = 0;
cl_uint device_count = 0;
vector<OpenCLDeviceInfo*> result;
clGetPlatformIDs(0, NULL, &platform_count);
if(platform_count == 0) {
return result;
}
cl_platform_id *platforms = (cl_platform_id*)malloc(platform_count * sizeof(cl_platform_id));
err=clGetPlatformIDs(platform_count, platforms, &platform_count);
if(err != CL_SUCCESS) {
free(platforms);
error = err;
error_message = "Error querying for opencl platforms.";
return result;
}
int counter = 0;
for(uint32_t i=0; i < platform_count; i++) {
device_count = 0;
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &device_count);
if(device_count == 0) {
continue;
}
cl_device_id * devices = (cl_device_id*)malloc(device_count * sizeof(cl_device_id));
err=clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, device_count, devices, &device_count);
if(err != CL_SUCCESS) {
free(devices);
error = err;
error_message = "Error querying for opencl devices.";
continue;
}
for(uint32_t j=0; j < device_count; j++) {
OpenCLDeviceInfo *info = getDeviceInfo(platforms[i], devices[j]);
if(info->error != CL_SUCCESS) {
error = info->error;
error_message = info->errorMessage;
}
else {
info->deviceIndex = counter;
result.push_back(info);
counter++;
}
}
free(devices);
}
free(platforms);
return result;
}
OpenCLDeviceInfo *OpenCLHasher::getDeviceInfo(cl_platform_id platform, cl_device_id device) {
OpenCLDeviceInfo *device_info = new OpenCLDeviceInfo(CL_SUCCESS, "");
device_info->platform = platform;
device_info->device = device;
char *buffer;
size_t sz;
// device name
string device_vendor;
sz = 0;
clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, NULL, &sz);
buffer = (char *)malloc(sz + 1);
device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz);
if(device_info->error != CL_SUCCESS) {
free(buffer);
device_info->errorMessage = "Error querying device vendor.";
return device_info;
}
else {
buffer[sz] = 0;
device_vendor = buffer;
free(buffer);
}
string device_name;
cl_device_info query_type = CL_DEVICE_NAME;
if(device_vendor.find("Advanced Micro Devices") != string::npos)
query_type = CL_DEVICE_BOARD_NAME_AMD;
sz = 0;
clGetDeviceInfo(device, query_type, 0, NULL, &sz);
buffer = (char *) malloc(sz + 1);
device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz);
if (device_info->error != CL_SUCCESS) {
free(buffer);
device_info->errorMessage = "Error querying device name.";
return device_info;
} else {
buffer[sz] = 0;
device_name = buffer;
free(buffer);
}
string device_version;
sz = 0;
clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &sz);
buffer = (char *)malloc(sz + 1);
device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz);
if(device_info->error != CL_SUCCESS) {
free(buffer);
device_info->errorMessage = "Error querying device version.";
return device_info;
}
else {
buffer[sz] = 0;
device_version = buffer;
free(buffer);
}
device_info->deviceString = device_vendor + " - " + device_name/* + " : " + device_version*/;
device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->maxMemSize), &(device_info->maxMemSize), NULL);
if(device_info->error != CL_SUCCESS) {
device_info->errorMessage = "Error querying device global memory size.";
return device_info;
}
device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->maxAllocableMemSize), &(device_info->maxAllocableMemSize), NULL);
if(device_info->error != CL_SUCCESS) {
device_info->errorMessage = "Error querying device max memory allocation.";
return device_info;
}
double mem_in_gb = device_info->maxMemSize / 1073741824.0;
stringstream ss;
ss << setprecision(2) << mem_in_gb;
device_info->deviceString += (" (" + ss.str() + "GB)");
return device_info;
}
bool OpenCLHasher::configure(xmrig::HasherConfig &config) {
int deviceOffset = config.getGPUCardsCount();
int index = deviceOffset;
double intensity = 0;
int total_threads = 0;
intensity = config.getAverageGPUIntensity();
if (intensity == 0) {
m_intensity = 0;
m_description = "Status: DISABLED - by user.";
return false;
}
bool cards_selected = false;
intensity = 0;
for(vector<OpenCLDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) {
stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->deviceString;
string device_description = ss.str();
(*d)->deviceIndex = index;
(*d)->profileInfo.profile = m_profile;
if(config.gpuFilter().size() > 0) {
bool found = false;
for(xmrig::GPUFilter fit : config.gpuFilter()) {
if(device_description.find(fit.filter) != string::npos) {
found = true;
break;
}
}
if(!found) {
(*d)->profileInfo.threads = 0;
ss << " - DISABLED" << endl;
m_description += ss.str();
continue;
}
else {
cards_selected = true;
}
}
else {
cards_selected = true;
}
ss << endl;
double device_intensity = config.getGPUIntensity(deviceOffset + m_enabledDevices.size());
m_description += ss.str();
if(!(setupDeviceInfo((*d), device_intensity))) {
m_description += (*d)->errorMessage;
m_description += "\n";
continue;
};
DeviceInfo device;
if((*d)->deviceString.find("Advanced Micro Devices") != string::npos) {
device_topology_amd amdtopo;
if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) {
char bus_id[50];
sprintf(bus_id, "%02x:%02x.%x", amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
device.bus_id = bus_id;
}
}
else if((*d)->deviceString.find("NVIDIA") != string::npos) {
cl_uint bus;
cl_uint slot;
if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof(bus), &bus, NULL) == CL_SUCCESS) {
if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof(slot), &slot, NULL) == CL_SUCCESS) {
char bus_id[50];
sprintf(bus_id, "%02x:%02x.0", bus, slot);
device.bus_id = bus_id;
}
}
}
device.name = (*d)->deviceString;
device.intensity = device_intensity;
storeDeviceInfo((*d)->deviceIndex, device);
m_enabledDevices.push_back(*d);
total_threads += (*d)->profileInfo.threads;
intensity += device_intensity;
}
config.addGPUCardsCount(index - deviceOffset);
if(!cards_selected) {
m_intensity = 0;
m_description += "Status: DISABLED - no card enabled because of filtering.";
return false;
}
if (total_threads == 0) {
m_intensity = 0;
m_description += "Status: DISABLED - not enough resources.";
return false;
}
buildThreadData();
m_intensity = intensity / m_enabledDevices.size();
m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true;
}
bool OpenCLHasher::setupDeviceInfo(OpenCLDeviceInfo *device, double intensity) {
cl_int error;
cl_context_properties properties[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) device->platform,
0};
device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error getting device context.";
return false;
}
device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error getting device command queue.";
return false;
}
const char *srcptr[] = {OpenCLKernel.c_str()};
size_t srcsize = OpenCLKernel.size();
device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating opencl program for device.";
return false;
}
error = clBuildProgram(device->program, 1, &device->device, "", NULL, NULL);
if (error != CL_SUCCESS) {
size_t log_size;
clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char *) malloc(log_size + 1);
clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
log[log_size] = 0;
string build_log = log;
free(log);
device->error = error;
device->errorMessage = "Error building opencl program for device: " + build_log;
return false;
}
device->kernelPrehash = clCreateKernel(device->program, "prehash", &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating opencl prehash kernel for device.";
return false;
}
device->kernelFillBlocks = clCreateKernel(device->program, "fill_blocks", &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating opencl main kernel for device.";
return false;
}
device->kernelPosthash = clCreateKernel(device->program, "posthash", &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating opencl posthash kernel for device.";
return false;
}
device->profileInfo.threads_per_chunk = (uint32_t) (device->maxAllocableMemSize / device->profileInfo.profile->memSize);
size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize;
if (chunk_size == 0) {
device->error = -1;
device->errorMessage = "Not enough memory on GPU.";
return false;
}
uint64_t usable_memory = device->maxMemSize;
double chunks = (double) usable_memory / (double) chunk_size;
uint32_t max_threads = (uint32_t) (device->profileInfo.threads_per_chunk * chunks);
if (max_threads == 0) {
device->error = -1;
device->errorMessage = "Not enough memory on GPU.";
return false;
}
device->profileInfo.threads = (uint32_t) (max_threads * intensity / 100.0);
device->profileInfo.threads = (device->profileInfo.threads / 4) * 4; // make it divisible by 4
if (max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0)
device->profileInfo.threads = 4;
double counter = (double) device->profileInfo.threads / (double) device->profileInfo.threads_per_chunk;
size_t allocated_mem_for_current_chunk = 0;
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memoryChunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memoryChunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memoryChunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memoryChunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memoryChunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memoryChunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), NULL,
&error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
if (device->profileInfo.profile->succesiveIdxs == 1) {
device->arguments.idxs = NULL;
}
else {
device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), NULL,
&error);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
}
device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profileInfo.profile->segCount * 3 * sizeof(uint32_t), NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
size_t preseed_memory_size = device->profileInfo.profile->pwdLen * 4;
size_t seed_memory_size = device->profileInfo.threads * (device->profileInfo.profile->thrCost * 2) * ARGON2_BLOCK_SIZE;
size_t out_memory_size = device->profileInfo.threads * ARGON2_BLOCK_SIZE;
size_t hash_memory_size = device->profileInfo.threads * (xmrig::ARGON2_HASHLEN + 4);
device->arguments.preseedMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.preseedMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.seedMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.seedMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.outMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.outMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.hashMemory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
device->arguments.hashMemory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error creating memory buffer.";
return false;
}
//optimise address sizes
uint32_t *refs = (uint32_t *)malloc(device->profileInfo.profile->blockRefsSize * sizeof(uint32_t));
for(int i=0;i<device->profileInfo.profile->blockRefsSize; i++) {
refs[i] = device->profileInfo.profile->blockRefs[i * 3 + 1];
}
error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error writing to gpu memory.";
return false;
}
free(refs);
if(device->profileInfo.profile->succesiveIdxs == 0) {
uint32_t *idxs = (uint32_t *) malloc(device->profileInfo.profile->blockRefsSize * sizeof(uint32_t));
for (int i = 0; i < device->profileInfo.profile->blockRefsSize; i++) {
idxs[i] = device->profileInfo.profile->blockRefs[i * 3];
if (device->profileInfo.profile->blockRefs[i * 3 + 2] == 1) {
idxs[i] |= 0x80000000;
}
}
error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error writing to gpu memory.";
return false;
}
free(idxs);
}
error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profileInfo.profile->segCount * 3 * sizeof(uint32_t), device->profileInfo.profile->segments, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error writing to gpu memory.";
return false;
}
clSetKernelArg(device->kernelFillBlocks, 0, sizeof(device->arguments.memoryChunk_0), &device->arguments.memoryChunk_0);
clSetKernelArg(device->kernelFillBlocks, 1, sizeof(device->arguments.memoryChunk_1), &device->arguments.memoryChunk_1);
clSetKernelArg(device->kernelFillBlocks, 2, sizeof(device->arguments.memoryChunk_2), &device->arguments.memoryChunk_2);
clSetKernelArg(device->kernelFillBlocks, 3, sizeof(device->arguments.memoryChunk_3), &device->arguments.memoryChunk_3);
clSetKernelArg(device->kernelFillBlocks, 4, sizeof(device->arguments.memoryChunk_4), &device->arguments.memoryChunk_4);
clSetKernelArg(device->kernelFillBlocks, 5, sizeof(device->arguments.memoryChunk_5), &device->arguments.memoryChunk_5);
clSetKernelArg(device->kernelFillBlocks, 8, sizeof(device->arguments.refs), &device->arguments.refs);
if(device->profileInfo.profile->succesiveIdxs == 0)
clSetKernelArg(device->kernelFillBlocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs);
else
clSetKernelArg(device->kernelFillBlocks, 9, sizeof(cl_mem), NULL);
clSetKernelArg(device->kernelFillBlocks, 10, sizeof(device->arguments.segments), &device->arguments.segments);
clSetKernelArg(device->kernelFillBlocks, 11, sizeof(int32_t), &device->profileInfo.profile->memSize);
clSetKernelArg(device->kernelFillBlocks, 12, sizeof(int32_t), &device->profileInfo.profile->thrCost);
clSetKernelArg(device->kernelFillBlocks, 13, sizeof(int32_t), &device->profileInfo.profile->segSize);
clSetKernelArg(device->kernelFillBlocks, 14, sizeof(int32_t), &device->profileInfo.profile->segCount);
clSetKernelArg(device->kernelFillBlocks, 15, sizeof(int32_t), &device->profileInfo.threads_per_chunk);
clSetKernelArg(device->kernelPrehash, 2, sizeof(int32_t), &device->profileInfo.profile->memCost);
clSetKernelArg(device->kernelPrehash, 3, sizeof(int32_t), &device->profileInfo.profile->thrCost);
int passes = device->profileInfo.profile->segCount / (4 * device->profileInfo.profile->thrCost);
clSetKernelArg(device->kernelPrehash, 4, sizeof(int32_t), &passes);
clSetKernelArg(device->kernelPrehash, 6, sizeof(int32_t), &device->profileInfo.profile->saltLen);
return true;
}
bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data;
OpenCLDeviceInfo *device = gpumgmt_thread->device;
cl_int error;
int sessions = max(profile->thrCost * 2, (uint32_t)16);
double hashes_per_block = sessions / (profile->thrCost * 2.0);
size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block);
size_t local_work_items = sessions * 4;
device->deviceLock.lock();
error = clEnqueueWriteBuffer(device->queue, device->arguments.preseedMemory[gpumgmt_thread->threadId],
CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error writing to gpu memory.";
device->deviceLock.unlock();
return false;
}
int inSizeInInt = gpumgmt_thread->hashData.inSize / 4;
clSetKernelArg(device->kernelPrehash, 0, sizeof(device->arguments.preseedMemory[gpumgmt_thread->threadId]), &device->arguments.preseedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelPrehash, 1, sizeof(device->arguments.seedMemory[gpumgmt_thread->threadId]), &device->arguments.seedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelPrehash, 5, sizeof(int), &inSizeInInt);
clSetKernelArg(device->kernelPrehash, 7, sizeof(int), &threads);
clSetKernelArg(device->kernelPrehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes)
error=clEnqueueNDRangeKernel(device->queue, device->kernelPrehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error running the kernel.";
device->deviceLock.unlock();
return false;
}
return true;
}
void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data;
OpenCLDeviceInfo *device = gpumgmt_thread->device;
cl_int error;
size_t total_work_items = threads * KERNEL_WORKGROUP_SIZE * profile->thrCost;
size_t local_work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost;
size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK;
clSetKernelArg(device->kernelFillBlocks, 6, sizeof(device->arguments.seedMemory[gpumgmt_thread->threadId]), &device->arguments.seedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelFillBlocks, 7, sizeof(device->arguments.outMemory[gpumgmt_thread->threadId]), &device->arguments.outMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelFillBlocks, 16, sizeof(cl_ulong) * shared_mem, NULL);
error=clEnqueueNDRangeKernel(device->queue, device->kernelFillBlocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error running the kernel.";
device->deviceLock.unlock();
return NULL;
}
return (void *)1;
}
bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data;
OpenCLDeviceInfo *device = gpumgmt_thread->device;
cl_int error;
size_t total_work_items = threads * 4;
size_t local_work_items = 4;
clSetKernelArg(device->kernelPosthash, 0, sizeof(device->arguments.hashMemory[gpumgmt_thread->threadId]), &device->arguments.hashMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelPosthash, 1, sizeof(device->arguments.outMemory[gpumgmt_thread->threadId]), &device->arguments.outMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelPosthash, 2, sizeof(device->arguments.preseedMemory[gpumgmt_thread->threadId]), &device->arguments.preseedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernelPosthash, 3, sizeof(cl_ulong) * 60, NULL);
error=clEnqueueNDRangeKernel(device->queue, device->kernelPosthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error running the kernel.";
device->deviceLock.unlock();
return false;
}
error = clEnqueueReadBuffer(device->queue, device->arguments.hashMemory[gpumgmt_thread->threadId], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL);
if (error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error reading gpu memory.";
device->deviceLock.unlock();
return false;
}
error=clFinish(device->queue);
if(error != CL_SUCCESS) {
device->error = error;
device->errorMessage = "Error flushing GPU queue.";
device->deviceLock.unlock();
return false;
}
device->deviceLock.unlock();
return true;
}
void OpenCLHasher::buildThreadData() {
m_threadData = new OpenCLGpuMgmtThreadData[m_enabledDevices.size() * 2];
for(int i=0; i < m_enabledDevices.size(); i++) {
OpenCLDeviceInfo *device = m_enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) {
OpenCLGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId];
thread_data.device = device;
thread_data.threadId = threadId;
thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher,
nullptr, &thread_data);
thread_data.argon2->setThreads(device->profileInfo.threads);
thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
}
}
}
int OpenCLHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
OpenCLGpuMgmtThreadData &threadData = m_threadData[threadIdx];
threadData.hashData.input = input;
threadData.hashData.inSize = size;
threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != CL_SUCCESS) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage);
return 0;
}
uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
(*nonce) += threadData.device->profileInfo.threads;
return hashCount;
}
void OpenCLHasher::cleanup() {
vector<cl_platform_id> platforms;
for(vector<OpenCLDeviceInfo *>::iterator it=m_devices.begin(); it != m_devices.end(); it++) {
if ((*it)->profileInfo.threads != 0) {
clReleaseMemObject((*it)->arguments.memoryChunk_0);
clReleaseMemObject((*it)->arguments.memoryChunk_1);
clReleaseMemObject((*it)->arguments.memoryChunk_2);
clReleaseMemObject((*it)->arguments.memoryChunk_3);
clReleaseMemObject((*it)->arguments.memoryChunk_4);
clReleaseMemObject((*it)->arguments.memoryChunk_5);
clReleaseMemObject((*it)->arguments.refs);
clReleaseMemObject((*it)->arguments.segments);
clReleaseMemObject((*it)->arguments.preseedMemory[0]);
clReleaseMemObject((*it)->arguments.preseedMemory[1]);
clReleaseMemObject((*it)->arguments.seedMemory[0]);
clReleaseMemObject((*it)->arguments.seedMemory[1]);
clReleaseMemObject((*it)->arguments.outMemory[0]);
clReleaseMemObject((*it)->arguments.outMemory[1]);
clReleaseMemObject((*it)->arguments.hashMemory[0]);
clReleaseMemObject((*it)->arguments.hashMemory[1]);
clReleaseKernel((*it)->kernelPrehash);
clReleaseKernel((*it)->kernelFillBlocks);
clReleaseKernel((*it)->kernelPosthash);
clReleaseProgram((*it)->program);
clReleaseCommandQueue((*it)->queue);
clReleaseContext((*it)->context);
}
clReleaseDevice((*it)->device);
delete (*it);
}
m_devices.clear();
}
size_t OpenCLHasher::parallelism(int workerIdx) {
// there are 2 computing threads per device, so divide by 2 to get device index
workerIdx /= 2;
if(workerIdx < 0 || workerIdx > m_enabledDevices.size())
return 0;
return m_enabledDevices[workerIdx]->profileInfo.threads;
}
size_t OpenCLHasher::deviceCount() {
return m_enabledDevices.size();
}
DeviceInfo &OpenCLHasher::device(int workerIdx) {
workerIdx /= 2;
if(workerIdx < 0 || workerIdx > m_enabledDevices.size())
return devices().begin()->second;
return devices()[m_enabledDevices[workerIdx]->deviceIndex];
}
REGISTER_HASHER(OpenCLHasher);
#endif // WITH_OPENCL