Code fixes for naming convention consistency.

This commit is contained in:
Haifa Bogdan Adnan 2019-08-26 13:33:04 +03:00
parent 3b70cdd8e7
commit eb6959f312
10 changed files with 596 additions and 596 deletions

View file

@ -171,7 +171,7 @@ set(SOURCE_CPU_HASHER src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp src/crypto
set(SOURCE_OPENCL_HASHER src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h set(SOURCE_OPENCL_HASHER src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h
src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h) src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h)
set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.h
src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu) src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu)
set(ARGON2_FILL_BLOCKS_SRC set(ARGON2_FILL_BLOCKS_SRC

View file

@ -14,10 +14,10 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <driver_types.h> #include <driver_types.h>
#include "cuda_hasher.h" #include "CudaHasher.h"
#include "../../../common/DLLExport.h" #include "../../../common/DLLExport.h"
cuda_hasher::cuda_hasher() { CudaHasher::CudaHasher() {
m_type = "GPU"; m_type = "GPU";
m_subType = "CUDA"; m_subType = "CUDA";
m_shortSubType = "NVD"; m_shortSubType = "NVD";
@ -27,24 +27,24 @@ cuda_hasher::cuda_hasher() {
} }
cuda_hasher::~cuda_hasher() { CudaHasher::~CudaHasher() {
this->cleanup(); this->cleanup();
} }
bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { bool CudaHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cudaError_t error = cudaSuccess; cudaError_t error = cudaSuccess;
string error_message; string error_message;
m_profile = getArgon2Profile(algorithm, variant); m_profile = getArgon2Profile(algorithm, variant);
__devices = __query_cuda_devices(error, error_message); m_devices = queryCudaDevices(error, error_message);
if(error != cudaSuccess) { if(error != cudaSuccess) {
m_description = "No compatible GPU detected: " + error_message; m_description = "No compatible GPU detected: " + error_message;
return false; return false;
} }
if (__devices.empty()) { if (m_devices.empty()) {
m_description = "No compatible GPU detected."; m_description = "No compatible GPU detected.";
return false; return false;
} }
@ -52,8 +52,8 @@ bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
return true; return true;
} }
vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) { vector<CudaDeviceInfo *> CudaHasher::queryCudaDevices(cudaError_t &error, string &error_message) {
vector<cuda_device_info *> devices; vector<CudaDeviceInfo *> devices;
int devCount = 0; int devCount = 0;
error = cudaGetDeviceCount(&devCount); error = cudaGetDeviceCount(&devCount);
@ -67,12 +67,12 @@ vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error,
for (int i = 0; i < devCount; ++i) for (int i = 0; i < devCount; ++i)
{ {
cuda_device_info *dev = __get_device_info(i); CudaDeviceInfo *dev = getDeviceInfo(i);
if(dev == NULL) if(dev == NULL)
continue; continue;
if(dev->error != cudaSuccess) { if(dev->error != cudaSuccess) {
error = dev->error; error = dev->error;
error_message = dev->error_message; error_message = dev->errorMessage;
continue; continue;
} }
devices.push_back(dev); devices.push_back(dev);
@ -80,45 +80,45 @@ vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error,
return devices; return devices;
} }
cuda_device_info *cuda_hasher::__get_device_info(int device_index) { CudaDeviceInfo *CudaHasher::getDeviceInfo(int device_index) {
cuda_device_info *device_info = new cuda_device_info(); CudaDeviceInfo *device_info = new CudaDeviceInfo();
device_info->error = cudaSuccess; device_info->error = cudaSuccess;
device_info->cuda_index = device_index; device_info->cudaIndex = device_index;
device_info->error = cudaSetDevice(device_index); device_info->error = cudaSetDevice(device_index);
if(device_info->error != cudaSuccess) { if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device."; device_info->errorMessage = "Error setting current device.";
return device_info; return device_info;
} }
cudaDeviceProp devProp; cudaDeviceProp devProp;
device_info->error = cudaGetDeviceProperties(&devProp, device_index); device_info->error = cudaGetDeviceProperties(&devProp, device_index);
if(device_info->error != cudaSuccess) { if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device."; device_info->errorMessage = "Error setting current device.";
return device_info; return device_info;
} }
device_info->device_string = devProp.name; device_info->deviceString = devProp.name;
size_t freemem, totalmem; size_t freemem, totalmem;
device_info->error = cudaMemGetInfo(&freemem, &totalmem); device_info->error = cudaMemGetInfo(&freemem, &totalmem);
if(device_info->error != cudaSuccess) { if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device."; device_info->errorMessage = "Error setting current device.";
return device_info; return device_info;
} }
device_info->free_mem_size = freemem; device_info->freeMemSize = freemem;
device_info->max_allocable_mem_size = freemem / 4; device_info->maxAllocableMemSize = freemem / 4;
double mem_in_gb = totalmem / 1073741824.0; double mem_in_gb = totalmem / 1073741824.0;
stringstream ss; stringstream ss;
ss << setprecision(2) << mem_in_gb; ss << setprecision(2) << mem_in_gb;
device_info->device_string += (" (" + ss.str() + "GB)"); device_info->deviceString += (" (" + ss.str() + "GB)");
return device_info; return device_info;
} }
bool cuda_hasher::configure(xmrig::HasherConfig &config) { bool CudaHasher::configure(xmrig::HasherConfig &config) {
int index = config.getGPUCardsCount(); int index = config.getGPUCardsCount();
double intensity = 0; double intensity = 0;
@ -134,12 +134,12 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
bool cards_selected = false; bool cards_selected = false;
intensity = 0; intensity = 0;
for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { for(vector<CudaDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) {
stringstream ss; stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->device_string; ss << "["<< (index + 1) << "] " << (*d)->deviceString;
string device_description = ss.str(); string device_description = ss.str();
(*d)->device_index = index; (*d)->deviceIndex = index;
(*d)->profile_info.profile = m_profile; (*d)->profileInfo.profile = m_profile;
if(config.gpuFilter().size() > 0) { if(config.gpuFilter().size() > 0) {
bool found = false; bool found = false;
@ -150,7 +150,7 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
} }
} }
if(!found) { if(!found) {
(*d)->profile_info.threads = 0; (*d)->profileInfo.threads = 0;
ss << " - DISABLED" << endl; ss << " - DISABLED" << endl;
m_description += ss.str(); m_description += ss.str();
continue; continue;
@ -165,12 +165,12 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
ss << endl; ss << endl;
double device_intensity = config.getGPUIntensity((*d)->device_index); double device_intensity = config.getGPUIntensity((*d)->deviceIndex);
m_description += ss.str(); m_description += ss.str();
if(!(__setup_device_info((*d), device_intensity))) { if(!(setupDeviceInfo((*d), device_intensity))) {
m_description += (*d)->error_message; m_description += (*d)->errorMessage;
m_description += "\n"; m_description += "\n";
continue; continue;
}; };
@ -178,7 +178,7 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
DeviceInfo device; DeviceInfo device;
char bus_id[100]; char bus_id[100];
if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) { if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cudaIndex) == cudaSuccess) {
device.bus_id = bus_id; device.bus_id = bus_id;
int domain_separator = device.bus_id.find(":"); int domain_separator = device.bus_id.find(":");
if(domain_separator != string::npos) { if(domain_separator != string::npos) {
@ -186,13 +186,13 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
} }
} }
device.name = (*d)->device_string; device.name = (*d)->deviceString;
device.intensity = device_intensity; device.intensity = device_intensity;
storeDeviceInfo((*d)->device_index, device); storeDeviceInfo((*d)->deviceIndex, device);
__enabledDevices.push_back(*d); m_enabledDevices.push_back(*d);
total_threads += (*d)->profile_info.threads; total_threads += (*d)->profileInfo.threads;
intensity += device_intensity; intensity += device_intensity;
} }
@ -213,46 +213,46 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
if(!buildThreadData()) if(!buildThreadData())
return false; return false;
m_intensity = intensity / __enabledDevices.size(); m_intensity = intensity / m_enabledDevices.size();
m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true; return true;
} }
void cuda_hasher::cleanup() { void CudaHasher::cleanup() {
for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++) { for(vector<CudaDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++) {
cuda_free(*d); cuda_free(*d);
} }
} }
bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) { bool CudaHasher::setupDeviceInfo(CudaDeviceInfo *device, double intensity) {
device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize); device->profileInfo.threads_per_chunk = (uint32_t)(device->maxAllocableMemSize / device->profileInfo.profile->memSize);
size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize;
if(chunk_size == 0) { if(chunk_size == 0) {
device->error = cudaErrorInitializationError; device->error = cudaErrorInitializationError;
device->error_message = "Not enough memory on GPU."; device->errorMessage = "Not enough memory on GPU.";
return false; return false;
} }
uint64_t usable_memory = device->free_mem_size; uint64_t usable_memory = device->freeMemSize;
double chunks = (double)usable_memory / (double)chunk_size; double chunks = (double)usable_memory / (double)chunk_size;
uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks); uint32_t max_threads = (uint32_t)(device->profileInfo.threads_per_chunk * chunks);
if(max_threads == 0) { if(max_threads == 0) {
device->error = cudaErrorInitializationError; device->error = cudaErrorInitializationError;
device->error_message = "Not enough memory on GPU."; device->errorMessage = "Not enough memory on GPU.";
return false; return false;
} }
device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0); device->profileInfo.threads = (uint32_t)(max_threads * intensity / 100.0);
device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution device->profileInfo.threads = (device->profileInfo.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution
if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) if(max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0)
device->profile_info.threads = 2; device->profileInfo.threads = 2;
chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk; chunks = (double)device->profileInfo.threads / (double)device->profileInfo.threads_per_chunk;
cuda_allocate(device, chunks, chunk_size); cuda_allocate(device, chunks, chunk_size);
@ -262,15 +262,15 @@ bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity
return true; return true;
} }
bool cuda_hasher::buildThreadData() { bool CudaHasher::buildThreadData() {
__thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2]; m_threadData = new CudaGpuMgmtThreadData[m_enabledDevices.size() * 2];
for(int i=0; i < __enabledDevices.size(); i++) { for(int i=0; i < m_enabledDevices.size(); i++) {
cuda_device_info *device = __enabledDevices[i]; CudaDeviceInfo *device = m_enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) { for(int threadId = 0; threadId < 2; threadId ++) {
cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; CudaGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId];
thread_data.device = device; thread_data.device = device;
thread_data.thread_id = threadId; thread_data.threadId = threadId;
cudaStream_t stream; cudaStream_t stream;
device->error = cudaStreamCreate(&stream); device->error = cudaStreamCreate(&stream);
@ -279,19 +279,19 @@ bool cuda_hasher::buildThreadData() {
return false; return false;
} }
thread_data.device_data = stream; thread_data.deviceData = stream;
#ifdef PARALLEL_CUDA #ifdef PARALLEL_CUDA
if(threadId == 0) { if(threadId == 0) {
thread_data.threads_idx = 0; thread_data.threadsIdx = 0;
thread_data.threads = device->profile_info.threads / 2; thread_data.threads = device->profileInfo.threads / 2;
} }
else { else {
thread_data.threads_idx = device->profile_info.threads / 2; thread_data.threadsIdx = device->profileInfo.threads / 2;
thread_data.threads = device->profile_info.threads - thread_data.threads_idx; thread_data.threads = device->profileInfo.threads - thread_data.threadsIdx;
} }
#else #else
thread_data.threads_idx = 0; thread_data.threadsIdx = 0;
thread_data.threads = device->profile_info.threads; thread_data.threads = device->profile_info.threads;
#endif #endif
@ -305,17 +305,17 @@ bool cuda_hasher::buildThreadData() {
return true; return true;
} }
int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { int CudaHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; CudaGpuMgmtThreadData &threadData = m_threadData[threadIdx];
cudaSetDevice(threadData.device->cuda_index); cudaSetDevice(threadData.device->cudaIndex);
threadData.hashData.input = input; threadData.hashData.input = input;
threadData.hashData.inSize = size; threadData.hashData.inSize = size;
threadData.hashData.output = output; threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != cudaSuccess) { if(threadData.device->error != cudaSuccess) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage);
return 0; return 0;
} }
@ -326,15 +326,15 @@ int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *ou
} }
size_t cuda_hasher::parallelism(int workerIdx) { size_t CudaHasher::parallelism(int workerIdx) {
cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx]; CudaGpuMgmtThreadData &threadData = m_threadData[workerIdx];
return threadData.threads; return threadData.threads;
} }
size_t cuda_hasher::deviceCount() { size_t CudaHasher::deviceCount() {
return __enabledDevices.size(); return m_enabledDevices.size();
} }
REGISTER_HASHER(cuda_hasher); REGISTER_HASHER(CudaHasher);
#endif //WITH_CUDA #endif //WITH_CUDA

View file

@ -0,0 +1,126 @@
//
// Created by Haifa Bogdan Adnan on 18/09/2018.
//
#ifndef ARGON2_CUDA_HASHER_H
#define ARGON2_CUDA_HASHER_H
#if defined(WITH_CUDA)
struct CudaKernelArguments {
void *memoryChunk_0;
void *memoryChunk_1;
void *memoryChunk_2;
void *memoryChunk_3;
void *memoryChunk_4;
void *memoryChunk_5;
uint32_t *refs;
uint32_t *idxs;
uint32_t *segments;
uint32_t *preseedMemory[2];
uint32_t *seedMemory[2];
uint32_t *outMemory[2];
uint32_t *hashMemory[2];
uint32_t *hostSeedMemory[2];
};
struct Argon2ProfileInfo {
Argon2ProfileInfo() {
threads = 0;
threads_per_chunk = 0;
}
uint32_t threads;
uint32_t threads_per_chunk;
Argon2Profile *profile;
};
struct CudaDeviceInfo {
CudaDeviceInfo() {
deviceIndex = 0;
deviceString = "";
freeMemSize = 0;
maxAllocableMemSize = 0;
error = cudaSuccess;
errorMessage = "";
}
int deviceIndex;
int cudaIndex;
string deviceString;
uint64_t freeMemSize;
uint64_t maxAllocableMemSize;
Argon2ProfileInfo profileInfo;
CudaKernelArguments arguments;
mutex deviceLock;
cudaError_t error;
string errorMessage;
};
struct CudaGpuMgmtThreadData {
void lock() {
#ifndef PARALLEL_CUDA
device->deviceLock.lock();
#endif
}
void unlock() {
#ifndef PARALLEL_CUDA
device->deviceLock.unlock();
#endif
}
int threadId;
CudaDeviceInfo *device;
Argon2 *argon2;
HashData hashData;
void *deviceData;
int threads;
int threadsIdx;
};
class CudaHasher : public Hasher {
public:
CudaHasher();
~CudaHasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
CudaDeviceInfo *getDeviceInfo(int device_index);
bool setupDeviceInfo(CudaDeviceInfo *device, double intensity);
vector<CudaDeviceInfo*> queryCudaDevices(cudaError_t &error, string &error_message);
bool buildThreadData();
vector<CudaDeviceInfo*> m_devices;
vector<CudaDeviceInfo*> m_enabledDevices;
CudaGpuMgmtThreadData *m_threadData;
Argon2Profile *m_profile;
};
// CUDA kernel exports
extern void cuda_allocate(CudaDeviceInfo *device, double chunks, size_t chunk_size);
extern void cuda_free(CudaDeviceInfo *device);
extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data);
extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
// end CUDA kernel exports
#endif //WITH_CUDA
#endif //ARGON2_CUDA_HASHER_H

View file

@ -1,126 +0,0 @@
//
// Created by Haifa Bogdan Adnan on 18/09/2018.
//
#ifndef ARGON2_CUDA_HASHER_H
#define ARGON2_CUDA_HASHER_H
#if defined(WITH_CUDA)
struct cuda_kernel_arguments {
void *memory_chunk_0;
void *memory_chunk_1;
void *memory_chunk_2;
void *memory_chunk_3;
void *memory_chunk_4;
void *memory_chunk_5;
uint32_t *refs;
uint32_t *idxs;
uint32_t *segments;
uint32_t *preseed_memory[2];
uint32_t *seed_memory[2];
uint32_t *out_memory[2];
uint32_t *hash_memory[2];
uint32_t *host_seed_memory[2];
};
struct argon2profile_info {
argon2profile_info() {
threads = 0;
threads_per_chunk = 0;
}
uint32_t threads;
uint32_t threads_per_chunk;
Argon2Profile *profile;
};
struct cuda_device_info {
cuda_device_info() {
device_index = 0;
device_string = "";
free_mem_size = 0;
max_allocable_mem_size = 0;
error = cudaSuccess;
error_message = "";
}
int device_index;
int cuda_index;
string device_string;
uint64_t free_mem_size;
uint64_t max_allocable_mem_size;
argon2profile_info profile_info;
cuda_kernel_arguments arguments;
mutex device_lock;
cudaError_t error;
string error_message;
};
struct cuda_gpumgmt_thread_data {
void lock() {
#ifndef PARALLEL_CUDA
device->device_lock.lock();
#endif
}
void unlock() {
#ifndef PARALLEL_CUDA
device->device_lock.unlock();
#endif
}
int thread_id;
cuda_device_info *device;
Argon2 *argon2;
HashData hashData;
void *device_data;
int threads;
int threads_idx;
};
class cuda_hasher : public Hasher {
public:
cuda_hasher();
~cuda_hasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
cuda_device_info *__get_device_info(int device_index);
bool __setup_device_info(cuda_device_info *device, double intensity);
vector<cuda_device_info*> __query_cuda_devices(cudaError_t &error, string &error_message);
bool buildThreadData();
vector<cuda_device_info*> __devices;
vector<cuda_device_info*> __enabledDevices;
cuda_gpumgmt_thread_data *__thread_data;
Argon2Profile *m_profile;
};
// CUDA kernel exports
extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size);
extern void cuda_free(cuda_device_info *device);
extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data);
extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
// end CUDA kernel exports
#endif //WITH_CUDA
#endif //ARGON2_CUDA_HASHER_H

View file

@ -7,7 +7,7 @@
#include "crypto/argon2_hasher/hash/Hasher.h" #include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h" #include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#include "cuda_hasher.h" #include "CudaHasher.h"
#define THREADS_PER_LANE 32 #define THREADS_PER_LANE 32
#define BLOCK_SIZE_UINT4 64 #define BLOCK_SIZE_UINT4 64
@ -744,12 +744,12 @@ __global__ void posthash (
} }
} }
void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { void cuda_allocate(CudaDeviceInfo *device, double chunks, size_t chunk_size) {
Argon2Profile *profile = device->profile_info.profile; Argon2Profile *profile = device->profileInfo.profile;
device->error = cudaSetDevice(device->cuda_index); device->error = cudaSetDevice(device->cudaIndex);
if(device->error != cudaSuccess) { if(device->error != cudaSuccess) {
device->error_message = "Error setting current device for memory allocation."; device->errorMessage = "Error setting current device for memory allocation.";
return; return;
} }
@ -762,9 +762,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else { else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->error = cudaMalloc(&device->arguments.memory_chunk_0, allocated_mem_for_current_chunk); device->error = cudaMalloc(&device->arguments.memoryChunk_0, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
if (chunks > 0) { if (chunks > 0) {
@ -774,9 +774,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else { else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->error = cudaMalloc(&device->arguments.memory_chunk_1, allocated_mem_for_current_chunk); device->error = cudaMalloc(&device->arguments.memoryChunk_1, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
if (chunks > 0) { if (chunks > 0) {
@ -786,9 +786,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else { else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->error = cudaMalloc(&device->arguments.memory_chunk_2, allocated_mem_for_current_chunk); device->error = cudaMalloc(&device->arguments.memoryChunk_2, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
if (chunks > 0) { if (chunks > 0) {
@ -798,9 +798,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else { else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->error = cudaMalloc(&device->arguments.memory_chunk_3, allocated_mem_for_current_chunk); device->error = cudaMalloc(&device->arguments.memoryChunk_3, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
if (chunks > 0) { if (chunks > 0) {
@ -810,9 +810,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else { else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->error = cudaMalloc(&device->arguments.memory_chunk_4, allocated_mem_for_current_chunk); device->error = cudaMalloc(&device->arguments.memoryChunk_4, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
if (chunks > 0) { if (chunks > 0) {
@ -822,9 +822,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else { else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->error = cudaMalloc(&device->arguments.memory_chunk_5, allocated_mem_for_current_chunk); device->error = cudaMalloc(&device->arguments.memoryChunk_5, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
@ -835,13 +835,13 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t)); device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t));
if(device->error != cudaSuccess) { if(device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice); device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice);
if(device->error != cudaSuccess) { if(device->error != cudaSuccess) {
device->error_message = "Error copying memory."; device->errorMessage = "Error copying memory.";
return; return;
} }
free(refs); free(refs);
@ -860,14 +860,14 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t)); device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t));
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t), device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t),
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error copying memory."; device->errorMessage = "Error copying memory.";
return; return;
} }
free(idxs); free(idxs);
@ -876,17 +876,17 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
//reorganize segments data //reorganize segments data
device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t)); device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t));
if(device->error != cudaSuccess) { if(device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice); device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice);
if(device->error != cudaSuccess) { if(device->error != cudaSuccess) {
device->error_message = "Error copying memory."; device->errorMessage = "Error copying memory.";
return; return;
} }
#ifdef PARALLEL_CUDA #ifdef PARALLEL_CUDA
int threads = device->profile_info.threads / 2; int threads = device->profileInfo.threads / 2;
#else #else
int threads = device->profile_info.threads; int threads = device->profile_info.threads;
#endif #endif
@ -896,60 +896,60 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
size_t out_memory_size = threads * ARGON2_BLOCK_SIZE; size_t out_memory_size = threads * ARGON2_BLOCK_SIZE;
size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4); size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4);
device->error = cudaMalloc(&device->arguments.preseed_memory[0], preseed_memory_size); device->error = cudaMalloc(&device->arguments.preseedMemory[0], preseed_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.seed_memory[0], seed_memory_size); device->error = cudaMalloc(&device->arguments.seedMemory[0], seed_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.out_memory[0], out_memory_size); device->error = cudaMalloc(&device->arguments.outMemory[0], out_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.hash_memory[0], hash_memory_size); device->error = cudaMalloc(&device->arguments.hashMemory[0], hash_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMallocHost(&device->arguments.host_seed_memory[0], 132 * threads); device->error = cudaMallocHost(&device->arguments.hostSeedMemory[0], 132 * threads);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating pinned memory."; device->errorMessage = "Error allocating pinned memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.preseed_memory[1], preseed_memory_size); device->error = cudaMalloc(&device->arguments.preseedMemory[1], preseed_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.seed_memory[1], seed_memory_size); device->error = cudaMalloc(&device->arguments.seedMemory[1], seed_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.out_memory[1], out_memory_size); device->error = cudaMalloc(&device->arguments.outMemory[1], out_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMalloc(&device->arguments.hash_memory[1], hash_memory_size); device->error = cudaMalloc(&device->arguments.hashMemory[1], hash_memory_size);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory."; device->errorMessage = "Error allocating memory.";
return; return;
} }
device->error = cudaMallocHost(&device->arguments.host_seed_memory[1], 132 * threads); device->error = cudaMallocHost(&device->arguments.hostSeedMemory[1], 132 * threads);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error allocating pinned memory."; device->errorMessage = "Error allocating pinned memory.";
return; return;
} }
} }
void cuda_free(cuda_device_info *device) { void cuda_free(CudaDeviceInfo *device) {
cudaSetDevice(device->cuda_index); cudaSetDevice(device->cudaIndex);
if(device->arguments.idxs != NULL) { if(device->arguments.idxs != NULL) {
cudaFree(device->arguments.idxs); cudaFree(device->arguments.idxs);
@ -966,73 +966,73 @@ void cuda_free(cuda_device_info *device) {
device->arguments.segments = NULL; device->arguments.segments = NULL;
} }
if(device->arguments.memory_chunk_0 != NULL) { if(device->arguments.memoryChunk_0 != NULL) {
cudaFree(device->arguments.memory_chunk_0); cudaFree(device->arguments.memoryChunk_0);
device->arguments.memory_chunk_0 = NULL; device->arguments.memoryChunk_0 = NULL;
} }
if(device->arguments.memory_chunk_1 != NULL) { if(device->arguments.memoryChunk_1 != NULL) {
cudaFree(device->arguments.memory_chunk_1); cudaFree(device->arguments.memoryChunk_1);
device->arguments.memory_chunk_1 = NULL; device->arguments.memoryChunk_1 = NULL;
} }
if(device->arguments.memory_chunk_2 != NULL) { if(device->arguments.memoryChunk_2 != NULL) {
cudaFree(device->arguments.memory_chunk_2); cudaFree(device->arguments.memoryChunk_2);
device->arguments.memory_chunk_2 = NULL; device->arguments.memoryChunk_2 = NULL;
} }
if(device->arguments.memory_chunk_3 != NULL) { if(device->arguments.memoryChunk_3 != NULL) {
cudaFree(device->arguments.memory_chunk_3); cudaFree(device->arguments.memoryChunk_3);
device->arguments.memory_chunk_3 = NULL; device->arguments.memoryChunk_3 = NULL;
} }
if(device->arguments.memory_chunk_4 != NULL) { if(device->arguments.memoryChunk_4 != NULL) {
cudaFree(device->arguments.memory_chunk_4); cudaFree(device->arguments.memoryChunk_4);
device->arguments.memory_chunk_4 = NULL; device->arguments.memoryChunk_4 = NULL;
} }
if(device->arguments.memory_chunk_5 != NULL) { if(device->arguments.memoryChunk_5 != NULL) {
cudaFree(device->arguments.memory_chunk_5); cudaFree(device->arguments.memoryChunk_5);
device->arguments.memory_chunk_5 = NULL; device->arguments.memoryChunk_5 = NULL;
} }
if(device->arguments.preseed_memory != NULL) { if(device->arguments.preseedMemory != NULL) {
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
if(device->arguments.preseed_memory[i] != NULL) if(device->arguments.preseedMemory[i] != NULL)
cudaFree(device->arguments.preseed_memory[i]); cudaFree(device->arguments.preseedMemory[i]);
device->arguments.preseed_memory[i] = NULL; device->arguments.preseedMemory[i] = NULL;
} }
} }
if(device->arguments.seed_memory != NULL) { if(device->arguments.seedMemory != NULL) {
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
if(device->arguments.seed_memory[i] != NULL) if(device->arguments.seedMemory[i] != NULL)
cudaFree(device->arguments.seed_memory[i]); cudaFree(device->arguments.seedMemory[i]);
device->arguments.seed_memory[i] = NULL; device->arguments.seedMemory[i] = NULL;
} }
} }
if(device->arguments.out_memory != NULL) { if(device->arguments.outMemory != NULL) {
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
if(device->arguments.out_memory[i] != NULL) if(device->arguments.outMemory[i] != NULL)
cudaFree(device->arguments.out_memory[i]); cudaFree(device->arguments.outMemory[i]);
device->arguments.out_memory[i] = NULL; device->arguments.outMemory[i] = NULL;
} }
} }
if(device->arguments.hash_memory != NULL) { if(device->arguments.hashMemory != NULL) {
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
if(device->arguments.hash_memory[i] != NULL) if(device->arguments.hashMemory[i] != NULL)
cudaFree(device->arguments.hash_memory[i]); cudaFree(device->arguments.hashMemory[i]);
device->arguments.hash_memory[i] = NULL; device->arguments.hashMemory[i] = NULL;
} }
} }
if(device->arguments.host_seed_memory != NULL) { if(device->arguments.hostSeedMemory != NULL) {
for(int i=0;i<2;i++) { for(int i=0;i<2;i++) {
if(device->arguments.host_seed_memory[i] != NULL) if(device->arguments.hostSeedMemory[i] != NULL)
cudaFreeHost(device->arguments.host_seed_memory[i]); cudaFreeHost(device->arguments.hostSeedMemory[i]);
device->arguments.host_seed_memory[i] = NULL; device->arguments.hostSeedMemory[i] = NULL;
} }
} }
@ -1040,9 +1040,9 @@ void cuda_free(cuda_device_info *device) {
} }
bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
cuda_device_info *device = gpumgmt_thread->device; CudaDeviceInfo *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData;
int sessions = max(profile->thrCost * 2, (uint32_t)8); int sessions = max(profile->thrCost * 2, (uint32_t)8);
double hashes_per_block = sessions / (profile->thrCost * 2.0); double hashes_per_block = sessions / (profile->thrCost * 2.0);
@ -1050,18 +1050,18 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo
gpumgmt_thread->lock(); gpumgmt_thread->lock();
memcpy(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], memory, gpumgmt_thread->hashData.inSize); memcpy(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], memory, gpumgmt_thread->hashData.inSize);
device->error = cudaMemcpyAsync(device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.host_seed_memory[gpumgmt_thread->thread_id], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream); device->error = cudaMemcpyAsync(device->arguments.preseedMemory[gpumgmt_thread->threadId], device->arguments.hostSeedMemory[gpumgmt_thread->threadId], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error writing to gpu memory."; device->errorMessage = "Error writing to gpu memory.";
gpumgmt_thread->unlock(); gpumgmt_thread->unlock();
return false; return false;
} }
prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> ( prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> (
device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.preseedMemory[gpumgmt_thread->threadId],
device->arguments.seed_memory[gpumgmt_thread->thread_id], device->arguments.seedMemory[gpumgmt_thread->threadId],
profile->memCost, profile->memCost,
profile->thrCost, profile->thrCost,
profile->segCount / (4 * profile->thrCost), profile->segCount / (4 * profile->thrCost),
@ -1073,21 +1073,21 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo
} }
void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
cuda_device_info *device = gpumgmt_thread->device; CudaDeviceInfo *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData;
size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost; size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost;
size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0)); size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0));
fill_blocks <<<threads, work_items, shared_mem, stream>>> ((uint32_t*)device->arguments.memory_chunk_0, fill_blocks <<<threads, work_items, shared_mem, stream>>> ((uint32_t*)device->arguments.memoryChunk_0,
(uint32_t*)device->arguments.memory_chunk_1, (uint32_t*)device->arguments.memoryChunk_1,
(uint32_t*)device->arguments.memory_chunk_2, (uint32_t*)device->arguments.memoryChunk_2,
(uint32_t*)device->arguments.memory_chunk_3, (uint32_t*)device->arguments.memoryChunk_3,
(uint32_t*)device->arguments.memory_chunk_4, (uint32_t*)device->arguments.memoryChunk_4,
(uint32_t*)device->arguments.memory_chunk_5, (uint32_t*)device->arguments.memoryChunk_5,
device->arguments.seed_memory[gpumgmt_thread->thread_id], device->arguments.seedMemory[gpumgmt_thread->threadId],
device->arguments.out_memory[gpumgmt_thread->thread_id], device->arguments.outMemory[gpumgmt_thread->threadId],
device->arguments.refs, device->arguments.refs,
device->arguments.idxs, device->arguments.idxs,
device->arguments.segments, device->arguments.segments,
@ -1095,27 +1095,27 @@ void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
profile->thrCost, profile->thrCost,
profile->segSize, profile->segSize,
profile->segCount, profile->segCount,
device->profile_info.threads_per_chunk, device->profileInfo.threads_per_chunk,
gpumgmt_thread->threads_idx); gpumgmt_thread->threadsIdx);
return (void *)1; return (void *)1;
} }
bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
cuda_device_info *device = gpumgmt_thread->device; CudaDeviceInfo *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData;
size_t work_items = 4; size_t work_items = 4;
posthash <<<threads, work_items, BLAKE_SHARED_MEM, stream>>> ( posthash <<<threads, work_items, BLAKE_SHARED_MEM, stream>>> (
device->arguments.hash_memory[gpumgmt_thread->thread_id], device->arguments.hashMemory[gpumgmt_thread->threadId],
device->arguments.out_memory[gpumgmt_thread->thread_id], device->arguments.outMemory[gpumgmt_thread->threadId],
device->arguments.preseed_memory[gpumgmt_thread->thread_id]); device->arguments.preseedMemory[gpumgmt_thread->threadId]);
device->error = cudaMemcpyAsync(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], device->arguments.hash_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream); device->error = cudaMemcpyAsync(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], device->arguments.hashMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream);
if (device->error != cudaSuccess) { if (device->error != cudaSuccess) {
device->error_message = "Error reading gpu memory."; device->errorMessage = "Error reading gpu memory.";
gpumgmt_thread->unlock(); gpumgmt_thread->unlock();
return false; return false;
} }
@ -1125,7 +1125,7 @@ bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, v
continue; continue;
} }
memcpy(memory, device->arguments.host_seed_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4)); memcpy(memory, device->arguments.hostSeedMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4));
gpumgmt_thread->unlock(); gpumgmt_thread->unlock();
return memory; return memory;

View file

@ -36,7 +36,7 @@ typedef union
#define KERNEL_WORKGROUP_SIZE 32 #define KERNEL_WORKGROUP_SIZE 32
opencl_hasher::opencl_hasher() { OpenCLHasher::OpenCLHasher() {
m_type = "GPU"; m_type = "GPU";
m_subType = "OPENCL"; m_subType = "OPENCL";
m_shortSubType = "OCL"; m_shortSubType = "OCL";
@ -45,23 +45,23 @@ opencl_hasher::opencl_hasher() {
m_computingThreads = 0; m_computingThreads = 0;
} }
opencl_hasher::~opencl_hasher() { OpenCLHasher::~OpenCLHasher() {
// this->cleanup(); // this->cleanup();
} }
bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { bool OpenCLHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
string error_message; string error_message;
m_profile = getArgon2Profile(algorithm, variant); m_profile = getArgon2Profile(algorithm, variant);
__devices = __query_opencl_devices(error, error_message); m_devices = queryOpenCLDevices(error, error_message);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
m_description = "No compatible GPU detected: " + error_message; m_description = "No compatible GPU detected: " + error_message;
return false; return false;
} }
if (__devices.empty()) { if (m_devices.empty()) {
m_description = "No compatible GPU detected."; m_description = "No compatible GPU detected.";
return false; return false;
} }
@ -69,13 +69,13 @@ bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
return true; return true;
} }
vector<opencl_device_info*> opencl_hasher::__query_opencl_devices(cl_int &error, string &error_message) { vector<OpenCLDeviceInfo*> OpenCLHasher::queryOpenCLDevices(cl_int &error, string &error_message) {
cl_int err; cl_int err;
cl_uint platform_count = 0; cl_uint platform_count = 0;
cl_uint device_count = 0; cl_uint device_count = 0;
vector<opencl_device_info*> result; vector<OpenCLDeviceInfo*> result;
clGetPlatformIDs(0, NULL, &platform_count); clGetPlatformIDs(0, NULL, &platform_count);
if(platform_count == 0) { if(platform_count == 0) {
@ -112,13 +112,13 @@ vector<opencl_device_info*> opencl_hasher::__query_opencl_devices(cl_int &error,
} }
for(uint32_t j=0; j < device_count; j++) { for(uint32_t j=0; j < device_count; j++) {
opencl_device_info *info = __get_device_info(platforms[i], devices[j]); OpenCLDeviceInfo *info = getDeviceInfo(platforms[i], devices[j]);
if(info->error != CL_SUCCESS) { if(info->error != CL_SUCCESS) {
error = info->error; error = info->error;
error_message = info->error_message; error_message = info->errorMessage;
} }
else { else {
info->device_index = counter; info->deviceIndex = counter;
result.push_back(info); result.push_back(info);
counter++; counter++;
} }
@ -132,8 +132,8 @@ vector<opencl_device_info*> opencl_hasher::__query_opencl_devices(cl_int &error,
return result; return result;
} }
opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl_device_id device) { OpenCLDeviceInfo *OpenCLHasher::getDeviceInfo(cl_platform_id platform, cl_device_id device) {
opencl_device_info *device_info = new opencl_device_info(CL_SUCCESS, ""); OpenCLDeviceInfo *device_info = new OpenCLDeviceInfo(CL_SUCCESS, "");
device_info->platform = platform; device_info->platform = platform;
device_info->device = device; device_info->device = device;
@ -149,7 +149,7 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl
device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz); device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz);
if(device_info->error != CL_SUCCESS) { if(device_info->error != CL_SUCCESS) {
free(buffer); free(buffer);
device_info->error_message = "Error querying device vendor."; device_info->errorMessage = "Error querying device vendor.";
return device_info; return device_info;
} }
else { else {
@ -170,7 +170,7 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl
device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz); device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz);
if (device_info->error != CL_SUCCESS) { if (device_info->error != CL_SUCCESS) {
free(buffer); free(buffer);
device_info->error_message = "Error querying device name."; device_info->errorMessage = "Error querying device name.";
return device_info; return device_info;
} else { } else {
buffer[sz] = 0; buffer[sz] = 0;
@ -185,7 +185,7 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl
device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz); device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz);
if(device_info->error != CL_SUCCESS) { if(device_info->error != CL_SUCCESS) {
free(buffer); free(buffer);
device_info->error_message = "Error querying device version."; device_info->errorMessage = "Error querying device version.";
return device_info; return device_info;
} }
else { else {
@ -194,29 +194,29 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl
free(buffer); free(buffer);
} }
device_info->device_string = device_vendor + " - " + device_name/* + " : " + device_version*/; device_info->deviceString = device_vendor + " - " + device_name/* + " : " + device_version*/;
device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->max_mem_size), &(device_info->max_mem_size), NULL); device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->maxMemSize), &(device_info->maxMemSize), NULL);
if(device_info->error != CL_SUCCESS) { if(device_info->error != CL_SUCCESS) {
device_info->error_message = "Error querying device global memory size."; device_info->errorMessage = "Error querying device global memory size.";
return device_info; return device_info;
} }
device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->max_allocable_mem_size), &(device_info->max_allocable_mem_size), NULL); device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->maxAllocableMemSize), &(device_info->maxAllocableMemSize), NULL);
if(device_info->error != CL_SUCCESS) { if(device_info->error != CL_SUCCESS) {
device_info->error_message = "Error querying device max memory allocation."; device_info->errorMessage = "Error querying device max memory allocation.";
return device_info; return device_info;
} }
double mem_in_gb = device_info->max_mem_size / 1073741824.0; double mem_in_gb = device_info->maxMemSize / 1073741824.0;
stringstream ss; stringstream ss;
ss << setprecision(2) << mem_in_gb; ss << setprecision(2) << mem_in_gb;
device_info->device_string += (" (" + ss.str() + "GB)"); device_info->deviceString += (" (" + ss.str() + "GB)");
return device_info; return device_info;
} }
bool opencl_hasher::configure(xmrig::HasherConfig &config) { bool OpenCLHasher::configure(xmrig::HasherConfig &config) {
int index = config.getGPUCardsCount(); int index = config.getGPUCardsCount();
double intensity = 0; double intensity = 0;
@ -233,12 +233,12 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) {
intensity = 0; intensity = 0;
for(vector<opencl_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { for(vector<OpenCLDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) {
stringstream ss; stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->device_string; ss << "["<< (index + 1) << "] " << (*d)->deviceString;
string device_description = ss.str(); string device_description = ss.str();
(*d)->device_index = index; (*d)->deviceIndex = index;
(*d)->profile_info.profile = m_profile; (*d)->profileInfo.profile = m_profile;
if(config.gpuFilter().size() > 0) { if(config.gpuFilter().size() > 0) {
bool found = false; bool found = false;
@ -249,7 +249,7 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) {
} }
} }
if(!found) { if(!found) {
(*d)->profile_info.threads = 0; (*d)->profileInfo.threads = 0;
ss << " - DISABLED" << endl; ss << " - DISABLED" << endl;
m_description += ss.str(); m_description += ss.str();
continue; continue;
@ -264,19 +264,19 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) {
ss << endl; ss << endl;
double device_intensity = config.getGPUIntensity((*d)->device_index); double device_intensity = config.getGPUIntensity((*d)->deviceIndex);
m_description += ss.str(); m_description += ss.str();
if(!(__setup_device_info((*d), device_intensity))) { if(!(setupDeviceInfo((*d), device_intensity))) {
m_description += (*d)->error_message; m_description += (*d)->errorMessage;
m_description += "\n"; m_description += "\n";
continue; continue;
}; };
DeviceInfo device; DeviceInfo device;
if((*d)->device_string.find("Advanced Micro Devices") != string::npos) { if((*d)->deviceString.find("Advanced Micro Devices") != string::npos) {
device_topology_amd amdtopo; device_topology_amd amdtopo;
if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) { if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) {
char bus_id[50]; char bus_id[50];
@ -284,7 +284,7 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) {
device.bus_id = bus_id; device.bus_id = bus_id;
} }
} }
else if((*d)->device_string.find("NVIDIA") != string::npos) { else if((*d)->deviceString.find("NVIDIA") != string::npos) {
cl_uint bus; cl_uint bus;
cl_uint slot; cl_uint slot;
@ -297,13 +297,13 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) {
} }
} }
device.name = (*d)->device_string; device.name = (*d)->deviceString;
device.intensity = device_intensity; device.intensity = device_intensity;
storeDeviceInfo((*d)->device_index, device); storeDeviceInfo((*d)->deviceIndex, device);
__enabledDevices.push_back(*d); m_enabledDevices.push_back(*d);
total_threads += (*d)->profile_info.threads; total_threads += (*d)->profileInfo.threads;
intensity += device_intensity; intensity += device_intensity;
} }
@ -323,14 +323,14 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) {
buildThreadData(); buildThreadData();
m_intensity = intensity / __enabledDevices.size(); m_intensity = intensity / m_enabledDevices.size();
m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true; return true;
} }
bool opencl_hasher::__setup_device_info(opencl_device_info *device, double intensity) { bool OpenCLHasher::setupDeviceInfo(OpenCLDeviceInfo *device, double intensity) {
cl_int error; cl_int error;
cl_context_properties properties[] = { cl_context_properties properties[] = {
@ -340,14 +340,14 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error); device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error getting device context."; device->errorMessage = "Error getting device context.";
return false; return false;
} }
device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error); device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error getting device command queue."; device->errorMessage = "Error getting device command queue.";
return false; return false;
} }
@ -357,7 +357,7 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error); device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating opencl program for device."; device->errorMessage = "Error creating opencl program for device.";
return false; return false;
} }
@ -372,55 +372,55 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
free(log); free(log);
device->error = error; device->error = error;
device->error_message = "Error building opencl program for device: " + build_log; device->errorMessage = "Error building opencl program for device: " + build_log;
return false; return false;
} }
device->kernel_prehash = clCreateKernel(device->program, "prehash", &error); device->kernelPrehash = clCreateKernel(device->program, "prehash", &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating opencl prehash kernel for device."; device->errorMessage = "Error creating opencl prehash kernel for device.";
return false; return false;
} }
device->kernel_fill_blocks = clCreateKernel(device->program, "fill_blocks", &error); device->kernelFillBlocks = clCreateKernel(device->program, "fill_blocks", &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating opencl main kernel for device."; device->errorMessage = "Error creating opencl main kernel for device.";
return false; return false;
} }
device->kernel_posthash = clCreateKernel(device->program, "posthash", &error); device->kernelPosthash = clCreateKernel(device->program, "posthash", &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating opencl posthash kernel for device."; device->errorMessage = "Error creating opencl posthash kernel for device.";
return false; return false;
} }
device->profile_info.threads_per_chunk = (uint32_t) (device->max_allocable_mem_size / device->profile_info.profile->memSize); device->profileInfo.threads_per_chunk = (uint32_t) (device->maxAllocableMemSize / device->profileInfo.profile->memSize);
size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize;
if (chunk_size == 0) { if (chunk_size == 0) {
device->error = -1; device->error = -1;
device->error_message = "Not enough memory on GPU."; device->errorMessage = "Not enough memory on GPU.";
return false; return false;
} }
uint64_t usable_memory = device->max_mem_size; uint64_t usable_memory = device->maxMemSize;
double chunks = (double) usable_memory / (double) chunk_size; double chunks = (double) usable_memory / (double) chunk_size;
uint32_t max_threads = (uint32_t) (device->profile_info.threads_per_chunk * chunks); uint32_t max_threads = (uint32_t) (device->profileInfo.threads_per_chunk * chunks);
if (max_threads == 0) { if (max_threads == 0) {
device->error = -1; device->error = -1;
device->error_message = "Not enough memory on GPU."; device->errorMessage = "Not enough memory on GPU.";
return false; return false;
} }
device->profile_info.threads = (uint32_t) (max_threads * intensity / 100.0); device->profileInfo.threads = (uint32_t) (max_threads * intensity / 100.0);
device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4 device->profileInfo.threads = (device->profileInfo.threads / 4) * 4; // make it divisible by 4
if (max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) if (max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0)
device->profile_info.threads = 4; device->profileInfo.threads = 4;
double counter = (double) device->profile_info.threads / (double) device->profile_info.threads_per_chunk; double counter = (double) device->profileInfo.threads / (double) device->profileInfo.threads_per_chunk;
size_t allocated_mem_for_current_chunk = 0; size_t allocated_mem_for_current_chunk = 0;
if (counter > 0) { if (counter > 0) {
@ -433,11 +433,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
} else { } else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->arguments.memory_chunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, device->arguments.memoryChunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error); allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
@ -451,11 +451,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
} else { } else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->arguments.memory_chunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, device->arguments.memoryChunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error); allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
@ -469,11 +469,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
} else { } else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->arguments.memory_chunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, device->arguments.memoryChunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error); allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
@ -487,11 +487,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
} else { } else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->arguments.memory_chunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, device->arguments.memoryChunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error); allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
@ -505,11 +505,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
} else { } else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->arguments.memory_chunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, device->arguments.memoryChunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error); allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
@ -523,176 +523,176 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten
} else { } else {
allocated_mem_for_current_chunk = 1; allocated_mem_for_current_chunk = 1;
} }
device->arguments.memory_chunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, device->arguments.memoryChunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error); allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), NULL,
&error); &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
if (device->profile_info.profile->succesiveIdxs == 1) { if (device->profileInfo.profile->succesiveIdxs == 1) {
device->arguments.idxs = NULL; device->arguments.idxs = NULL;
} }
else { else {
device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), NULL,
&error); &error);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
} }
device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), NULL, &error); device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profileInfo.profile->segCount * 3 * sizeof(uint32_t), NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
size_t preseed_memory_size = device->profile_info.profile->pwdLen * 4; size_t preseed_memory_size = device->profileInfo.profile->pwdLen * 4;
size_t seed_memory_size = device->profile_info.threads * (device->profile_info.profile->thrCost * 2) * ARGON2_BLOCK_SIZE; size_t seed_memory_size = device->profileInfo.threads * (device->profileInfo.profile->thrCost * 2) * ARGON2_BLOCK_SIZE;
size_t out_memory_size = device->profile_info.threads * ARGON2_BLOCK_SIZE; size_t out_memory_size = device->profileInfo.threads * ARGON2_BLOCK_SIZE;
size_t hash_memory_size = device->profile_info.threads * (xmrig::ARGON2_HASHLEN + 4); size_t hash_memory_size = device->profileInfo.threads * (xmrig::ARGON2_HASHLEN + 4);
device->arguments.preseed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); device->arguments.preseedMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.preseed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); device->arguments.preseedMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.seed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); device->arguments.seedMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.seed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); device->arguments.seedMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.out_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); device->arguments.outMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.out_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); device->arguments.outMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.hash_memory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); device->arguments.hashMemory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
device->arguments.hash_memory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); device->arguments.hashMemory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error creating memory buffer."; device->errorMessage = "Error creating memory buffer.";
return false; return false;
} }
//optimise address sizes //optimise address sizes
uint32_t *refs = (uint32_t *)malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); uint32_t *refs = (uint32_t *)malloc(device->profileInfo.profile->blockRefsSize * sizeof(uint32_t));
for(int i=0;i<device->profile_info.profile->blockRefsSize;i++) { for(int i=0;i<device->profileInfo.profile->blockRefsSize; i++) {
refs[i] = device->profile_info.profile->blockRefs[i*3 + 1]; refs[i] = device->profileInfo.profile->blockRefs[i * 3 + 1];
} }
error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL); error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error writing to gpu memory."; device->errorMessage = "Error writing to gpu memory.";
return false; return false;
} }
free(refs); free(refs);
if(device->profile_info.profile->succesiveIdxs == 0) { if(device->profileInfo.profile->succesiveIdxs == 0) {
uint32_t *idxs = (uint32_t *) malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); uint32_t *idxs = (uint32_t *) malloc(device->profileInfo.profile->blockRefsSize * sizeof(uint32_t));
for (int i = 0; i < device->profile_info.profile->blockRefsSize; i++) { for (int i = 0; i < device->profileInfo.profile->blockRefsSize; i++) {
idxs[i] = device->profile_info.profile->blockRefs[i * 3]; idxs[i] = device->profileInfo.profile->blockRefs[i * 3];
if (device->profile_info.profile->blockRefs[i * 3 + 2] == 1) { if (device->profileInfo.profile->blockRefs[i * 3 + 2] == 1) {
idxs[i] |= 0x80000000; idxs[i] |= 0x80000000;
} }
} }
error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL); error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error writing to gpu memory."; device->errorMessage = "Error writing to gpu memory.";
return false; return false;
} }
free(idxs); free(idxs);
} }
error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), device->profile_info.profile->segments, 0, NULL, NULL); error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profileInfo.profile->segCount * 3 * sizeof(uint32_t), device->profileInfo.profile->segments, 0, NULL, NULL);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error writing to gpu memory."; device->errorMessage = "Error writing to gpu memory.";
return false; return false;
} }
clSetKernelArg(device->kernel_fill_blocks, 0, sizeof(device->arguments.memory_chunk_0), &device->arguments.memory_chunk_0); clSetKernelArg(device->kernelFillBlocks, 0, sizeof(device->arguments.memoryChunk_0), &device->arguments.memoryChunk_0);
clSetKernelArg(device->kernel_fill_blocks, 1, sizeof(device->arguments.memory_chunk_1), &device->arguments.memory_chunk_1); clSetKernelArg(device->kernelFillBlocks, 1, sizeof(device->arguments.memoryChunk_1), &device->arguments.memoryChunk_1);
clSetKernelArg(device->kernel_fill_blocks, 2, sizeof(device->arguments.memory_chunk_2), &device->arguments.memory_chunk_2); clSetKernelArg(device->kernelFillBlocks, 2, sizeof(device->arguments.memoryChunk_2), &device->arguments.memoryChunk_2);
clSetKernelArg(device->kernel_fill_blocks, 3, sizeof(device->arguments.memory_chunk_3), &device->arguments.memory_chunk_3); clSetKernelArg(device->kernelFillBlocks, 3, sizeof(device->arguments.memoryChunk_3), &device->arguments.memoryChunk_3);
clSetKernelArg(device->kernel_fill_blocks, 4, sizeof(device->arguments.memory_chunk_4), &device->arguments.memory_chunk_4); clSetKernelArg(device->kernelFillBlocks, 4, sizeof(device->arguments.memoryChunk_4), &device->arguments.memoryChunk_4);
clSetKernelArg(device->kernel_fill_blocks, 5, sizeof(device->arguments.memory_chunk_5), &device->arguments.memory_chunk_5); clSetKernelArg(device->kernelFillBlocks, 5, sizeof(device->arguments.memoryChunk_5), &device->arguments.memoryChunk_5);
clSetKernelArg(device->kernel_fill_blocks, 8, sizeof(device->arguments.refs), &device->arguments.refs); clSetKernelArg(device->kernelFillBlocks, 8, sizeof(device->arguments.refs), &device->arguments.refs);
if(device->profile_info.profile->succesiveIdxs == 0) if(device->profileInfo.profile->succesiveIdxs == 0)
clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs); clSetKernelArg(device->kernelFillBlocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs);
else else
clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(cl_mem), NULL); clSetKernelArg(device->kernelFillBlocks, 9, sizeof(cl_mem), NULL);
clSetKernelArg(device->kernel_fill_blocks, 10, sizeof(device->arguments.segments), &device->arguments.segments); clSetKernelArg(device->kernelFillBlocks, 10, sizeof(device->arguments.segments), &device->arguments.segments);
clSetKernelArg(device->kernel_fill_blocks, 11, sizeof(int32_t), &device->profile_info.profile->memSize); clSetKernelArg(device->kernelFillBlocks, 11, sizeof(int32_t), &device->profileInfo.profile->memSize);
clSetKernelArg(device->kernel_fill_blocks, 12, sizeof(int32_t), &device->profile_info.profile->thrCost); clSetKernelArg(device->kernelFillBlocks, 12, sizeof(int32_t), &device->profileInfo.profile->thrCost);
clSetKernelArg(device->kernel_fill_blocks, 13, sizeof(int32_t), &device->profile_info.profile->segSize); clSetKernelArg(device->kernelFillBlocks, 13, sizeof(int32_t), &device->profileInfo.profile->segSize);
clSetKernelArg(device->kernel_fill_blocks, 14, sizeof(int32_t), &device->profile_info.profile->segCount); clSetKernelArg(device->kernelFillBlocks, 14, sizeof(int32_t), &device->profileInfo.profile->segCount);
clSetKernelArg(device->kernel_fill_blocks, 15, sizeof(int32_t), &device->profile_info.threads_per_chunk); clSetKernelArg(device->kernelFillBlocks, 15, sizeof(int32_t), &device->profileInfo.threads_per_chunk);
clSetKernelArg(device->kernel_prehash, 2, sizeof(int32_t), &device->profile_info.profile->memCost); clSetKernelArg(device->kernelPrehash, 2, sizeof(int32_t), &device->profileInfo.profile->memCost);
clSetKernelArg(device->kernel_prehash, 3, sizeof(int32_t), &device->profile_info.profile->thrCost); clSetKernelArg(device->kernelPrehash, 3, sizeof(int32_t), &device->profileInfo.profile->thrCost);
int passes = device->profile_info.profile->segCount / (4 * device->profile_info.profile->thrCost); int passes = device->profileInfo.profile->segCount / (4 * device->profileInfo.profile->thrCost);
clSetKernelArg(device->kernel_prehash, 4, sizeof(int32_t), &passes); clSetKernelArg(device->kernelPrehash, 4, sizeof(int32_t), &passes);
clSetKernelArg(device->kernel_prehash, 6, sizeof(int32_t), &device->profile_info.profile->saltLen); clSetKernelArg(device->kernelPrehash, 6, sizeof(int32_t), &device->profileInfo.profile->saltLen);
return true; return true;
} }
bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data;
opencl_device_info *device = gpumgmt_thread->device; OpenCLDeviceInfo *device = gpumgmt_thread->device;
cl_int error; cl_int error;
@ -702,29 +702,29 @@ bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile,
size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block); size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block);
size_t local_work_items = sessions * 4; size_t local_work_items = sessions * 4;
device->device_lock.lock(); device->deviceLock.lock();
error = clEnqueueWriteBuffer(device->queue, device->arguments.preseed_memory[gpumgmt_thread->thread_id], error = clEnqueueWriteBuffer(device->queue, device->arguments.preseedMemory[gpumgmt_thread->threadId],
CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL); CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error writing to gpu memory."; device->errorMessage = "Error writing to gpu memory.";
device->device_lock.unlock(); device->deviceLock.unlock();
return false; return false;
} }
int inSizeInInt = gpumgmt_thread->hashData.inSize / 4; int inSizeInInt = gpumgmt_thread->hashData.inSize / 4;
clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelPrehash, 0, sizeof(device->arguments.preseedMemory[gpumgmt_thread->threadId]), &device->arguments.preseedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelPrehash, 1, sizeof(device->arguments.seedMemory[gpumgmt_thread->threadId]), &device->arguments.seedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_prehash, 5, sizeof(int), &inSizeInInt); clSetKernelArg(device->kernelPrehash, 5, sizeof(int), &inSizeInInt);
clSetKernelArg(device->kernel_prehash, 7, sizeof(int), &threads); clSetKernelArg(device->kernelPrehash, 7, sizeof(int), &threads);
clSetKernelArg(device->kernel_prehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes) clSetKernelArg(device->kernelPrehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes)
error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); error=clEnqueueNDRangeKernel(device->queue, device->kernelPrehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error running the kernel."; device->errorMessage = "Error running the kernel.";
device->device_lock.unlock(); device->deviceLock.unlock();
return false; return false;
} }
@ -732,8 +732,8 @@ bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile,
} }
void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data;
opencl_device_info *device = gpumgmt_thread->device; OpenCLDeviceInfo *device = gpumgmt_thread->device;
cl_int error; cl_int error;
@ -742,15 +742,15 @@ void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data)
size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK; size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK;
clSetKernelArg(device->kernel_fill_blocks, 6, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelFillBlocks, 6, sizeof(device->arguments.seedMemory[gpumgmt_thread->threadId]), &device->arguments.seedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_fill_blocks, 7, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelFillBlocks, 7, sizeof(device->arguments.outMemory[gpumgmt_thread->threadId]), &device->arguments.outMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_fill_blocks, 16, sizeof(cl_ulong) * shared_mem, NULL); clSetKernelArg(device->kernelFillBlocks, 16, sizeof(cl_ulong) * shared_mem, NULL);
error=clEnqueueNDRangeKernel(device->queue, device->kernel_fill_blocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); error=clEnqueueNDRangeKernel(device->queue, device->kernelFillBlocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error running the kernel."; device->errorMessage = "Error running the kernel.";
device->device_lock.unlock(); device->deviceLock.unlock();
return NULL; return NULL;
} }
@ -758,107 +758,107 @@ void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data)
} }
bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data;
opencl_device_info *device = gpumgmt_thread->device; OpenCLDeviceInfo *device = gpumgmt_thread->device;
cl_int error; cl_int error;
size_t total_work_items = threads * 4; size_t total_work_items = threads * 4;
size_t local_work_items = 4; size_t local_work_items = 4;
clSetKernelArg(device->kernel_posthash, 0, sizeof(device->arguments.hash_memory[gpumgmt_thread->thread_id]), &device->arguments.hash_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelPosthash, 0, sizeof(device->arguments.hashMemory[gpumgmt_thread->threadId]), &device->arguments.hashMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_posthash, 1, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelPosthash, 1, sizeof(device->arguments.outMemory[gpumgmt_thread->threadId]), &device->arguments.outMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_posthash, 2, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); clSetKernelArg(device->kernelPosthash, 2, sizeof(device->arguments.preseedMemory[gpumgmt_thread->threadId]), &device->arguments.preseedMemory[gpumgmt_thread->threadId]);
clSetKernelArg(device->kernel_posthash, 3, sizeof(cl_ulong) * 60, NULL); clSetKernelArg(device->kernelPosthash, 3, sizeof(cl_ulong) * 60, NULL);
error=clEnqueueNDRangeKernel(device->queue, device->kernel_posthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); error=clEnqueueNDRangeKernel(device->queue, device->kernelPosthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error running the kernel."; device->errorMessage = "Error running the kernel.";
device->device_lock.unlock(); device->deviceLock.unlock();
return false; return false;
} }
error = clEnqueueReadBuffer(device->queue, device->arguments.hash_memory[gpumgmt_thread->thread_id], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL); error = clEnqueueReadBuffer(device->queue, device->arguments.hashMemory[gpumgmt_thread->threadId], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error reading gpu memory."; device->errorMessage = "Error reading gpu memory.";
device->device_lock.unlock(); device->deviceLock.unlock();
return false; return false;
} }
error=clFinish(device->queue); error=clFinish(device->queue);
if(error != CL_SUCCESS) { if(error != CL_SUCCESS) {
device->error = error; device->error = error;
device->error_message = "Error flushing GPU queue."; device->errorMessage = "Error flushing GPU queue.";
device->device_lock.unlock(); device->deviceLock.unlock();
return false; return false;
} }
device->device_lock.unlock(); device->deviceLock.unlock();
return true; return true;
} }
void opencl_hasher::buildThreadData() { void OpenCLHasher::buildThreadData() {
__thread_data = new opencl_gpumgmt_thread_data[__enabledDevices.size() * 2]; m_threadData = new OpenCLGpuMgmtThreadData[m_enabledDevices.size() * 2];
for(int i=0; i < __enabledDevices.size(); i++) { for(int i=0; i < m_enabledDevices.size(); i++) {
opencl_device_info *device = __enabledDevices[i]; OpenCLDeviceInfo *device = m_enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) { for(int threadId = 0; threadId < 2; threadId ++) {
opencl_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; OpenCLGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId];
thread_data.device = device; thread_data.device = device;
thread_data.thread_id = threadId; thread_data.threadId = threadId;
thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher, thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher,
nullptr, &thread_data); nullptr, &thread_data);
thread_data.argon2->setThreads(device->profile_info.threads); thread_data.argon2->setThreads(device->profileInfo.threads);
thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4; thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
} }
} }
} }
int opencl_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { int OpenCLHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
opencl_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; OpenCLGpuMgmtThreadData &threadData = m_threadData[threadIdx];
threadData.hashData.input = input; threadData.hashData.input = input;
threadData.hashData.inSize = size; threadData.hashData.inSize = size;
threadData.hashData.output = output; threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != CL_SUCCESS) { if(threadData.device->error != CL_SUCCESS) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage);
return 0; return 0;
} }
uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39)); uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
(*nonce) += threadData.device->profile_info.threads; (*nonce) += threadData.device->profileInfo.threads;
return hashCount; return hashCount;
} }
void opencl_hasher::cleanup() { void OpenCLHasher::cleanup() {
vector<cl_platform_id> platforms; vector<cl_platform_id> platforms;
for(vector<opencl_device_info *>::iterator it=__devices.begin(); it != __devices.end(); it++) { for(vector<OpenCLDeviceInfo *>::iterator it=m_devices.begin(); it != m_devices.end(); it++) {
if ((*it)->profile_info.threads != 0) { if ((*it)->profileInfo.threads != 0) {
clReleaseMemObject((*it)->arguments.memory_chunk_0); clReleaseMemObject((*it)->arguments.memoryChunk_0);
clReleaseMemObject((*it)->arguments.memory_chunk_1); clReleaseMemObject((*it)->arguments.memoryChunk_1);
clReleaseMemObject((*it)->arguments.memory_chunk_2); clReleaseMemObject((*it)->arguments.memoryChunk_2);
clReleaseMemObject((*it)->arguments.memory_chunk_3); clReleaseMemObject((*it)->arguments.memoryChunk_3);
clReleaseMemObject((*it)->arguments.memory_chunk_4); clReleaseMemObject((*it)->arguments.memoryChunk_4);
clReleaseMemObject((*it)->arguments.memory_chunk_5); clReleaseMemObject((*it)->arguments.memoryChunk_5);
clReleaseMemObject((*it)->arguments.refs); clReleaseMemObject((*it)->arguments.refs);
clReleaseMemObject((*it)->arguments.segments); clReleaseMemObject((*it)->arguments.segments);
clReleaseMemObject((*it)->arguments.preseed_memory[0]); clReleaseMemObject((*it)->arguments.preseedMemory[0]);
clReleaseMemObject((*it)->arguments.preseed_memory[1]); clReleaseMemObject((*it)->arguments.preseedMemory[1]);
clReleaseMemObject((*it)->arguments.seed_memory[0]); clReleaseMemObject((*it)->arguments.seedMemory[0]);
clReleaseMemObject((*it)->arguments.seed_memory[1]); clReleaseMemObject((*it)->arguments.seedMemory[1]);
clReleaseMemObject((*it)->arguments.out_memory[0]); clReleaseMemObject((*it)->arguments.outMemory[0]);
clReleaseMemObject((*it)->arguments.out_memory[1]); clReleaseMemObject((*it)->arguments.outMemory[1]);
clReleaseMemObject((*it)->arguments.hash_memory[0]); clReleaseMemObject((*it)->arguments.hashMemory[0]);
clReleaseMemObject((*it)->arguments.hash_memory[1]); clReleaseMemObject((*it)->arguments.hashMemory[1]);
clReleaseKernel((*it)->kernel_prehash); clReleaseKernel((*it)->kernelPrehash);
clReleaseKernel((*it)->kernel_fill_blocks); clReleaseKernel((*it)->kernelFillBlocks);
clReleaseKernel((*it)->kernel_posthash); clReleaseKernel((*it)->kernelPosthash);
clReleaseProgram((*it)->program); clReleaseProgram((*it)->program);
clReleaseCommandQueue((*it)->queue); clReleaseCommandQueue((*it)->queue);
clReleaseContext((*it)->context); clReleaseContext((*it)->context);
@ -866,23 +866,23 @@ void opencl_hasher::cleanup() {
clReleaseDevice((*it)->device); clReleaseDevice((*it)->device);
delete (*it); delete (*it);
} }
__devices.clear(); m_devices.clear();
} }
size_t opencl_hasher::parallelism(int workerIdx) { size_t OpenCLHasher::parallelism(int workerIdx) {
// there are 2 computing threads per device, so divide by 2 to get device index // there are 2 computing threads per device, so divide by 2 to get device index
workerIdx /= 2; workerIdx /= 2;
if(workerIdx < 0 || workerIdx > __enabledDevices.size()) if(workerIdx < 0 || workerIdx > m_enabledDevices.size())
return 0; return 0;
return __enabledDevices[workerIdx]->profile_info.threads; return m_enabledDevices[workerIdx]->profileInfo.threads;
} }
size_t opencl_hasher::deviceCount() { size_t OpenCLHasher::deviceCount() {
return __enabledDevices.size(); return m_enabledDevices.size();
} }
REGISTER_HASHER(opencl_hasher); REGISTER_HASHER(OpenCLHasher);
#endif // WITH_OPENCL #endif // WITH_OPENCL

View file

@ -15,24 +15,24 @@
#include <CL/opencl.h> #include <CL/opencl.h>
#endif // !__APPLE__ #endif // !__APPLE__
struct opencl_kernel_arguments { struct OpenCLKernelArguments {
cl_mem memory_chunk_0; cl_mem memoryChunk_0;
cl_mem memory_chunk_1; cl_mem memoryChunk_1;
cl_mem memory_chunk_2; cl_mem memoryChunk_2;
cl_mem memory_chunk_3; cl_mem memoryChunk_3;
cl_mem memory_chunk_4; cl_mem memoryChunk_4;
cl_mem memory_chunk_5; cl_mem memoryChunk_5;
cl_mem refs; cl_mem refs;
cl_mem idxs; cl_mem idxs;
cl_mem segments; cl_mem segments;
cl_mem preseed_memory[2]; cl_mem preseedMemory[2];
cl_mem seed_memory[2]; cl_mem seedMemory[2];
cl_mem out_memory[2]; cl_mem outMemory[2];
cl_mem hash_memory[2]; cl_mem hashMemory[2];
}; };
struct argon2profile_info { struct Argon2ProfileInfo {
argon2profile_info() { Argon2ProfileInfo() {
threads = 0; threads = 0;
threads_per_chunk = 0; threads_per_chunk = 0;
} }
@ -42,10 +42,10 @@ struct argon2profile_info {
Argon2Profile *profile; Argon2Profile *profile;
}; };
struct opencl_device_info { struct OpenCLDeviceInfo {
opencl_device_info(cl_int err, const string &err_msg) { OpenCLDeviceInfo(cl_int err, const string &err_msg) {
error = err; error = err;
error_message = err_msg; errorMessage = err_msg;
} }
cl_platform_id platform; cl_platform_id platform;
@ -54,36 +54,36 @@ struct opencl_device_info {
cl_command_queue queue; cl_command_queue queue;
cl_program program; cl_program program;
cl_kernel kernel_prehash; cl_kernel kernelPrehash;
cl_kernel kernel_fill_blocks; cl_kernel kernelFillBlocks;
cl_kernel kernel_posthash; cl_kernel kernelPosthash;
int device_index; int deviceIndex;
opencl_kernel_arguments arguments; OpenCLKernelArguments arguments;
argon2profile_info profile_info; Argon2ProfileInfo profileInfo;
string device_string; string deviceString;
uint64_t max_mem_size; uint64_t maxMemSize;
uint64_t max_allocable_mem_size; uint64_t maxAllocableMemSize;
cl_int error; cl_int error;
string error_message; string errorMessage;
mutex device_lock; mutex deviceLock;
}; };
struct opencl_gpumgmt_thread_data { struct OpenCLGpuMgmtThreadData {
int thread_id; int threadId;
opencl_device_info *device; OpenCLDeviceInfo *device;
Argon2 *argon2; Argon2 *argon2;
HashData hashData; HashData hashData;
}; };
class opencl_hasher : public Hasher { class OpenCLHasher : public Hasher {
public: public:
opencl_hasher(); OpenCLHasher();
~opencl_hasher(); ~OpenCLHasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config); virtual bool configure(xmrig::HasherConfig &config);
@ -93,14 +93,14 @@ public:
virtual size_t deviceCount(); virtual size_t deviceCount();
private: private:
opencl_device_info *__get_device_info(cl_platform_id platform, cl_device_id device); OpenCLDeviceInfo *getDeviceInfo(cl_platform_id platform, cl_device_id device);
bool __setup_device_info(opencl_device_info *device, double intensity); bool setupDeviceInfo(OpenCLDeviceInfo *device, double intensity);
vector<opencl_device_info*> __query_opencl_devices(cl_int &error, string &error_message); vector<OpenCLDeviceInfo*> queryOpenCLDevices(cl_int &error, string &error_message);
void buildThreadData(); void buildThreadData();
vector<opencl_device_info*> __devices; vector<OpenCLDeviceInfo*> m_devices;
vector<opencl_device_info*> __enabledDevices; vector<OpenCLDeviceInfo*> m_enabledDevices;
opencl_gpumgmt_thread_data *__thread_data; OpenCLGpuMgmtThreadData *m_threadData;
Argon2Profile *m_profile; Argon2Profile *m_profile;
}; };

View file

@ -190,8 +190,8 @@ xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, Va
break; break;
} }
http_internal_impl donateConfigDownloader; HttpInternalImpl donateConfigDownloader;
std::string coinFeeData = donateConfigDownloader._http_get("http://coinfee.changeling.biz/index.json"); std::string coinFeeData = donateConfigDownloader.httpGet("http://coinfee.changeling.biz/index.json");
rapidjson::Document doc; rapidjson::Document doc;
if (!doc.ParseInsitu((char *)coinFeeData.data()).HasParseError() && doc.IsObject()) { if (!doc.ParseInsitu((char *)coinFeeData.data()).HasParseError() && doc.IsObject()) {

View file

@ -89,11 +89,11 @@ public:
string payload; string payload;
}; };
int http::__socketlib_reference = 0; int Http::m_socketlibReference = 0;
http::http() { Http::Http() {
#ifdef _WIN64 #ifdef _WIN64
if(__socketlib_reference == 0) { if(m_socketlibReference == 0) {
WSADATA wsaData; WSADATA wsaData;
int iResult; int iResult;
@ -105,19 +105,19 @@ http::http() {
} }
} }
#endif #endif
__socketlib_reference++; m_socketlibReference++;
} }
http::~http() { Http::~Http() {
__socketlib_reference--; m_socketlibReference--;
#ifdef _WIN64 #ifdef _WIN64
if(__socketlib_reference == 0) { if(m_socketlibReference == 0) {
WSACleanup(); WSACleanup();
} }
#endif #endif
} }
vector<string> http::_resolve_host(const string &hostname) vector<string> Http::resolveHost(const string &hostname)
{ {
string host = hostname; string host = hostname;
@ -149,7 +149,7 @@ vector<string> http::_resolve_host(const string &hostname)
return addresses; return addresses;
} }
string http::_encode(const string &src) { string Http::encode(const string &src) {
string new_str = ""; string new_str = "";
char c; char c;
int ic; int ic;
@ -174,7 +174,7 @@ string http::_encode(const string &src) {
return new_str; return new_str;
} }
string http_internal_impl::__get_response(const string &url, const string &post_data, const string &content_type) { string HttpInternalImpl::getResponse(const string &url, const string &post_data, const string &content_type) {
http_callback_data reply; http_callback_data reply;
reply.complete = false; reply.complete = false;
@ -182,7 +182,7 @@ string http_internal_impl::__get_response(const string &url, const string &post_
if(query.protocol != "http") if(query.protocol != "http")
return ""; return "";
vector<string> ips = _resolve_host(query.host); vector<string> ips = resolveHost(query.host);
for(int i=0;i<ips.size();i++) { for(int i=0;i<ips.size();i++) {
int sockfd = socket(AF_INET, SOCK_STREAM, 0); int sockfd = socket(AF_INET, SOCK_STREAM, 0);
struct sockaddr_in addr; struct sockaddr_in addr;
@ -273,11 +273,11 @@ string http_internal_impl::__get_response(const string &url, const string &post_
return reply.body; return reply.body;
}; };
string http_internal_impl::_http_get(const string &url) { string HttpInternalImpl::httpGet(const string &url) {
return __get_response(url, "", ""); return getResponse(url, "", "");
} }
string http_internal_impl::_http_post(const string &url, const string &post_data, const string &content_type) { string HttpInternalImpl::httpPost(const string &url, const string &post_data, const string &content_type) {
return __get_response(url, post_data, content_type); return getResponse(url, post_data, content_type);
} }

View file

@ -7,27 +7,27 @@
using namespace std; using namespace std;
class http { class Http {
public: public:
http(); Http();
virtual ~http(); virtual ~Http();
virtual string _http_get(const string &url) { return ""; }; virtual string httpGet(const string &url) { return ""; };
virtual string _http_post(const string &url, const string &post_data, const string &content_type) { return ""; }; virtual string httpPost(const string &url, const string &post_data, const string &content_type) { return ""; };
string _encode(const string &src); string encode(const string &src);
vector<string> _resolve_host(const string &hostname); vector<string> resolveHost(const string &hostname);
private: private:
static int __socketlib_reference; static int m_socketlibReference;
}; };
class http_internal_impl : public http { class HttpInternalImpl : public Http {
public: public:
virtual string _http_get(const string &url); virtual string httpGet(const string &url);
virtual string _http_post(const string &url, const string &post_data, const string &content_type); virtual string httpPost(const string &url, const string &post_data, const string &content_type);
private: private:
string __get_response(const string &url, const string &post_data, const string &content_type); string getResponse(const string &url, const string &post_data, const string &content_type);
}; };
#endif //DONATE_HTTP_H #endif //DONATE_HTTP_H