Code fixes for naming convention consistency.

This commit is contained in:
Haifa Bogdan Adnan 2019-08-26 13:33:04 +03:00
parent 3b70cdd8e7
commit eb6959f312
10 changed files with 596 additions and 596 deletions

View file

@ -14,10 +14,10 @@
#include <cuda_runtime.h>
#include <driver_types.h>
#include "cuda_hasher.h"
#include "CudaHasher.h"
#include "../../../common/DLLExport.h"
cuda_hasher::cuda_hasher() {
CudaHasher::CudaHasher() {
m_type = "GPU";
m_subType = "CUDA";
m_shortSubType = "NVD";
@ -27,24 +27,24 @@ cuda_hasher::cuda_hasher() {
}
cuda_hasher::~cuda_hasher() {
CudaHasher::~CudaHasher() {
this->cleanup();
}
bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
bool CudaHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cudaError_t error = cudaSuccess;
string error_message;
m_profile = getArgon2Profile(algorithm, variant);
__devices = __query_cuda_devices(error, error_message);
m_devices = queryCudaDevices(error, error_message);
if(error != cudaSuccess) {
m_description = "No compatible GPU detected: " + error_message;
return false;
}
if (__devices.empty()) {
if (m_devices.empty()) {
m_description = "No compatible GPU detected.";
return false;
}
@ -52,8 +52,8 @@ bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
return true;
}
vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) {
vector<cuda_device_info *> devices;
vector<CudaDeviceInfo *> CudaHasher::queryCudaDevices(cudaError_t &error, string &error_message) {
vector<CudaDeviceInfo *> devices;
int devCount = 0;
error = cudaGetDeviceCount(&devCount);
@ -67,12 +67,12 @@ vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error,
for (int i = 0; i < devCount; ++i)
{
cuda_device_info *dev = __get_device_info(i);
CudaDeviceInfo *dev = getDeviceInfo(i);
if(dev == NULL)
continue;
if(dev->error != cudaSuccess) {
error = dev->error;
error_message = dev->error_message;
error_message = dev->errorMessage;
continue;
}
devices.push_back(dev);
@ -80,45 +80,45 @@ vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error,
return devices;
}
cuda_device_info *cuda_hasher::__get_device_info(int device_index) {
cuda_device_info *device_info = new cuda_device_info();
CudaDeviceInfo *CudaHasher::getDeviceInfo(int device_index) {
CudaDeviceInfo *device_info = new CudaDeviceInfo();
device_info->error = cudaSuccess;
device_info->cuda_index = device_index;
device_info->cudaIndex = device_index;
device_info->error = cudaSetDevice(device_index);
if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device.";
device_info->errorMessage = "Error setting current device.";
return device_info;
}
cudaDeviceProp devProp;
device_info->error = cudaGetDeviceProperties(&devProp, device_index);
if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device.";
device_info->errorMessage = "Error setting current device.";
return device_info;
}
device_info->device_string = devProp.name;
device_info->deviceString = devProp.name;
size_t freemem, totalmem;
device_info->error = cudaMemGetInfo(&freemem, &totalmem);
if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device.";
device_info->errorMessage = "Error setting current device.";
return device_info;
}
device_info->free_mem_size = freemem;
device_info->max_allocable_mem_size = freemem / 4;
device_info->freeMemSize = freemem;
device_info->maxAllocableMemSize = freemem / 4;
double mem_in_gb = totalmem / 1073741824.0;
stringstream ss;
ss << setprecision(2) << mem_in_gb;
device_info->device_string += (" (" + ss.str() + "GB)");
device_info->deviceString += (" (" + ss.str() + "GB)");
return device_info;
}
bool cuda_hasher::configure(xmrig::HasherConfig &config) {
bool CudaHasher::configure(xmrig::HasherConfig &config) {
int index = config.getGPUCardsCount();
double intensity = 0;
@ -134,12 +134,12 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
bool cards_selected = false;
intensity = 0;
for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) {
for(vector<CudaDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) {
stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->device_string;
ss << "["<< (index + 1) << "] " << (*d)->deviceString;
string device_description = ss.str();
(*d)->device_index = index;
(*d)->profile_info.profile = m_profile;
(*d)->deviceIndex = index;
(*d)->profileInfo.profile = m_profile;
if(config.gpuFilter().size() > 0) {
bool found = false;
@ -150,7 +150,7 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
}
}
if(!found) {
(*d)->profile_info.threads = 0;
(*d)->profileInfo.threads = 0;
ss << " - DISABLED" << endl;
m_description += ss.str();
continue;
@ -165,12 +165,12 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
ss << endl;
double device_intensity = config.getGPUIntensity((*d)->device_index);
double device_intensity = config.getGPUIntensity((*d)->deviceIndex);
m_description += ss.str();
if(!(__setup_device_info((*d), device_intensity))) {
m_description += (*d)->error_message;
if(!(setupDeviceInfo((*d), device_intensity))) {
m_description += (*d)->errorMessage;
m_description += "\n";
continue;
};
@ -178,7 +178,7 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
DeviceInfo device;
char bus_id[100];
if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) {
if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cudaIndex) == cudaSuccess) {
device.bus_id = bus_id;
int domain_separator = device.bus_id.find(":");
if(domain_separator != string::npos) {
@ -186,13 +186,13 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
}
}
device.name = (*d)->device_string;
device.name = (*d)->deviceString;
device.intensity = device_intensity;
storeDeviceInfo((*d)->device_index, device);
storeDeviceInfo((*d)->deviceIndex, device);
__enabledDevices.push_back(*d);
m_enabledDevices.push_back(*d);
total_threads += (*d)->profile_info.threads;
total_threads += (*d)->profileInfo.threads;
intensity += device_intensity;
}
@ -213,46 +213,46 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) {
if(!buildThreadData())
return false;
m_intensity = intensity / __enabledDevices.size();
m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device
m_intensity = intensity / m_enabledDevices.size();
m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true;
}
void cuda_hasher::cleanup() {
for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++) {
void CudaHasher::cleanup() {
for(vector<CudaDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++) {
cuda_free(*d);
}
}
bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) {
device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize);
size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize;
bool CudaHasher::setupDeviceInfo(CudaDeviceInfo *device, double intensity) {
device->profileInfo.threads_per_chunk = (uint32_t)(device->maxAllocableMemSize / device->profileInfo.profile->memSize);
size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize;
if(chunk_size == 0) {
device->error = cudaErrorInitializationError;
device->error_message = "Not enough memory on GPU.";
device->errorMessage = "Not enough memory on GPU.";
return false;
}
uint64_t usable_memory = device->free_mem_size;
uint64_t usable_memory = device->freeMemSize;
double chunks = (double)usable_memory / (double)chunk_size;
uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks);
uint32_t max_threads = (uint32_t)(device->profileInfo.threads_per_chunk * chunks);
if(max_threads == 0) {
device->error = cudaErrorInitializationError;
device->error_message = "Not enough memory on GPU.";
device->errorMessage = "Not enough memory on GPU.";
return false;
}
device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0);
device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution
if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0)
device->profile_info.threads = 2;
device->profileInfo.threads = (uint32_t)(max_threads * intensity / 100.0);
device->profileInfo.threads = (device->profileInfo.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution
if(max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0)
device->profileInfo.threads = 2;
chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk;
chunks = (double)device->profileInfo.threads / (double)device->profileInfo.threads_per_chunk;
cuda_allocate(device, chunks, chunk_size);
@ -262,15 +262,15 @@ bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity
return true;
}
bool cuda_hasher::buildThreadData() {
__thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2];
bool CudaHasher::buildThreadData() {
m_threadData = new CudaGpuMgmtThreadData[m_enabledDevices.size() * 2];
for(int i=0; i < __enabledDevices.size(); i++) {
cuda_device_info *device = __enabledDevices[i];
for(int i=0; i < m_enabledDevices.size(); i++) {
CudaDeviceInfo *device = m_enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) {
cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId];
CudaGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId];
thread_data.device = device;
thread_data.thread_id = threadId;
thread_data.threadId = threadId;
cudaStream_t stream;
device->error = cudaStreamCreate(&stream);
@ -279,19 +279,19 @@ bool cuda_hasher::buildThreadData() {
return false;
}
thread_data.device_data = stream;
thread_data.deviceData = stream;
#ifdef PARALLEL_CUDA
if(threadId == 0) {
thread_data.threads_idx = 0;
thread_data.threads = device->profile_info.threads / 2;
thread_data.threadsIdx = 0;
thread_data.threads = device->profileInfo.threads / 2;
}
else {
thread_data.threads_idx = device->profile_info.threads / 2;
thread_data.threads = device->profile_info.threads - thread_data.threads_idx;
thread_data.threadsIdx = device->profileInfo.threads / 2;
thread_data.threads = device->profileInfo.threads - thread_data.threadsIdx;
}
#else
thread_data.threads_idx = 0;
thread_data.threadsIdx = 0;
thread_data.threads = device->profile_info.threads;
#endif
@ -305,17 +305,17 @@ bool cuda_hasher::buildThreadData() {
return true;
}
int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx];
int CudaHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
CudaGpuMgmtThreadData &threadData = m_threadData[threadIdx];
cudaSetDevice(threadData.device->cuda_index);
cudaSetDevice(threadData.device->cudaIndex);
threadData.hashData.input = input;
threadData.hashData.inSize = size;
threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != cudaSuccess) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message);
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage);
return 0;
}
@ -326,15 +326,15 @@ int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *ou
}
size_t cuda_hasher::parallelism(int workerIdx) {
cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx];
size_t CudaHasher::parallelism(int workerIdx) {
CudaGpuMgmtThreadData &threadData = m_threadData[workerIdx];
return threadData.threads;
}
size_t cuda_hasher::deviceCount() {
return __enabledDevices.size();
size_t CudaHasher::deviceCount() {
return m_enabledDevices.size();
}
REGISTER_HASHER(cuda_hasher);
REGISTER_HASHER(CudaHasher);
#endif //WITH_CUDA

View file

@ -0,0 +1,126 @@
//
// Created by Haifa Bogdan Adnan on 18/09/2018.
//
#ifndef ARGON2_CUDA_HASHER_H
#define ARGON2_CUDA_HASHER_H
#if defined(WITH_CUDA)
struct CudaKernelArguments {
void *memoryChunk_0;
void *memoryChunk_1;
void *memoryChunk_2;
void *memoryChunk_3;
void *memoryChunk_4;
void *memoryChunk_5;
uint32_t *refs;
uint32_t *idxs;
uint32_t *segments;
uint32_t *preseedMemory[2];
uint32_t *seedMemory[2];
uint32_t *outMemory[2];
uint32_t *hashMemory[2];
uint32_t *hostSeedMemory[2];
};
struct Argon2ProfileInfo {
Argon2ProfileInfo() {
threads = 0;
threads_per_chunk = 0;
}
uint32_t threads;
uint32_t threads_per_chunk;
Argon2Profile *profile;
};
struct CudaDeviceInfo {
CudaDeviceInfo() {
deviceIndex = 0;
deviceString = "";
freeMemSize = 0;
maxAllocableMemSize = 0;
error = cudaSuccess;
errorMessage = "";
}
int deviceIndex;
int cudaIndex;
string deviceString;
uint64_t freeMemSize;
uint64_t maxAllocableMemSize;
Argon2ProfileInfo profileInfo;
CudaKernelArguments arguments;
mutex deviceLock;
cudaError_t error;
string errorMessage;
};
struct CudaGpuMgmtThreadData {
void lock() {
#ifndef PARALLEL_CUDA
device->deviceLock.lock();
#endif
}
void unlock() {
#ifndef PARALLEL_CUDA
device->deviceLock.unlock();
#endif
}
int threadId;
CudaDeviceInfo *device;
Argon2 *argon2;
HashData hashData;
void *deviceData;
int threads;
int threadsIdx;
};
class CudaHasher : public Hasher {
public:
CudaHasher();
~CudaHasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
CudaDeviceInfo *getDeviceInfo(int device_index);
bool setupDeviceInfo(CudaDeviceInfo *device, double intensity);
vector<CudaDeviceInfo*> queryCudaDevices(cudaError_t &error, string &error_message);
bool buildThreadData();
vector<CudaDeviceInfo*> m_devices;
vector<CudaDeviceInfo*> m_enabledDevices;
CudaGpuMgmtThreadData *m_threadData;
Argon2Profile *m_profile;
};
// CUDA kernel exports
extern void cuda_allocate(CudaDeviceInfo *device, double chunks, size_t chunk_size);
extern void cuda_free(CudaDeviceInfo *device);
extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data);
extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
// end CUDA kernel exports
#endif //WITH_CUDA
#endif //ARGON2_CUDA_HASHER_H

View file

@ -1,126 +0,0 @@
//
// Created by Haifa Bogdan Adnan on 18/09/2018.
//
#ifndef ARGON2_CUDA_HASHER_H
#define ARGON2_CUDA_HASHER_H
#if defined(WITH_CUDA)
struct cuda_kernel_arguments {
void *memory_chunk_0;
void *memory_chunk_1;
void *memory_chunk_2;
void *memory_chunk_3;
void *memory_chunk_4;
void *memory_chunk_5;
uint32_t *refs;
uint32_t *idxs;
uint32_t *segments;
uint32_t *preseed_memory[2];
uint32_t *seed_memory[2];
uint32_t *out_memory[2];
uint32_t *hash_memory[2];
uint32_t *host_seed_memory[2];
};
struct argon2profile_info {
argon2profile_info() {
threads = 0;
threads_per_chunk = 0;
}
uint32_t threads;
uint32_t threads_per_chunk;
Argon2Profile *profile;
};
struct cuda_device_info {
cuda_device_info() {
device_index = 0;
device_string = "";
free_mem_size = 0;
max_allocable_mem_size = 0;
error = cudaSuccess;
error_message = "";
}
int device_index;
int cuda_index;
string device_string;
uint64_t free_mem_size;
uint64_t max_allocable_mem_size;
argon2profile_info profile_info;
cuda_kernel_arguments arguments;
mutex device_lock;
cudaError_t error;
string error_message;
};
struct cuda_gpumgmt_thread_data {
void lock() {
#ifndef PARALLEL_CUDA
device->device_lock.lock();
#endif
}
void unlock() {
#ifndef PARALLEL_CUDA
device->device_lock.unlock();
#endif
}
int thread_id;
cuda_device_info *device;
Argon2 *argon2;
HashData hashData;
void *device_data;
int threads;
int threads_idx;
};
class cuda_hasher : public Hasher {
public:
cuda_hasher();
~cuda_hasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
cuda_device_info *__get_device_info(int device_index);
bool __setup_device_info(cuda_device_info *device, double intensity);
vector<cuda_device_info*> __query_cuda_devices(cudaError_t &error, string &error_message);
bool buildThreadData();
vector<cuda_device_info*> __devices;
vector<cuda_device_info*> __enabledDevices;
cuda_gpumgmt_thread_data *__thread_data;
Argon2Profile *m_profile;
};
// CUDA kernel exports
extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size);
extern void cuda_free(cuda_device_info *device);
extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data);
extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
// end CUDA kernel exports
#endif //WITH_CUDA
#endif //ARGON2_CUDA_HASHER_H

View file

@ -7,7 +7,7 @@
#include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#include "cuda_hasher.h"
#include "CudaHasher.h"
#define THREADS_PER_LANE 32
#define BLOCK_SIZE_UINT4 64
@ -744,12 +744,12 @@ __global__ void posthash (
}
}
void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
Argon2Profile *profile = device->profile_info.profile;
void cuda_allocate(CudaDeviceInfo *device, double chunks, size_t chunk_size) {
Argon2Profile *profile = device->profileInfo.profile;
device->error = cudaSetDevice(device->cuda_index);
device->error = cudaSetDevice(device->cudaIndex);
if(device->error != cudaSuccess) {
device->error_message = "Error setting current device for memory allocation.";
device->errorMessage = "Error setting current device for memory allocation.";
return;
}
@ -762,9 +762,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else {
allocated_mem_for_current_chunk = 1;
}
device->error = cudaMalloc(&device->arguments.memory_chunk_0, allocated_mem_for_current_chunk);
device->error = cudaMalloc(&device->arguments.memoryChunk_0, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
if (chunks > 0) {
@ -774,9 +774,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else {
allocated_mem_for_current_chunk = 1;
}
device->error = cudaMalloc(&device->arguments.memory_chunk_1, allocated_mem_for_current_chunk);
device->error = cudaMalloc(&device->arguments.memoryChunk_1, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
if (chunks > 0) {
@ -786,9 +786,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else {
allocated_mem_for_current_chunk = 1;
}
device->error = cudaMalloc(&device->arguments.memory_chunk_2, allocated_mem_for_current_chunk);
device->error = cudaMalloc(&device->arguments.memoryChunk_2, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
if (chunks > 0) {
@ -798,9 +798,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else {
allocated_mem_for_current_chunk = 1;
}
device->error = cudaMalloc(&device->arguments.memory_chunk_3, allocated_mem_for_current_chunk);
device->error = cudaMalloc(&device->arguments.memoryChunk_3, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
if (chunks > 0) {
@ -810,9 +810,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else {
allocated_mem_for_current_chunk = 1;
}
device->error = cudaMalloc(&device->arguments.memory_chunk_4, allocated_mem_for_current_chunk);
device->error = cudaMalloc(&device->arguments.memoryChunk_4, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
if (chunks > 0) {
@ -822,9 +822,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
else {
allocated_mem_for_current_chunk = 1;
}
device->error = cudaMalloc(&device->arguments.memory_chunk_5, allocated_mem_for_current_chunk);
device->error = cudaMalloc(&device->arguments.memoryChunk_5, allocated_mem_for_current_chunk);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
@ -835,13 +835,13 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t));
if(device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice);
if(device->error != cudaSuccess) {
device->error_message = "Error copying memory.";
device->errorMessage = "Error copying memory.";
return;
}
free(refs);
@ -860,14 +860,14 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t));
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t),
cudaMemcpyHostToDevice);
if (device->error != cudaSuccess) {
device->error_message = "Error copying memory.";
device->errorMessage = "Error copying memory.";
return;
}
free(idxs);
@ -876,17 +876,17 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
//reorganize segments data
device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t));
if(device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice);
if(device->error != cudaSuccess) {
device->error_message = "Error copying memory.";
device->errorMessage = "Error copying memory.";
return;
}
#ifdef PARALLEL_CUDA
int threads = device->profile_info.threads / 2;
int threads = device->profileInfo.threads / 2;
#else
int threads = device->profile_info.threads;
#endif
@ -896,60 +896,60 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
size_t out_memory_size = threads * ARGON2_BLOCK_SIZE;
size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4);
device->error = cudaMalloc(&device->arguments.preseed_memory[0], preseed_memory_size);
device->error = cudaMalloc(&device->arguments.preseedMemory[0], preseed_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMalloc(&device->arguments.seed_memory[0], seed_memory_size);
device->error = cudaMalloc(&device->arguments.seedMemory[0], seed_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMalloc(&device->arguments.out_memory[0], out_memory_size);
device->error = cudaMalloc(&device->arguments.outMemory[0], out_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMalloc(&device->arguments.hash_memory[0], hash_memory_size);
device->error = cudaMalloc(&device->arguments.hashMemory[0], hash_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMallocHost(&device->arguments.host_seed_memory[0], 132 * threads);
device->error = cudaMallocHost(&device->arguments.hostSeedMemory[0], 132 * threads);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating pinned memory.";
device->errorMessage = "Error allocating pinned memory.";
return;
}
device->error = cudaMalloc(&device->arguments.preseed_memory[1], preseed_memory_size);
device->error = cudaMalloc(&device->arguments.preseedMemory[1], preseed_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMalloc(&device->arguments.seed_memory[1], seed_memory_size);
device->error = cudaMalloc(&device->arguments.seedMemory[1], seed_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMalloc(&device->arguments.out_memory[1], out_memory_size);
device->error = cudaMalloc(&device->arguments.outMemory[1], out_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMalloc(&device->arguments.hash_memory[1], hash_memory_size);
device->error = cudaMalloc(&device->arguments.hashMemory[1], hash_memory_size);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating memory.";
device->errorMessage = "Error allocating memory.";
return;
}
device->error = cudaMallocHost(&device->arguments.host_seed_memory[1], 132 * threads);
device->error = cudaMallocHost(&device->arguments.hostSeedMemory[1], 132 * threads);
if (device->error != cudaSuccess) {
device->error_message = "Error allocating pinned memory.";
device->errorMessage = "Error allocating pinned memory.";
return;
}
}
void cuda_free(cuda_device_info *device) {
cudaSetDevice(device->cuda_index);
void cuda_free(CudaDeviceInfo *device) {
cudaSetDevice(device->cudaIndex);
if(device->arguments.idxs != NULL) {
cudaFree(device->arguments.idxs);
@ -966,73 +966,73 @@ void cuda_free(cuda_device_info *device) {
device->arguments.segments = NULL;
}
if(device->arguments.memory_chunk_0 != NULL) {
cudaFree(device->arguments.memory_chunk_0);
device->arguments.memory_chunk_0 = NULL;
if(device->arguments.memoryChunk_0 != NULL) {
cudaFree(device->arguments.memoryChunk_0);
device->arguments.memoryChunk_0 = NULL;
}
if(device->arguments.memory_chunk_1 != NULL) {
cudaFree(device->arguments.memory_chunk_1);
device->arguments.memory_chunk_1 = NULL;
if(device->arguments.memoryChunk_1 != NULL) {
cudaFree(device->arguments.memoryChunk_1);
device->arguments.memoryChunk_1 = NULL;
}
if(device->arguments.memory_chunk_2 != NULL) {
cudaFree(device->arguments.memory_chunk_2);
device->arguments.memory_chunk_2 = NULL;
if(device->arguments.memoryChunk_2 != NULL) {
cudaFree(device->arguments.memoryChunk_2);
device->arguments.memoryChunk_2 = NULL;
}
if(device->arguments.memory_chunk_3 != NULL) {
cudaFree(device->arguments.memory_chunk_3);
device->arguments.memory_chunk_3 = NULL;
if(device->arguments.memoryChunk_3 != NULL) {
cudaFree(device->arguments.memoryChunk_3);
device->arguments.memoryChunk_3 = NULL;
}
if(device->arguments.memory_chunk_4 != NULL) {
cudaFree(device->arguments.memory_chunk_4);
device->arguments.memory_chunk_4 = NULL;
if(device->arguments.memoryChunk_4 != NULL) {
cudaFree(device->arguments.memoryChunk_4);
device->arguments.memoryChunk_4 = NULL;
}
if(device->arguments.memory_chunk_5 != NULL) {
cudaFree(device->arguments.memory_chunk_5);
device->arguments.memory_chunk_5 = NULL;
if(device->arguments.memoryChunk_5 != NULL) {
cudaFree(device->arguments.memoryChunk_5);
device->arguments.memoryChunk_5 = NULL;
}
if(device->arguments.preseed_memory != NULL) {
if(device->arguments.preseedMemory != NULL) {
for(int i=0;i<2;i++) {
if(device->arguments.preseed_memory[i] != NULL)
cudaFree(device->arguments.preseed_memory[i]);
device->arguments.preseed_memory[i] = NULL;
if(device->arguments.preseedMemory[i] != NULL)
cudaFree(device->arguments.preseedMemory[i]);
device->arguments.preseedMemory[i] = NULL;
}
}
if(device->arguments.seed_memory != NULL) {
if(device->arguments.seedMemory != NULL) {
for(int i=0;i<2;i++) {
if(device->arguments.seed_memory[i] != NULL)
cudaFree(device->arguments.seed_memory[i]);
device->arguments.seed_memory[i] = NULL;
if(device->arguments.seedMemory[i] != NULL)
cudaFree(device->arguments.seedMemory[i]);
device->arguments.seedMemory[i] = NULL;
}
}
if(device->arguments.out_memory != NULL) {
if(device->arguments.outMemory != NULL) {
for(int i=0;i<2;i++) {
if(device->arguments.out_memory[i] != NULL)
cudaFree(device->arguments.out_memory[i]);
device->arguments.out_memory[i] = NULL;
if(device->arguments.outMemory[i] != NULL)
cudaFree(device->arguments.outMemory[i]);
device->arguments.outMemory[i] = NULL;
}
}
if(device->arguments.hash_memory != NULL) {
if(device->arguments.hashMemory != NULL) {
for(int i=0;i<2;i++) {
if(device->arguments.hash_memory[i] != NULL)
cudaFree(device->arguments.hash_memory[i]);
device->arguments.hash_memory[i] = NULL;
if(device->arguments.hashMemory[i] != NULL)
cudaFree(device->arguments.hashMemory[i]);
device->arguments.hashMemory[i] = NULL;
}
}
if(device->arguments.host_seed_memory != NULL) {
if(device->arguments.hostSeedMemory != NULL) {
for(int i=0;i<2;i++) {
if(device->arguments.host_seed_memory[i] != NULL)
cudaFreeHost(device->arguments.host_seed_memory[i]);
device->arguments.host_seed_memory[i] = NULL;
if(device->arguments.hostSeedMemory[i] != NULL)
cudaFreeHost(device->arguments.hostSeedMemory[i]);
device->arguments.hostSeedMemory[i] = NULL;
}
}
@ -1040,9 +1040,9 @@ void cuda_free(cuda_device_info *device) {
}
bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data;
cuda_device_info *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data;
CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
CudaDeviceInfo *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData;
int sessions = max(profile->thrCost * 2, (uint32_t)8);
double hashes_per_block = sessions / (profile->thrCost * 2.0);
@ -1050,18 +1050,18 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo
gpumgmt_thread->lock();
memcpy(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], memory, gpumgmt_thread->hashData.inSize);
memcpy(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], memory, gpumgmt_thread->hashData.inSize);
device->error = cudaMemcpyAsync(device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.host_seed_memory[gpumgmt_thread->thread_id], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream);
device->error = cudaMemcpyAsync(device->arguments.preseedMemory[gpumgmt_thread->threadId], device->arguments.hostSeedMemory[gpumgmt_thread->threadId], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream);
if (device->error != cudaSuccess) {
device->error_message = "Error writing to gpu memory.";
device->errorMessage = "Error writing to gpu memory.";
gpumgmt_thread->unlock();
return false;
}
prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> (
device->arguments.preseed_memory[gpumgmt_thread->thread_id],
device->arguments.seed_memory[gpumgmt_thread->thread_id],
device->arguments.preseedMemory[gpumgmt_thread->threadId],
device->arguments.seedMemory[gpumgmt_thread->threadId],
profile->memCost,
profile->thrCost,
profile->segCount / (4 * profile->thrCost),
@ -1073,21 +1073,21 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo
}
void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data;
cuda_device_info *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data;
CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
CudaDeviceInfo *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData;
size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost;
size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0));
fill_blocks <<<threads, work_items, shared_mem, stream>>> ((uint32_t*)device->arguments.memory_chunk_0,
(uint32_t*)device->arguments.memory_chunk_1,
(uint32_t*)device->arguments.memory_chunk_2,
(uint32_t*)device->arguments.memory_chunk_3,
(uint32_t*)device->arguments.memory_chunk_4,
(uint32_t*)device->arguments.memory_chunk_5,
device->arguments.seed_memory[gpumgmt_thread->thread_id],
device->arguments.out_memory[gpumgmt_thread->thread_id],
fill_blocks <<<threads, work_items, shared_mem, stream>>> ((uint32_t*)device->arguments.memoryChunk_0,
(uint32_t*)device->arguments.memoryChunk_1,
(uint32_t*)device->arguments.memoryChunk_2,
(uint32_t*)device->arguments.memoryChunk_3,
(uint32_t*)device->arguments.memoryChunk_4,
(uint32_t*)device->arguments.memoryChunk_5,
device->arguments.seedMemory[gpumgmt_thread->threadId],
device->arguments.outMemory[gpumgmt_thread->threadId],
device->arguments.refs,
device->arguments.idxs,
device->arguments.segments,
@ -1095,27 +1095,27 @@ void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
profile->thrCost,
profile->segSize,
profile->segCount,
device->profile_info.threads_per_chunk,
gpumgmt_thread->threads_idx);
device->profileInfo.threads_per_chunk,
gpumgmt_thread->threadsIdx);
return (void *)1;
}
bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data;
cuda_device_info *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data;
CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
CudaDeviceInfo *device = gpumgmt_thread->device;
cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData;
size_t work_items = 4;
posthash <<<threads, work_items, BLAKE_SHARED_MEM, stream>>> (
device->arguments.hash_memory[gpumgmt_thread->thread_id],
device->arguments.out_memory[gpumgmt_thread->thread_id],
device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
device->arguments.hashMemory[gpumgmt_thread->threadId],
device->arguments.outMemory[gpumgmt_thread->threadId],
device->arguments.preseedMemory[gpumgmt_thread->threadId]);
device->error = cudaMemcpyAsync(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], device->arguments.hash_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream);
device->error = cudaMemcpyAsync(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], device->arguments.hashMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream);
if (device->error != cudaSuccess) {
device->error_message = "Error reading gpu memory.";
device->errorMessage = "Error reading gpu memory.";
gpumgmt_thread->unlock();
return false;
}
@ -1125,7 +1125,7 @@ bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, v
continue;
}
memcpy(memory, device->arguments.host_seed_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4));
memcpy(memory, device->arguments.hostSeedMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4));
gpumgmt_thread->unlock();
return memory;