REDACTED-rig/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp
2019-08-27 11:32:23 +03:00

340 lines
9.5 KiB
C++

//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#include <crypto/Argon2_constants.h>
#include "../../../common/common.h"
#include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#if defined(WITH_CUDA)
#include <cuda_runtime.h>
#include <driver_types.h>
#include "CudaHasher.h"
#include "../../../common/DLLExport.h"
CudaHasher::CudaHasher() {
m_type = "GPU";
m_subType = "CUDA";
m_shortSubType = "NVD";
m_intensity = 0;
m_description = "";
m_computingThreads = 0;
}
CudaHasher::~CudaHasher() {
this->cleanup();
}
bool CudaHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cudaError_t error = cudaSuccess;
string error_message;
m_profile = getArgon2Profile(algorithm, variant);
m_devices = queryCudaDevices(error, error_message);
if(error != cudaSuccess) {
m_description = "No compatible GPU detected: " + error_message;
return false;
}
if (m_devices.empty()) {
m_description = "No compatible GPU detected.";
return false;
}
return true;
}
vector<CudaDeviceInfo *> CudaHasher::queryCudaDevices(cudaError_t &error, string &error_message) {
vector<CudaDeviceInfo *> devices;
int devCount = 0;
error = cudaGetDeviceCount(&devCount);
if(error != cudaSuccess) {
error_message = "Error querying CUDA device count.";
return devices;
}
if(devCount == 0)
return devices;
for (int i = 0; i < devCount; ++i)
{
CudaDeviceInfo *dev = getDeviceInfo(i);
if(dev == NULL)
continue;
if(dev->error != cudaSuccess) {
error = dev->error;
error_message = dev->errorMessage;
continue;
}
devices.push_back(dev);
}
return devices;
}
CudaDeviceInfo *CudaHasher::getDeviceInfo(int device_index) {
CudaDeviceInfo *device_info = new CudaDeviceInfo();
device_info->error = cudaSuccess;
device_info->cudaIndex = device_index;
device_info->error = cudaSetDevice(device_index);
if(device_info->error != cudaSuccess) {
device_info->errorMessage = "Error setting current device.";
return device_info;
}
cudaDeviceProp devProp;
device_info->error = cudaGetDeviceProperties(&devProp, device_index);
if(device_info->error != cudaSuccess) {
device_info->errorMessage = "Error setting current device.";
return device_info;
}
device_info->deviceString = devProp.name;
size_t freemem, totalmem;
device_info->error = cudaMemGetInfo(&freemem, &totalmem);
if(device_info->error != cudaSuccess) {
device_info->errorMessage = "Error setting current device.";
return device_info;
}
device_info->freeMemSize = freemem;
device_info->maxAllocableMemSize = freemem / 4;
double mem_in_gb = totalmem / 1073741824.0;
stringstream ss;
ss << setprecision(2) << mem_in_gb;
device_info->deviceString += (" (" + ss.str() + "GB)");
return device_info;
}
bool CudaHasher::configure(xmrig::HasherConfig &config) {
int index = config.getGPUCardsCount();
double intensity = 0;
int total_threads = 0;
intensity = config.getAverageGPUIntensity();
if (intensity == 0) {
m_intensity = 0;
m_description = "Status: DISABLED - by user.";
return false;
}
bool cards_selected = false;
intensity = 0;
for(vector<CudaDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) {
stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->deviceString;
string device_description = ss.str();
(*d)->deviceIndex = index;
(*d)->profileInfo.profile = m_profile;
if(config.gpuFilter().size() > 0) {
bool found = false;
for(xmrig::GPUFilter fit : config.gpuFilter()) {
if(device_description.find(fit.filter) != string::npos) {
found = true;
break;
}
}
if(!found) {
(*d)->profileInfo.threads = 0;
ss << " - DISABLED" << endl;
m_description += ss.str();
continue;
}
else {
cards_selected = true;
}
}
else {
cards_selected = true;
}
ss << endl;
double device_intensity = config.getGPUIntensity((*d)->deviceIndex);
m_description += ss.str();
if(!(setupDeviceInfo((*d), device_intensity))) {
m_description += (*d)->errorMessage;
m_description += "\n";
continue;
};
DeviceInfo device;
char bus_id[100];
if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cudaIndex) == cudaSuccess) {
device.bus_id = bus_id;
int domain_separator = device.bus_id.find(":");
if(domain_separator != string::npos) {
device.bus_id.erase(0, domain_separator + 1);
}
}
device.name = (*d)->deviceString;
device.intensity = device_intensity;
storeDeviceInfo((*d)->deviceIndex, device);
m_enabledDevices.push_back(*d);
total_threads += (*d)->profileInfo.threads;
intensity += device_intensity;
}
config.addGPUCardsCount(index - config.getGPUCardsCount());
if(!cards_selected) {
m_intensity = 0;
m_description += "Status: DISABLED - no card enabled because of filtering.";
return false;
}
if (total_threads == 0) {
m_intensity = 0;
m_description += "Status: DISABLED - not enough resources.";
return false;
}
if(!buildThreadData())
return false;
m_intensity = intensity / m_enabledDevices.size();
m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true;
}
void CudaHasher::cleanup() {
for(vector<CudaDeviceInfo *>::iterator d = m_devices.begin(); d != m_devices.end(); d++) {
cuda_free(*d);
}
}
bool CudaHasher::setupDeviceInfo(CudaDeviceInfo *device, double intensity) {
device->profileInfo.threads_per_chunk = (uint32_t)(device->maxAllocableMemSize / device->profileInfo.profile->memSize);
size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize;
if(chunk_size == 0) {
device->error = cudaErrorInitializationError;
device->errorMessage = "Not enough memory on GPU.";
return false;
}
uint64_t usable_memory = device->freeMemSize;
double chunks = (double)usable_memory / (double)chunk_size;
uint32_t max_threads = (uint32_t)(device->profileInfo.threads_per_chunk * chunks);
if(max_threads == 0) {
device->error = cudaErrorInitializationError;
device->errorMessage = "Not enough memory on GPU.";
return false;
}
device->profileInfo.threads = (uint32_t)(max_threads * intensity / 100.0);
device->profileInfo.threads = (device->profileInfo.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution
if(max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0)
device->profileInfo.threads = 2;
chunks = (double)device->profileInfo.threads / (double)device->profileInfo.threads_per_chunk;
cuda_allocate(device, chunks, chunk_size);
if(device->error != cudaSuccess)
return false;
return true;
}
bool CudaHasher::buildThreadData() {
m_threadData = new CudaGpuMgmtThreadData[m_enabledDevices.size() * 2];
for(int i=0; i < m_enabledDevices.size(); i++) {
CudaDeviceInfo *device = m_enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) {
CudaGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId];
thread_data.device = device;
thread_data.threadId = threadId;
cudaStream_t stream;
device->error = cudaStreamCreate(&stream);
if(device->error != cudaSuccess) {
LOG("Error running kernel: (" + to_string(device->error) + ") cannot create cuda stream.");
return false;
}
thread_data.deviceData = stream;
#ifdef PARALLEL_CUDA
if(threadId == 0) {
thread_data.threadsIdx = 0;
thread_data.threads = device->profileInfo.threads / 2;
}
else {
thread_data.threadsIdx = device->profileInfo.threads / 2;
thread_data.threads = device->profileInfo.threads - thread_data.threadsIdx;
}
#else
thread_data.threadsIdx = 0;
thread_data.threads = device->profileInfo.threads;
#endif
thread_data.argon2 = new Argon2(cuda_kernel_prehasher, cuda_kernel_filler, cuda_kernel_posthasher,
nullptr, &thread_data);
thread_data.argon2->setThreads(thread_data.threads);
thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
}
}
return true;
}
int CudaHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
CudaGpuMgmtThreadData &threadData = m_threadData[threadIdx];
cudaSetDevice(threadData.device->cudaIndex);
threadData.hashData.input = input;
threadData.hashData.inSize = size;
threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != cudaSuccess) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage);
return 0;
}
uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
(*nonce) += threadData.threads;
return hashCount;
}
size_t CudaHasher::parallelism(int workerIdx) {
CudaGpuMgmtThreadData &threadData = m_threadData[workerIdx];
return threadData.threads;
}
size_t CudaHasher::deviceCount() {
return m_enabledDevices.size();
}
REGISTER_HASHER(CudaHasher);
#endif //WITH_CUDA