Add cudaSyncronizeStream and cuda error check for kernel run.

2019-09-06 12:29:30 +03:00 · 2019-09-06 12:29:30 +03:00 · 05758cda36
commit 05758cda36
parent 54f7b97c44
2 changed files with 40 additions and 6 deletions
--- a/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp
+++ b/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp
@ -53,6 +53,8 @@ bool CudaHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
 }
 vector<CudaDeviceInfo *> CudaHasher::queryCudaDevices(cudaError_t &error, string &error_message) {
    cudaSetDeviceFlags(cudaDeviceBlockingSync);
 	vector<CudaDeviceInfo *> devices;
 	int devCount = 0;
 	error = cudaGetDeviceCount(&devCount);
--- a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu
+++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu
@ -1039,6 +1039,18 @@ void cuda_free(CudaDeviceInfo *device) {
 	cudaDeviceReset();
 }
 inline bool cudaCheckError(cudaError &err, string &errStr)
 {
    err = cudaGetLastError();
    if ( cudaSuccess != err )
    {
        errStr = string("CUDA error: ") + cudaGetErrorString( err );
        return false;
    }
    return true;
 }
 bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
    CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data;
    CudaDeviceInfo *device = gpumgmt_thread->device;
@ -1069,6 +1081,12 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo
 			profile->saltLen,
            threads);
    bool success = cudaCheckError(device->error, device->errorMessage);
    if(!success) {
        gpumgmt_thread->unlock();
        return false;
    }
    return true;
 }
@ -1098,7 +1116,13 @@ void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
 			device->profileInfo.threads_per_chunk,
            gpumgmt_thread->threadsIdx);
-	return (void *)1;
+	bool success = cudaCheckError(device->error, device->errorMessage);
 	if(!success) {
        gpumgmt_thread->unlock();
        return NULL;
    }
 	return  (void *)1;
 }
 bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
@ -1113,6 +1137,11 @@ bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, v
            device->arguments.outMemory[gpumgmt_thread->threadId],
            device->arguments.preseedMemory[gpumgmt_thread->threadId]);
    if(!cudaCheckError(device->error, device->errorMessage)) {
        gpumgmt_thread->unlock();
        return false;
    }
 	device->error = cudaMemcpyAsync(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], device->arguments.hashMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream);
 	if (device->error != cudaSuccess) {
 		device->errorMessage = "Error reading gpu memory.";
@ -1120,13 +1149,16 @@ bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, v
 		return false;
 	}
-	while(cudaStreamQuery(stream) != cudaSuccess) {
+    cudaStreamSynchronize(stream);
-		this_thread::sleep_for(chrono::milliseconds(10));
+
-		continue;
+    bool success = cudaCheckError(device->error, device->errorMessage);
-	}
+    if(!success) {
        gpumgmt_thread->unlock();
        return false;
    }
    memcpy(memory, device->arguments.hostSeedMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4));
 	gpumgmt_thread->unlock();
-	return memory;
+	return true;
 }