diff --git a/CMakeLists.txt b/CMakeLists.txt index 60651d62..0ff926ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_CUDA "Enable CUDA support" ON) option(WITH_OPENCL "Enable OpenCL support" ON) -option(WITH_OPENCL_DOUBLE_THREADS "Enable dual threads for OpenCL jobs" ON) +option(WITH_OPENCL_DOUBLE_THREADS "Enable dual threads for OpenCL jobs" OFF) option(WITH_CUDA_DOUBLE_THREADS "Enable dual threads for CUDA jobs" ON) include (CheckIncludeFile) diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp index 0f14e1cf..cbd2d11d 100644 --- a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp @@ -862,7 +862,7 @@ __kernel void fill_blocks(__global ulong *chunk_0, cur_idx = seg_idxs[0]; } - ulong4 nextref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG); + ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG); for (int i=0;idx < seg_length;i++, idx++) { next_block = memory + (cur_idx & 0x7FFFFFFF) * 2 * BLOCK_SIZE_ULONG; @@ -870,7 +870,7 @@ __kernel void fill_blocks(__global ulong *chunk_0, if(with_xor == 1) next = vload4(wave_id, next_block); - ref = nextref; + tmp ^= ref; if (idx < seg_length - 1) { ref_idx = seg_refs[i + 1]; @@ -882,11 +882,9 @@ __kernel void fill_blocks(__global ulong *chunk_0, else cur_idx++; - nextref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG); + ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG); } - tmp ^= ref; - vstore4(tmp, id, state); G1(state);