From f4de892742bf0e434e4e3f2d69cc6e0a8b8d930f Mon Sep 17 00:00:00 2001 From: Haifa Bogdan Adnan Date: Sun, 8 Sep 2019 01:18:03 +0300 Subject: [PATCH] OpenCL kernel optimization - next block preloading optimization. --- .../argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp index cbd2d11d..cf4497b2 100644 --- a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp @@ -908,6 +908,11 @@ __kernel void fill_blocks(__global ulong *chunk_0, barrier(CLK_LOCAL_MEM_FENCE); for (int i=0;idx < seg_length;i++, idx++, cur_idx++) { + next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG; + + if(with_xor == 1) + next = vload4(wave_id, next_block); + ulong pseudo_rand = state[0]; if(lanes == 1) { @@ -957,11 +962,6 @@ __kernel void fill_blocks(__global ulong *chunk_0, ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG); - next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG; - - if(with_xor == 1) - next = vload4(wave_id, next_block); - tmp ^= ref; vstore4(tmp, id, state);