From f4de892742bf0e434e4e3f2d69cc6e0a8b8d930f Mon Sep 17 00:00:00 2001
From: Haifa Bogdan Adnan <bogdan_adnan@yahoo.com>
Date: Sun, 8 Sep 2019 01:18:03 +0300
Subject: [PATCH] OpenCL kernel optimization - next block preloading
 optimization.

---
 .../argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp     | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
index cbd2d11d..cf4497b2 100644
--- a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
+++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
@@ -908,6 +908,11 @@ __kernel void fill_blocks(__global ulong *chunk_0,
             barrier(CLK_LOCAL_MEM_FENCE);
 
             for (int i=0;idx < seg_length;i++, idx++, cur_idx++) {
+    			next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
+
+                if(with_xor == 1)
+                    next = vload4(wave_id, next_block);
+
                 ulong pseudo_rand = state[0];
 
                 if(lanes == 1) {
@@ -957,11 +962,6 @@ __kernel void fill_blocks(__global ulong *chunk_0,
 
         		ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG);
 
-    			next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
-
-                if(with_xor == 1)
-                    next = vload4(wave_id, next_block);
-
                 tmp ^= ref;
 
                 vstore4(tmp, id, state);