OpenCL kernel optimization - next block preloading optimization.

2019-09-08 01:18:03 +03:00 · 2019-09-08 01:18:03 +03:00 · f4de892742
commit f4de892742
parent f787b9f2cc
1 changed files with 5 additions and 5 deletions
--- a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
+++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
@ -908,6 +908,11 @@ __kernel void fill_blocks(__global ulong *chunk_0,
            barrier(CLK_LOCAL_MEM_FENCE);
            for (int i=0;idx < seg_length;i++, idx++, cur_idx++) {
    			next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
                if(with_xor == 1)
                    next = vload4(wave_id, next_block);
                ulong pseudo_rand = state[0];
                if(lanes == 1) {
@ -957,11 +962,6 @@ __kernel void fill_blocks(__global ulong *chunk_0,
        		ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG);
    			next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
                if(with_xor == 1)
                    next = vload4(wave_id, next_block);
                tmp ^= ref;
                vstore4(tmp, id, state);