OpenCL kernel optimization - next block preloading optimization.

This commit is contained in:
Haifa Bogdan Adnan 2019-09-08 01:18:03 +03:00
parent f787b9f2cc
commit f4de892742

View file

@ -908,6 +908,11 @@ __kernel void fill_blocks(__global ulong *chunk_0,
barrier(CLK_LOCAL_MEM_FENCE);
for (int i=0;idx < seg_length;i++, idx++, cur_idx++) {
next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
if(with_xor == 1)
next = vload4(wave_id, next_block);
ulong pseudo_rand = state[0];
if(lanes == 1) {
@ -957,11 +962,6 @@ __kernel void fill_blocks(__global ulong *chunk_0,
ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG);
next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
if(with_xor == 1)
next = vload4(wave_id, next_block);
tmp ^= ref;
vstore4(tmp, id, state);