OpenCL kernel optimization - next block preloading optimization.
This commit is contained in:
parent
f787b9f2cc
commit
f4de892742
1 changed files with 5 additions and 5 deletions
|
@ -908,6 +908,11 @@ __kernel void fill_blocks(__global ulong *chunk_0,
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
for (int i=0;idx < seg_length;i++, idx++, cur_idx++) {
|
for (int i=0;idx < seg_length;i++, idx++, cur_idx++) {
|
||||||
|
next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
|
||||||
|
|
||||||
|
if(with_xor == 1)
|
||||||
|
next = vload4(wave_id, next_block);
|
||||||
|
|
||||||
ulong pseudo_rand = state[0];
|
ulong pseudo_rand = state[0];
|
||||||
|
|
||||||
if(lanes == 1) {
|
if(lanes == 1) {
|
||||||
|
@ -957,11 +962,6 @@ __kernel void fill_blocks(__global ulong *chunk_0,
|
||||||
|
|
||||||
ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG);
|
ref = vload4(wave_id, memory + ref_idx * 2 * BLOCK_SIZE_ULONG);
|
||||||
|
|
||||||
next_block = memory + cur_idx * 2 * BLOCK_SIZE_ULONG;
|
|
||||||
|
|
||||||
if(with_xor == 1)
|
|
||||||
next = vload4(wave_id, next_block);
|
|
||||||
|
|
||||||
tmp ^= ref;
|
tmp ^= ref;
|
||||||
|
|
||||||
vstore4(tmp, id, state);
|
vstore4(tmp, id, state);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue