Fix for ROCm.
This commit is contained in:
parent
002fd008a6
commit
3d3a32087f
2 changed files with 880 additions and 885 deletions
|
@ -161,29 +161,27 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
||||||
mem_fence(CLK_LOCAL_MEM_FENCE);
|
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
# if (ALGO_FAMILY == FAMILY_CN_HEAVY)
|
# if (ALGO_FAMILY == FAMILY_CN_HEAVY)
|
||||||
{
|
__local uint4 xin[8][8];
|
||||||
__local uint4 xin[8][8];
|
|
||||||
|
|
||||||
/* Also left over threads perform this loop.
|
/* Also left over threads perform this loop.
|
||||||
* The left over thread results will be ignored
|
* The left over thread results will be ignored
|
||||||
*/
|
*/
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
#pragma unroll 10
|
#pragma unroll 10
|
||||||
for (int j = 0; j < 10; ++j) {
|
for (int j = 0; j < 10; ++j) {
|
||||||
uint4 t = ((uint4 *)ExpandedKey1)[j];
|
uint4 t = ((uint4 *)ExpandedKey1)[j];
|
||||||
t.s0 ^= AES0[BYTE(text.s0, 0)] ^ AES1[BYTE(text.s1, 1)] ^ AES2[BYTE(text.s2, 2)] ^ AES3[BYTE(text.s3, 3)];
|
t.s0 ^= AES0[BYTE(text.s0, 0)] ^ AES1[BYTE(text.s1, 1)] ^ AES2[BYTE(text.s2, 2)] ^ AES3[BYTE(text.s3, 3)];
|
||||||
t.s1 ^= AES0[BYTE(text.s1, 0)] ^ AES1[BYTE(text.s2, 1)] ^ AES2[BYTE(text.s3, 2)] ^ AES3[BYTE(text.s0, 3)];
|
t.s1 ^= AES0[BYTE(text.s1, 0)] ^ AES1[BYTE(text.s2, 1)] ^ AES2[BYTE(text.s3, 2)] ^ AES3[BYTE(text.s0, 3)];
|
||||||
t.s2 ^= AES0[BYTE(text.s2, 0)] ^ AES1[BYTE(text.s3, 1)] ^ AES2[BYTE(text.s0, 2)] ^ AES3[BYTE(text.s1, 3)];
|
t.s2 ^= AES0[BYTE(text.s2, 0)] ^ AES1[BYTE(text.s3, 1)] ^ AES2[BYTE(text.s0, 2)] ^ AES3[BYTE(text.s1, 3)];
|
||||||
t.s3 ^= AES0[BYTE(text.s3, 0)] ^ AES1[BYTE(text.s0, 1)] ^ AES2[BYTE(text.s1, 2)] ^ AES3[BYTE(text.s2, 3)];
|
t.s3 ^= AES0[BYTE(text.s3, 0)] ^ AES1[BYTE(text.s0, 1)] ^ AES2[BYTE(text.s1, 2)] ^ AES3[BYTE(text.s2, 3)];
|
||||||
text = t;
|
text = t;
|
||||||
}
|
|
||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
xin[get_local_id(1)][get_local_id(0)] = text;
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
text = mix_and_propagate(xin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
xin[get_local_id(1)][get_local_id(0)] = text;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
text = mix_and_propagate(xin);
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue