Cryptonight OpenCL: fix for long input data
This commit is contained in:
parent
e0eed7d5d6
commit
bd8cf54a0b
6 changed files with 700 additions and 704 deletions
|
@ -71,7 +71,7 @@ inline ulong getIdx()
|
|||
|
||||
|
||||
__attribute__((reqd_work_group_size(8, 8, 1)))
|
||||
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
__kernel void cn0(__global ulong *input, int inlen, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
{
|
||||
uint ExpandedKey1[40];
|
||||
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
||||
|
@ -109,34 +109,25 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
if (get_local_id(1) == 0) {
|
||||
__local ulong* State = State_buf + get_local_id(0) * 25;
|
||||
|
||||
((__local ulong8 *)State)[0] = vload8(0, input);
|
||||
State[8] = input[8];
|
||||
State[9] = input[9];
|
||||
State[10] = input[10];
|
||||
State[11] = input[11];
|
||||
State[12] = input[12];
|
||||
State[13] = input[13];
|
||||
State[14] = input[14];
|
||||
State[15] = input[15];
|
||||
|
||||
((__local uint *)State)[9] &= 0x00FFFFFFU;
|
||||
((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24;
|
||||
((__local uint *)State)[10] &= 0xFF000000U;
|
||||
/* explicit cast to `uint` is required because some OpenCL implementations (e.g. NVIDIA)
|
||||
* handle get_global_id and get_global_offset as signed long long int and add
|
||||
* 0xFFFFFFFF... to `get_global_id` if we set on host side a 32bit offset where the first bit is `1`
|
||||
* (even if it is correct casted to unsigned on the host)
|
||||
*/
|
||||
((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8));
|
||||
|
||||
// Last bit of padding
|
||||
State[16] = 0x8000000000000000UL;
|
||||
|
||||
for (int i = 17; i < 25; ++i) {
|
||||
State[i] = 0x00UL;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 25; ++i) {
|
||||
State[i] = 0;
|
||||
}
|
||||
|
||||
keccakf1600_2(State);
|
||||
// Input length must be a multiple of 136 and padded on the host side
|
||||
for (int i = 0; inlen > 0; i += 17, inlen -= 136) {
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 17; ++j) {
|
||||
State[j] ^= input[i + j];
|
||||
}
|
||||
if (i == 0) {
|
||||
((__local uint *)State)[9] &= 0x00FFFFFFU;
|
||||
((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24;
|
||||
((__local uint *)State)[10] &= 0xFF000000U;
|
||||
((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8));
|
||||
}
|
||||
keccakf1600_2(State);
|
||||
}
|
||||
|
||||
#pragma unroll 1
|
||||
for (int i = 0; i < 25; ++i) {
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue