REDACTED-rig/src/backend/opencl/cl/rx/blake2b.cl

157 lines
6 KiB
Common Lisp

/*
Copyright (c) 2019 SChernykh
This file is part of RandomX OpenCL.
RandomX OpenCL is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX OpenCL is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX OpenCL. If not, see <http://www.gnu.org/licenses/>.
*/
__constant static const uchar blake2b_sigma[12 * 16] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3,
11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4,
7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8,
9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13,
2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9,
12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11,
13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10,
6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5,
10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3,
};
enum Blake2b_IV
{
iv0 = 0x6a09e667f3bcc908ul,
iv1 = 0xbb67ae8584caa73bul,
iv2 = 0x3c6ef372fe94f82bul,
iv3 = 0xa54ff53a5f1d36f1ul,
iv4 = 0x510e527fade682d1ul,
iv5 = 0x9b05688c2b3e6c1ful,
iv6 = 0x1f83d9abfb41bd6bul,
iv7 = 0x5be0cd19137e2179ul,
};
ulong rotr64(ulong a, ulong shift) { return rotate(a, 64 - shift); }
#define G(r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r * 16 + 2 * i + 0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r * 16 + 2 * i + 1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while (0)
#define ROUND(r) \
do { \
G(r, 0, v[0], v[4], v[8], v[12]); \
G(r, 1, v[1], v[5], v[9], v[13]); \
G(r, 2, v[2], v[6], v[10], v[14]); \
G(r, 3, v[3], v[7], v[11], v[15]); \
G(r, 4, v[0], v[5], v[10], v[15]); \
G(r, 5, v[1], v[6], v[11], v[12]); \
G(r, 6, v[2], v[7], v[8], v[13]); \
G(r, 7, v[3], v[4], v[9], v[14]); \
} while (0)
#define BLAKE2B_ROUNDS() ROUND(0);ROUND(1);ROUND(2);ROUND(3);ROUND(4);ROUND(5);ROUND(6);ROUND(7);ROUND(8);ROUND(9);ROUND(10);ROUND(11);
void blake2b_512_process_single_block(ulong *h, const ulong* m, uint blockTemplateSize)
{
ulong v[16] =
{
iv0 ^ 0x01010040, iv1, iv2, iv3, iv4 , iv5, iv6, iv7,
iv0 , iv1, iv2, iv3, iv4 ^ blockTemplateSize, iv5, ~iv6, iv7,
};
BLAKE2B_ROUNDS();
h[0] = v[0] ^ v[ 8] ^ iv0 ^ 0x01010040;
h[1] = v[1] ^ v[ 9] ^ iv1;
h[2] = v[2] ^ v[10] ^ iv2;
h[3] = v[3] ^ v[11] ^ iv3;
h[4] = v[4] ^ v[12] ^ iv4;
h[5] = v[5] ^ v[13] ^ iv5;
h[6] = v[6] ^ v[14] ^ iv6;
h[7] = v[7] ^ v[15] ^ iv7;
}
__attribute__((reqd_work_group_size(64, 1, 1)))
__kernel void blake2b_initial_hash(__global void *out, __global const void* blockTemplate, uint blockTemplateSize, uint start_nonce)
{
const uint global_index = get_global_id(0);
__global const ulong* p = (__global const ulong*) blockTemplate;
ulong m[16] = {
(blockTemplateSize > 0) ? p[ 0] : 0,
(blockTemplateSize > 8) ? p[ 1] : 0,
(blockTemplateSize > 16) ? p[ 2] : 0,
(blockTemplateSize > 24) ? p[ 3] : 0,
(blockTemplateSize > 32) ? p[ 4] : 0,
(blockTemplateSize > 40) ? p[ 5] : 0,
(blockTemplateSize > 48) ? p[ 6] : 0,
(blockTemplateSize > 56) ? p[ 7] : 0,
(blockTemplateSize > 64) ? p[ 8] : 0,
(blockTemplateSize > 72) ? p[ 9] : 0,
(blockTemplateSize > 80) ? p[10] : 0,
(blockTemplateSize > 88) ? p[11] : 0,
(blockTemplateSize > 96) ? p[12] : 0,
(blockTemplateSize > 104) ? p[13] : 0,
(blockTemplateSize > 112) ? p[14] : 0,
(blockTemplateSize > 120) ? p[15] : 0,
};
if (blockTemplateSize % sizeof(ulong))
m[blockTemplateSize / sizeof(ulong)] &= (ulong)(-1) >> (64 - (blockTemplateSize % sizeof(ulong)) * 8);
const ulong nonce = start_nonce + global_index;
m[4] = (m[4] & ((ulong)(-1) >> 8)) | (nonce << 56);
m[5] = (m[5] & ((ulong)(-1) << 24)) | (nonce >> 8);
ulong hash[8];
blake2b_512_process_single_block(hash, m, blockTemplateSize);
__global ulong* t = ((__global ulong*) out) + global_index * 8;
t[0] = hash[0];
t[1] = hash[1];
t[2] = hash[2];
t[3] = hash[3];
t[4] = hash[4];
t[5] = hash[5];
t[6] = hash[6];
t[7] = hash[7];
}
#define in_len 256
#define out_len 32
#define blake2b_512_process_double_block_name blake2b_512_process_double_block_32
#define blake2b_hash_registers_name blake2b_hash_registers_32
#include "blake2b_double_block.cl"
#undef blake2b_hash_registers_name
#undef blake2b_512_process_double_block_name
#undef out_len
#define out_len 64
#define blake2b_512_process_double_block_name blake2b_512_process_double_block_64
#define blake2b_hash_registers_name blake2b_hash_registers_64
#include "blake2b_double_block.cl"
#undef blake2b_hash_registers_name
#undef blake2b_512_process_double_block_name
#undef out_len