Adds monero v1 support, untested so far

This commit is contained in:
Unknown 2018-03-08 23:46:25 +01:00
parent 3f46dfeb08
commit aee09415c5
5 changed files with 177 additions and 16 deletions

2
.gitignore vendored
View file

@ -5,7 +5,7 @@
/.idea /.idea
/CMakeFiles /CMakeFiles
/src/3rdparty /src/3rdparty
/cmake-build-debug /cmake-build-*
CMakeCache.txt CMakeCache.txt
cmake_install.cmake cmake_install.cmake
Makefile Makefile

View file

@ -115,19 +115,19 @@ bool CryptoNight::selfTest(int algo)
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 6, 16); ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 6, 16);
cryptonight_hash_ctx[0](test_input, 76, output, ctx); cryptonight_hash_ctx[0](test_input, 76, output, ctx);
bool resultSingle = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 32) == 0; bool resultSingle = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output_light : test_output, 32) == 0;
cryptonight_hash_ctx[1](test_input, 76, output, ctx); cryptonight_hash_ctx[1](test_input, 76, output, ctx);
bool resultDouble = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 64) == 0; bool resultDouble = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output_light : test_output, 64) == 0;
cryptonight_hash_ctx[2](test_input, 76, output, ctx); cryptonight_hash_ctx[2](test_input, 76, output, ctx);
bool resultTriple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 96) == 0; bool resultTriple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output_light : test_output, 96) == 0;
cryptonight_hash_ctx[3](test_input, 76, output, ctx); cryptonight_hash_ctx[3](test_input, 76, output, ctx);
bool resultQuadruple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 128) == 0; bool resultQuadruple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output_light : test_output, 128) == 0;
cryptonight_hash_ctx[4](test_input, 76, output, ctx); cryptonight_hash_ctx[4](test_input, 76, output, ctx);
bool resultQuintuple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 160) == 0; bool resultQuintuple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output_light : test_output, 160) == 0;
_mm_free(ctx->memory); _mm_free(ctx->memory);
_mm_free(ctx); _mm_free(ctx);

View file

@ -0,0 +1,57 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_MONERO_H__
#define __CRYPTONIGHT_MONERO_H__
// VARIANT ALTERATIONS
#define VARIANT1_INIT(part) \
const uint8_t version##part = static_cast<const uint8_t*>(input)[part * size]; \
uint64_t tweak1_2_##part = 0; \
/*if (MONERO)*/ { \
if (version##part > 6) { \
tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35 + part * size) ^ \
*(reinterpret_cast<const uint64_t*>(ctx->state[part]) + 24)); \
} \
}
#define VARIANT1_1(p, part) \
/*if (MONERO)*/ { \
if (version##part > 6) { \
const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \
static const uint32_t table = 0x75310; \
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
} \
}
#define VARIANT1_2(p, part) \
/*if (MONERO)*/ { \
if (version##part > 6) { \
(p) ^= tweak1_2_##part; \
} \
}
#endif /* __CRYPTONIGHT_MONERO_H__ */

View file

@ -25,7 +25,7 @@
#define __CRYPTONIGHT_TEST_H__ #define __CRYPTONIGHT_TEST_H__
const static uint8_t test_input[456] = { const static uint8_t test_input[] = {
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19, 0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9, 0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F, 0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
@ -59,7 +59,7 @@ const static uint8_t test_input[456] = {
}; };
const static uint8_t test_output0[192] = { const static uint8_t test_output[] = {
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66, 0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F, 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7, 0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
@ -75,7 +75,7 @@ const static uint8_t test_output0[192] = {
}; };
const static uint8_t test_output1[192] = { const static uint8_t test_output_light[] = {
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE, 0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD, 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E, 0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
@ -91,4 +91,9 @@ const static uint8_t test_output1[192] = {
}; };
const static uint8_t test_input_monero_slow[] = {
0xb5, 0xa7, 0xf6, 0x3a, 0xbb, 0x94, 0xd0, 0x7d, 0x1a, 0x64, 0x45, 0xc3, 0x6c, 0x07, 0xc7, 0xe8,
0x32, 0x7f, 0xe6, 0x1b, 0x16, 0x47, 0xe3, 0x91, 0xb4, 0xc7, 0xed, 0xae, 0x5d, 0xe5, 0x7a, 0x3d
};
#endif /* __CRYPTONIGHT_TEST_H__ */ #endif /* __CRYPTONIGHT_TEST_H__ */

View file

@ -36,6 +36,7 @@
#include "crypto/CryptoNight.h" #include "crypto/CryptoNight.h"
#include "crypto/CryptoNight_monero.h"
#include "crypto/soft_aes.h" #include "crypto/soft_aes.h"
@ -48,7 +49,6 @@ extern "C"
#include "crypto/c_skein.h" #include "crypto/c_skein.h"
} }
static inline void do_blake_hash(const void* input, size_t len, char* output) static inline void do_blake_hash(const void* input, size_t len, char* output)
{ {
blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len); blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
@ -332,10 +332,18 @@ public:
uint64_t ah[NUM_HASH_BLOCKS]; uint64_t ah[NUM_HASH_BLOCKS];
__m128i bx[NUM_HASH_BLOCKS]; __m128i bx[NUM_HASH_BLOCKS];
uint64_t idx[NUM_HASH_BLOCKS]; uint64_t idx[NUM_HASH_BLOCKS];
uint64_t tweak1_2[NUM_HASH_BLOCKS];
uint64_t version[NUM_HASH_BLOCKS];
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, ctx->state[hashBlock], 200);
ctx->state[hashBlock], 200); version[hashBlock] = static_cast<const uint8_t*>(input)[hashBlock * size];
/*if (MONERO)*/ {
if (version[hashBlock] > 6) {
tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35 + hashBlock * size) ^
*(reinterpret_cast<const uint64_t*>(ctx->state[hashBlock]) + 24));
}
}
} }
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
@ -363,6 +371,16 @@ public:
_mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK], _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
_mm_xor_si128(bx[hashBlock], cx)); _mm_xor_si128(bx[hashBlock], cx));
/*if (MONERO)*/ {
if (version[hashBlock] > 6) {
const uint8_t tmp = reinterpret_cast<const uint8_t*>(&l[hashBlock][idx[hashBlock] & MASK])[11];
static const uint32_t table = 0x75310;
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1;
((uint8_t*)(&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
}
}
idx[hashBlock] = EXTRACT64(cx); idx[hashBlock] = EXTRACT64(cx);
bx[hashBlock] = cx; bx[hashBlock] = cx;
@ -374,9 +392,21 @@ public:
al[hashBlock] += hi; al[hashBlock] += hi;
ah[hashBlock] += lo; ah[hashBlock] += lo;
/*if (MONERO)*/ {
if (version[hashBlock] > 6) {
ah[hashBlock] ^= tweak1_2[hashBlock];
}
}
((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock]; ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock]; ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
/*if (MONERO)*/ {
if (version[hashBlock] > 6) {
ah[hashBlock] ^= tweak1_2[hashBlock];
}
}
ah[hashBlock] ^= ch; ah[hashBlock] ^= ch;
al[hashBlock] ^= cl; al[hashBlock] ^= cl;
idx[hashBlock] = al[hashBlock]; idx[hashBlock] = al[hashBlock];
@ -410,6 +440,8 @@ public:
keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state[0], 200); keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state[0], 200);
VARIANT1_INIT(0);
l = ctx->memory; l = ctx->memory;
h = reinterpret_cast<uint64_t*>(ctx->state[0]); h = reinterpret_cast<uint64_t*>(ctx->state[0]);
@ -431,6 +463,7 @@ public:
} }
_mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx)); _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
VARIANT1_1(&l[idx & MASK], 0);
idx = EXTRACT64(cx); idx = EXTRACT64(cx);
bx = cx; bx = cx;
@ -442,8 +475,10 @@ public:
al += hi; al += hi;
ah += lo; ah += lo;
VARIANT1_2(ah, 0);
((uint64_t*) &l[idx & MASK])[0] = al; ((uint64_t*) &l[idx & MASK])[0] = al;
((uint64_t*) &l[idx & MASK])[1] = ah; ((uint64_t*) &l[idx & MASK])[1] = ah;
VARIANT1_2(ah, 0);
ah ^= ch; ah ^= ch;
al ^= cl; al ^= cl;
@ -468,6 +503,9 @@ public:
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200); keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200); keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx->memory; const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM; const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
@ -505,6 +543,9 @@ public:
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
VARIANT1_1(&l0[idx0 & MASK], 0);
VARIANT1_1(&l1[idx1 & MASK], 1);
idx0 = EXTRACT64(cx0); idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1); idx1 = EXTRACT64(cx1);
@ -519,8 +560,10 @@ public:
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
VARIANT1_2(ah0, 0);
((uint64_t*) &l0[idx0 & MASK])[0] = al0; ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
VARIANT1_2(ah0, 0);
ah0 ^= ch; ah0 ^= ch;
al0 ^= cl; al0 ^= cl;
@ -533,8 +576,10 @@ public:
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
VARIANT1_2(ah1, 1);
((uint64_t*) &l1[idx1 & MASK])[0] = al1; ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1; ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
VARIANT1_2(ah1, 1);
ah1 ^= ch; ah1 ^= ch;
al1 ^= cl; al1 ^= cl;
@ -565,6 +610,10 @@ public:
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200); keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200); keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT1_INIT(2);
const uint8_t* l0 = ctx->memory; const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM; const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM; const uint8_t* l2 = ctx->memory + 2 * MEM;
@ -614,6 +663,10 @@ public:
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2)); _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
VARIANT1_1(&l0[idx0 & MASK], 0);
VARIANT1_1(&l1[idx1 & MASK], 1);
VARIANT1_1(&l2[idx2 & MASK], 2);
idx0 = EXTRACT64(cx0); idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1); idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2); idx2 = EXTRACT64(cx2);
@ -631,8 +684,10 @@ public:
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
VARIANT1_2(ah0, 0);
((uint64_t*) &l0[idx0 & MASK])[0] = al0; ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
VARIANT1_2(ah0, 0);
ah0 ^= ch; ah0 ^= ch;
al0 ^= cl; al0 ^= cl;
@ -646,8 +701,10 @@ public:
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
VARIANT1_2(ah1, 1);
((uint64_t*) &l1[idx1 & MASK])[0] = al1; ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1; ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
VARIANT1_2(ah1, 1);
ah1 ^= ch; ah1 ^= ch;
al1 ^= cl; al1 ^= cl;
@ -661,8 +718,10 @@ public:
al2 += hi; al2 += hi;
ah2 += lo; ah2 += lo;
VARIANT1_2(ah2, 2);
((uint64_t*) &l2[idx2 & MASK])[0] = al2; ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2; ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
VARIANT1_2(ah2, 2);
ah2 ^= ch; ah2 ^= ch;
al2 ^= cl; al2 ^= cl;
@ -697,6 +756,11 @@ public:
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200); keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200); keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT1_INIT(2);
VARIANT1_INIT(3);
const uint8_t* l0 = ctx->memory; const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM; const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM; const uint8_t* l2 = ctx->memory + 2 * MEM;
@ -758,6 +822,11 @@ public:
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2)); _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
_mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3)); _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3));
VARIANT1_1(&l0[idx0 & MASK], 0);
VARIANT1_1(&l1[idx1 & MASK], 1);
VARIANT1_1(&l2[idx2 & MASK], 2);
VARIANT1_1(&l3[idx3 & MASK], 3);
idx0 = EXTRACT64(cx0); idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1); idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2); idx2 = EXTRACT64(cx2);
@ -777,8 +846,10 @@ public:
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
VARIANT1_2(ah0, 0);
((uint64_t*) &l0[idx0 & MASK])[0] = al0; ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
VARIANT1_2(ah0, 0);
ah0 ^= ch; ah0 ^= ch;
al0 ^= cl; al0 ^= cl;
@ -792,8 +863,10 @@ public:
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
VARIANT1_2(ah1, 1);
((uint64_t*) &l1[idx1 & MASK])[0] = al1; ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1; ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
VARIANT1_2(ah1, 1);
ah1 ^= ch; ah1 ^= ch;
al1 ^= cl; al1 ^= cl;
@ -807,8 +880,10 @@ public:
al2 += hi; al2 += hi;
ah2 += lo; ah2 += lo;
VARIANT1_2(ah2, 2);
((uint64_t*) &l2[idx2 & MASK])[0] = al2; ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2; ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
VARIANT1_2(ah2, 2);
ah2 ^= ch; ah2 ^= ch;
al2 ^= cl; al2 ^= cl;
@ -822,8 +897,10 @@ public:
al3 += hi; al3 += hi;
ah3 += lo; ah3 += lo;
VARIANT1_2(ah3, 3);
((uint64_t*) &l3[idx3 & MASK])[0] = al3; ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
((uint64_t*) &l3[idx3 & MASK])[1] = ah3; ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
VARIANT1_2(ah3, 3);
ah3 ^= ch; ah3 ^= ch;
al3 ^= cl; al3 ^= cl;
@ -862,6 +939,12 @@ public:
keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200); keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200);
keccak((const uint8_t*) input + 4 * size, (int) size, ctx->state[4], 200); keccak((const uint8_t*) input + 4 * size, (int) size, ctx->state[4], 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT1_INIT(2);
VARIANT1_INIT(3);
VARIANT1_INIT(4);
const uint8_t* l0 = ctx->memory; const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM; const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM; const uint8_t* l2 = ctx->memory + 2 * MEM;
@ -935,6 +1018,12 @@ public:
_mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3)); _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3));
_mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx4, cx4)); _mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx4, cx4));
VARIANT1_1(&l0[idx0 & MASK], 0);
VARIANT1_1(&l1[idx1 & MASK], 1);
VARIANT1_1(&l2[idx2 & MASK], 2);
VARIANT1_1(&l3[idx3 & MASK], 3);
VARIANT1_1(&l4[idx4 & MASK], 4);
idx0 = EXTRACT64(cx0); idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1); idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2); idx2 = EXTRACT64(cx2);
@ -955,8 +1044,10 @@ public:
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
VARIANT1_2(ah0, 0);
((uint64_t*) &l0[idx0 & MASK])[0] = al0; ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
VARIANT1_2(ah0, 0);
ah0 ^= ch; ah0 ^= ch;
al0 ^= cl; al0 ^= cl;
@ -970,8 +1061,10 @@ public:
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
VARIANT1_2(ah1, 1);
((uint64_t*) &l1[idx1 & MASK])[0] = al1; ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1; ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
VARIANT1_2(ah1, 1);
ah1 ^= ch; ah1 ^= ch;
al1 ^= cl; al1 ^= cl;
@ -985,8 +1078,10 @@ public:
al2 += hi; al2 += hi;
ah2 += lo; ah2 += lo;
VARIANT1_2(ah2, 2);
((uint64_t*) &l2[idx2 & MASK])[0] = al2; ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2; ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
VARIANT1_2(ah2, 2);
ah2 ^= ch; ah2 ^= ch;
al2 ^= cl; al2 ^= cl;
@ -1000,8 +1095,10 @@ public:
al3 += hi; al3 += hi;
ah3 += lo; ah3 += lo;
VARIANT1_2(ah3, 3);
((uint64_t*) &l3[idx3 & MASK])[0] = al3; ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
((uint64_t*) &l3[idx3 & MASK])[1] = ah3; ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
VARIANT1_2(ah3, 3);
ah3 ^= ch; ah3 ^= ch;
al3 ^= cl; al3 ^= cl;
@ -1015,8 +1112,10 @@ public:
al4 += hi; al4 += hi;
ah4 += lo; ah4 += lo;
VARIANT1_2(ah4, 4);
((uint64_t*) &l4[idx4 & MASK])[0] = al4; ((uint64_t*) &l4[idx4 & MASK])[0] = al4;
((uint64_t*) &l4[idx4 & MASK])[1] = ah4; ((uint64_t*) &l4[idx4 & MASK])[1] = ah4;
VARIANT1_2(ah4, 4);
ah4 ^= ch; ah4 ^= ch;
al4 ^= cl; al4 ^= cl;