Added VAES support for Cryptonight variants

This commit is contained in:
SChernykh 2021-11-28 16:10:09 +01:00
parent c40f1f9f66
commit a4d5d0a75a
14 changed files with 749 additions and 40 deletions

View file

@ -40,6 +40,7 @@
#include "crypto/cn/CnAlgo.h"
#include "crypto/cn/CryptoNight_monero.h"
#include "crypto/cn/CryptoNight.h"
#include "crypto/cn/CryptoNight_x86_vaes.h"
#include "crypto/cn/soft_aes.h"
@ -289,6 +290,11 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
{
constexpr CnAlgo<ALGO> props;
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes<ALGO>(ctx);
return;
}
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
@ -341,7 +347,7 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
__m128i* e = output + N - prefetch_dist;
__m128i* e = output + (N << interleave) - prefetch_dist;
__m128i* prefetch_ptr = output + prefetch_dist;
for (int i = 0; i < 2; ++i) {
@ -396,6 +402,11 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
{
constexpr CnAlgo<ALGO> props;
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes<ALGO>(ctx);
return;
}
constexpr bool IS_HEAVY = props.isHeavy();
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
@ -996,8 +1007,14 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
if (!props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else {
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
}
if (ALGO == Algorithm::CN_2) {
cnv2_double_mainloop_sandybridge_asm(ctx);
@ -1036,8 +1053,13 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
if (!props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else {
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
}
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@ -1092,8 +1114,14 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else {
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
}
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@ -1288,8 +1316,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
bx10 = cx1;
}
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else {
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
}
keccakf(h0, 24);
keccakf(h1, 24);
@ -1350,10 +1383,16 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
ctx[3]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else {
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@ -1474,10 +1513,16 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
}
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else {
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
keccakf(h0, 24);
keccakf(h1, 24);
@ -1714,7 +1759,17 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
if (props.half_mem()) {
ctx[i]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
}
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else {
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
uint8_t* l0 = ctx[0]->memory;
@ -1766,8 +1821,18 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
}
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else {
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
for (size_t i = 0; i < 4; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}