From 5d9ebb0b917550d09e6f0f26bf5b1f02060f3427 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 7 Mar 2019 17:51:54 +0700 Subject: [PATCH 01/18] v2.14.2-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index ff00eca3..4e8fee45 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.14.1" +#define APP_VERSION "2.14.2-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 2 #define APP_VER_MINOR 14 -#define APP_VER_PATCH 1 +#define APP_VER_PATCH 2 #ifdef _MSC_VER # if (_MSC_VER >= 1910) From 1bb8f77b527b02271c0645ced7063522322d628c Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 13 Mar 2019 22:00:44 +0100 Subject: [PATCH 02/18] Unified ASM functions signature --- src/Mem.cpp | 2 +- src/crypto/CryptoNight.h | 5 +- src/crypto/CryptoNight_x86.h | 70 +++++++++---------- .../asm/CryptonightR_soft_aes_template.inc | 2 + .../CryptonightR_soft_aes_template_win.inc | 2 + src/crypto/asm/CryptonightR_template.inc | 5 ++ src/crypto/asm/CryptonightR_template_win.inc | 5 ++ .../asm/CryptonightWOW_soft_aes_template.inc | 2 + .../CryptonightWOW_soft_aes_template_win.inc | 2 + src/crypto/asm/CryptonightWOW_template.inc | 5 ++ .../asm/CryptonightWOW_template_win.inc | 5 ++ .../cn2/cnv2_double_main_loop_sandybridge.inc | 3 + .../asm/cn2/cnv2_main_loop_bulldozer.inc | 2 + .../asm/cn2/cnv2_main_loop_ivybridge.inc | 2 + src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc | 2 + .../asm/cn2/cnv2_rwz_double_main_loop.inc | 3 + src/crypto/asm/cn2/cnv2_rwz_main_loop.inc | 2 + src/crypto/asm/cn_main_loop.S | 2 - .../CryptonightR_soft_aes_template_win.inc | 2 + .../asm/win64/CryptonightR_template_win.inc | 5 ++ .../CryptonightWOW_soft_aes_template_win.inc | 2 + .../asm/win64/CryptonightWOW_template_win.inc | 5 ++ .../cn2/cnv2_double_main_loop_sandybridge.inc | 3 + .../win64/cn2/cnv2_main_loop_bulldozer.inc | 2 + .../win64/cn2/cnv2_main_loop_ivybridge.inc | 2 + .../asm/win64/cn2/cnv2_main_loop_ryzen.inc | 2 + .../win64/cn2/cnv2_rwz_double_main_loop.inc | 3 + .../asm/win64/cn2/cnv2_rwz_main_loop.inc | 2 + src/workers/CpuThread.cpp | 24 +++---- src/workers/CpuThread.h | 3 +- 30 files changed, 121 insertions(+), 55 deletions(-) diff --git a/src/Mem.cpp b/src/Mem.cpp index 4fa794d6..01a2157b 100644 --- a/src/Mem.cpp +++ b/src/Mem.cpp @@ -53,7 +53,7 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count) uint8_t* p = reinterpret_cast(allocateExecutableMemory(0x4000)); c->generated_code = reinterpret_cast(p); - c->generated_code_double = reinterpret_cast(p + 0x2000); + c->generated_code_double = reinterpret_cast(p + 0x2000); c->generated_code_data.variant = xmrig::VARIANT_MAX; c->generated_code_data.height = (uint64_t)(-1); diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index 91a4c7b7..b1ec2371 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -36,8 +36,7 @@ #endif struct cryptonight_ctx; -typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE; -typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE; +typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE; struct cryptonight_r_data { int variant; @@ -54,7 +53,7 @@ struct cryptonight_ctx { const uint32_t* saes_table; cn_mainloop_fun_ms_abi generated_code; - cn_mainloop_double_fun_ms_abi generated_code_double; + cn_mainloop_fun_ms_abi generated_code_double; cryptonight_r_data generated_code_data; cryptonight_r_data generated_code_double_data; }; diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 8b9ea783..202b662a 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -590,7 +590,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si } ctx[0]->saes_table = (const uint32_t*)saes_table; - ctx[0]->generated_code(ctx[0]); + ctx[0]->generated_code(ctx); } else { #endif @@ -750,32 +750,32 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_ #ifndef XMRIG_NO_ASM -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); -extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); @@ -824,64 +824,64 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ if (VARIANT == xmrig::VARIANT_2) { if (ASM == xmrig::ASM_INTEL) { - cnv2_mainloop_ivybridge_asm(ctx[0]); + cnv2_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cnv2_mainloop_ryzen_asm(ctx[0]); + cnv2_mainloop_ryzen_asm(ctx); } else { - cnv2_mainloop_bulldozer_asm(ctx[0]); + cnv2_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_HALF) { if (ASM == xmrig::ASM_INTEL) { - cn_half_mainloop_ivybridge_asm(ctx[0]); + cn_half_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_half_mainloop_ryzen_asm(ctx[0]); + cn_half_mainloop_ryzen_asm(ctx); } else { - cn_half_mainloop_bulldozer_asm(ctx[0]); + cn_half_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_TRTL) { if (ASM == xmrig::ASM_INTEL) { - cn_trtl_mainloop_ivybridge_asm(ctx[0]); + cn_trtl_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_trtl_mainloop_ryzen_asm(ctx[0]); + cn_trtl_mainloop_ryzen_asm(ctx); } else { - cn_trtl_mainloop_bulldozer_asm(ctx[0]); + cn_trtl_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_mainloop_asm(ctx[0]); + cnv2_rwz_mainloop_asm(ctx); } else if (VARIANT == xmrig::VARIANT_ZLS) { if (ASM == xmrig::ASM_INTEL) { - cn_zls_mainloop_ivybridge_asm(ctx[0]); + cn_zls_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_zls_mainloop_ryzen_asm(ctx[0]); + cn_zls_mainloop_ryzen_asm(ctx); } else { - cn_zls_mainloop_bulldozer_asm(ctx[0]); + cn_zls_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_DOUBLE) { if (ASM == xmrig::ASM_INTEL) { - cn_double_mainloop_ivybridge_asm(ctx[0]); + cn_double_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_double_mainloop_ryzen_asm(ctx[0]); + cn_double_mainloop_ryzen_asm(ctx); } else { - cn_double_mainloop_bulldozer_asm(ctx[0]); + cn_double_mainloop_bulldozer_asm(ctx); } } else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code(ctx[0]); + ctx[0]->generated_code(ctx); } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); @@ -910,25 +910,25 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory)); if (VARIANT == xmrig::VARIANT_2) { - cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cnv2_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_HALF) { - cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_half_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_TRTL) { - cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_trtl_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]); + cnv2_rwz_double_mainloop_asm(ctx); } else if (VARIANT == xmrig::VARIANT_ZLS) { - cn_zls_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_zls_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_DOUBLE) { - cn_double_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_double_double_mainloop_sandybridge_asm(ctx); } else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code_double(ctx[0], ctx[1]); + ctx[0]->generated_code_double(ctx); } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc index 40c7874d..e9e1bb4f 100644 --- a/src/crypto/asm/CryptonightR_soft_aes_template.inc +++ b/src/crypto/asm/CryptonightR_soft_aes_template.inc @@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) ALIGN(64) FN_PREFIX(CryptonightR_soft_aes_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc index d771f69c..589192ca 100644 --- a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc +++ b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end ALIGN(64) CryptonightR_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc index 8ecab724..61b6b985 100644 --- a/src/crypto/asm/CryptonightR_template.inc +++ b/src/crypto/asm/CryptonightR_template.inc @@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightR_template_double_end) ALIGN(64) FN_PREFIX(CryptonightR_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ FN_PREFIX(CryptonightR_template_end): ALIGN(64) FN_PREFIX(CryptonightR_template_double_part1): + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc index a170f2d2..1bb89eb1 100644 --- a/src/crypto/asm/CryptonightR_template_win.inc +++ b/src/crypto/asm/CryptonightR_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end ALIGN(64) CryptonightR_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ CryptonightR_template_end: ALIGN(64) CryptonightR_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc index feea3949..53b7016a 100644 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc @@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end) ALIGN(64) FN_PREFIX(CryptonightWOW_soft_aes_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc index 6ebad99f..b3202b78 100644 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end ALIGN(64) CryptonightWOW_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc index 7183a659..82d455f6 100644 --- a/src/crypto/asm/CryptonightWOW_template.inc +++ b/src/crypto/asm/CryptonightWOW_template.inc @@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) ALIGN(64) FN_PREFIX(CryptonightWOW_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ FN_PREFIX(CryptonightWOW_template_end): ALIGN(64) FN_PREFIX(CryptonightWOW_template_double_part1): + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightWOW_template_win.inc b/src/crypto/asm/CryptonightWOW_template_win.inc index c5652e27..644c01f1 100644 --- a/src/crypto/asm/CryptonightWOW_template_win.inc +++ b/src/crypto/asm/CryptonightWOW_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end ALIGN(64) CryptonightWOW_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ CryptonightWOW_template_end: ALIGN(64) CryptonightWOW_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc index aa5101a8..1710cac7 100644 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc index c764501d..b881b669 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc index 06f1d28b..863673de 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc index 5dbf5917..8ccc5e17 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc index d2d87173..d9bfc9c1 100644 --- a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc +++ b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc index 021f787e..b59c02d6 100644 --- a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc +++ b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index 347f0e08..7aed6c20 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -49,7 +49,6 @@ ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi - mov rdx, rsi #include "cn2/cnv2_double_main_loop_sandybridge.inc" add rsp, 48 ret 0 @@ -68,7 +67,6 @@ ALIGN(64) FN_PREFIX(cnv2_rwz_double_mainloop_asm): sub rsp, 48 mov rcx, rdi - mov rdx, rsi #include "cn2/cnv2_rwz_double_main_loop.inc" add rsp, 48 ret 0 diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc index d6d393a9..6898a604 100644 --- a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end ALIGN(64) CryptonightR_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc index 60ee3441..d24eedaa 100644 --- a/src/crypto/asm/win64/CryptonightR_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end ALIGN(64) CryptonightR_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ CryptonightR_template_end: ALIGN(64) CryptonightR_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc index 68209036..1c73f77c 100644 --- a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc +++ b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end ALIGN(64) CryptonightWOW_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/win64/CryptonightWOW_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_template_win.inc index 9db2cf39..55c8c8df 100644 --- a/src/crypto/asm/win64/CryptonightWOW_template_win.inc +++ b/src/crypto/asm/win64/CryptonightWOW_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end ALIGN(64) CryptonightWOW_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ CryptonightWOW_template_end: ALIGN(64) CryptonightWOW_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc index 05af9393..85077a20 100644 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc index 03a36f48..f17017a0 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc index 77e28f80..a12ac35c 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc index 7e5c127f..044235d8 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc index 69ca8793..97fb691b 100644 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc index 99317730..e2b7a5fc 100644 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index c98b730a..6548b461 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -90,31 +90,31 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma } -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; void xmrig::CpuThread::patchAsmVariants() @@ -125,22 +125,22 @@ void xmrig::CpuThread::patchAsmVariants() cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); - cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); + cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); - cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); + cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); cn_zls_mainloop_ivybridge_asm = reinterpret_cast (base + 0x8000); cn_zls_mainloop_ryzen_asm = reinterpret_cast (base + 0x9000); cn_zls_mainloop_bulldozer_asm = reinterpret_cast (base + 0xA000); - cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); + cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); cn_double_mainloop_ivybridge_asm = reinterpret_cast (base + 0xC000); cn_double_mainloop_ryzen_asm = reinterpret_cast (base + 0xD000); cn_double_mainloop_bulldozer_asm = reinterpret_cast (base + 0xE000); - cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); + cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h index 54e98cde..05d4a066 100644 --- a/src/workers/CpuThread.h +++ b/src/workers/CpuThread.h @@ -61,8 +61,7 @@ public: CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly); typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height); - typedef void (*cn_mainloop_fun)(cryptonight_ctx *ctx); - typedef void (*cn_mainloop_double_fun)(cryptonight_ctx *ctx1, cryptonight_ctx *ctx2); + typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx); # ifndef XMRIG_NO_ASM static void patchAsmVariants(); From 29373c4226dbd72b37be63e77cadb90d1f129b55 Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Fri, 15 Mar 2019 07:13:14 -0600 Subject: [PATCH 03/18] Fix compilation with Clang 3.5 for those with limited compiler choices Performance similar to gcc7+ on systems where gcc4 is the only alternative --- src/crypto/cn_gpu_avx.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/crypto/cn_gpu_avx.cpp b/src/crypto/cn_gpu_avx.cpp index 3dc7cacb..9f801c80 100644 --- a/src/crypto/cn_gpu_avx.cpp +++ b/src/crypto/cn_gpu_avx.cpp @@ -30,6 +30,12 @@ # include # define __restrict__ __restrict #endif +#ifndef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) +#endif +#ifndef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) +#endif inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01) { From cf7ce13986e81b7c359a08149bb97b4d78901c04 Mon Sep 17 00:00:00 2001 From: stoffu Date: Thu, 11 Apr 2019 14:01:07 +0900 Subject: [PATCH 04/18] Config: fix std::max issue with msvc2017 to fix the following errors: xmrig\src\core\Config.cpp(165): error C2065: 'max': undeclared identifier xmrig\src\core\Config.cpp(165): error C2275: 'size_t': illegal use of this type as an expression --- src/core/Config.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 82a96117..9216027a 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -22,6 +22,7 @@ * along with this program. If not, see . */ +#include #include #include #include From 32c3d4b9f6469d57ab4d5fbbb007f3463c2ccac5 Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Mon, 15 Apr 2019 10:39:16 -0600 Subject: [PATCH 05/18] Repair compilation with Clang 9.0.0 (which now includes its own _rotr intrinsic) --- src/crypto/soft_aes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h index 26c1b06a..52fd9b7a 100644 --- a/src/crypto/soft_aes.h +++ b/src/crypto/soft_aes.h @@ -130,7 +130,7 @@ static inline uint32_t sub_word(uint32_t key) saes_sbox[key & 0xff]; } -#if defined(__clang__) || defined(XMRIG_ARM) +#if (defined(__clang__) && __clang_major__ != 9) || defined(XMRIG_ARM) static inline uint32_t _rotr(uint32_t value, uint32_t amount) { return (value >> amount) | (value << ((32 - amount) & 31)); From f9f7ef26b8c9757bb5e6733f420f880fbdb0464a Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 16 Apr 2019 01:20:50 +0700 Subject: [PATCH 06/18] #1012 Added checks for _rotr. --- cmake/flags.cmake | 7 +++++++ src/crypto/soft_aes.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 3989cb71..2f3eb208 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -10,6 +10,8 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") add_definitions(/DNDEBUG) endif() +include(CheckSymbolExists) + if (CMAKE_CXX_COMPILER_ID MATCHES GNU) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-strict-aliasing") @@ -27,6 +29,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + + add_definitions(/DHAVE_ROTR) endif() if (WIN32) @@ -50,6 +54,7 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(/D_CRT_NONSTDC_NO_WARNINGS) add_definitions(/DNOMINMAX) + add_definitions(/DHAVE_ROTR) elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) @@ -68,6 +73,8 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + + check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR) endif() endif() diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h index 26c1b06a..4ad9bdd9 100644 --- a/src/crypto/soft_aes.h +++ b/src/crypto/soft_aes.h @@ -130,7 +130,7 @@ static inline uint32_t sub_word(uint32_t key) saes_sbox[key & 0xff]; } -#if defined(__clang__) || defined(XMRIG_ARM) +#ifndef HAVE_ROTR static inline uint32_t _rotr(uint32_t value, uint32_t amount) { return (value >> amount) | (value << ((32 - amount) & 31)); From 9137f59ec1216ceef8e4569640748d8bdc452593 Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 16 Apr 2019 02:00:27 +0700 Subject: [PATCH 07/18] #1012 Fixed _rotr detection. --- cmake/flags.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 2f3eb208..d50b5c84 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -75,6 +75,9 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR) + if (HAVE_ROTR) + add_definitions(/DHAVE_ROTR) + endif() endif() endif() From 699269bbae408001f19c53f5e130d93f67d06e0b Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Mon, 15 Apr 2019 13:26:55 -0600 Subject: [PATCH 08/18] Upstream fixed it for good with [Added checks for _rotr.] and [Fixed _rotr detection.] --- cmake/flags.cmake | 10 ++++++++++ src/crypto/soft_aes.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 3989cb71..d50b5c84 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -10,6 +10,8 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") add_definitions(/DNDEBUG) endif() +include(CheckSymbolExists) + if (CMAKE_CXX_COMPILER_ID MATCHES GNU) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-strict-aliasing") @@ -27,6 +29,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + + add_definitions(/DHAVE_ROTR) endif() if (WIN32) @@ -50,6 +54,7 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(/D_CRT_NONSTDC_NO_WARNINGS) add_definitions(/DNOMINMAX) + add_definitions(/DHAVE_ROTR) elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) @@ -68,6 +73,11 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + + check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR) + if (HAVE_ROTR) + add_definitions(/DHAVE_ROTR) + endif() endif() endif() diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h index 52fd9b7a..4ad9bdd9 100644 --- a/src/crypto/soft_aes.h +++ b/src/crypto/soft_aes.h @@ -130,7 +130,7 @@ static inline uint32_t sub_word(uint32_t key) saes_sbox[key & 0xff]; } -#if (defined(__clang__) && __clang_major__ != 9) || defined(XMRIG_ARM) +#ifndef HAVE_ROTR static inline uint32_t _rotr(uint32_t value, uint32_t amount) { return (value >> amount) | (value << ((32 - amount) & 31)); From 51b92f66cfddf151570c643cb0407dd3720cdc65 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 25 May 2019 10:58:35 +0700 Subject: [PATCH 09/18] Sync changes. --- src/api/ApiRouter.cpp | 8 +++++--- src/version.h | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/api/ApiRouter.cpp b/src/api/ApiRouter.cpp index bdedac4d..beee8fd3 100644 --- a/src/api/ApiRouter.cpp +++ b/src/api/ApiRouter.cpp @@ -51,13 +51,15 @@ #include "workers/Workers.h" -static inline double normalize(double d) +static inline rapidjson::Value normalize(double d) { + using namespace rapidjson; + if (!isnormal(d)) { - return 0.0; + return Value(kNullType); } - return floor(d * 100.0) / 100.0; + return Value(floor(d * 100.0) / 100.0); } diff --git a/src/version.h b/src/version.h index 4e8fee45..b6ae7102 100644 --- a/src/version.h +++ b/src/version.h @@ -39,7 +39,9 @@ #define APP_VER_PATCH 2 #ifdef _MSC_VER -# if (_MSC_VER >= 1910) +# if (_MSC_VER >= 1920) +# define MSVC_VERSION 2019 +# elif (_MSC_VER >= 1910 && _MSC_VER < 1920) # define MSVC_VERSION 2017 # elif _MSC_VER == 1900 # define MSVC_VERSION 2015 From 10165da53efcad10c3d9a56f01b58f9471ac5172 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 26 May 2019 18:54:47 +0700 Subject: [PATCH 10/18] Removed obsolete automatic variants. --- src/common/net/Job.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index 851507ad..cb6be4e6 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -127,24 +127,6 @@ bool xmrig::Job::setBlob(const char *blob) m_algorithm.setVariant(variant()); } - if (!m_algorithm.isForced()) { - if (m_algorithm.variant() == VARIANT_XTL && m_blob[0] >= 9) { - m_algorithm.setVariant(VARIANT_HALF); - } - else if (m_algorithm.variant() == VARIANT_MSR && m_blob[0] >= 8) { - m_algorithm.setVariant(VARIANT_HALF); - } - else if (m_algorithm.variant() == VARIANT_WOW && m_blob[0] < 11) { - m_algorithm.setVariant(VARIANT_2); - } - else if (m_algorithm.variant() == VARIANT_RWZ && m_blob[0] < 12) { - m_algorithm.setVariant(VARIANT_2); - } - else if (m_algorithm.variant() == VARIANT_ZLS && m_blob[0] < 8) { - m_algorithm.setVariant(VARIANT_2); - } - } - # ifdef XMRIG_PROXY_PROJECT memset(m_rawBlob, 0, sizeof(m_rawBlob)); memcpy(m_rawBlob, blob, m_size * 2); From 809efb4700dc9f14e78eea7f7f1a5c38c6d37cfa Mon Sep 17 00:00:00 2001 From: xmrig Date: Sun, 26 May 2019 19:21:13 +0700 Subject: [PATCH 11/18] Update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79c8bb4a..be30f774 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# v2.14.4 +- [#992](https://github.com/xmrig/xmrig/pull/992) Fixed compilation with Clang 3.5. +- [#1012](https://github.com/xmrig/xmrig/pull/1012) Fixed compilation with Clang 9.0. +- In HTTP API for unknown hashrate now used `null` instead of `0.0`. +- Fixed MSVC 2019 version detection. +- Removed obsolete automatic variants. + # v2.14.1 * [#975](https://github.com/xmrig/xmrig/issues/975) Fixed crash on Linux if double thread mode used. From 1d4bc030fb4ccb366765cd4feb31b95ebd9db9a5 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 26 May 2019 19:28:28 +0700 Subject: [PATCH 12/18] v2.14.4-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index b6ae7102..33be0af8 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.14.2-dev" +#define APP_VERSION "2.14.4-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 2 #define APP_VER_MINOR 14 -#define APP_VER_PATCH 2 +#define APP_VER_PATCH 4 #ifdef _MSC_VER # if (_MSC_VER >= 1920) From dcf9c6833428f527a4734bbed22c095064626ede Mon Sep 17 00:00:00 2001 From: Brandon Lehmann Date: Mon, 27 May 2019 11:46:09 -0400 Subject: [PATCH 13/18] Add Chukwa support --- CMakeLists.txt | 14 +- src/3rdparty/argon2/.gitattributes | 2 + src/3rdparty/argon2/.gitignore | 70 ++ src/3rdparty/argon2/.travis.yml | 23 + src/3rdparty/argon2/CMakeLists.txt | 203 ++++++ src/3rdparty/argon2/LICENSE | 21 + src/3rdparty/argon2/Makefile.am | 125 ++++ src/3rdparty/argon2/README.md | 58 ++ .../argon2/arch/generic/lib/argon2-arch.c | 20 + .../argon2/arch/x86_64/lib/argon2-arch.c | 38 ++ .../argon2/arch/x86_64/lib/argon2-avx2.c | 341 ++++++++++ .../argon2/arch/x86_64/lib/argon2-avx2.h | 11 + .../argon2/arch/x86_64/lib/argon2-avx512f.c | 326 +++++++++ .../argon2/arch/x86_64/lib/argon2-avx512f.h | 11 + .../argon2/arch/x86_64/lib/argon2-sse2.c | 122 ++++ .../argon2/arch/x86_64/lib/argon2-sse2.h | 11 + .../argon2/arch/x86_64/lib/argon2-ssse3.c | 134 ++++ .../argon2/arch/x86_64/lib/argon2-ssse3.h | 11 + .../arch/x86_64/lib/argon2-template-128.h | 164 +++++ .../argon2/arch/x86_64/lib/argon2-xop.c | 122 ++++ .../argon2/arch/x86_64/lib/argon2-xop.h | 11 + src/3rdparty/argon2/configure.ac | 108 +++ src/3rdparty/argon2/include/argon2.h | 478 +++++++++++++ src/3rdparty/argon2/lib/argon2-template-64.h | 193 ++++++ src/3rdparty/argon2/lib/argon2.c | 476 +++++++++++++ src/3rdparty/argon2/lib/blake2/blake2-impl.h | 90 +++ src/3rdparty/argon2/lib/blake2/blake2.c | 225 +++++++ src/3rdparty/argon2/lib/blake2/blake2.h | 30 + src/3rdparty/argon2/lib/core.c | 633 ++++++++++++++++++ src/3rdparty/argon2/lib/core.h | 226 +++++++ src/3rdparty/argon2/lib/encoding.c | 432 ++++++++++++ src/3rdparty/argon2/lib/encoding.h | 40 ++ src/3rdparty/argon2/lib/genkat.c | 117 ++++ src/3rdparty/argon2/lib/genkat.h | 47 ++ src/3rdparty/argon2/lib/impl-select.c | 120 ++++ src/3rdparty/argon2/lib/impl-select.h | 23 + src/3rdparty/argon2/lib/thread.c | 36 + src/3rdparty/argon2/lib/thread.h | 47 ++ .../argon2/m4/ax_check_compile_flag.m4 | 74 ++ src/3rdparty/argon2/m4/ax_pthread.m4 | 485 ++++++++++++++ src/3rdparty/argon2/qmake/arch/arch.pro | 3 + .../argon2/qmake/arch/generic/generic.pro | 1 + .../x86_64/libargon2-avx2/libargon2-avx2.pro | 23 + .../libargon2-avx512f/libargon2-avx512f.pro | 23 + .../x86_64/libargon2-sse2/libargon2-sse2.pro | 24 + .../libargon2-ssse3/libargon2-ssse3.pro | 24 + .../x86_64/libargon2-xop/libargon2-xop.pro | 24 + .../argon2/qmake/arch/x86_64/x86_64.pro | 8 + .../qmake/argon2-bench2/argon2-bench2.pro | 19 + .../qmake/argon2-genkat/argon2-genkat.pro | 16 + .../argon2/qmake/argon2-test/argon2-test.pro | 16 + src/3rdparty/argon2/qmake/argon2.pro | 9 + src/3rdparty/argon2/qmake/argon2/argon2.pro | 18 + .../argon2/qmake/libargon2/libargon2.pro | 119 ++++ .../metacentrum/start-all-benchmarks.sh | 12 + .../scripts/metacentrum/start-benchmark.sh | 75 +++ src/3rdparty/argon2/scripts/run-benchmark.sh | 40 ++ src/3rdparty/argon2/src/bench2.c | 179 +++++ src/3rdparty/argon2/src/genkat.c | 90 +++ src/3rdparty/argon2/src/run.c | 315 +++++++++ src/3rdparty/argon2/src/timing.h | 41 ++ src/3rdparty/argon2/tests/test.c | 239 +++++++ src/common/crypto/Algorithm.cpp | 7 +- src/common/xmrig.h | 4 +- src/core/Config.cpp | 12 +- src/crypto/Argon2.h | 45 ++ src/crypto/Argon2_constants.h | 72 ++ src/crypto/Argon2_test.h | 0 src/workers/CpuThread.cpp | 61 ++ src/workers/MultiWorker.cpp | 7 + 70 files changed, 7240 insertions(+), 4 deletions(-) create mode 100644 src/3rdparty/argon2/.gitattributes create mode 100644 src/3rdparty/argon2/.gitignore create mode 100644 src/3rdparty/argon2/.travis.yml create mode 100644 src/3rdparty/argon2/CMakeLists.txt create mode 100644 src/3rdparty/argon2/LICENSE create mode 100644 src/3rdparty/argon2/Makefile.am create mode 100644 src/3rdparty/argon2/README.md create mode 100644 src/3rdparty/argon2/arch/generic/lib/argon2-arch.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c create mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h create mode 100644 src/3rdparty/argon2/configure.ac create mode 100644 src/3rdparty/argon2/include/argon2.h create mode 100644 src/3rdparty/argon2/lib/argon2-template-64.h create mode 100644 src/3rdparty/argon2/lib/argon2.c create mode 100644 src/3rdparty/argon2/lib/blake2/blake2-impl.h create mode 100644 src/3rdparty/argon2/lib/blake2/blake2.c create mode 100644 src/3rdparty/argon2/lib/blake2/blake2.h create mode 100644 src/3rdparty/argon2/lib/core.c create mode 100644 src/3rdparty/argon2/lib/core.h create mode 100644 src/3rdparty/argon2/lib/encoding.c create mode 100644 src/3rdparty/argon2/lib/encoding.h create mode 100644 src/3rdparty/argon2/lib/genkat.c create mode 100644 src/3rdparty/argon2/lib/genkat.h create mode 100644 src/3rdparty/argon2/lib/impl-select.c create mode 100644 src/3rdparty/argon2/lib/impl-select.h create mode 100644 src/3rdparty/argon2/lib/thread.c create mode 100644 src/3rdparty/argon2/lib/thread.h create mode 100644 src/3rdparty/argon2/m4/ax_check_compile_flag.m4 create mode 100644 src/3rdparty/argon2/m4/ax_pthread.m4 create mode 100644 src/3rdparty/argon2/qmake/arch/arch.pro create mode 100644 src/3rdparty/argon2/qmake/arch/generic/generic.pro create mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro create mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro create mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro create mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro create mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro create mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro create mode 100644 src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro create mode 100644 src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro create mode 100644 src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro create mode 100644 src/3rdparty/argon2/qmake/argon2.pro create mode 100644 src/3rdparty/argon2/qmake/argon2/argon2.pro create mode 100644 src/3rdparty/argon2/qmake/libargon2/libargon2.pro create mode 100644 src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh create mode 100644 src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh create mode 100644 src/3rdparty/argon2/scripts/run-benchmark.sh create mode 100644 src/3rdparty/argon2/src/bench2.c create mode 100644 src/3rdparty/argon2/src/genkat.c create mode 100644 src/3rdparty/argon2/src/run.c create mode 100644 src/3rdparty/argon2/src/timing.h create mode 100644 src/3rdparty/argon2/tests/test.c create mode 100644 src/crypto/Argon2.h create mode 100644 src/crypto/Argon2_constants.h create mode 100644 src/crypto/Argon2_test.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 14dcc931..eb09197b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(WITH_HTTPD "HTTP REST API" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_ASM "Enable ASM PoW implementations" ON) +option(WITH_ARGON2 "Enable Argon2 Support" ON) option(BUILD_STATIC "Build static binary" OFF) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) @@ -86,6 +87,8 @@ set(HEADERS ) set(HEADERS_CRYPTO + src/crypto/Argon2.h + src/crypto/Argon2_constants.h src/crypto/c_blake256.h src/crypto/c_groestl.h src/crypto/c_jh.h @@ -281,6 +284,15 @@ else() add_definitions(/DXMRIG_NO_API) endif() +if (WITH_ARGON2) + add_subdirectory(src/3rdparty/argon2) + include_directories(src/3rdparty/argon2/include) + set(ARGON2_LIBRARY argon2) +else() + add_definitions(/DXMRIG_NO_ARGON2) + set(ARGON2_LIBRARY "") +endif() + include_directories(src) include_directories(src/3rdparty) include_directories(${UV_INCLUDE_DIR}) @@ -294,4 +306,4 @@ if (WITH_DEBUG_LOG) endif() add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES}) -target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) +target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY}) diff --git a/src/3rdparty/argon2/.gitattributes b/src/3rdparty/argon2/.gitattributes new file mode 100644 index 00000000..69755b35 --- /dev/null +++ b/src/3rdparty/argon2/.gitattributes @@ -0,0 +1,2 @@ +*.h linguist-language=C +*.pro linguist-language=QMake diff --git a/src/3rdparty/argon2/.gitignore b/src/3rdparty/argon2/.gitignore new file mode 100644 index 00000000..5bff3d06 --- /dev/null +++ b/src/3rdparty/argon2/.gitignore @@ -0,0 +1,70 @@ +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Autotools + Libtool +/aclocal.m4 +/config.status +/config +/install-sh +**/Makefile +**/Makefile.in +/autom4te.cache/ +/compile +/config.guess +/config.log +/config.sub +/configure +/depcomp +/libtool +/ltmain.sh +/m4/libtool.m4 +/m4/lt*.m4 +/missing +/test-driver +**/.deps/ +**/.dirstamp +**/.libs/ + +# Qt Creator +**/*.user +**/*.user.* +**/build-*/ + +# KDE +**/.directory + +# Vim +*.swp + +# CMake +CMakeFiles/ +*.cmake +CMakeCache.txt +Makefile + diff --git a/src/3rdparty/argon2/.travis.yml b/src/3rdparty/argon2/.travis.yml new file mode 100644 index 00000000..0298ff65 --- /dev/null +++ b/src/3rdparty/argon2/.travis.yml @@ -0,0 +1,23 @@ +language: c + +dist: trusty +sudo: false + +compiler: + - clang + - gcc + +env: + - BUILD=cmake BUILD_TYPE=Debug + - BUILD=cmake BUILD_TYPE=Release + - BUILD=autotools + +script: | + case $BUILD in + cmake) + cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE . && make && make test + ;; + autotools) + autoreconf -i && ./configure && make && make check + ;; + esac diff --git a/src/3rdparty/argon2/CMakeLists.txt b/src/3rdparty/argon2/CMakeLists.txt new file mode 100644 index 00000000..a8be1bed --- /dev/null +++ b/src/3rdparty/argon2/CMakeLists.txt @@ -0,0 +1,203 @@ +cmake_minimum_required(VERSION 2.6) + +find_program(CCACHE_PROGRAM ccache) +if(CCACHE_PROGRAM) + message(STATUS "-- Argon2: Found ccache package... Activating...") + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") +endif() + +project(Argon2 C) +set(ARGON2_VERSION 1.0) +set(CMAKE_C_STANDARD 90) +set(CMAKE_C_STANDARD_REQUIRED ON) + +include(CheckCSourceCompiles) +find_package(Threads REQUIRED) + +add_library(argon2-interface INTERFACE) +target_include_directories(argon2-interface INTERFACE + $ + $ +) + +add_library(argon2-internal INTERFACE) +target_include_directories(argon2-internal INTERFACE lib lib/blake2) +target_link_libraries(argon2-internal INTERFACE argon2-interface) + +set(ARGON2_SRC + lib/argon2.c + lib/core.c + lib/encoding.c + lib/genkat.c + lib/impl-select.c + lib/thread.c + lib/blake2/blake2.c +) + +message("-- Argon2: Processor: ${CMAKE_SYSTEM_PROCESSOR}") +message("-- Argon2: Build Type: ${ARCH}") + +if((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") AND NOT "${ARCH}" STREQUAL "default") + include(CheckCXXSourceRuns) + + # Check for AVX2 + check_cxx_source_runs(" + #include + int main() + { + __m256i a, b, c; + const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + int dst[8]; + a = _mm256_loadu_si256( (__m256i*)src ); + b = _mm256_loadu_si256( (__m256i*)src ); + c = _mm256_add_epi32( a, b ); + _mm256_storeu_si256( (__m256i*)dst, c ); + for( int i = 0; i < 8; i++ ){ + if( ( src[i] + src[i] ) != dst[i] ){ + return -1; + } + } + return 0; + }" + HAVE_AVX2_EXTENSIONS) + + if(HAVE_AVX2_EXTENSIONS) + message("-- Argon2: AVX2 Extensions - Enabled") + add_definitions(-DHAVE_AVX2) + if(MSVC) + add_definitions(/arch:AVX2) + endif() + else() + message("-- Argon2: AVX2 Extensions - Disabled") + endif() + + # Check for AVX512 + check_cxx_source_runs(" + #include + int main() + { + __m512i a, b, c; + const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + int dst[8]; + a = _mm512_loadu_si512( (__m512i*)src ); + b = _mm512_loadu_si512( (__m512i*)src ); + c = _mm512_add_epi32( a, b ); + _mm512_storeu_si512( (__m512i*)dst, c ); + for( int i = 0; i < 8; i++ ){ + if( ( src[i] + src[i] ) != dst[i] ){ + return -1; + } + } + return 0; + }" + HAVE_AVX512F_EXTENSIONS) + + if(HAVE_AVX512F_EXTENSIONS) + message("-- Argon2: AVX512 Extensions - Enabled") + add_definitions(-DHAVE_AVX512F) + else() + message("-- Argon2: AVX512 Extensions - Disabled") + endif() + + # Check for SSE2 + check_cxx_source_runs(" + #include + int main() + { + __m128d a, b; + double vals[2] = {0}; + a = _mm_loadu_pd(vals); + b = _mm_add_pd(a,a); + _mm_storeu_pd(vals,b); + return 0; + }" + HAVE_SSE2_EXTENSIONS) + + if(HAVE_SSE2_EXTENSIONS) + message("-- Argon2: SSE2 Extensions - Enabled") + add_definitions(-DHAVE_SSE2) + if(MSVC) + add_definitions(/arch:SSE2) + endif() + else() + message("-- Argon2: SSE2 Extensions - Disabled") + endif() + + # Check for SSE3 + check_cxx_source_runs(" + #include + int main() + { + __m128d a, b; + double vals[2] = {0}; + a = _mm_loadu_pd(vals); + b = _mm_hadd_pd(a,a); + _mm_storeu_pd(vals, b); + return 0; + }" + HAVE_SSE3_EXTENSIONS) + + if(HAVE_SSE3_EXTENSIONS) + message("-- Argon2: SSE3 Extensions - Enabled") + add_definitions(-DHAVE_SSE3) + if(MSVC) + add_definitions(/arch:SSE3) + endif() + else() + message("-- Argon2: SSE3 Extensions - Disabled") + endif() + + # Check for XOP + check_cxx_source_runs(" + #include + int main() + { + __m128i a, b, c; + const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + a = _mm_load_si128( (__m128i*)src ); + b = _mm_load_si128( (__m128i*)src ); + c = _mm_roti_epi64(*a, b); + return 0; + }" + HAVE_XOP_EXTENSIONS) + + if(HAVE_XOP_EXTENSIONS) + message("-- Argon2: XOP Extensions - Enabled") + add_definitions(-DHAVE_XOP) + else() + message("-- Argon2: XOP Extensions - Disabled") + endif() + + list(APPEND ARGON2_SRC + arch/x86_64/lib/argon2-sse2.c + arch/x86_64/lib/argon2-ssse3.c + arch/x86_64/lib/argon2-xop.c + arch/x86_64/lib/argon2-avx2.c + arch/x86_64/lib/argon2-avx512f.c + arch/x86_64/lib/argon2-arch.c + ) +else() + list(APPEND ARGON2_SRC + arch/generic/lib/argon2-arch.c + ) +endif() + +add_library(argon2 STATIC ${ARGON2_SRC}) + +target_compile_definitions(argon2 + PUBLIC "A2_VISCTL" +) + +target_link_libraries(argon2 + PUBLIC argon2-interface ${CMAKE_THREAD_LIBS_INIT} + PRIVATE argon2-internal +) + +set_property(TARGET argon2 PROPERTY C_STANDARD 90) +set_property(TARGET argon2 PROPERTY VERSION ${Upstream_VERSION}) +set_property(TARGET argon2 PROPERTY SOVERSION 1) +set_property(TARGET argon2 PROPERTY INTERFACE_ARGON2_MAJOR_VERSION 1) +set_property(TARGET argon2 APPEND PROPERTY + COMPATIBLE_INTERFACE_STRING ARGON2_MAJOR_VERSION +) + diff --git a/src/3rdparty/argon2/LICENSE b/src/3rdparty/argon2/LICENSE new file mode 100644 index 00000000..f9b00035 --- /dev/null +++ b/src/3rdparty/argon2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Ondrej Mosnáček + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/3rdparty/argon2/Makefile.am b/src/3rdparty/argon2/Makefile.am new file mode 100644 index 00000000..fffb45ca --- /dev/null +++ b/src/3rdparty/argon2/Makefile.am @@ -0,0 +1,125 @@ +ACLOCAL_AMFLAGS = -I m4 + +include_HEADERS = include/argon2.h +lib_LTLIBRARIES = libargon2.la +noinst_LTLIBRARIES = + +bin_PROGRAMS = argon2 +noinst_PROGRAMS = argon2-genkat argon2-bench2 argon2-test + +TESTS = argon2-test + +AM_CPPFLAGS = \ + -I$(srcdir)/include \ + -I$(srcdir)/lib \ + -I$(srcdir)/arch/@ARCH@/include \ + -I$(srcdir)/arch/@ARCH@/lib + +libargon2_la_CFLAGS = @PTHREAD_CFLAGS@ +libargon2_la_LIBADD = @PTHREAD_LIBS@ +libargon2_la_SOURCES = \ + lib/argon2.c \ + lib/core.c \ + lib/encoding.c \ + lib/genkat.c \ + lib/impl-select.c \ + lib/thread.c \ + lib/blake2/blake2.c \ + lib/argon2-template-64.h \ + lib/core.h \ + lib/encoding.h \ + lib/genkat.h \ + lib/impl-select.h \ + lib/thread.h \ + lib/blake2/blake2.h \ + lib/blake2/blake2-impl.h + +if ARCH_X86_64 +noinst_LTLIBRARIES += \ + libargon2-sse2.la \ + libargon2-ssse3.la \ + libargon2-xop.la \ + libargon2-avx2.la \ + libargon2-avx512f.la +libargon2_la_LIBADD += \ + libargon2-sse2.la \ + libargon2-ssse3.la \ + libargon2-xop.la \ + libargon2-avx2.la \ + libargon2-avx512f.la + +libargon2_sse2_la_SOURCES = \ + arch/@ARCH@/lib/argon2-sse2.c \ + arch/@ARCH@/lib/argon2-sse2.h \ + arch/@ARCH@/lib/argon2-template-128.h +libargon2_sse2_la_CPPFLAGS = $(AM_CPPFLAGS) +libargon2_sse2_la_CFLAGS = $(AM_CFLAGS) +if HAVE_SSE2 +libargon2_sse2_la_CPPFLAGS += -DHAVE_SSE2 +libargon2_sse2_la_CFLAGS += -msse2 +endif # HAVE_SSE2 + +libargon2_ssse3_la_SOURCES = \ + arch/@ARCH@/lib/argon2-ssse3.c \ + arch/@ARCH@/lib/argon2-ssse3.h \ + arch/@ARCH@/lib/argon2-template-128.h +libargon2_ssse3_la_CPPFLAGS = $(AM_CPPFLAGS) +libargon2_ssse3_la_CFLAGS = $(AM_CFLAGS) +if HAVE_SSSE3 +libargon2_ssse3_la_CPPFLAGS += -DHAVE_SSSE3 +libargon2_ssse3_la_CFLAGS += -mssse3 +endif # HAVE_SSSE3 + +libargon2_xop_la_SOURCES = \ + arch/@ARCH@/lib/argon2-xop.c \ + arch/@ARCH@/lib/argon2-xop.h \ + arch/@ARCH@/lib/argon2-template-128.h +libargon2_xop_la_CPPFLAGS = $(AM_CPPFLAGS) +libargon2_xop_la_CFLAGS = $(AM_CFLAGS) +if HAVE_XOP +libargon2_xop_la_CPPFLAGS += -DHAVE_XOP +libargon2_xop_la_CFLAGS += -mxop +endif # HAVE_XOP + +libargon2_avx2_la_SOURCES = \ + arch/@ARCH@/lib/argon2-avx2.c \ + arch/@ARCH@/lib/argon2-avx2.h +libargon2_avx2_la_CPPFLAGS = $(AM_CPPFLAGS) +libargon2_avx2_la_CFLAGS = $(AM_CFLAGS) +if HAVE_AVX2 +libargon2_avx2_la_CPPFLAGS += -DHAVE_AVX2 +libargon2_avx2_la_CFLAGS += -mavx2 +endif # HAVE_AVX2 + +libargon2_avx512f_la_SOURCES = \ + arch/@ARCH@/lib/argon2-avx512f.c \ + arch/@ARCH@/lib/argon2-avx512f.h +libargon2_avx512f_la_CPPFLAGS = $(AM_CPPFLAGS) +libargon2_avx512f_la_CFLAGS = $(AM_CFLAGS) +if HAVE_AVX512F +libargon2_avx512f_la_CPPFLAGS += -DHAVE_AVX512F +libargon2_avx512f_la_CFLAGS += -mavx512f +endif # HAVE_AVX512F + +libargon2_la_SOURCES += \ + arch/@ARCH@/lib/argon2-arch.c \ + arch/@ARCH@/lib/cpu-flags.c \ + arch/@ARCH@/lib/cpu-flags.h +endif # ARCH_X86_64 + +if ARCH_GENERIC +libargon2_la_SOURCES += \ + arch/@ARCH@/lib/argon2-arch.c +endif # ARCH_GENERIC + +argon2_LDADD = libargon2.la +argon2_SOURCES = src/run.c lib/core.h + +argon2_genkat_LDADD = libargon2.la +argon2_genkat_SOURCES = src/genkat.c + +argon2_bench2_LDADD = libargon2.la -lrt +argon2_bench2_SOURCES = src/bench2.c src/timing.h + +argon2_test_LDADD = libargon2.la +argon2_test_SOURCES = tests/test.c diff --git a/src/3rdparty/argon2/README.md b/src/3rdparty/argon2/README.md new file mode 100644 index 00000000..254e26da --- /dev/null +++ b/src/3rdparty/argon2/README.md @@ -0,0 +1,58 @@ +# Argon2 [![Build Status](https://travis-ci.org/WOnder93/argon2.svg?branch=master)](https://travis-ci.org/WOnder93/argon2) +A multi-arch library implementing the Argon2 password hashing algorithm. + +This project is based on the [original source code](https://github.com/P-H-C/phc-winner-argon2) by the Argon2 authors. The goal of this project is to provide efficient Argon2 implementations for various HW architectures (x86, SSE, ARM, PowerPC, ...). + +For the x86_64 architecture, the library implements a simple CPU dispatch which automatically selects the best implementation based on CPU flags and quick benchmarks. + +# Building +## Using GNU autotools + +To prepare the build environment, run: +```bash +autoreconf -i +./configure +``` + +After that, just run `make` to build the library. + +### Running tests +After configuring the build environment, run `make check` to run the tests. + +### Architecture options +You can specify the target architecture by passing the `--host=...` flag to `./configure`. + +Supported architectures: + * `x86_64` – 64-bit x86 architecture + * `generic` – use generic C impementation + +## Using CMake + +To prepare the build environment, run: +```bash +cmake -DCMAKE_BUILD_TYPE=Release . +``` + +Then you can run `make` to build the library. + +## Using QMake/Qt Creator +A [QMake](http://doc.qt.io/qt-4.8/qmake-manual.html) project is also available in the `qmake` directory. You can open it in the [Qt Creator IDE](http://wiki.qt.io/Category:Tools::QtCreator) or build it from terminal: +```bash +cd qmake +# see table below for the list of possible ARCH and CONFIG values +qmake ARCH=... CONFIG+=... +make +``` + +### Architecture options +For QMake builds you can configure support for different architectures. Use the `ARCH` variable to choose the architecture and the `CONFIG` variable to set additional options. + +Supported architectures: + * `x86_64` – 64-bit x86 architecture + * QMake config flags: + * `USE_SSE2` – use SSE2 instructions + * `USE_SSSE3` – use SSSE3 instructions + * `USE_XOP` – use XOP instructions + * `USE_AVX2` – use AVX2 instructions + * `USE_AVX512F` – use AVX-512F instructions + * `generic` – use generic C impementation diff --git a/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c b/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c new file mode 100644 index 00000000..39abadee --- /dev/null +++ b/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c @@ -0,0 +1,20 @@ +#include +#include +#include + +#include "impl-select.h" + +#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +#include "argon2-template-64.h" + +void fill_segment_default(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_64(instance, position); +} + +void argon2_get_impl_list(argon2_impl_list *list) +{ + list->count = 0; +} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c new file mode 100644 index 00000000..1d54b657 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c @@ -0,0 +1,38 @@ +#include +#include +#include + +#include "impl-select.h" + +#include "argon2-sse2.h" +#include "argon2-ssse3.h" +#include "argon2-xop.h" +#include "argon2-avx2.h" +#include "argon2-avx512f.h" + +/* NOTE: there is no portable intrinsic for 64-bit rotate, but any + * sane compiler should be able to compile this into a ROR instruction: */ +#define rotr64(x, n) ((x) >> (n)) | ((x) << (64 - (n))) + +#include "argon2-template-64.h" + +void fill_segment_default(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_64(instance, position); +} + +void argon2_get_impl_list(argon2_impl_list *list) +{ + static const argon2_impl IMPLS[] = { + { "x86_64", NULL, fill_segment_default }, + { "SSE2", check_sse2, fill_segment_sse2 }, + { "SSSE3", check_ssse3, fill_segment_ssse3 }, + { "XOP", check_xop, fill_segment_xop }, + { "AVX2", check_avx2, fill_segment_avx2 }, + { "AVX-512F", check_avx512f, fill_segment_avx512f }, + }; + + list->count = sizeof(IMPLS) / sizeof(IMPLS[0]); + list->entries = IMPLS; +} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c new file mode 100644 index 00000000..5dc41979 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c @@ -0,0 +1,341 @@ +#include "argon2-avx2.h" + +#ifdef HAVE_AVX2 +#include + +#include + +#define r16 (_mm256_setr_epi8( \ + 2, 3, 4, 5, 6, 7, 0, 1, \ + 10, 11, 12, 13, 14, 15, 8, 9, \ + 18, 19, 20, 21, 22, 23, 16, 17, \ + 26, 27, 28, 29, 30, 31, 24, 25)) + +#define r24 (_mm256_setr_epi8( \ + 3, 4, 5, 6, 7, 0, 1, 2, \ + 11, 12, 13, 14, 15, 8, 9, 10, \ + 19, 20, 21, 22, 23, 16, 17, 18, \ + 27, 28, 29, 30, 31, 24, 25, 26)) + +#define ror64_16(x) _mm256_shuffle_epi8((x), r16) +#define ror64_24(x) _mm256_shuffle_epi8((x), r24) +#define ror64_32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) +#define ror64_63(x) \ + _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) + +static __m256i f(__m256i x, __m256i y) +{ + __m256i z = _mm256_mul_epu32(x, y); + return _mm256_add_epi64(_mm256_add_epi64(x, y), _mm256_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm256_xor_si256(D0, A0); \ + D1 = _mm256_xor_si256(D1, A1); \ +\ + D0 = ror64_32(D0); \ + D1 = ror64_32(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm256_xor_si256(B0, C0); \ + B1 = _mm256_xor_si256(B1, C1); \ +\ + B0 = ror64_24(B0); \ + B1 = ror64_24(B1); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm256_xor_si256(D0, A0); \ + D1 = _mm256_xor_si256(D1, A1); \ +\ + D0 = ror64_16(D0); \ + D1 = ror64_16(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm256_xor_si256(B0, C0); \ + B1 = _mm256_xor_si256(B1, C1); \ +\ + B0 = ror64_63(B0); \ + B1 = ror64_63(B1); \ + } while ((void)0, 0) + +#define DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m256i tmp1, tmp2; \ + tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ +\ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ +\ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m256i tmp1, tmp2; \ + tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ +\ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ +\ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +enum { + ARGON2_HWORDS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 2, +}; + +static void fill_block(__m256i *s, const block *ref_block, block *next_block, + int with_xor) +{ + __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + s[i] =_mm256_xor_si256( + s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + block_XY[i] = _mm256_xor_si256( + s[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); + } + + } else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + block_XY[i] = s[i] =_mm256_xor_si256( + s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + } + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND1( + s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], + s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND2( + s[4 * 0 + i], s[4 * 1 + i], s[4 * 2 + i], s[4 * 3 + i], + s[4 * 4 + i], s[4 * 5 + i], s[4 * 6 + i], s[4 * 7 + i]); + } + + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + s[i] = _mm256_xor_si256(s[i], block_XY[i]); + _mm256_storeu_si256((__m256i *)next_block->v + i, s[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) +{ + /*Temporary zero-initialized blocks*/ + __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; + __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +void fill_segment_avx2(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m256i state[ARGON2_HWORDS_IN_BLOCK]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } +} + +int check_avx2(void) +{ + return 1; +} + +#else + +void fill_segment_avx2(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_avx2(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h new file mode 100644 index 00000000..8abdb8a5 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_AVX2_H +#define ARGON2_AVX2_H + +#include "core.h" + +void fill_segment_avx2(const argon2_instance_t *instance, + argon2_position_t position); + +int check_avx2(void); + +#endif // ARGON2_AVX2_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c new file mode 100644 index 00000000..f6de135b --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c @@ -0,0 +1,326 @@ +#include "argon2-avx512f.h" + +#ifdef HAVE_AVX512F +#include +#include + +#include + +#define ror64(x, n) _mm512_ror_epi64((x), (n)) + +static __m512i f(__m512i x, __m512i y) +{ + __m512i z = _mm512_mul_epu32(x, y); + return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define SWAP_HALVES(A0, A1) \ + do { \ + __m512i t0, t1; \ + t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ + t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ + A0 = t0; \ + A1 = t1; \ + } while((void)0, 0) + +#define SWAP_QUARTERS(A0, A1) \ + do { \ + SWAP_HALVES(A0, A1); \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + } while((void)0, 0) + +#define UNSWAP_QUARTERS(A0, A1) \ + do { \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + SWAP_HALVES(A0, A1); \ + } while((void)0, 0) + +#define BLAKE2_ROUND1(A0, C0, B0, D0, A1, C1, B1, D1) \ + do { \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + SWAP_QUARTERS(A0, A1); \ + SWAP_QUARTERS(B0, B1); \ + SWAP_QUARTERS(C0, C1); \ + SWAP_QUARTERS(D0, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + UNSWAP_QUARTERS(A0, A1); \ + UNSWAP_QUARTERS(B0, B1); \ + UNSWAP_QUARTERS(C0, C1); \ + UNSWAP_QUARTERS(D0, D1); \ + } while ((void)0, 0) + +enum { + ARGON2_VECS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 4, +}; + +static void fill_block(__m512i *s, const block *ref_block, block *next_block, + int with_xor) +{ + __m512i block_XY[ARGON2_VECS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { + s[i] =_mm512_xor_si512( + s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + block_XY[i] = _mm512_xor_si512( + s[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); + } + + } else { + for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { + block_XY[i] = s[i] =_mm512_xor_si512( + s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + } + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND1( + s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], + s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND2( + s[2 * 0 + i], s[2 * 1 + i], s[2 * 2 + i], s[2 * 3 + i], + s[2 * 4 + i], s[2 * 5 + i], s[2 * 6 + i], s[2 * 7 + i]); + } + + for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { + s[i] = _mm512_xor_si512(s[i], block_XY[i]); + _mm512_storeu_si512((__m512i *)next_block->v + i, s[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) +{ + /*Temporary zero-initialized blocks*/ + __m512i zero_block[ARGON2_VECS_IN_BLOCK]; + __m512i zero2_block[ARGON2_VECS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +void fill_segment_avx512f(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m512i state[ARGON2_VECS_IN_BLOCK]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } +} + +int check_avx512f(void) +{ + return 1; +} + +#else + +void fill_segment_avx512f(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_avx512f(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h new file mode 100644 index 00000000..ba431114 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_AVX512F_H +#define ARGON2_AVX512F_H + +#include "core.h" + +void fill_segment_avx512f(const argon2_instance_t *instance, + argon2_position_t position); + +int check_avx512f(void); + +#endif // ARGON2_AVX512F_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c new file mode 100644 index 00000000..60ffb7bb --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c @@ -0,0 +1,122 @@ +#include "argon2-sse2.h" + +#ifdef HAVE_SSE2 +#include + +#define ror64_16(x) \ + _mm_shufflehi_epi16( \ + _mm_shufflelo_epi16((x), _MM_SHUFFLE(0, 3, 2, 1)), \ + _MM_SHUFFLE(0, 3, 2, 1)) +#define ror64_24(x) \ + _mm_xor_si128(_mm_srli_epi64((x), 24), _mm_slli_epi64((x), 40)) +#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) +#define ror64_63(x) \ + _mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x))) + +static __m128i f(__m128i x, __m128i y) +{ + __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_32(D0); \ + D1 = ror64_32(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_24(B0); \ + B1 = ror64_24(B1); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_16(D0); \ + D1 = ror64_16(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_63(B0); \ + B1 = ror64_63(B1); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = D0; \ + __m128i t1 = B0; \ + D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ + D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ + B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ + B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = B0; \ + __m128i t1 = D0; \ + B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ + B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ + D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ + D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C1, D0, A1, B1, C0, D1); \ + G2(A0, B0, C1, D0, A1, B1, C0, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#include "argon2-template-128.h" + +void fill_segment_sse2(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_128(instance, position); +} + +int check_sse2(void) +{ + return 1; +} + +#else + +void fill_segment_sse2(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_sse2(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h new file mode 100644 index 00000000..024d503d --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_SSE2_H +#define ARGON2_SSE2_H + +#include "core.h" + +void fill_segment_sse2(const argon2_instance_t *instance, + argon2_position_t position); + +int check_sse2(void); + +#endif // ARGON2_SSE2_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c new file mode 100644 index 00000000..7098ab22 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c @@ -0,0 +1,134 @@ +#include "argon2-ssse3.h" + +#ifdef HAVE_SSSE3 +#include + +#include + +#define r16 (_mm_setr_epi8( \ + 2, 3, 4, 5, 6, 7, 0, 1, \ + 10, 11, 12, 13, 14, 15, 8, 9)) + +#define r24 (_mm_setr_epi8( \ + 3, 4, 5, 6, 7, 0, 1, 2, \ + 11, 12, 13, 14, 15, 8, 9, 10)) + +#define ror64_16(x) _mm_shuffle_epi8((x), r16) +#define ror64_24(x) _mm_shuffle_epi8((x), r24) +#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) +#define ror64_63(x) \ + _mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x))) + +static __m128i f(__m128i x, __m128i y) +{ + __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_32(D0); \ + D1 = ror64_32(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_24(B0); \ + B1 = ror64_24(B1); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_16(D0); \ + D1 = ror64_16(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_63(B0); \ + B1 = ror64_63(B1); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C1, D0, A1, B1, C0, D1); \ + G2(A0, B0, C1, D0, A1, B1, C0, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#include "argon2-template-128.h" + +void fill_segment_ssse3(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_128(instance, position); +} + +int check_ssse3(void) +{ + return 1; +} + +#else + +void fill_segment_ssse3(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_ssse3(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h new file mode 100644 index 00000000..139fdacc --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_SSSE3_H +#define ARGON2_SSSE3_H + +#include "core.h" + +void fill_segment_ssse3(const argon2_instance_t *instance, + argon2_position_t position); + +int check_ssse3(void); + +#endif // ARGON2_SSSE3_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h new file mode 100644 index 00000000..3062ec00 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h @@ -0,0 +1,164 @@ +#include + +#include + +#include "core.h" + +static void fill_block(__m128i *s, const block *ref_block, block *next_block, + int with_xor) +{ + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + s[i] = _mm_xor_si128( + s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + block_XY[i] = _mm_xor_si128( + s[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = s[i] = _mm_xor_si128( + s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND( + s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], + s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND( + s[8 * 0 + i], s[8 * 1 + i], s[8 * 2 + i], s[8 * 3 + i], + s[8 * 4 + i], s[8 * 5 + i], s[8 * 6 + i], s[8 * 7 + i]); + } + + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + s[i] = _mm_xor_si128(s[i], block_XY[i]); + _mm_storeu_si128((__m128i *)next_block->v + i, s[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) +{ + /*Temporary zero-initialized blocks*/ + __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; + __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +static void fill_segment_128(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m128i state[ARGON2_OWORDS_IN_BLOCK]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } +} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c new file mode 100644 index 00000000..a7f6e399 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c @@ -0,0 +1,122 @@ +#include "argon2-xop.h" + +#ifdef HAVE_XOP +#include + +#include + +#define ror64(x, c) _mm_roti_epi64((x), -(c)) + +static __m128i f(__m128i x, __m128i y) +{ + __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C1, D0, A1, B1, C0, D1); \ + G2(A0, B0, C1, D0, A1, B1, C0, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#include "argon2-template-128.h" + +void fill_segment_xop(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_128(instance, position); +} + +int check_xop(void) +{ + return 1; +} + +#else + +void fill_segment_xop(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_xop(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h new file mode 100644 index 00000000..1474a11c --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_XOP_H +#define ARGON2_XOP_H + +#include "core.h" + +void fill_segment_xop(const argon2_instance_t *instance, + argon2_position_t position); + +int check_xop(void); + +#endif // ARGON2_XOP_H diff --git a/src/3rdparty/argon2/configure.ac b/src/3rdparty/argon2/configure.ac new file mode 100644 index 00000000..81607a97 --- /dev/null +++ b/src/3rdparty/argon2/configure.ac @@ -0,0 +1,108 @@ +dnl --------------------------------------------------------------------- +dnl Copyright (C) 2015, Ondrej Mosnacek +dnl +dnl This program is free software: you can redistribute it and/or +dnl modify it under the terms of the GNU General Public License +dnl as published by the Free Software Foundation: either version 2 +dnl of the License, or (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl --------------------------------------------------------------------- + +AC_CONFIG_MACRO_DIR([m4]) + +AC_INIT([argon2], [0.1], []) +LT_INIT +AM_INIT_AUTOMAKE([foreign subdir-objects]) +AM_SILENT_RULES([yes]) + +AC_PROG_CC +AC_PROG_CC_C89 +AM_PROG_AS +AX_PTHREAD + +AC_CANONICAL_HOST + +AS_CASE([$host_cpu], + dnl [i?86], [ARCH=i386], + [x86_64], [ARCH=x86_64], + [ARCH=generic + AC_MSG_WARN("No code for architecture $host_cpu; using generic implementation")] +) +AC_SUBST([ARCH]) + +AM_CONDITIONAL([ARCH_X86_64], [test "$ARCH" = 'x86_64']) +AM_CONDITIONAL([ARCH_GENERIC], [test "$ARCH" = 'generic']) + +# AX_CHECK_COMPILER_FEATURE(NAME, FLAG, TEST_SOURCE) +# -------------------------- +AC_DEFUN([AX_CHECK_COMPILER_FEATURE], [{ + AX_CHECK_COMPILE_FLAG([-m$2], [HAVE_FLAG=1], [HAVE_FLAG=0]) + HAVE_FEATURE=0 + AS_IF([test "$HAVE_FLAG" = '1'], [{ + AC_MSG_CHECKING("whether C compiler supports $1 with -m$2...") + + CFLAGS_BACKUP="$CFLAGS" + CFLAGS="-m$2" + + AC_COMPILE_IFELSE([AC_LANG_SOURCE([$3])], [HAVE_FEATURE=1]) + + CFLAGS="$CFLAGS_BACKUP" + AS_IF([test "$HAVE_FEATURE" = '1'], [RESULT='yes'], [RESULT='no']) + AC_MSG_RESULT([$RESULT]) + + }]) + HAVE_$1=HAVE_FEATURE + AM_CONDITIONAL([HAVE_$1], [test "$HAVE_FEATURE" = '1']) +}]) + +AX_CHECK_COMPILER_FEATURE([SSE2], [sse2], [[ +#include + +void function_sse2(__m128i *dst, const __m128i *a, const __m128i *b) +{ + *dst = _mm_xor_si128(*a, *b); +} +]]) +AX_CHECK_COMPILER_FEATURE([SSSE3], [ssse3], [[ +#include + +void function_ssse3(__m128i *dst, const __m128i *a, const __m128i *b) +{ + *dst = _mm_shuffle_epi8(*a, *b); +} +]]) +AX_CHECK_COMPILER_FEATURE([XOP], [xop], [[ +#include + +void function_xop(__m128i *dst, const __m128i *a, int b) +{ + *dst = _mm_roti_epi64(*a, b); +} +]]) +AX_CHECK_COMPILER_FEATURE([AVX2], [avx2], [[ +#include + +void function_avx2(__m256i *dst, const __m256i *a, const __m256i *b) +{ + *dst = _mm256_xor_si256(*a, *b); +} +]]) +AX_CHECK_COMPILER_FEATURE([AVX512F], [avx512f], [[ +#include + +void function_avx512f(__m512i *dst, const __m512i *a) +{ + *dst = _mm512_ror_epi64(*a, 57); +} +]]) + +AC_CONFIG_FILES([Makefile]) + +AC_OUTPUT diff --git a/src/3rdparty/argon2/include/argon2.h b/src/3rdparty/argon2/include/argon2.h new file mode 100644 index 00000000..59df71aa --- /dev/null +++ b/src/3rdparty/argon2/include/argon2.h @@ -0,0 +1,478 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication + * along with this software. If not, see + * . + */ + +#ifndef ARGON2_H +#define ARGON2_H + +#include +#include +#include +#include + +/* Symbols visibility control */ +#if defined(_WIN32) || defined(__CYGWIN__) + #if defined(A2_VISCTL) + #if defined(_MSC_VER) + #define ARGON2_PUBLIC __declspec(dllexport) + #else + #define ARGON2_PUBLIC __attribute__ ((dllexport)) + #endif + #else + #if defined(_MSC_VER) + #define ARGON2_PUBLIC __declspec(dllimport) + #else + #define ARGON2_PUBLIC /*__attribute__ ((dllimport))*/ + #endif + #endif + #define ARGON2_LOCAL +#else + #if defined(A2_VISCTL) + #define ARGON2_PUBLIC __attribute__ ((visibility ("default"))) + #define ARGON2_LOCAL __attribute__ ((visibility ("hidden"))) + #else + #define ARGON2_PUBLIC + #define ARGON2_LOCAL + #endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Argon2 input parameter restrictions + */ + +/* Minimum and maximum number of lanes (degree of parallelism) */ +#define ARGON2_MIN_LANES UINT32_C(1) +#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) + +/* Minimum and maximum number of threads */ +#define ARGON2_MIN_THREADS UINT32_C(1) +#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) + +/* Number of synchronization points between lanes per pass */ +#define ARGON2_SYNC_POINTS UINT32_C(4) + +/* Minimum and maximum digest size in bytes */ +#define ARGON2_MIN_OUTLEN UINT32_C(4) +#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ +#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ + +#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) +/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ +#define ARGON2_MAX_MEMORY_BITS \ + ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) +#define ARGON2_MAX_MEMORY \ + ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) + +/* Minimum and maximum number of passes */ +#define ARGON2_MIN_TIME UINT32_C(1) +#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum password length in bytes */ +#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) +#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum associated data length in bytes */ +#define ARGON2_MIN_AD_LENGTH UINT32_C(0) +#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum salt length in bytes */ +#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) +#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum key length in bytes */ +#define ARGON2_MIN_SECRET UINT32_C(0) +#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) + +/* Flags to determine which fields are securely wiped (default = no wipe). */ +#define ARGON2_DEFAULT_FLAGS UINT32_C(0) +#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) +#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) +#define ARGON2_FLAG_GENKAT (UINT32_C(1) << 3) + +/* Global flag to determine if we are wiping internal memory buffers. This flag + * is defined in core.c and deafults to 1 (wipe internal memory). */ +extern int FLAG_clear_internal_memory; + +/* Error codes */ +typedef enum Argon2_ErrorCodes { + ARGON2_OK = 0, + + ARGON2_OUTPUT_PTR_NULL = -1, + + ARGON2_OUTPUT_TOO_SHORT = -2, + ARGON2_OUTPUT_TOO_LONG = -3, + + ARGON2_PWD_TOO_SHORT = -4, + ARGON2_PWD_TOO_LONG = -5, + + ARGON2_SALT_TOO_SHORT = -6, + ARGON2_SALT_TOO_LONG = -7, + + ARGON2_AD_TOO_SHORT = -8, + ARGON2_AD_TOO_LONG = -9, + + ARGON2_SECRET_TOO_SHORT = -10, + ARGON2_SECRET_TOO_LONG = -11, + + ARGON2_TIME_TOO_SMALL = -12, + ARGON2_TIME_TOO_LARGE = -13, + + ARGON2_MEMORY_TOO_LITTLE = -14, + ARGON2_MEMORY_TOO_MUCH = -15, + + ARGON2_LANES_TOO_FEW = -16, + ARGON2_LANES_TOO_MANY = -17, + + ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ + ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ + ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ + ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ + + ARGON2_MEMORY_ALLOCATION_ERROR = -22, + + ARGON2_FREE_MEMORY_CBK_NULL = -23, + ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, + + ARGON2_INCORRECT_PARAMETER = -25, + ARGON2_INCORRECT_TYPE = -26, + + ARGON2_OUT_PTR_MISMATCH = -27, + + ARGON2_THREADS_TOO_FEW = -28, + ARGON2_THREADS_TOO_MANY = -29, + + ARGON2_MISSING_ARGS = -30, + + ARGON2_ENCODING_FAIL = -31, + + ARGON2_DECODING_FAIL = -32, + + ARGON2_THREAD_FAIL = -33, + + ARGON2_DECODING_LENGTH_FAIL = -34, + + ARGON2_VERIFY_MISMATCH = -35 +} argon2_error_codes; + +/* Memory allocator types --- for external allocation */ +typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); +typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); + +/* Argon2 external data structures */ + +/* + ***** + * Context: structure to hold Argon2 inputs: + * output array and its length, + * password and its length, + * salt and its length, + * secret and its length, + * associated data and its length, + * number of passes, amount of used memory (in KBytes, can be rounded up a bit) + * number of parallel threads that will be run. + * All the parameters above affect the output hash value. + * Additionally, two function pointers can be provided to allocate and + * deallocate the memory (if NULL, memory will be allocated internally). + * Also, three flags indicate whether to erase password, secret as soon as they + * are pre-hashed (and thus not needed anymore), and the entire memory + ***** + * Simplest situation: you have output array out[8], password is stored in + * pwd[32], salt is stored in salt[16], you do not have keys nor associated + * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with + * 4 parallel lanes. + * You want to erase the password, but you're OK with last pass not being + * erased. You want to use the default memory allocator. + * Then you initialize: + Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) + */ +typedef struct Argon2_Context { + uint8_t *out; /* output array */ + uint32_t outlen; /* digest length */ + + uint8_t *pwd; /* password array */ + uint32_t pwdlen; /* password length */ + + uint8_t *salt; /* salt array */ + uint32_t saltlen; /* salt length */ + + uint8_t *secret; /* key array */ + uint32_t secretlen; /* key length */ + + uint8_t *ad; /* associated data array */ + uint32_t adlen; /* associated data length */ + + uint32_t t_cost; /* number of passes */ + uint32_t m_cost; /* amount of memory requested (KB) */ + uint32_t lanes; /* number of lanes */ + uint32_t threads; /* maximum number of threads */ + + uint32_t version; /* version number */ + + allocate_fptr allocate_cbk; /* pointer to memory allocator */ + deallocate_fptr free_cbk; /* pointer to memory deallocator */ + + uint32_t flags; /* array of bool options */ +} argon2_context; + +/* Argon2 primitive type */ +typedef enum Argon2_type { + Argon2_d = 0, + Argon2_i = 1, + Argon2_id = 2 +} argon2_type; + +/* Version of the algorithm */ +typedef enum Argon2_version { + ARGON2_VERSION_10 = 0x10, + ARGON2_VERSION_13 = 0x13, + ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 +} argon2_version; + +/* + * Function that gives the string representation of an argon2_type. + * @param type The argon2_type that we want the string for + * @param uppercase Whether the string should have the first letter uppercase + * @return NULL if invalid type, otherwise the string representation. + */ +ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase); + +/* + * Function that performs memory-hard hashing with certain degree of parallelism + * @param context Pointer to the Argon2 internal structure + * @return Error code if smth is wrong, ARGON2_OK otherwise + */ +ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type); + +/** + * Hashes a password with Argon2i, producing an encoded hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hashlen Desired length of the hash in bytes + * @param encoded Buffer where to write the encoded hash + * @param encodedlen Size of the buffer (thus max size of the encoded hash) + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +/** + * Hashes a password with Argon2i, producing a raw hash by allocating memory at + * @hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hash Buffer where to write the raw hash - updated by the function + * @param hashlen Desired length of the hash in bytes + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type, + const uint32_t version); + +/** + * Verifies a password against an encoded string + * Encoded string is restricted as in validate_inputs() + * @param encoded String encoding parameters, salt, hash + * @param pwd Pointer to password + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd, + const size_t pwdlen, argon2_type type); + +/** + * Argon2d: Version of Argon2 that picks memory blocks depending + * on the password and salt. Only for side-channel-free + * environment!! + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2d_ctx(argon2_context *context); + +/** + * Argon2i: Version of Argon2 that picks memory blocks + * independent on the password and salt. Good for side-channels, + * but worse w.r.t. tradeoff attacks if only one pass is used. + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2i_ctx(argon2_context *context); + +/** + * Argon2id: Version of Argon2 where the first half-pass over memory is + * password-independent, the rest are password-dependent (on the password and + * salt). OK against side channels (they reduce to 1/2-pass Argon2i), and + * better with w.r.t. tradeoff attacks (similar to Argon2d). + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2id_ctx(argon2_context *context); + +/** + * Verify if a given password is correct for Argon2d hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash); + +/** + * Verify if a given password is correct for Argon2i hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash); + +/** + * Verify if a given password is correct for Argon2id hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context, + const char *hash); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type); + +/** + * Get the associated error message for given error code + * @return The error message associated with the given error code + */ +ARGON2_PUBLIC const char *argon2_error_message(int error_code); + +/** + * Returns the encoded hash length for the given input parameters + * @param t_cost Number of iterations + * @param m_cost Memory usage in kibibytes + * @param parallelism Number of threads; used to compute lanes + * @param saltlen Salt size in bytes + * @param hashlen Hash size in bytes + * @param type The argon2_type that we want the encoded length for + * @return The encoded hash length in bytes + */ +ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, + uint32_t parallelism, uint32_t saltlen, + uint32_t hashlen, argon2_type type); + +/* signals availability of argon2_select_impl: */ +#define ARGON2_SELECTABLE_IMPL + +/** + * Selects the fastest available optimized implementation. + * @param out The file for debug output (e. g. stderr; pass NULL for no + * debug output) + * @param prefix What to print before each line; NULL is equivalent to empty + * string + */ +ARGON2_PUBLIC void argon2_select_impl(FILE *out, const char *prefix); + +/* signals support for passing preallocated memory: */ +#define ARGON2_PREALLOCATED_MEMORY + +ARGON2_PUBLIC size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism); + +/** + * Function that performs memory-hard hashing with certain degree of parallelism + * @param context Pointer to the Argon2 internal structure + * @param type The Argon2 type + * @param memory Preallocated memory for blocks (or NULL) + * @param memory_size The size of preallocated memory + * @return Error code if smth is wrong, ARGON2_OK otherwise + */ +ARGON2_PUBLIC int argon2_ctx_mem(argon2_context *context, argon2_type type, + void *memory, size_t memory_size); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/3rdparty/argon2/lib/argon2-template-64.h b/src/3rdparty/argon2/lib/argon2-template-64.h new file mode 100644 index 00000000..16ddbd35 --- /dev/null +++ b/src/3rdparty/argon2/lib/argon2-template-64.h @@ -0,0 +1,193 @@ +#include + +#include "core.h" + +#define MASK_32 UINT64_C(0xFFFFFFFF) + +#define F(x, y) ((x) + (y) + 2 * ((x) & MASK_32) * ((y) & MASK_32)) + +#define G(a, b, c, d) \ + do { \ + a = F(a, b); \ + d = rotr64(d ^ a, 32); \ + c = F(c, d); \ + b = rotr64(b ^ c, 24); \ + a = F(a, b); \ + d = rotr64(d ^ a, 16); \ + c = F(c, d); \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, \ + v8, v9, v10, v11, v12, v13, v14, v15) \ + do { \ + G(v0, v4, v8, v12); \ + G(v1, v5, v9, v13); \ + G(v2, v6, v10, v14); \ + G(v3, v7, v11, v15); \ + G(v0, v5, v10, v15); \ + G(v1, v6, v11, v12); \ + G(v2, v7, v8, v13); \ + G(v3, v4, v9, v14); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG1(v) \ + BLAKE2_ROUND_NOMSG( \ + (v)[ 0], (v)[ 1], (v)[ 2], (v)[ 3], \ + (v)[ 4], (v)[ 5], (v)[ 6], (v)[ 7], \ + (v)[ 8], (v)[ 9], (v)[10], (v)[11], \ + (v)[12], (v)[13], (v)[14], (v)[15]) + +#define BLAKE2_ROUND_NOMSG2(v) \ + BLAKE2_ROUND_NOMSG( \ + (v)[ 0], (v)[ 1], (v)[ 16], (v)[ 17], \ + (v)[ 32], (v)[ 33], (v)[ 48], (v)[ 49], \ + (v)[ 64], (v)[ 65], (v)[ 80], (v)[ 81], \ + (v)[ 96], (v)[ 97], (v)[112], (v)[113]) + +static void fill_block(const block *prev_block, const block *ref_block, + block *next_block, int with_xor) +{ + block blockR, block_tmp; + + copy_block(&blockR, ref_block); + xor_block(&blockR, prev_block); + copy_block(&block_tmp, &blockR); + if (with_xor) { + xor_block(&block_tmp, next_block); + } + + /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then + (16,17,..31)... finally (112,113,...127) */ + BLAKE2_ROUND_NOMSG1(blockR.v + 0 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 1 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 2 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 3 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 4 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 5 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 6 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 7 * 16); + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + BLAKE2_ROUND_NOMSG2(blockR.v + 0 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 1 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 2 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 3 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 4 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 5 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 6 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 7 * 2); + + copy_block(next_block, &block_tmp); + xor_block(next_block, &blockR); +} + +static void next_addresses(block *address_block, block *input_block, + const block *zero_block) +{ + input_block->v[6]++; + fill_block(zero_block, input_block, address_block, 0); + fill_block(zero_block, address_block, address_block, 0); +} + +static void fill_segment_64(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block, *curr_block, *prev_block; + block address_block, input_block, zero_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&zero_block, 0); + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block, &zero_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block, &zero_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + prev_block = instance->memory + prev_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(prev_block, ref_block, curr_block, 0); + } else { + fill_block(prev_block, ref_block, curr_block, 1); + } + } +} diff --git a/src/3rdparty/argon2/lib/argon2.c b/src/3rdparty/argon2/lib/argon2.c new file mode 100644 index 00000000..28d3d402 --- /dev/null +++ b/src/3rdparty/argon2/lib/argon2.c @@ -0,0 +1,476 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#include +#include +#include + +#include "argon2.h" +#include "encoding.h" +#include "core.h" + +const char *argon2_type2string(argon2_type type, int uppercase) { + switch (type) { + case Argon2_d: + return uppercase ? "Argon2d" : "argon2d"; + case Argon2_i: + return uppercase ? "Argon2i" : "argon2i"; + case Argon2_id: + return uppercase ? "Argon2id" : "argon2id"; + } + + return NULL; +} + +static void argon2_compute_memory_blocks(uint32_t *memory_blocks, + uint32_t *segment_length, + uint32_t m_cost, uint32_t lanes) +{ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + *memory_blocks = m_cost; + if (*memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) { + *memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes; + } + + *segment_length = *memory_blocks / (lanes * ARGON2_SYNC_POINTS); + /* Ensure that all segments have equal length */ + *memory_blocks = *segment_length * (lanes * ARGON2_SYNC_POINTS); +} + +size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism) { + uint32_t memory_blocks, segment_length; + argon2_compute_memory_blocks(&memory_blocks, &segment_length, m_cost, + parallelism); + return memory_blocks * ARGON2_BLOCK_SIZE; +} + +int argon2_ctx_mem(argon2_context *context, argon2_type type, void *memory, + size_t memory_size) { + /* 1. Validate all inputs */ + int result = validate_inputs(context); + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + + if (ARGON2_OK != result) { + return result; + } + + if (Argon2_d != type && Argon2_i != type && Argon2_id != type) { + return ARGON2_INCORRECT_TYPE; + } + + /* 2. Align memory size */ + argon2_compute_memory_blocks(&memory_blocks, &segment_length, + context->m_cost, context->lanes); + + /* check for sufficient memory size: */ + if (memory != NULL && (memory_size % ARGON2_BLOCK_SIZE != 0 || + memory_size / ARGON2_BLOCK_SIZE < memory_blocks)) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + instance.version = context->version; + instance.memory = (block *)memory; + instance.passes = context->t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context->lanes; + instance.threads = context->threads; + instance.type = type; + instance.print_internals = !!(context->flags & ARGON2_FLAG_GENKAT); + instance.keep_memory = memory != NULL; + + if (instance.threads > instance.lanes) { + instance.threads = instance.lanes; + } + + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + result = initialize(&instance, context); + + if (ARGON2_OK != result) { + return result; + } + + /* 4. Filling memory */ + result = fill_memory_blocks(&instance); + + if (ARGON2_OK != result) { + return result; + } + /* 5. Finalization */ + finalize(context, &instance); + + return ARGON2_OK; +} + +int argon2_ctx(argon2_context *context, argon2_type type) { + return argon2_ctx_mem(context, type, NULL, 0); +} + +int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, const size_t saltlen, + void *hash, const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type, + const uint32_t version){ + + argon2_context context; + int result; + uint8_t *out; + + if (pwdlen > ARGON2_MAX_PWD_LENGTH) { + return ARGON2_PWD_TOO_LONG; + } + + if (saltlen > ARGON2_MAX_SALT_LENGTH) { + return ARGON2_SALT_TOO_LONG; + } + + if (hashlen > ARGON2_MAX_OUTLEN) { + return ARGON2_OUTPUT_TOO_LONG; + } + + if (hashlen < ARGON2_MIN_OUTLEN) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + out = malloc(hashlen); + if (!out) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + context.out = (uint8_t *)out; + context.outlen = (uint32_t)hashlen; + context.pwd = CONST_CAST(uint8_t *)pwd; + context.pwdlen = (uint32_t)pwdlen; + context.salt = CONST_CAST(uint8_t *)salt; + context.saltlen = (uint32_t)saltlen; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = t_cost; + context.m_cost = m_cost; + context.lanes = parallelism; + context.threads = parallelism; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + context.version = version; + + result = argon2_ctx(&context, type); + + if (result != ARGON2_OK) { + clear_internal_memory(out, hashlen); + free(out); + return result; + } + + /* if raw hash requested, write it */ + if (hash) { + memcpy(hash, out, hashlen); + } + + /* if encoding requested, write it */ + if (encoded && encodedlen) { + if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) { + clear_internal_memory(out, hashlen); /* wipe buffers if error */ + clear_internal_memory(encoded, encodedlen); + free(out); + return ARGON2_ENCODING_FAIL; + } + } + clear_internal_memory(out, hashlen); + free(out); + + return ARGON2_OK; +} + +int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_i, + ARGON2_VERSION_NUMBER); +} + +int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER); +} + +int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_d, + ARGON2_VERSION_NUMBER); +} + +int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER); +} + +int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_id, + ARGON2_VERSION_NUMBER); +} + +int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_id, + ARGON2_VERSION_NUMBER); +} + +static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) { + size_t i; + uint8_t d = 0U; + + for (i = 0U; i < len; i++) { + d |= b1[i] ^ b2[i]; + } + return (int)((1 & ((d - 1) >> 8)) - 1); +} + +int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen, + argon2_type type) { + + argon2_context ctx; + uint8_t *desired_result = NULL; + + int ret = ARGON2_OK; + + size_t encoded_len; + uint32_t max_field_len; + + if (pwdlen > ARGON2_MAX_PWD_LENGTH) { + return ARGON2_PWD_TOO_LONG; + } + + if (encoded == NULL) { + return ARGON2_DECODING_FAIL; + } + + encoded_len = strlen(encoded); + if (encoded_len > UINT32_MAX) { + return ARGON2_DECODING_FAIL; + } + + /* No field can be longer than the encoded length */ + max_field_len = (uint32_t)encoded_len; + + ctx.saltlen = max_field_len; + ctx.outlen = max_field_len; + + ctx.salt = malloc(ctx.saltlen); + ctx.out = malloc(ctx.outlen); + if (!ctx.salt || !ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ctx.pwd = (uint8_t *)pwd; + ctx.pwdlen = (uint32_t)pwdlen; + + ret = decode_string(&ctx, encoded, type); + if (ret != ARGON2_OK) { + goto fail; + } + + /* Set aside the desired result, and get a new buffer. */ + desired_result = ctx.out; + ctx.out = malloc(ctx.outlen); + if (!ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ret = argon2_verify_ctx(&ctx, (char *)desired_result, type); + if (ret != ARGON2_OK) { + goto fail; + } + +fail: + free(ctx.salt); + free(ctx.out); + free(desired_result); + + return ret; +} + +int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return argon2_verify(encoded, pwd, pwdlen, Argon2_i); +} + +int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return argon2_verify(encoded, pwd, pwdlen, Argon2_d); +} + +int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return argon2_verify(encoded, pwd, pwdlen, Argon2_id); +} + +int argon2d_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_d); +} + +int argon2i_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_i); +} + +int argon2id_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_id); +} + +int argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type) { + int ret = argon2_ctx(context, type); + if (ret != ARGON2_OK) { + return ret; + } + + if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) { + return ARGON2_VERIFY_MISMATCH; + } + + return ARGON2_OK; +} + +int argon2d_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_d); +} + +int argon2i_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_i); +} + +int argon2id_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_id); +} + +const char *argon2_error_message(int error_code) { + switch (error_code) { + case ARGON2_OK: + return "OK"; + case ARGON2_OUTPUT_PTR_NULL: + return "Output pointer is NULL"; + case ARGON2_OUTPUT_TOO_SHORT: + return "Output is too short"; + case ARGON2_OUTPUT_TOO_LONG: + return "Output is too long"; + case ARGON2_PWD_TOO_SHORT: + return "Password is too short"; + case ARGON2_PWD_TOO_LONG: + return "Password is too long"; + case ARGON2_SALT_TOO_SHORT: + return "Salt is too short"; + case ARGON2_SALT_TOO_LONG: + return "Salt is too long"; + case ARGON2_AD_TOO_SHORT: + return "Associated data is too short"; + case ARGON2_AD_TOO_LONG: + return "Associated data is too long"; + case ARGON2_SECRET_TOO_SHORT: + return "Secret is too short"; + case ARGON2_SECRET_TOO_LONG: + return "Secret is too long"; + case ARGON2_TIME_TOO_SMALL: + return "Time cost is too small"; + case ARGON2_TIME_TOO_LARGE: + return "Time cost is too large"; + case ARGON2_MEMORY_TOO_LITTLE: + return "Memory cost is too small"; + case ARGON2_MEMORY_TOO_MUCH: + return "Memory cost is too large"; + case ARGON2_LANES_TOO_FEW: + return "Too few lanes"; + case ARGON2_LANES_TOO_MANY: + return "Too many lanes"; + case ARGON2_PWD_PTR_MISMATCH: + return "Password pointer is NULL, but password length is not 0"; + case ARGON2_SALT_PTR_MISMATCH: + return "Salt pointer is NULL, but salt length is not 0"; + case ARGON2_SECRET_PTR_MISMATCH: + return "Secret pointer is NULL, but secret length is not 0"; + case ARGON2_AD_PTR_MISMATCH: + return "Associated data pointer is NULL, but ad length is not 0"; + case ARGON2_MEMORY_ALLOCATION_ERROR: + return "Memory allocation error"; + case ARGON2_FREE_MEMORY_CBK_NULL: + return "The free memory callback is NULL"; + case ARGON2_ALLOCATE_MEMORY_CBK_NULL: + return "The allocate memory callback is NULL"; + case ARGON2_INCORRECT_PARAMETER: + return "Argon2_Context context is NULL"; + case ARGON2_INCORRECT_TYPE: + return "There is no such version of Argon2"; + case ARGON2_OUT_PTR_MISMATCH: + return "Output pointer mismatch"; + case ARGON2_THREADS_TOO_FEW: + return "Not enough threads"; + case ARGON2_THREADS_TOO_MANY: + return "Too many threads"; + case ARGON2_MISSING_ARGS: + return "Missing arguments"; + case ARGON2_ENCODING_FAIL: + return "Encoding failed"; + case ARGON2_DECODING_FAIL: + return "Decoding failed"; + case ARGON2_THREAD_FAIL: + return "Threading failure"; + case ARGON2_DECODING_LENGTH_FAIL: + return "Some of encoded parameters are too long or too short"; + case ARGON2_VERIFY_MISMATCH: + return "The password does not match the supplied hash"; + default: + return "Unknown error code"; + } +} + +size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism, + uint32_t saltlen, uint32_t hashlen, argon2_type type) { + return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) + + numlen(t_cost) + numlen(m_cost) + numlen(parallelism) + + b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + + 1; +} diff --git a/src/3rdparty/argon2/lib/blake2/blake2-impl.h b/src/3rdparty/argon2/lib/blake2/blake2-impl.h new file mode 100644 index 00000000..e6cdf7c4 --- /dev/null +++ b/src/3rdparty/argon2/lib/blake2/blake2-impl.h @@ -0,0 +1,90 @@ +#ifndef ARGON2_BLAKE2_IMPL_H +#define ARGON2_BLAKE2_IMPL_H + +#include + +/* Argon2 Team - Begin Code */ +/* + Not an exhaustive list, but should cover the majority of modern platforms + Additionally, the code will always be correct---this is only a performance + tweak. +*/ +#if (defined(__BYTE_ORDER__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ + defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ + defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_ARM) +#define NATIVE_LITTLE_ENDIAN +#endif +/* Argon2 Team - End Code */ + +static inline uint32_t load32(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + return *(const uint32_t *)src; +#else + const uint8_t *p = (const uint8_t *)src; + uint32_t w = *p++; + w |= (uint32_t)(*p++) << 8; + w |= (uint32_t)(*p++) << 16; + w |= (uint32_t)(*p++) << 24; + return w; +#endif +} + +static inline uint64_t load64(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + return *(const uint64_t *)src; +#else + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + w |= (uint64_t)(*p++) << 48; + w |= (uint64_t)(*p++) << 56; + return w; +#endif +} + +static inline void store32(void *dst, uint32_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + *(uint32_t *)dst = w; +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static inline void store64(void *dst, uint64_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + *(uint64_t *)dst = w; +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +#endif // ARGON2_BLAKE2_IMPL_H diff --git a/src/3rdparty/argon2/lib/blake2/blake2.c b/src/3rdparty/argon2/lib/blake2/blake2.c new file mode 100644 index 00000000..d32028ed --- /dev/null +++ b/src/3rdparty/argon2/lib/blake2/blake2.c @@ -0,0 +1,225 @@ +#include + +#include "blake2/blake2.h" +#include "blake2/blake2-impl.h" + +#include "core.h" + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) +}; + +#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +static const unsigned int blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +#define G(m, r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(m, v, r) \ + do { \ + G(m, r, 0, v[0], v[4], v[ 8], v[12]); \ + G(m, r, 1, v[1], v[5], v[ 9], v[13]); \ + G(m, r, 2, v[2], v[6], v[10], v[14]); \ + G(m, r, 3, v[3], v[7], v[11], v[15]); \ + G(m, r, 4, v[0], v[5], v[10], v[15]); \ + G(m, r, 5, v[1], v[6], v[11], v[12]); \ + G(m, r, 6, v[2], v[7], v[ 8], v[13]); \ + G(m, r, 7, v[3], v[4], v[ 9], v[14]); \ + } while ((void)0, 0) + +void blake2b_compress(blake2b_state *S, const void *block, uint64_t f0) +{ + uint64_t m[16]; + uint64_t v[16]; + + m[ 0] = load64((const uint64_t *)block + 0); + m[ 1] = load64((const uint64_t *)block + 1); + m[ 2] = load64((const uint64_t *)block + 2); + m[ 3] = load64((const uint64_t *)block + 3); + m[ 4] = load64((const uint64_t *)block + 4); + m[ 5] = load64((const uint64_t *)block + 5); + m[ 6] = load64((const uint64_t *)block + 6); + m[ 7] = load64((const uint64_t *)block + 7); + m[ 8] = load64((const uint64_t *)block + 8); + m[ 9] = load64((const uint64_t *)block + 9); + m[10] = load64((const uint64_t *)block + 10); + m[11] = load64((const uint64_t *)block + 11); + m[12] = load64((const uint64_t *)block + 12); + m[13] = load64((const uint64_t *)block + 13); + m[14] = load64((const uint64_t *)block + 14); + m[15] = load64((const uint64_t *)block + 15); + + v[ 0] = S->h[0]; + v[ 1] = S->h[1]; + v[ 2] = S->h[2]; + v[ 3] = S->h[3]; + v[ 4] = S->h[4]; + v[ 5] = S->h[5]; + v[ 6] = S->h[6]; + v[ 7] = S->h[7]; + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ f0; + v[15] = blake2b_IV[7]; + + ROUND(m, v, 0); + ROUND(m, v, 1); + ROUND(m, v, 2); + ROUND(m, v, 3); + ROUND(m, v, 4); + ROUND(m, v, 5); + ROUND(m, v, 6); + ROUND(m, v, 7); + ROUND(m, v, 8); + ROUND(m, v, 9); + ROUND(m, v, 10); + ROUND(m, v, 11); + + S->h[0] ^= v[0] ^ v[ 8]; + S->h[1] ^= v[1] ^ v[ 9]; + S->h[2] ^= v[2] ^ v[10]; + S->h[3] ^= v[3] ^ v[11]; + S->h[4] ^= v[4] ^ v[12]; + S->h[5] ^= v[5] ^ v[13]; + S->h[6] ^= v[6] ^ v[14]; + S->h[7] ^= v[7] ^ v[15]; +} + +static void blake2b_increment_counter(blake2b_state *S, uint64_t inc) +{ + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static void blake2b_init_state(blake2b_state *S) +{ + memcpy(S->h, blake2b_IV, sizeof(S->h)); + S->t[1] = S->t[0] = 0; + S->buflen = 0; +} + +void blake2b_init(blake2b_state *S, size_t outlen) +{ + blake2b_init_state(S); + /* XOR initial state with param block: */ + S->h[0] ^= (uint64_t)outlen | (UINT64_C(1) << 16) | (UINT64_C(1) << 24); +} + +void blake2b_update(blake2b_state *S, const void *in, size_t inlen) +{ + const uint8_t *pin = (const uint8_t *)in; + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf, 0); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin, 0); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += inlen; +} + +void blake2b_final(blake2b_state *S, void *out, size_t outlen) +{ + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + unsigned int i; + + blake2b_increment_counter(S, S->buflen); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf, UINT64_C(0xFFFFFFFFFFFFFFFF)); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + i * sizeof(uint64_t), S->h[i]); + } + + memcpy(out, buffer, outlen); + clear_internal_memory(buffer, sizeof(buffer)); + clear_internal_memory(S->buf, sizeof(S->buf)); + clear_internal_memory(S->h, sizeof(S->h)); +} + +void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen) +{ + uint8_t *pout = (uint8_t *)out; + blake2b_state blake_state; + uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; + + store32(outlen_bytes, (uint32_t)outlen); + if (outlen <= BLAKE2B_OUTBYTES) { + blake2b_init(&blake_state, outlen); + blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); + blake2b_update(&blake_state, in, inlen); + blake2b_final(&blake_state, pout, outlen); + } else { + uint32_t toproduce; + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + + blake2b_init(&blake_state, BLAKE2B_OUTBYTES); + blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); + blake2b_update(&blake_state, in, inlen); + blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + + memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2); + pout += BLAKE2B_OUTBYTES / 2; + toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + blake2b_init(&blake_state, BLAKE2B_OUTBYTES); + blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + + memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2); + pout += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + blake2b_init(&blake_state, toproduce); + blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + blake2b_final(&blake_state, out_buffer, toproduce); + + memcpy(pout, out_buffer, toproduce); + + clear_internal_memory(out_buffer, sizeof(out_buffer)); + } +} diff --git a/src/3rdparty/argon2/lib/blake2/blake2.h b/src/3rdparty/argon2/lib/blake2/blake2.h new file mode 100644 index 00000000..7deeaa1f --- /dev/null +++ b/src/3rdparty/argon2/lib/blake2/blake2.h @@ -0,0 +1,30 @@ +#ifndef ARGON2_BLAKE2_H +#define ARGON2_BLAKE2_H + +#include +#include + +enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 +}; + +typedef struct __blake2b_state { + uint64_t h[8]; + uint64_t t[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; +} blake2b_state; + +/* Streaming API */ +void blake2b_init(blake2b_state *S, size_t outlen); +void blake2b_update(blake2b_state *S, const void *in, size_t inlen); +void blake2b_final(blake2b_state *S, void *out, size_t outlen); + +void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); + +#endif // ARGON2_BLAKE2_H + diff --git a/src/3rdparty/argon2/lib/core.c b/src/3rdparty/argon2/lib/core.c new file mode 100644 index 00000000..d6592a6a --- /dev/null +++ b/src/3rdparty/argon2/lib/core.c @@ -0,0 +1,633 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +/*For memory wiping*/ +#ifdef _MSC_VER +#include +#include /* For SecureZeroMemory */ +#endif +#if defined __STDC_LIB_EXT1__ +#define __STDC_WANT_LIB_EXT1__ 1 +#endif +#define VC_GE_2005(version) (version >= 1400) + +#include +#include +#include +#include + +#include "core.h" +#include "thread.h" +#include "blake2/blake2.h" +#include "blake2/blake2-impl.h" + +#include "genkat.h" + +#if defined(__clang__) +#if __has_attribute(optnone) +#define NOT_OPTIMIZED __attribute__((optnone)) +#endif +#elif defined(__GNUC__) +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if GCC_VERSION >= 40400 +#define NOT_OPTIMIZED __attribute__((optimize("O0"))) +#endif +#endif +#ifndef NOT_OPTIMIZED +#define NOT_OPTIMIZED +#endif + +/***************Instance and Position constructors**********/ +void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } + +void copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +void xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +static void load_block(block *dst, const void *input) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); + } +} + +static void store_block(void *output, const block *src) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); + } +} + +/***************Memory functions*****************/ + +int allocate_memory(const argon2_context *context, + argon2_instance_t *instance) { + size_t blocks = instance->memory_blocks; + size_t memory_size = blocks * ARGON2_BLOCK_SIZE; + + /* 0. Check for memory supplied by user: */ + /* NOTE: Sufficient memory size is already checked in argon2_ctx_mem() */ + if (instance->memory != NULL) { + return ARGON2_OK; + } + + /* 1. Check for multiplication overflow */ + if (blocks != 0 && memory_size / ARGON2_BLOCK_SIZE != blocks) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 2. Try to allocate with appropriate allocator */ + if (context->allocate_cbk) { + (context->allocate_cbk)((uint8_t **)&instance->memory, memory_size); + } else { + instance->memory = malloc(memory_size); + } + + if (instance->memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + return ARGON2_OK; +} + +void free_memory(const argon2_context *context, + const argon2_instance_t *instance) { + size_t memory_size = instance->memory_blocks * ARGON2_BLOCK_SIZE; + + clear_internal_memory(instance->memory, memory_size); + + if (instance->keep_memory) { + /* user-supplied memory -- do not free */ + return; + } + + if (context->free_cbk) { + (context->free_cbk)((uint8_t *)instance->memory, memory_size); + } else { + free(instance->memory); + } +} + +void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { +#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) + SecureZeroMemory(v, n); +#elif defined memset_s + memset_s(v, n, 0, n); +#elif defined(__OpenBSD__) + explicit_bzero(v, n); +#else + static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; + memset_sec(v, 0, n); +#endif +} + +/* Memory clear flag defaults to true. */ +int FLAG_clear_internal_memory = 1; +void clear_internal_memory(void *v, size_t n) { + if (FLAG_clear_internal_memory && v) { + secure_wipe_memory(v, n); + } +} + +void finalize(const argon2_context *context, argon2_instance_t *instance) { + if (context != NULL && instance != NULL) { + block blockhash; + uint32_t l; + + copy_block(&blockhash, instance->memory + instance->lane_length - 1); + + /* XOR the last blocks */ + for (l = 1; l < instance->lanes; ++l) { + uint32_t last_block_in_lane = + l * instance->lane_length + (instance->lane_length - 1); + xor_block(&blockhash, instance->memory + last_block_in_lane); + } + + /* Hash the result */ + { + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + store_block(blockhash_bytes, &blockhash); + blake2b_long(context->out, context->outlen, blockhash_bytes, + ARGON2_BLOCK_SIZE); + /* clear blockhash and blockhash_bytes */ + clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); + } + + if (instance->print_internals) { + print_tag(context->out, context->outlen); + } + + free_memory(context, instance); + } +} + +uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane) { + /* + * Pass 0: + * This lane : all already finished segments plus already constructed + * blocks in this segment + * Other lanes : all already finished segments + * Pass 1+: + * This lane : (SYNC_POINTS - 1) last segments plus already constructed + * blocks in this segment + * Other lanes : (SYNC_POINTS - 1) last segments + */ + uint32_t reference_area_size; + uint64_t relative_position; + uint32_t start_position, absolute_position; + + if (0 == position->pass) { + /* First pass */ + if (0 == position->slice) { + /* First slice */ + reference_area_size = + position->index - 1; /* all but the previous */ + } else { + if (same_lane) { + /* The same lane => add current segment */ + reference_area_size = + position->slice * instance->segment_length + + position->index - 1; + } else { + reference_area_size = + position->slice * instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + } else { + /* Second pass */ + if (same_lane) { + reference_area_size = instance->lane_length - + instance->segment_length + position->index - + 1; + } else { + reference_area_size = instance->lane_length - + instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + + /* 1.2.4. Mapping pseudo_rand to 0.. and produce + * relative position */ + relative_position = pseudo_rand; + relative_position = relative_position * relative_position >> 32; + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + /* 1.2.5 Computing starting position */ + start_position = 0; + + if (0 != position->pass) { + start_position = (position->slice == ARGON2_SYNC_POINTS - 1) + ? 0 + : (position->slice + 1) * instance->segment_length; + } + + /* 1.2.6. Computing absolute position */ + absolute_position = (start_position + relative_position) % + instance->lane_length; /* absolute position */ + return absolute_position; +} + +#ifdef _WIN32 +static unsigned __stdcall fill_segment_thr(void *thread_data) +#else +static void *fill_segment_thr(void *thread_data) +#endif +{ + argon2_thread_data *my_data = thread_data; + fill_segment(my_data->instance_ptr, my_data->pos); + argon2_thread_exit(); + return 0; +} + +/* Single-threaded version for p=1 case */ +static int fill_memory_blocks_st(argon2_instance_t *instance) { + uint32_t r, s, l; + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position = { r, l, (uint8_t)s, 0 }; + fill_segment(instance, position); + } + } + + if (instance->print_internals) { + internal_kat(instance, r); /* Print all memory blocks */ + } + } + return ARGON2_OK; +} + +/* Multi-threaded version for p > 1 case */ +static int fill_memory_blocks_mt(argon2_instance_t *instance) { + uint32_t r, s; + argon2_thread_handle_t *thread = NULL; + argon2_thread_data *thr_data = NULL; + int rc = ARGON2_OK; + + /* 1. Allocating space for threads */ + thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t)); + if (thread == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + thr_data = calloc(instance->lanes, sizeof(argon2_thread_data)); + if (thr_data == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + uint32_t l; + + /* 2. Calling threads */ + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position; + + /* 2.1 Join a thread if limit is exceeded */ + if (l >= instance->threads) { + if (argon2_thread_join(thread[l - instance->threads])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + + /* 2.2 Create thread */ + position.pass = r; + position.lane = l; + position.slice = (uint8_t)s; + position.index = 0; + thr_data[l].instance_ptr = + instance; /* preparing the thread input */ + memcpy(&(thr_data[l].pos), &position, + sizeof(argon2_position_t)); + if (argon2_thread_create(&thread[l], &fill_segment_thr, + (void *)&thr_data[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + + /* fill_segment(instance, position); */ + /*Non-thread equivalent of the lines above */ + } + + /* 3. Joining remaining threads */ + for (l = instance->lanes - instance->threads; l < instance->lanes; + ++l) { + if (argon2_thread_join(thread[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + } + + if (instance->print_internals) { + internal_kat(instance, r); /* Print all memory blocks */ + } + } + +fail: + if (thread != NULL) { + free(thread); + } + if (thr_data != NULL) { + free(thr_data); + } + return rc; +} + +int fill_memory_blocks(argon2_instance_t *instance) { + if (instance == NULL || instance->lanes == 0) { + return ARGON2_INCORRECT_PARAMETER; + } + + return instance->threads == 1 ? + fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance); +} + +int validate_inputs(const argon2_context *context) { + if (NULL == context) { + return ARGON2_INCORRECT_PARAMETER; + } + + if (NULL == context->out) { + return ARGON2_OUTPUT_PTR_NULL; + } + + /* Validate output length */ + if (ARGON2_MIN_OUTLEN > context->outlen) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + if (ARGON2_MAX_OUTLEN < context->outlen) { + return ARGON2_OUTPUT_TOO_LONG; + } + + /* Validate password (required param) */ + if (NULL == context->pwd) { + if (0 != context->pwdlen) { + return ARGON2_PWD_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { + return ARGON2_PWD_TOO_SHORT; + } + + if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { + return ARGON2_PWD_TOO_LONG; + } + + /* Validate salt (required param) */ + if (NULL == context->salt) { + if (0 != context->saltlen) { + return ARGON2_SALT_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { + return ARGON2_SALT_TOO_SHORT; + } + + if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { + return ARGON2_SALT_TOO_LONG; + } + + /* Validate secret (optional param) */ + if (NULL == context->secret) { + if (0 != context->secretlen) { + return ARGON2_SECRET_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_SECRET > context->secretlen) { + return ARGON2_SECRET_TOO_SHORT; + } + if (ARGON2_MAX_SECRET < context->secretlen) { + return ARGON2_SECRET_TOO_LONG; + } + } + + /* Validate associated data (optional param) */ + if (NULL == context->ad) { + if (0 != context->adlen) { + return ARGON2_AD_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_AD_LENGTH > context->adlen) { + return ARGON2_AD_TOO_SHORT; + } + if (ARGON2_MAX_AD_LENGTH < context->adlen) { + return ARGON2_AD_TOO_LONG; + } + } + + /* Validate memory cost */ + if (ARGON2_MIN_MEMORY > context->m_cost) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + if (ARGON2_MAX_MEMORY < context->m_cost) { + return ARGON2_MEMORY_TOO_MUCH; + } + + if (context->m_cost < 8 * context->lanes) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + /* Validate time cost */ + if (ARGON2_MIN_TIME > context->t_cost) { + return ARGON2_TIME_TOO_SMALL; + } + + if (ARGON2_MAX_TIME < context->t_cost) { + return ARGON2_TIME_TOO_LARGE; + } + + /* Validate lanes */ + if (ARGON2_MIN_LANES > context->lanes) { + return ARGON2_LANES_TOO_FEW; + } + + if (ARGON2_MAX_LANES < context->lanes) { + return ARGON2_LANES_TOO_MANY; + } + + /* Validate threads */ + if (ARGON2_MIN_THREADS > context->threads) { + return ARGON2_THREADS_TOO_FEW; + } + + if (ARGON2_MAX_THREADS < context->threads) { + return ARGON2_THREADS_TOO_MANY; + } + + if (NULL != context->allocate_cbk && NULL == context->free_cbk) { + return ARGON2_FREE_MEMORY_CBK_NULL; + } + + if (NULL == context->allocate_cbk && NULL != context->free_cbk) { + return ARGON2_ALLOCATE_MEMORY_CBK_NULL; + } + + return ARGON2_OK; +} + +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { + uint32_t l; + /* Make the first and second block in each lane as G(H0||0||i) or + G(H0||1||i) */ + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + for (l = 0; l < instance->lanes; ++l) { + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 0], + blockhash_bytes); + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 1], + blockhash_bytes); + } + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); +} + +void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type) { + blake2b_state BlakeHash; + uint8_t value[sizeof(uint32_t)]; + + if (NULL == context || NULL == blockhash) { + return; + } + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + store32(&value, context->lanes); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->outlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->m_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->t_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->version); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, (uint32_t)type); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->pwdlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->pwd != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, + context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + secure_wipe_memory(context->pwd, context->pwdlen); + context->pwdlen = 0; + } + } + + store32(&value, context->saltlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->salt != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->salt, + context->saltlen); + } + + store32(&value, context->secretlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->secret != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->secret, + context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + secure_wipe_memory(context->secret, context->secretlen); + context->secretlen = 0; + } + } + + store32(&value, context->adlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->ad != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->ad, + context->adlen); + } + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} + +int initialize(argon2_instance_t *instance, argon2_context *context) { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + int result = ARGON2_OK; + + if (instance == NULL || context == NULL) + return ARGON2_INCORRECT_PARAMETER; + instance->context_ptr = context; + + /* 1. Memory allocation */ + + result = allocate_memory(context, instance); + if (result != ARGON2_OK) { + return result; + } + + /* 2. Initial hashing */ + /* H_0 + 8 extra bytes to produce the first blocks */ + /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ + /* Hashing all inputs */ + initial_hash(blockhash, context, instance->type); + /* Zeroing 8 extra bytes */ + clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + + if (instance->print_internals) { + initial_kat(blockhash, context, instance->type); + } + + /* 3. Creating first blocks, we always have at least two blocks in a slice + */ + fill_first_blocks(blockhash, instance); + /* Clearing the hash */ + clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); + + return ARGON2_OK; +} diff --git a/src/3rdparty/argon2/lib/core.h b/src/3rdparty/argon2/lib/core.h new file mode 100644 index 00000000..5c67fa36 --- /dev/null +++ b/src/3rdparty/argon2/lib/core.h @@ -0,0 +1,226 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#ifndef ARGON2_CORE_H +#define ARGON2_CORE_H + +#include "argon2.h" + +#if defined(_MSC_VER) +#define ALIGN(n) __declspec(align(16)) +#elif defined(__GNUC__) || defined(__clang) +#define ALIGN(x) __attribute__((__aligned__(x))) +#else +#define ALIGN(x) +#endif + +#define CONST_CAST(x) (x)(uintptr_t) + +/**********************Argon2 internal constants*******************************/ + +enum argon2_core_constants { + /* Memory block size in bytes */ + ARGON2_BLOCK_SIZE = 1024, + ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, + ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, + + /* Number of pseudo-random values generated by one call to Blake in Argon2i + to + generate reference block positions */ + ARGON2_ADDRESSES_IN_BLOCK = 128, + + /* Pre-hashing digest length and its extension*/ + ARGON2_PREHASH_DIGEST_LENGTH = 64, + ARGON2_PREHASH_SEED_LENGTH = 72 +}; + +/*************************Argon2 internal data types***********************/ + +/* + * Structure for the (1KB) memory block implemented as 128 64-bit words. + * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no + * bounds checking). + */ +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; + +/*****************Functions that work with the block******************/ + +/* Initialize each byte of the block with @in */ +void init_block_value(block *b, uint8_t in); + +/* Copy block @src to block @dst */ +void copy_block(block *dst, const block *src); + +/* XOR @src onto @dst bytewise */ +void xor_block(block *dst, const block *src); + +/* + * Argon2 instance: memory pointer, number of passes, amount of memory, type, + * and derived values. + * Used to evaluate the number and location of blocks to construct in each + * thread + */ +typedef struct Argon2_instance_t { + block *memory; /* Memory pointer */ + uint32_t version; + uint32_t passes; /* Number of passes */ + uint32_t memory_blocks; /* Number of blocks in memory */ + uint32_t segment_length; + uint32_t lane_length; + uint32_t lanes; + uint32_t threads; + argon2_type type; + int print_internals; /* whether to print the memory blocks */ + int keep_memory; + argon2_context *context_ptr; /* points back to original context */ +} argon2_instance_t; + +/* + * Argon2 position: where we construct the block right now. Used to distribute + * work between threads. + */ +typedef struct Argon2_position_t { + uint32_t pass; + uint32_t lane; + uint8_t slice; + uint32_t index; +} argon2_position_t; + +/*Struct that holds the inputs for thread handling FillSegment*/ +typedef struct Argon2_thread_data { + argon2_instance_t *instance_ptr; + argon2_position_t pos; +} argon2_thread_data; + +/*************************Argon2 core functions********************************/ + +/* Allocates memory to the given pointer, uses the appropriate allocator as + * specified in the context. Total allocated memory is num*size. + * @param context argon2_context which specifies the allocator + * @param instance the Argon2 instance + * @return ARGON2_OK if memory is allocated successfully + */ +int allocate_memory(const argon2_context *context, + argon2_instance_t *instance); + +/* + * Frees memory at the given pointer, uses the appropriate deallocator as + * specified in the context. Also cleans the memory using clear_internal_memory. + * @param context argon2_context which specifies the deallocator + * @param instance the Argon2 instance + */ +void free_memory(const argon2_context *context, + const argon2_instance_t *instance); + +/* Function that securely cleans the memory. This ignores any flags set + * regarding clearing memory. Usually one just calls clear_internal_memory. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +void secure_wipe_memory(void *v, size_t n); + +/* Function that securely clears the memory if FLAG_clear_internal_memory is + * set. If the flag isn't set, this function does nothing. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +ARGON2_PUBLIC void clear_internal_memory(void *v, size_t n); + +/* + * Computes absolute position of reference block in the lane following a skewed + * distribution and using a pseudo-random value as input + * @param instance Pointer to the current instance + * @param position Pointer to the current position + * @param pseudo_rand 32-bit pseudo-random value used to determine the position + * @param same_lane Indicates if the block will be taken from the current lane. + * If so we can reference the current segment + * @pre All pointers must be valid + */ +uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane); + +/* + * Function that validates all inputs against predefined restrictions and return + * an error code + * @param context Pointer to current Argon2 context + * @return ARGON2_OK if everything is all right, otherwise one of error codes + * (all defined in + */ +int validate_inputs(const argon2_context *context); + +/* + * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears + * password and secret if needed + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param blockhash Buffer for pre-hashing digest + * @param type Argon2 type + * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes + * allocated + */ +void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type); + +/* + * Function creates first 2 blocks per lane + * @param instance Pointer to the current instance + * @param blockhash Pointer to the pre-hashing digest + * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values + */ +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); + +/* + * Function allocates memory, hashes the inputs with Blake, and creates first + * two blocks. Returns the pointer to the main memory with 2 blocks per lane + * initialized + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param instance Current Argon2 instance + * @return Zero if successful, -1 if memory failed to allocate. @context->state + * will be modified if successful. + */ +int initialize(argon2_instance_t *instance, argon2_context *context); + +/* + * XORing the last block of each lane, hashing it, making the tag. Deallocates + * the memory. + * @param context Pointer to current Argon2 context (use only the out parameters + * from it) + * @param instance Pointer to current instance of Argon2 + * @pre instance->state must point to necessary amount of memory + * @pre context->out must point to outlen bytes of memory + * @pre if context->free_cbk is not NULL, it should point to a function that + * deallocates memory + */ +void finalize(const argon2_context *context, argon2_instance_t *instance); + +/* + * Function that fills the segment using previous segments also from other + * threads + * @param instance Pointer to the current instance + * @param position Current position + * @pre all block pointers must be valid + */ +void fill_segment(const argon2_instance_t *instance, + argon2_position_t position); + +/* + * Function that fills the entire memory t_cost times based on the first two + * blocks in each lane + * @param instance Pointer to the current instance + * @return ARGON2_OK if successful, @context->state + */ +int fill_memory_blocks(argon2_instance_t *instance); + +#endif diff --git a/src/3rdparty/argon2/lib/encoding.c b/src/3rdparty/argon2/lib/encoding.c new file mode 100644 index 00000000..af56e447 --- /dev/null +++ b/src/3rdparty/argon2/lib/encoding.c @@ -0,0 +1,432 @@ +#include +#include +#include +#include +#include "encoding.h" +#include "core.h" + +/* + * Example code for a decoder and encoder of "hash strings", with Argon2 + * parameters. + * + * This code comprises three sections: + * + * -- The first section contains generic Base64 encoding and decoding + * functions. It is conceptually applicable to any hash function + * implementation that uses Base64 to encode and decode parameters, + * salts and outputs. It could be made into a library, provided that + * the relevant functions are made public (non-static) and be given + * reasonable names to avoid collisions with other functions. + * + * -- The second section is specific to Argon2. It encodes and decodes + * the parameters, salts and outputs. It does not compute the hash + * itself. + * + * The code was originally written by Thomas Pornin , + * to whom comments and remarks may be sent. It is released under what + * should amount to Public Domain or its closest equivalent; the + * following mantra is supposed to incarnate that fact with all the + * proper legal rituals: + * + * --------------------------------------------------------------------- + * This file is provided under the terms of Creative Commons CC0 1.0 + * Public Domain Dedication. To the extent possible under law, the + * author (Thomas Pornin) has waived all copyright and related or + * neighboring rights to this file. This work is published from: Canada. + * --------------------------------------------------------------------- + * + * Copyright (c) 2015 Thomas Pornin + */ + +/* ==================================================================== */ +/* + * Common code; could be shared between different hash functions. + * + * Note: the Base64 functions below assume that uppercase letters (resp. + * lowercase letters) have consecutive numerical codes, that fit on 8 + * bits. All modern systems use ASCII-compatible charsets, where these + * properties are true. If you are stuck with a dinosaur of a system + * that still defaults to EBCDIC then you already have much bigger + * interoperability issues to deal with. + */ + +/* + * Some macros for constant-time comparisons. These work over values in + * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". + */ +#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) +#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) +#define GE(x, y) (GT(y, x) ^ 0xFF) +#define LT(x, y) GT(y, x) +#define LE(x, y) GE(y, x) + +/* + * Convert value x (0..63) to corresponding Base64 character. + */ +static int b64_byte_to_char(unsigned x) { + return (LT(x, 26) & (x + 'A')) | + (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | + (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | + (EQ(x, 63) & '/'); +} + +/* + * Convert character c to the corresponding 6-bit value. If character c + * is not a Base64 character, then 0xFF (255) is returned. + */ +static unsigned b64_char_to_byte(int c) { + unsigned x; + + x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) | + (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) | + (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) | + (EQ(c, '/') & 63); + return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)); +} + +/* + * Convert some bytes to Base64. 'dst_len' is the length (in characters) + * of the output buffer 'dst'; if that buffer is not large enough to + * receive the result (including the terminating 0), then (size_t)-1 + * is returned. Otherwise, the zero-terminated Base64 string is written + * in the buffer, and the output length (counted WITHOUT the terminating + * zero) is returned. + */ +static size_t to_base64(char *dst, size_t dst_len, const void *src, + size_t src_len) { + size_t olen; + const unsigned char *buf; + unsigned acc, acc_len; + + olen = (src_len / 3) << 2; + switch (src_len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + if (dst_len <= olen) { + return (size_t)-1; + } + acc = 0; + acc_len = 0; + buf = (const unsigned char *)src; + while (src_len-- > 0) { + acc = (acc << 8) + (*buf++); + acc_len += 8; + while (acc_len >= 6) { + acc_len -= 6; + *dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F); + } + } + if (acc_len > 0) { + *dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); + } + *dst++ = 0; + return olen; +} + +/* + * Decode Base64 chars into bytes. The '*dst_len' value must initially + * contain the length of the output buffer '*dst'; when the decoding + * ends, the actual number of decoded bytes is written back in + * '*dst_len'. + * + * Decoding stops when a non-Base64 character is encountered, or when + * the output buffer capacity is exceeded. If an error occurred (output + * buffer is too small, invalid last characters leading to unprocessed + * buffered bits), then NULL is returned; otherwise, the returned value + * points to the first non-Base64 character in the source stream, which + * may be the terminating zero. + */ +static const char *from_base64(void *dst, size_t *dst_len, const char *src) { + size_t len; + unsigned char *buf; + unsigned acc, acc_len; + + buf = (unsigned char *)dst; + len = 0; + acc = 0; + acc_len = 0; + for (;;) { + unsigned d; + + d = b64_char_to_byte(*src); + if (d == 0xFF) { + break; + } + src++; + acc = (acc << 6) + d; + acc_len += 6; + if (acc_len >= 8) { + acc_len -= 8; + if ((len++) >= *dst_len) { + return NULL; + } + *buf++ = (acc >> acc_len) & 0xFF; + } + } + + /* + * If the input length is equal to 1 modulo 4 (which is + * invalid), then there will remain 6 unprocessed bits; + * otherwise, only 0, 2 or 4 bits are buffered. The buffered + * bits must also all be zero. + */ + if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) { + return NULL; + } + *dst_len = len; + return src; +} + +/* + * Decode decimal integer from 'str'; the value is written in '*v'. + * Returned value is a pointer to the next non-decimal character in the + * string. If there is no digit at all, or the value encoding is not + * minimal (extra leading zeros), or the value does not fit in an + * 'unsigned long', then NULL is returned. + */ +static const char *decode_decimal(const char *str, unsigned long *v) { + const char *orig; + unsigned long acc; + + acc = 0; + for (orig = str;; str++) { + int c; + + c = *str; + if (c < '0' || c > '9') { + break; + } + c -= '0'; + if (acc > (ULONG_MAX / 10)) { + return NULL; + } + acc *= 10; + if ((unsigned long)c > (ULONG_MAX - acc)) { + return NULL; + } + acc += (unsigned long)c; + } + if (str == orig || (*orig == '0' && str != (orig + 1))) { + return NULL; + } + *v = acc; + return str; +} + +/* ==================================================================== */ +/* + * Code specific to Argon2. + * + * The code below applies the following format: + * + * $argon2[$v=]$m=,t=,p=$$ + * + * where is either 'd', 'id', or 'i', is a decimal integer (positive, + * fits in an 'unsigned long'), and is Base64-encoded data (no '=' padding + * characters, no newline or whitespace). + * + * The last two binary chunks (encoded in Base64) are, in that order, + * the salt and the output. Both are required. The binary salt length and the + * output length must be in the allowed ranges defined in argon2.h. + * + * The ctx struct must contain buffers large enough to hold the salt and pwd + * when it is fed into decode_string. + */ + +int decode_string(argon2_context *ctx, const char *str, argon2_type type) { + +/* check for prefix */ +#define CC(prefix) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) != 0) { \ + return ARGON2_DECODING_FAIL; \ + } \ + str += cc_len; \ + } while ((void)0, 0) + +/* optional prefix checking with supplied code */ +#define CC_opt(prefix, code) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) == 0) { \ + str += cc_len; \ + { code; } \ + } \ + } while ((void)0, 0) + +/* Decoding prefix into uint32_t decimal */ +#define DECIMAL_U32(x) \ + do { \ + unsigned long dec_x; \ + str = decode_decimal(str, &dec_x); \ + if (str == NULL || dec_x > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (x) = (uint32_t)dec_x; \ + } while ((void)0, 0) + +/* Decoding base64 into a binary buffer */ +#define BIN(buf, max_len, len) \ + do { \ + size_t bin_len = (max_len); \ + str = from_base64(buf, &bin_len, str); \ + if (str == NULL || bin_len > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (len) = (uint32_t)bin_len; \ + } while ((void)0, 0) + + size_t maxsaltlen = ctx->saltlen; + size_t maxoutlen = ctx->outlen; + int validation_result; + const char* type_string; + + /* We should start with the argon2_type we are using */ + type_string = argon2_type2string(type, 0); + if (!type_string) { + return ARGON2_INCORRECT_TYPE; + } + + CC("$"); + CC(type_string); + + /* Reading the version number if the default is suppressed */ + ctx->version = ARGON2_VERSION_10; + CC_opt("$v=", DECIMAL_U32(ctx->version)); + + CC("$m="); + DECIMAL_U32(ctx->m_cost); + CC(",t="); + DECIMAL_U32(ctx->t_cost); + CC(",p="); + DECIMAL_U32(ctx->lanes); + ctx->threads = ctx->lanes; + + CC("$"); + BIN(ctx->salt, maxsaltlen, ctx->saltlen); + CC("$"); + BIN(ctx->out, maxoutlen, ctx->outlen); + + /* The rest of the fields get the default values */ + ctx->secret = NULL; + ctx->secretlen = 0; + ctx->ad = NULL; + ctx->adlen = 0; + ctx->allocate_cbk = NULL; + ctx->free_cbk = NULL; + ctx->flags = ARGON2_DEFAULT_FLAGS; + + /* On return, must have valid context */ + validation_result = validate_inputs(ctx); + if (validation_result != ARGON2_OK) { + return validation_result; + } + + /* Can't have any additional characters */ + if (*str == 0) { + return ARGON2_OK; + } else { + return ARGON2_DECODING_FAIL; + } +#undef CC +#undef CC_opt +#undef DECIMAL_U32 +#undef BIN +} + +int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type) { +#define SS(str) \ + do { \ + size_t pp_len = strlen(str); \ + if (pp_len >= dst_len) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + memcpy(dst, str, pp_len + 1); \ + dst += pp_len; \ + dst_len -= pp_len; \ + } while ((void)0, 0) + +#define SX(x) \ + do { \ + char tmp[30]; \ + sprintf(tmp, "%lu", (unsigned long)(x)); \ + SS(tmp); \ + } while ((void)0, 0) + +#define SB(buf, len) \ + do { \ + size_t sb_len = to_base64(dst, dst_len, buf, len); \ + if (sb_len == (size_t)-1) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + dst += sb_len; \ + dst_len -= sb_len; \ + } while ((void)0, 0) + + const char* type_string = argon2_type2string(type, 0); + int validation_result = validate_inputs(ctx); + + if (!type_string) { + return ARGON2_ENCODING_FAIL; + } + + if (validation_result != ARGON2_OK) { + return validation_result; + } + + SS("$"); + SS(type_string); + + SS("$v="); + SX(ctx->version); + + SS("$m="); + SX(ctx->m_cost); + SS(",t="); + SX(ctx->t_cost); + SS(",p="); + SX(ctx->lanes); + + SS("$"); + SB(ctx->salt, ctx->saltlen); + + SS("$"); + SB(ctx->out, ctx->outlen); + return ARGON2_OK; + +#undef SS +#undef SX +#undef SB +} + +size_t b64len(uint32_t len) { + size_t olen = ((size_t)len / 3) << 2; + + switch (len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + + return olen; +} + +size_t numlen(uint32_t num) { + size_t len = 1; + while (num >= 10) { + ++len; + num = num / 10; + } + return len; +} + diff --git a/src/3rdparty/argon2/lib/encoding.h b/src/3rdparty/argon2/lib/encoding.h new file mode 100644 index 00000000..e7834e4f --- /dev/null +++ b/src/3rdparty/argon2/lib/encoding.h @@ -0,0 +1,40 @@ +#ifndef ENCODING_H +#define ENCODING_H +#include "argon2.h" + +#define ARGON2_MAX_DECODED_LANES UINT32_C(255) +#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8) +#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12) + +/* +* encode an Argon2 hash string into the provided buffer. 'dst_len' +* contains the size, in characters, of the 'dst' buffer; if 'dst_len' +* is less than the number of required characters (including the +* terminating 0), then this function returns ARGON2_ENCODING_ERROR. +* +* on success, ARGON2_OK is returned. +*/ +int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type); + +/* +* Decodes an Argon2 hash string into the provided structure 'ctx'. +* The only fields that must be set prior to this call are ctx.saltlen and +* ctx.outlen (which must be the maximal salt and out length values that are +* allowed), ctx.salt and ctx.out (which must be buffers of the specified +* length), and ctx.pwd and ctx.pwdlen which must hold a valid password. +* +* Invalid input string causes an error. On success, the ctx is valid and all +* fields have been initialized. +* +* Returned value is ARGON2_OK on success, other ARGON2_ codes on error. +*/ +int decode_string(argon2_context *ctx, const char *str, argon2_type type); + +/* Returns the length of the encoded byte stream with length len */ +size_t b64len(uint32_t len); + +/* Returns the length of the encoded number num */ +size_t numlen(uint32_t num); + +#endif diff --git a/src/3rdparty/argon2/lib/genkat.c b/src/3rdparty/argon2/lib/genkat.c new file mode 100644 index 00000000..fd5663bf --- /dev/null +++ b/src/3rdparty/argon2/lib/genkat.c @@ -0,0 +1,117 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#include +#include + +#include "genkat.h" + +void initial_kat(const uint8_t *blockhash, const argon2_context *context, + argon2_type type) { + unsigned i; + + if (blockhash != NULL && context != NULL) { + printf("=======================================\n"); + + printf("%s version number %d\n", argon2_type2string(type, 1), + context->version); + + printf("=======================================\n"); + + + printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag " + "length: %u bytes\n", + context->m_cost, context->t_cost, context->lanes, + context->outlen); + + printf("Password[%u]: ", context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + printf("CLEARED\n"); + } else { + for (i = 0; i < context->pwdlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->pwd)[i]); + } + + printf("\n"); + } + + printf("Salt[%u]: ", context->saltlen); + + for (i = 0; i < context->saltlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->salt)[i]); + } + + printf("\n"); + + printf("Secret[%u]: ", context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + printf("CLEARED\n"); + } else { + for (i = 0; i < context->secretlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->secret)[i]); + } + + printf("\n"); + } + + printf("Associated data[%u]: ", context->adlen); + + for (i = 0; i < context->adlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->ad)[i]); + } + + printf("\n"); + + printf("Pre-hashing digest: "); + + for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) { + printf("%2.2x ", ((unsigned char *)blockhash)[i]); + } + + printf("\n"); + } +} + +void print_tag(const void *out, uint32_t outlen) { + unsigned i; + if (out != NULL) { + printf("Tag: "); + + for (i = 0; i < outlen; ++i) { + printf("%2.2x ", ((uint8_t *)out)[i]); + } + + printf("\n"); + } +} + +void internal_kat(const argon2_instance_t *instance, uint32_t pass) { + + if (instance != NULL) { + uint32_t i, j; + printf("\n After pass %u:\n", pass); + + for (i = 0; i < instance->memory_blocks; ++i) { + uint32_t how_many_words = + (instance->memory_blocks > ARGON2_QWORDS_IN_BLOCK) + ? 1 + : ARGON2_QWORDS_IN_BLOCK; + + for (j = 0; j < how_many_words; ++j) + printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j, + instance->memory[i].v[j]); + } + } +} diff --git a/src/3rdparty/argon2/lib/genkat.h b/src/3rdparty/argon2/lib/genkat.h new file mode 100644 index 00000000..815c09b5 --- /dev/null +++ b/src/3rdparty/argon2/lib/genkat.h @@ -0,0 +1,47 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#ifndef ARGON2_KAT_H +#define ARGON2_KAT_H + +#include "core.h" + +/* + * Initial KAT function that prints the inputs to the file + * @param blockhash Array that contains pre-hashing digest + * @param context Holds inputs + * @param type Argon2 type + * @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes + * @pre context member pointers must point to allocated memory of size according + * to the length values + */ +void initial_kat(const uint8_t *blockhash, const argon2_context *context, + argon2_type type); + +/* + * Function that prints the output tag + * @param out output array pointer + * @param outlen digest length + * @pre out must point to @a outlen bytes + **/ +void print_tag(const void *out, uint32_t outlen); + +/* + * Function that prints the internal state at given moment + * @param instance pointer to the current instance + * @param pass current pass number + * @pre instance must have necessary memory allocated + **/ +void internal_kat(const argon2_instance_t *instance, uint32_t pass); + +#endif diff --git a/src/3rdparty/argon2/lib/impl-select.c b/src/3rdparty/argon2/lib/impl-select.c new file mode 100644 index 00000000..84c62aec --- /dev/null +++ b/src/3rdparty/argon2/lib/impl-select.c @@ -0,0 +1,120 @@ +#include +#include + +#include "impl-select.h" + +#include "argon2.h" + +#define log_maybe(file, ...) \ + do { \ + if (file) { \ + fprintf(file, __VA_ARGS__); \ + } \ + } while((void)0, 0) + +#define BENCH_SAMPLES 512 +#define BENCH_MEM_BLOCKS 512 + +static argon2_impl selected_argon_impl = { + "(default)", NULL, fill_segment_default +}; + +/* the benchmark routine is not thread-safe, so we can use a global var here: */ +static block memory[BENCH_MEM_BLOCKS]; + +static uint64_t benchmark_impl(const argon2_impl *impl) { + clock_t time; + unsigned int i; + uint64_t bench; + argon2_instance_t instance; + argon2_position_t pos; + + memset(memory, 0, sizeof(memory)); + + instance.version = ARGON2_VERSION_NUMBER; + instance.memory = memory; + instance.passes = 1; + instance.memory_blocks = BENCH_MEM_BLOCKS; + instance.segment_length = BENCH_MEM_BLOCKS / ARGON2_SYNC_POINTS; + instance.lane_length = instance.segment_length * ARGON2_SYNC_POINTS; + instance.lanes = 1; + instance.threads = 1; + instance.type = Argon2_i; + + pos.lane = 0; + pos.pass = 0; + pos.slice = 0; + pos.index = 0; + + /* warm-up cache: */ + impl->fill_segment(&instance, pos); + + /* OK, now measure: */ + bench = 0; + time = clock(); + for (i = 0; i < BENCH_SAMPLES; i++) { + impl->fill_segment(&instance, pos); + } + time = clock() - time; + bench = (uint64_t)time; + return bench; +} + +static void select_impl(FILE *out, const char *prefix) +{ + argon2_impl_list impls; + unsigned int i; + const argon2_impl *best_impl = NULL; + uint64_t best_bench = UINT_MAX; + + log_maybe(out, "%sSelecting best fill_segment implementation...\n", prefix); + + argon2_get_impl_list(&impls); + + for (i = 0; i < impls.count; i++) { + const argon2_impl *impl = &impls.entries[i]; + uint64_t bench; + + log_maybe(out, "%s%s: Checking availability... ", prefix, impl->name); + if (impl->check != NULL && !impl->check()) { + log_maybe(out, "FAILED!\n"); + continue; + } + log_maybe(out, "OK!\n"); + + log_maybe(out, "%s%s: Benchmarking...\n", prefix, impl->name); + bench = benchmark_impl(impl); + log_maybe(out, "%s%s: Benchmark result: %llu\n", prefix, impl->name, + (unsigned long long)bench); + + if (bench < best_bench) { + best_bench = bench; + best_impl = impl; + } + } + + if (best_impl != NULL) { + log_maybe(out, + "%sBest implementation: '%s' (bench %llu)\n", prefix, + best_impl->name, (unsigned long long)best_bench); + + selected_argon_impl = *best_impl; + } else { + log_maybe(out, + "%sNo optimized implementation available, using default!\n", + prefix); + } +} + +void fill_segment(const argon2_instance_t *instance, argon2_position_t position) +{ + selected_argon_impl.fill_segment(instance, position); +} + +void argon2_select_impl(FILE *out, const char *prefix) +{ + if (prefix == NULL) { + prefix = ""; + } + select_impl(out, prefix); +} diff --git a/src/3rdparty/argon2/lib/impl-select.h b/src/3rdparty/argon2/lib/impl-select.h new file mode 100644 index 00000000..e4acbd1f --- /dev/null +++ b/src/3rdparty/argon2/lib/impl-select.h @@ -0,0 +1,23 @@ +#ifndef ARGON2_IMPL_SELECT_H +#define ARGON2_IMPL_SELECT_H + +#include "core.h" + +typedef struct Argon2_impl { + const char *name; + int (*check)(void); + void (*fill_segment)(const argon2_instance_t *instance, + argon2_position_t position); +} argon2_impl; + +typedef struct Argon2_impl_list { + const argon2_impl *entries; + size_t count; +} argon2_impl_list; + +void argon2_get_impl_list(argon2_impl_list *list); +void fill_segment_default(const argon2_instance_t *instance, + argon2_position_t position); + +#endif // ARGON2_IMPL_SELECT_H + diff --git a/src/3rdparty/argon2/lib/thread.c b/src/3rdparty/argon2/lib/thread.c new file mode 100644 index 00000000..412261f1 --- /dev/null +++ b/src/3rdparty/argon2/lib/thread.c @@ -0,0 +1,36 @@ +#include "thread.h" +#if defined(_WIN32) +#include +#endif + +int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args) { + if (NULL == handle || func == NULL) { + return -1; + } +#if defined(_WIN32) + *handle = _beginthreadex(NULL, 0, func, args, 0, NULL); + return *handle != 0 ? 0 : -1; +#else + return pthread_create(handle, NULL, func, args); +#endif +} + +int argon2_thread_join(argon2_thread_handle_t handle) { +#if defined(_WIN32) + if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) { + return CloseHandle((HANDLE)handle) != 0 ? 0 : -1; + } + return -1; +#else + return pthread_join(handle, NULL); +#endif +} + +void argon2_thread_exit(void) { +#if defined(_WIN32) + _endthreadex(0); +#else + pthread_exit(NULL); +#endif +} diff --git a/src/3rdparty/argon2/lib/thread.h b/src/3rdparty/argon2/lib/thread.h new file mode 100644 index 00000000..f1ef5191 --- /dev/null +++ b/src/3rdparty/argon2/lib/thread.h @@ -0,0 +1,47 @@ +#ifndef ARGON2_THREAD_H +#define ARGON2_THREAD_H +/* + Here we implement an abstraction layer for the simpĺe requirements + of the Argon2 code. We only require 3 primitives---thread creation, + joining, and termination---so full emulation of the pthreads API + is unwarranted. Currently we wrap pthreads and Win32 threads. + + The API defines 2 types: the function pointer type, + argon2_thread_func_t, + and the type of the thread handle---argon2_thread_handle_t. +*/ +#if defined(_WIN32) +#include +#include +typedef unsigned(__stdcall *argon2_thread_func_t)(void *); +typedef uintptr_t argon2_thread_handle_t; +#else +#include +typedef void *(*argon2_thread_func_t)(void *); +typedef pthread_t argon2_thread_handle_t; +#endif + +/* Creates a thread + * @param handle pointer to a thread handle, which is the output of this + * function. Must not be NULL. + * @param func A function pointer for the thread's entry point. Must not be + * NULL. + * @param args Pointer that is passed as an argument to @func. May be NULL. + * @return 0 if @handle and @func are valid pointers and a thread is successfuly + * created. + */ +int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args); + +/* Waits for a thread to terminate + * @param handle Handle to a thread created with argon2_thread_create. + * @return 0 if @handle is a valid handle, and joining completed successfully. +*/ +int argon2_thread_join(argon2_thread_handle_t handle); + +/* Terminate the current thread. Must be run inside a thread created by + * argon2_thread_create. +*/ +void argon2_thread_exit(void); + +#endif diff --git a/src/3rdparty/argon2/m4/ax_check_compile_flag.m4 b/src/3rdparty/argon2/m4/ax_check_compile_flag.m4 new file mode 100644 index 00000000..ca363971 --- /dev/null +++ b/src/3rdparty/argon2/m4/ax_check_compile_flag.m4 @@ -0,0 +1,74 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the current language's compiler +# or gives an error. (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the current language's default +# flags (e.g. CFLAGS) when the check is done. The check is thus made with +# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to +# force the compiler to issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 4 + +AC_DEFUN([AX_CHECK_COMPILE_FLAG], +[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF +AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl +AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ + ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" + AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[yes])], + [AS_VAR_SET(CACHEVAR,[no])]) + _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) +AS_VAR_IF(CACHEVAR,yes, + [m4_default([$2], :)], + [m4_default([$3], :)]) +AS_VAR_POPDEF([CACHEVAR])dnl +])dnl AX_CHECK_COMPILE_FLAGS diff --git a/src/3rdparty/argon2/m4/ax_pthread.m4 b/src/3rdparty/argon2/m4/ax_pthread.m4 new file mode 100644 index 00000000..4c4051ea --- /dev/null +++ b/src/3rdparty/argon2/m4/ax_pthread.m4 @@ -0,0 +1,485 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pthread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. It +# sets the PTHREAD_LIBS output variable to the threads library and linker +# flags, and the PTHREAD_CFLAGS output variable to any special C compiler +# flags that are needed. (The user can also force certain compiler +# flags/libs to be tested by setting these environment variables.) +# +# Also sets PTHREAD_CC to any special C compiler that is needed for +# multi-threaded programs (defaults to the value of CC otherwise). (This +# is necessary on AIX to use the special cc_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also to link with them as well. For example, you might link with +# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# +# If you are only building threaded programs, you may wish to use these +# variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant +# has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to +# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the +# PTHREAD_PRIO_INHERIT symbol is defined when compiling with +# PTHREAD_CFLAGS. +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads library +# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it +# is not found. If ACTION-IF-FOUND is not specified, the default action +# will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or if +# you have any other suggestions or comments. This macro was based on work +# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help +# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by +# Alejandro Forero Cuervo to the autoconf macro repository. We are also +# grateful for the helpful feedback of numerous users. +# +# Updated for Autoconf 2.68 by Daniel Richard G. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2011 Daniel Richard G. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 23 + +AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) +AC_DEFUN([AX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_REQUIRE([AC_PROG_CC]) +AC_REQUIRE([AC_PROG_SED]) +AC_LANG_PUSH([C]) +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on Tru64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then + ax_pthread_save_CC="$CC" + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) + AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) + AC_MSG_RESULT([$ax_pthread_ok]) + if test "x$ax_pthread_ok" = "xno"; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + CC="$ax_pthread_save_CC" + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 +# (Note: HP C rejects this with "bad form for `-t' option") +# -pthreads: Solaris/gcc (Note: HP C also rejects) +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads and +# -D_REENTRANT too), HP C (must be checked before -lpthread, which +# is present but should not be used directly; and before -mthreads, +# because the compiler interprets this as "-mt" + "-hreads") +# -mthreads: Mingw32/gcc, Lynx/gcc +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case $host_os in + + freebsd*) + + # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) + # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) + + ax_pthread_flags="-kthread lthread $ax_pthread_flags" + ;; + + hpux*) + + # From the cc(1) man page: "[-mt] Sets various -D flags to enable + # multi-threading and also sets -lpthread." + + ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" + ;; + + openedition*) + + # IBM z/OS requires a feature-test macro to be defined in order to + # enable POSIX threads at all, so give the user a hint if this is + # not set. (We don't define these ourselves, as they can affect + # other portions of the system API in unpredictable ways.) + + AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], + [ +# if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) + AX_PTHREAD_ZOS_MISSING +# endif + ], + [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) + ;; + + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (N.B.: The stubs are missing + # pthread_cleanup_push, or rather a function called by this macro, + # so we could check for that, but who knows whether they'll stub + # that too in a future libc.) So we'll check first for the + # standard Solaris way of linking pthreads (-mt -lpthread). + + ax_pthread_flags="-mt,pthread pthread $ax_pthread_flags" + ;; +esac + +# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) + +AS_IF([test "x$GCC" = "xyes"], + [ax_pthread_flags="-pthread -pthreads $ax_pthread_flags"]) + +# The presence of a feature test macro requesting re-entrant function +# definitions is, on some systems, a strong hint that pthreads support is +# correctly enabled + +case $host_os in + darwin* | hpux* | linux* | osf* | solaris*) + ax_pthread_check_macro="_REENTRANT" + ;; + + aix*) + ax_pthread_check_macro="_THREAD_SAFE" + ;; + + *) + ax_pthread_check_macro="--" + ;; +esac +AS_IF([test "x$ax_pthread_check_macro" = "x--"], + [ax_pthread_check_cond=0], + [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) + +# Are we compiling with Clang? + +AC_CACHE_CHECK([whether $CC is Clang], + [ax_cv_PTHREAD_CLANG], + [ax_cv_PTHREAD_CLANG=no + # Note that Autoconf sets GCC=yes for Clang as well as GCC + if test "x$GCC" = "xyes"; then + AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], + [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ +# if defined(__clang__) && defined(__llvm__) + AX_PTHREAD_CC_IS_CLANG +# endif + ], + [ax_cv_PTHREAD_CLANG=yes]) + fi + ]) +ax_pthread_clang="$ax_cv_PTHREAD_CLANG" + +ax_pthread_clang_warning=no + +# Clang needs special handling, because older versions handle the -pthread +# option in a rather... idiosyncratic way + +if test "x$ax_pthread_clang" = "xyes"; then + + # Clang takes -pthread; it has never supported any other flag + + # (Note 1: This will need to be revisited if a system that Clang + # supports has POSIX threads in a separate library. This tends not + # to be the way of modern systems, but it's conceivable.) + + # (Note 2: On some systems, notably Darwin, -pthread is not needed + # to get POSIX threads support; the API is always present and + # active. We could reasonably leave PTHREAD_CFLAGS empty. But + # -pthread does define _REENTRANT, and while the Darwin headers + # ignore this macro, third-party headers might not.) + + PTHREAD_CFLAGS="-pthread" + PTHREAD_LIBS= + + ax_pthread_ok=yes + + # However, older versions of Clang make a point of warning the user + # that, in an invocation where only linking and no compilation is + # taking place, the -pthread option has no effect ("argument unused + # during compilation"). They expect -pthread to be passed in only + # when source code is being compiled. + # + # Problem is, this is at odds with the way Automake and most other + # C build frameworks function, which is that the same flags used in + # compilation (CFLAGS) are also used in linking. Many systems + # supported by AX_PTHREAD require exactly this for POSIX threads + # support, and in fact it is often not straightforward to specify a + # flag that is used only in the compilation phase and not in + # linking. Such a scenario is extremely rare in practice. + # + # Even though use of the -pthread flag in linking would only print + # a warning, this can be a nuisance for well-run software projects + # that build with -Werror. So if the active version of Clang has + # this misfeature, we search for an option to squash it. + + AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], + [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], + [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown + # Create an alternate version of $ac_link that compiles and + # links in two steps (.c -> .o, .o -> exe) instead of one + # (.c -> exe), because the warning occurs only in the second + # step + ax_pthread_save_ac_link="$ac_link" + ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' + ax_pthread_link_step=`$as_echo "$ac_link" | sed "$ax_pthread_sed"` + ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" + ax_pthread_save_CFLAGS="$CFLAGS" + for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do + AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) + CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" + ac_link="$ax_pthread_save_ac_link" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], + [ac_link="$ax_pthread_2step_ac_link" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], + [break]) + ]) + done + ac_link="$ax_pthread_save_ac_link" + CFLAGS="$ax_pthread_save_CFLAGS" + AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) + ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" + ]) + + case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in + no | unknown) ;; + *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; + esac + +fi # $ax_pthread_clang = yes + +if test "x$ax_pthread_ok" = "xno"; then +for ax_pthread_try_flag in $ax_pthread_flags; do + + case $ax_pthread_try_flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -mt,pthread) + AC_MSG_CHECKING([whether pthreads work with -mt -lpthread]) + PTHREAD_CFLAGS="-mt" + PTHREAD_LIBS="-lpthread" + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) + PTHREAD_CFLAGS="$ax_pthread_try_flag" + ;; + + pthread-config) + AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) + AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) + PTHREAD_LIBS="-l$ax_pthread_try_flag" + ;; + esac + + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include +# if $ax_pthread_check_cond +# error "$ax_pthread_check_macro must be defined" +# endif + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; }], + [pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */])], + [ax_pthread_ok=yes], + []) + + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" + + AC_MSG_RESULT([$ax_pthread_ok]) + AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = "xyes"; then + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_CACHE_CHECK([for joinable pthread attribute], + [ax_cv_PTHREAD_JOINABLE_ATTR], + [ax_cv_PTHREAD_JOINABLE_ATTR=unknown + for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [int attr = $ax_pthread_attr; return attr /* ; */])], + [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], + []) + done + ]) + AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ + test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ + test "x$ax_pthread_joinable_attr_defined" != "xyes"], + [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], + [$ax_cv_PTHREAD_JOINABLE_ATTR], + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + ax_pthread_joinable_attr_defined=yes + ]) + + AC_CACHE_CHECK([whether more special flags are required for pthreads], + [ax_cv_PTHREAD_SPECIAL_FLAGS], + [ax_cv_PTHREAD_SPECIAL_FLAGS=no + case $host_os in + solaris*) + ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" + ;; + esac + ]) + AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ + test "x$ax_pthread_special_flags_added" != "xyes"], + [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" + ax_pthread_special_flags_added=yes]) + + AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], + [ax_cv_PTHREAD_PRIO_INHERIT], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], + [[int i = PTHREAD_PRIO_INHERIT;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) + ]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ + test "x$ax_pthread_prio_inherit_defined" != "xyes"], + [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) + ax_pthread_prio_inherit_defined=yes + ]) + + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" + + # More AIX lossage: compile with *_r variant + if test "x$GCC" != "xyes"; then + case $host_os in + aix*) + AS_CASE(["x/$CC"], + [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], + [#handle absolute path differently from PATH based program lookup + AS_CASE(["x$CC"], + [x/*], + [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], + [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) + ;; + esac + fi +fi + +test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" + +AC_SUBST([PTHREAD_LIBS]) +AC_SUBST([PTHREAD_CFLAGS]) +AC_SUBST([PTHREAD_CC]) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test "x$ax_pthread_ok" = "xyes"; then + ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) + : +else + ax_pthread_ok=no + $2 +fi +AC_LANG_POP +])dnl AX_PTHREAD diff --git a/src/3rdparty/argon2/qmake/arch/arch.pro b/src/3rdparty/argon2/qmake/arch/arch.pro new file mode 100644 index 00000000..b1a83150 --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/arch.pro @@ -0,0 +1,3 @@ +TEMPLATE = subdirs + +SUBDIRS += $$ARCH diff --git a/src/3rdparty/argon2/qmake/arch/generic/generic.pro b/src/3rdparty/argon2/qmake/arch/generic/generic.pro new file mode 100644 index 00000000..96710850 --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/generic/generic.pro @@ -0,0 +1 @@ +TEMPLATE = subdirs diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro new file mode 100644 index 00000000..449dc508 --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro @@ -0,0 +1,23 @@ +QT -= core gui + +TARGET = argon2-avx2 +TEMPLATE = lib +CONFIG += staticlib + +ARGON2_ROOT = ../../../.. + +INCLUDEPATH += \ + $$ARGON2_ROOT/include \ + $$ARGON2_ROOT/lib \ + $$ARGON2_ROOT/arch/$$ARCH/lib + +USE_AVX2 { + DEFINES += HAVE_AVX2 + QMAKE_CFLAGS += -mavx2 +} + +SOURCES += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx2.c + +HEADERS += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx2.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro new file mode 100644 index 00000000..a4a32e06 --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro @@ -0,0 +1,23 @@ +QT -= core gui + +TARGET = argon2-avx512f +TEMPLATE = lib +CONFIG += staticlib + +ARGON2_ROOT = ../../../.. + +INCLUDEPATH += \ + $$ARGON2_ROOT/include \ + $$ARGON2_ROOT/lib \ + $$ARGON2_ROOT/arch/$$ARCH/lib + +USE_AVX512F { + DEFINES += HAVE_AVX512F + QMAKE_CFLAGS += -mavx512f +} + +SOURCES += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx512f.c + +HEADERS += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx512f.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro new file mode 100644 index 00000000..49e7deee --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro @@ -0,0 +1,24 @@ +QT -= core gui + +TARGET = argon2-sse2 +TEMPLATE = lib +CONFIG += staticlib + +ARGON2_ROOT = ../../../.. + +INCLUDEPATH += \ + $$ARGON2_ROOT/include \ + $$ARGON2_ROOT/lib \ + $$ARGON2_ROOT/arch/$$ARCH/lib + +USE_SSE2 | USE_SSSE3 | USE_XOP | USE_AVX2 { + DEFINES += HAVE_SSE2 + QMAKE_CFLAGS += -msse2 +} + +SOURCES += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-sse2.c + +HEADERS += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-sse2.h \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-template-128.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro new file mode 100644 index 00000000..53ebe6e5 --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro @@ -0,0 +1,24 @@ +QT -= core gui + +TARGET = argon2-ssse3 +TEMPLATE = lib +CONFIG += staticlib + +ARGON2_ROOT = ../../../.. + +INCLUDEPATH += \ + $$ARGON2_ROOT/include \ + $$ARGON2_ROOT/lib \ + $$ARGON2_ROOT/arch/$$ARCH/lib + +USE_SSSE3 | USE_XOP | USE_AVX2 { + DEFINES += HAVE_SSSE3 + QMAKE_CFLAGS += -mssse3 +} + +SOURCES += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-ssse3.c + +HEADERS += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-ssse3.h \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-template-128.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro new file mode 100644 index 00000000..85517dce --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro @@ -0,0 +1,24 @@ +QT -= core gui + +TARGET = argon2-xop +TEMPLATE = lib +CONFIG += staticlib + +ARGON2_ROOT = ../../../.. + +INCLUDEPATH += \ + $$ARGON2_ROOT/include \ + $$ARGON2_ROOT/lib \ + $$ARGON2_ROOT/arch/$$ARCH/lib + +USE_XOP { + DEFINES += HAVE_XOP + QMAKE_CFLAGS += -mxop +} + +SOURCES += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-xop.c + +HEADERS += \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-xop.h \ + $$ARGON2_ROOT/arch/x86_64/lib/argon2-template-128.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro b/src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro new file mode 100644 index 00000000..b3cfe029 --- /dev/null +++ b/src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro @@ -0,0 +1,8 @@ +TEMPLATE = subdirs + +SUBDIRS += \ + libargon2-sse2 \ + libargon2-ssse3 \ + libargon2-xop \ + libargon2-avx2 \ + libargon2-avx512f diff --git a/src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro b/src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro new file mode 100644 index 00000000..1e858d42 --- /dev/null +++ b/src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro @@ -0,0 +1,19 @@ +TEMPLATE = app +CONFIG += console c++11 +CONFIG -= app_bundle +CONFIG -= qt + +ARGON2_ROOT = ../.. + +SOURCES += \ + $$ARGON2_ROOT/src/bench2.c + +HEADERS += \ + $$ARGON2_ROOT/src/timing.h + +win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 +else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 +else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 + +INCLUDEPATH += $$PWD/../../include +DEPENDPATH += $$PWD/../../include diff --git a/src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro b/src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro new file mode 100644 index 00000000..c397efec --- /dev/null +++ b/src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro @@ -0,0 +1,16 @@ +TEMPLATE = app +CONFIG += console c++11 +CONFIG -= app_bundle +CONFIG -= qt + +ARGON2_ROOT = ../.. + +SOURCES += \ + $$ARGON2_ROOT/src/genkat.c + +win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 +else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 +else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 + +INCLUDEPATH += $$PWD/../../include +DEPENDPATH += $$PWD/../../include diff --git a/src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro b/src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro new file mode 100644 index 00000000..e0d23c45 --- /dev/null +++ b/src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro @@ -0,0 +1,16 @@ +TEMPLATE = app +CONFIG += console c++11 +CONFIG -= app_bundle +CONFIG -= qt + +ARGON2_ROOT = ../.. + +SOURCES += \ + $$ARGON2_ROOT/tests/test.c + +win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 +else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 +else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 + +INCLUDEPATH += $$PWD/../../include +DEPENDPATH += $$PWD/../../include diff --git a/src/3rdparty/argon2/qmake/argon2.pro b/src/3rdparty/argon2/qmake/argon2.pro new file mode 100644 index 00000000..0949d39c --- /dev/null +++ b/src/3rdparty/argon2/qmake/argon2.pro @@ -0,0 +1,9 @@ +TEMPLATE = subdirs + +SUBDIRS += \ + arch \ + libargon2 \ + argon2 \ + argon2-genkat \ + argon2-bench2 \ + argon2-test diff --git a/src/3rdparty/argon2/qmake/argon2/argon2.pro b/src/3rdparty/argon2/qmake/argon2/argon2.pro new file mode 100644 index 00000000..ff8c2049 --- /dev/null +++ b/src/3rdparty/argon2/qmake/argon2/argon2.pro @@ -0,0 +1,18 @@ +TEMPLATE = app +CONFIG += console c++11 +CONFIG -= app_bundle +CONFIG -= qt + +ARGON2_ROOT = ../.. + +SOURCES += \ + $$ARGON2_ROOT/src/run.c + +win32: DEFINES += argon2_EXPORT + +win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 +else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 +else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 + +INCLUDEPATH += $$PWD/../../include $$PWD/../../lib +DEPENDPATH += $$PWD/../../include $$PWD/../../lib diff --git a/src/3rdparty/argon2/qmake/libargon2/libargon2.pro b/src/3rdparty/argon2/qmake/libargon2/libargon2.pro new file mode 100644 index 00000000..77846168 --- /dev/null +++ b/src/3rdparty/argon2/qmake/libargon2/libargon2.pro @@ -0,0 +1,119 @@ +#------------------------------------------------- +# +# Project created by QtCreator 2016-08-08T17:43:00 +# +#------------------------------------------------- + +QT -= core gui + +TARGET = argon2 +TEMPLATE = lib + +ARGON2_ROOT = ../.. + +INCLUDEPATH += \ + $$ARGON2_ROOT/include \ + $$ARGON2_ROOT/lib + +SOURCES += \ + $$ARGON2_ROOT/lib/argon2.c \ + $$ARGON2_ROOT/lib/core.c \ + $$ARGON2_ROOT/lib/encoding.c \ + $$ARGON2_ROOT/lib/genkat.c \ + $$ARGON2_ROOT/lib/impl-select.c \ + $$ARGON2_ROOT/lib/thread.c \ + $$ARGON2_ROOT/lib/blake2/blake2.c + +HEADERS += \ + $$ARGON2_ROOT/include/argon2.h \ + $$ARGON2_ROOT/lib/argon2-template-64.h \ + $$ARGON2_ROOT/lib/core.h \ + $$ARGON2_ROOT/lib/encoding.h \ + $$ARGON2_ROOT/lib/genkat.h \ + $$ARGON2_ROOT/lib/impl-select.h \ + $$ARGON2_ROOT/lib/thread.h \ + $$ARGON2_ROOT/lib/blake2/blake2.h \ + $$ARGON2_ROOT/lib/blake2/blake2-impl.h + +equals(ARCH, x86_64) { + SOURCES += \ + $$ARGON2_ROOT/arch/$$ARCH/lib/cpu-flags.c \ + $$ARGON2_ROOT/arch/$$ARCH/lib/argon2-arch.c + + HEADERS += \ + $$ARGON2_ROOT/arch/$$ARCH/lib/cpu-flags.h + + # libargon2-sse2.a: + win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-sse2/release/ -largon2-sse2 + else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-sse2/debug/ -largon2-sse2 + else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-sse2/ -largon2-sse2 + + DEPENDPATH += $$PWD/../arch/x86_64/libargon2-sse2 + + win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/release/libargon2-sse2.a + else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/debug/libargon2-sse2.a + else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/release/argon2-sse2.lib + else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/debug/argon2-sse2.lib + else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/libargon2-sse2.a + + # libargon2-ssse3.a: + win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-ssse3/release/ -largon2-ssse3 + else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-ssse3/debug/ -largon2-ssse3 + else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-ssse3/ -largon2-ssse3 + + DEPENDPATH += $$PWD/../arch/x86_64/libargon2-ssse3 + + win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/release/libargon2-ssse3.a + else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/debug/libargon2-ssse3.a + else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/release/argon2-ssse3.lib + else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/debug/argon2-ssse3.lib + else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/libargon2-ssse3.a + + # libargon2-xop.a: + win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-xop/release/ -largon2-xop + else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-xop/debug/ -largon2-xop + else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-xop/ -largon2-xop + + DEPENDPATH += $$PWD/../arch/x86_64/libargon2-xop + + win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/release/libargon2-xop.a + else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/debug/libargon2-xop.a + else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/release/argon2-xop.lib + else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/debug/argon2-xop.lib + else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/libargon2-xop.a + + # libargon2-avx2.a: + win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx2/release/ -largon2-avx2 + else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx2/debug/ -largon2-avx2 + else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx2/ -largon2-avx2 + + DEPENDPATH += $$PWD/../arch/x86_64/libargon2-avx2 + + win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/release/libargon2-avx2.a + else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/debug/libargon2-avx2.a + else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/release/argon2-avx2.lib + else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/debug/argon2-avx2.lib + else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/libargon2-avx2.a + + # libargon2-avx512f.a: + win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx512f/release/ -largon2-avx512f + else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx512f/debug/ -largon2-avx512f + else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx512f/ -largon2-avx512f + + DEPENDPATH += $$PWD/../arch/x86_64/libargon2-avx512f + + win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/release/libargon2-avx512f.a + else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/debug/libargon2-avx512f.a + else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/release/argon2-avx512f.lib + else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/debug/argon2-avx512f.lib + else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/libargon2-avx512f.a +} +equals(ARCH, generic) { + SOURCES += \ + $$ARGON2_ROOT/arch/$$ARCH/lib/argon2-arch.c +} + +unix { + target.path = /usr/lib + INSTALLS += target +} diff --git a/src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh b/src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh new file mode 100644 index 00000000..47e925c6 --- /dev/null +++ b/src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +dirname="$(dirname "$0")" + +cd "$dirname" || exit 1 + +./start-benchmark.sh luna +./start-benchmark.sh lex '' '' '' '' '' backfill +./start-benchmark.sh mandos +./start-benchmark.sh zubat +PBS_SERVER=wagap.cerit-sc.cz \ + ./start-benchmark.sh zapat '' '' '' '' '' default@wagap.cerit-sc.cz diff --git a/src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh b/src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh new file mode 100644 index 00000000..beedf748 --- /dev/null +++ b/src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +machine="$1" +max_t_cost="$2" +max_m_cost="$3" +max_lanes="$4" +branch="$5" +duration="$6" +queue="$7" +run_tests="$8" + +if [ -z "$machine" ]; then + echo "ERROR: Machine must be specified!" 1>&2 + exit 1 +fi + +if [ -z "$max_t_cost" ]; then + max_t_cost=16 +fi + +if [ -z "$max_m_cost" ]; then + max_m_cost=$((8 * 1024 * 1024)) +fi + +if [ -z "$max_lanes" ]; then + max_lanes=16 +fi + +if [ -z "$branch" ]; then + branch='master' +fi + +if [ -z "$duration" ]; then + duration=2h +fi + +REPO_URL='https://github.com/WOnder93/argon2.git' + +dest_dir="$(pwd)" + +task_file="$(mktemp)" + +cat >$task_file <"$dest_dir/\$PBS_JOBID/benchmark-$machine-$branch.csv" +EOF + +qsub "$task_file" + +rm -f "$task_file" diff --git a/src/3rdparty/argon2/scripts/run-benchmark.sh b/src/3rdparty/argon2/scripts/run-benchmark.sh new file mode 100644 index 00000000..f023b8e8 --- /dev/null +++ b/src/3rdparty/argon2/scripts/run-benchmark.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +max_t_cost="$1" +max_m_cost="$2" +max_lanes="$3" + +if [ -z "$max_t_cost" ]; then + echo "ERROR: Maximum time cost must be specified!" 1>&2 + exit 1 +fi + +if [ -z "$max_m_cost" ]; then + echo "ERROR: Maximum memory cost must be specified!" 1>&2 + exit 1 +fi + +if [ -z "$max_lanes" ]; then + echo "ERROR: Maximum number of lanes must be specified!" 1>&2 + exit 1 +fi + +dirname="$(dirname "$0")" + +cd "$dirname/.." || exit 1 + +echo "t_cost,m_cost,lanes,ms_i,ms_d,ms_id" +stdbuf -oL ./argon2-bench2 $max_t_cost $max_m_cost $max_lanes | +stdbuf -oL tail -n +2 | +while read line; do + print_comma=0 + for x in $line; do + if [ $print_comma -eq 1 ]; then + echo -n "," + else + print_comma=1 + fi + echo -n "$x" + done + echo +done diff --git a/src/3rdparty/argon2/src/bench2.c b/src/3rdparty/argon2/src/bench2.c new file mode 100644 index 00000000..0d7d7fca --- /dev/null +++ b/src/3rdparty/argon2/src/bench2.c @@ -0,0 +1,179 @@ +#include +#include +#include +#include +#include + +#include "argon2.h" + +#include "timing.h" + +#define ARGON2_BLOCK_SIZE 1024 + +#define BENCH_MAX_T_COST 16 +#define BENCH_MAX_M_COST (1024 * 1024) +#define BENCH_MAX_THREADS 8 +#define BENCH_MIN_PASSES (1024 * 1024) +#define BENCH_MAX_SAMPLES 128 + +#define BENCH_OUTLEN 16 +#define BENCH_INLEN 16 + +static double pick_min(const double *samples, size_t count) +{ + size_t i; + double min = INFINITY; + for (i = 0; i < count; i++) { + if (samples[i] < min) { + min = samples[i]; + } + } + return min; +} + +static int benchmark(void *memory, size_t memory_size, + uint32_t t_cost, uint32_t m_cost, uint32_t p) +{ + static const unsigned char PASSWORD[BENCH_OUTLEN] = { 0 }; + static const unsigned char SALT[BENCH_INLEN] = { 1 }; + + unsigned char out[BENCH_OUTLEN]; + struct timestamp start, end; + double ms_d[BENCH_MAX_SAMPLES]; + double ms_i[BENCH_MAX_SAMPLES]; + double ms_id[BENCH_MAX_SAMPLES]; + + double ms_d_final, ms_i_final, ms_id_final; + unsigned int i, bench_samples; + argon2_context ctx; + + int res; + + ctx.out = out; + ctx.outlen = sizeof(out); + ctx.pwd = (uint8_t *)PASSWORD; + ctx.pwdlen = sizeof(PASSWORD); + ctx.salt = (uint8_t *)SALT; + ctx.saltlen = sizeof(SALT); + ctx.secret = NULL; + ctx.secretlen = 0; + ctx.ad = NULL; + ctx.adlen = 0; + ctx.t_cost = t_cost; + ctx.m_cost = m_cost; + ctx.lanes = ctx.threads = p; + ctx.version = ARGON2_VERSION_NUMBER; + ctx.allocate_cbk = NULL; + ctx.free_cbk = NULL; + ctx.flags = ARGON2_DEFAULT_FLAGS; + + bench_samples = (BENCH_MIN_PASSES * p) / (t_cost * m_cost); + bench_samples += (BENCH_MIN_PASSES * p) % (t_cost * m_cost) != 0; + + if (bench_samples > BENCH_MAX_SAMPLES) { + bench_samples = BENCH_MAX_SAMPLES; + } + for (i = 0; i < bench_samples; i++) { + timestamp_store(&start); + res = argon2_ctx_mem(&ctx, Argon2_d, memory, memory_size); + timestamp_store(&end); + if (res != ARGON2_OK) { + return res; + } + + ms_d[i] = timestamp_span_ms(&start, &end); + } + + for (i = 0; i < bench_samples; i++) { + timestamp_store(&start); + res = argon2_ctx_mem(&ctx, Argon2_i, memory, memory_size); + timestamp_store(&end); + if (res != ARGON2_OK) { + return res; + } + + ms_i[i] = timestamp_span_ms(&start, &end); + } + + for (i = 0; i < bench_samples; i++) { + timestamp_store(&start); + res = argon2_ctx_mem(&ctx, Argon2_id, memory, memory_size); + timestamp_store(&end); + if (res != ARGON2_OK) { + return res; + } + + ms_id[i] = timestamp_span_ms(&start, &end); + } + + ms_d_final = pick_min(ms_d, bench_samples); + ms_i_final = pick_min(ms_i, bench_samples); + ms_id_final = pick_min(ms_id, bench_samples); + + printf("%8lu%16lu%8lu%16.6lf%16.6lf%16.6lf\n", + (unsigned long)t_cost, (unsigned long)m_cost, (unsigned long)p, + ms_d_final, ms_i_final, ms_id_final); + return 0; +} + +int main(int argc, const char * const *argv) +{ + uint32_t max_t_cost = BENCH_MAX_T_COST; + uint32_t max_m_cost = BENCH_MAX_M_COST; + uint32_t max_p = BENCH_MAX_THREADS; + uint32_t t_cost, m_cost, p; + char *end; + int res; + + if (argc >= 2) { + max_t_cost = strtoul(argv[1], &end, 10); + if (end == argv[1]) { + fprintf(stderr, "ERROR: Invalid number format!\n"); + return 1; + } + } + + if (argc >= 3) { + max_m_cost = strtoul(argv[2], &end, 10); + if (end == argv[2]) { + fprintf(stderr, "ERROR: Invalid number format!\n"); + return 1; + } + } + + if (argc >= 4) { + max_p = strtoul(argv[3], &end, 10); + if (end == argv[3]) { + fprintf(stderr, "ERROR: Invalid number format!\n"); + return 1; + } + } + + argon2_select_impl(stderr, "[libargon2] "); + + size_t memory_size = (size_t)max_m_cost * (size_t)ARGON2_BLOCK_SIZE; + void *memory = malloc(memory_size); + if (memory == NULL) { + fprintf(stderr, "ERROR: Memory allocation failed!\n"); + return 1; + } + /* make sure the whole memory gets mapped to physical pages: */ + memset(memory, 0xAB, memory_size); + + printf("%8s%16s%8s%16s%16s%16s\n", "t_cost", "m_cost", "threads", + "Argon2d (ms)", "Argon2i (ms)", "Argon2id (ms)"); + for (t_cost = 1; t_cost <= max_t_cost; t_cost *= 2) { + uint32_t min_m_cost = max_p * ARGON2_SYNC_POINTS * 2; + for (m_cost = min_m_cost; m_cost <= max_m_cost; m_cost *= 2) { + for (p = 1; p <= max_p; p *= 2) { + res = benchmark(memory, memory_size, t_cost, m_cost, p); + if (res != 0) { + free(memory); + return res; + } + } + } + } + free(memory); + return 0; +} diff --git a/src/3rdparty/argon2/src/genkat.c b/src/3rdparty/argon2/src/genkat.c new file mode 100644 index 00000000..7295c985 --- /dev/null +++ b/src/3rdparty/argon2/src/genkat.c @@ -0,0 +1,90 @@ +#include +#include +#include + +#include "argon2.h" + +static void fatal(const char *error) { + fprintf(stderr, "Error: %s\n", error); + exit(1); +} + +static void generate_testvectors(argon2_type type, const uint32_t version) { +#define TEST_OUTLEN 32 +#define TEST_PWDLEN 32 +#define TEST_SALTLEN 16 +#define TEST_SECRETLEN 8 +#define TEST_ADLEN 12 + argon2_context context; + + unsigned char out[TEST_OUTLEN]; + unsigned char pwd[TEST_PWDLEN]; + unsigned char salt[TEST_SALTLEN]; + unsigned char secret[TEST_SECRETLEN]; + unsigned char ad[TEST_ADLEN]; + const allocate_fptr myown_allocator = NULL; + const deallocate_fptr myown_deallocator = NULL; + + unsigned t_cost = 3; + unsigned m_cost = 32; + unsigned lanes = 4; + + memset(pwd, 1, TEST_OUTLEN); + memset(salt, 2, TEST_SALTLEN); + memset(secret, 3, TEST_SECRETLEN); + memset(ad, 4, TEST_ADLEN); + + context.out = out; + context.outlen = TEST_OUTLEN; + context.version = version; + context.pwd = pwd; + context.pwdlen = TEST_PWDLEN; + context.salt = salt; + context.saltlen = TEST_SALTLEN; + context.secret = secret; + context.secretlen = TEST_SECRETLEN; + context.ad = ad; + context.adlen = TEST_ADLEN; + context.t_cost = t_cost; + context.m_cost = m_cost; + context.lanes = lanes; + context.threads = lanes; + context.allocate_cbk = myown_allocator; + context.free_cbk = myown_deallocator; + context.flags = ARGON2_DEFAULT_FLAGS | ARGON2_FLAG_GENKAT; + +#undef TEST_OUTLEN +#undef TEST_PWDLEN +#undef TEST_SALTLEN +#undef TEST_SECRETLEN +#undef TEST_ADLEN + + argon2_ctx(&context, type); +} + +int main(int argc, char *argv[]) { + /* Get and check Argon2 type */ + const char *type_str = (argc > 1) ? argv[1] : "i"; + argon2_type type = Argon2_i; + uint32_t version = ARGON2_VERSION_NUMBER; + if (!strcmp(type_str, "d")) { + type = Argon2_d; + } else if (!strcmp(type_str, "i")) { + type = Argon2_i; + } else if (!strcmp(type_str, "id")) { + type = Argon2_id; + } else { + fatal("wrong Argon2 type"); + } + + /* Get and check Argon2 version number */ + if(argc > 2) { + version = strtoul(argv[2], NULL, 10); + } + if (ARGON2_VERSION_10 != version && ARGON2_VERSION_NUMBER != version) { + fatal("wrong Argon2 version number"); + } + + generate_testvectors(type, version); + return ARGON2_OK; +} diff --git a/src/3rdparty/argon2/src/run.c b/src/3rdparty/argon2/src/run.c new file mode 100644 index 00000000..9588a632 --- /dev/null +++ b/src/3rdparty/argon2/src/run.c @@ -0,0 +1,315 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#define _GNU_SOURCE 1 + +#include +#include +#include +#include +#include +#include + +#include "argon2.h" +#include "core.h" + +#define T_COST_DEF 3 +#define LOG_M_COST_DEF 12 /* 2^12 = 4 MiB */ +#define LANES_DEF 1 +#define THREADS_DEF 1 +#define OUTLEN_DEF 32 +#define MAX_PASS_LEN 128 + +#define UNUSED_PARAMETER(x) (void)(x) + +static void usage(const char *cmd) { + printf("Usage: %s [-h] salt [-i|-d|-id] [-t iterations] [-m memory] " + "[-p parallelism] [-l hash length] [-e|-r] [-v (10|13)]\n", + cmd); + printf("\tPassword is read from stdin\n"); + printf("Parameters:\n"); + printf("\tsalt\t\tThe salt to use, at least 8 characters\n"); + printf("\t-i\t\tUse Argon2i (this is the default)\n"); + printf("\t-d\t\tUse Argon2d instead of Argon2i\n"); + printf("\t-id\t\tUse Argon2id instead of Argon2i\n"); + printf("\t-t N\t\tSets the number of iterations to N (default = %d)\n", + T_COST_DEF); + printf("\t-m N\t\tSets the memory usage of 2^N KiB (default %d)\n", + LOG_M_COST_DEF); + printf("\t-p N\t\tSets parallelism to N threads (default %d)\n", + THREADS_DEF); + printf("\t-l N\t\tSets hash output length to N bytes (default %d)\n", + OUTLEN_DEF); + printf("\t-e\t\tOutput only encoded hash\n"); + printf("\t-r\t\tOutput only the raw bytes of the hash\n"); + printf("\t-v (10|13)\tArgon2 version (defaults to the most recent version, " + "currently %x)\n", ARGON2_VERSION_NUMBER); + printf("\t-h\t\tPrint %s usage\n", cmd); +} + +static void fatal(const char *error) { + fprintf(stderr, "Error: %s\n", error); + exit(1); +} + +static void print_hex(uint8_t *bytes, size_t bytes_len) { + size_t i; + for (i = 0; i < bytes_len; ++i) { + printf("%02x", bytes[i]); + } + printf("\n"); +} + +/* +Runs Argon2 with certain inputs and parameters, inputs not cleared. Prints the +Base64-encoded hash string +@out output array with at least 32 bytes allocated +@pwd NULL-terminated string, presumably from argv[] +@salt salt array +@t_cost number of iterations +@m_cost amount of requested memory in KB +@lanes amount of requested parallelism +@threads actual parallelism +@type Argon2 type we want to run +@encoded_only display only the encoded hash +@raw_only display only the hexadecimal of the hash +@version Argon2 version +*/ +static void run(uint32_t outlen, char *pwd, char *salt, uint32_t t_cost, + uint32_t m_cost, uint32_t lanes, uint32_t threads, + argon2_type type, int encoded_only, int raw_only, + uint32_t version) { + clock_t start_time, stop_time; + size_t pwdlen, saltlen, encodedlen; + int result; + unsigned char * out = NULL; + char * encoded = NULL; + + start_time = clock(); + + if (!pwd) { + fatal("password missing"); + } + + if (!salt) { + clear_internal_memory(pwd, strlen(pwd)); + fatal("salt missing"); + } + + pwdlen = strlen(pwd); + saltlen = strlen(salt); + if(UINT32_MAX < saltlen) { + fatal("salt is too long"); + } + + UNUSED_PARAMETER(lanes); + + out = malloc(outlen + 1); + if (!out) { + clear_internal_memory(pwd, strlen(pwd)); + fatal("could not allocate memory for output"); + } + + encodedlen = argon2_encodedlen(t_cost, m_cost, lanes, (uint32_t)saltlen, outlen, type); + encoded = malloc(encodedlen + 1); + if (!encoded) { + clear_internal_memory(pwd, strlen(pwd)); + fatal("could not allocate memory for hash"); + } + + result = argon2_hash(t_cost, m_cost, threads, pwd, pwdlen, salt, saltlen, + out, outlen, encoded, encodedlen, type, version); + if (result != ARGON2_OK) + fatal(argon2_error_message(result)); + + stop_time = clock(); + + if (encoded_only) + puts(encoded); + + if (raw_only) + print_hex(out, outlen); + + if (encoded_only || raw_only) { + free(out); + free(encoded); + return; + } + + printf("Hash:\t\t"); + print_hex(out, outlen); + free(out); + + printf("Encoded:\t%s\n", encoded); + + printf("%2.3f seconds\n", + ((double)stop_time - start_time) / (CLOCKS_PER_SEC)); + + result = argon2_verify(encoded, pwd, pwdlen, type); + if (result != ARGON2_OK) + fatal(argon2_error_message(result)); + printf("Verification ok\n"); + free(encoded); +} + +int main(int argc, char *argv[]) { + uint32_t outlen = OUTLEN_DEF; + uint32_t m_cost = 1 << LOG_M_COST_DEF; + uint32_t t_cost = T_COST_DEF; + uint32_t lanes = LANES_DEF; + uint32_t threads = THREADS_DEF; + argon2_type type = Argon2_i; /* Argon2i is the default type */ + int types_specified = 0; + int encoded_only = 0; + int raw_only = 0; + uint32_t version = ARGON2_VERSION_NUMBER; + int i; + size_t n; + char pwd[MAX_PASS_LEN], *salt; + + if (argc < 2) { + usage(argv[0]); + return ARGON2_MISSING_ARGS; + } else if (argc >= 2 && strcmp(argv[1], "-h") == 0) { + usage(argv[0]); + return 1; + } + + argon2_select_impl(stderr, "[libargon2] "); + + /* get password from stdin */ + n = fread(pwd, 1, sizeof pwd - 1, stdin); + if(n < 1) { + fatal("no password read"); + } + if(n == MAX_PASS_LEN-1) { + fatal("Provided password longer than supported in command line utility"); + } + + pwd[n] = '\0'; + if (pwd[n - 1] == '\n') { + pwd[n - 1] = '\0'; + } + + salt = argv[1]; + + /* parse options */ + for (i = 2; i < argc; i++) { + const char *a = argv[i]; + unsigned long input = 0; + if (!strcmp(a, "-h")) { + usage(argv[0]); + return 1; + } else if (!strcmp(a, "-m")) { + if (i < argc - 1) { + i++; + input = strtoul(argv[i], NULL, 10); + if (input == 0 || input == ULONG_MAX || + input > ARGON2_MAX_MEMORY_BITS) { + fatal("bad numeric input for -m"); + } + m_cost = ARGON2_MIN(UINT64_C(1) << input, UINT32_C(0xFFFFFFFF)); + if (m_cost > ARGON2_MAX_MEMORY) { + fatal("m_cost overflow"); + } + continue; + } else { + fatal("missing -m argument"); + } + } else if (!strcmp(a, "-t")) { + if (i < argc - 1) { + i++; + input = strtoul(argv[i], NULL, 10); + if (input == 0 || input == ULONG_MAX || + input > ARGON2_MAX_TIME) { + fatal("bad numeric input for -t"); + } + t_cost = input; + continue; + } else { + fatal("missing -t argument"); + } + } else if (!strcmp(a, "-p")) { + if (i < argc - 1) { + i++; + input = strtoul(argv[i], NULL, 10); + if (input == 0 || input == ULONG_MAX || + input > ARGON2_MAX_THREADS || input > ARGON2_MAX_LANES) { + fatal("bad numeric input for -p"); + } + threads = input; + lanes = threads; + continue; + } else { + fatal("missing -p argument"); + } + } else if (!strcmp(a, "-l")) { + if (i < argc - 1) { + i++; + input = strtoul(argv[i], NULL, 10); + outlen = input; + continue; + } else { + fatal("missing -l argument"); + } + } else if (!strcmp(a, "-i")) { + type = Argon2_i; + ++types_specified; + } else if (!strcmp(a, "-d")) { + type = Argon2_d; + ++types_specified; + } else if (!strcmp(a, "-id")) { + type = Argon2_id; + ++types_specified; + } else if (!strcmp(a, "-e")) { + encoded_only = 1; + } else if (!strcmp(a, "-r")) { + raw_only = 1; + } else if (!strcmp(a, "-v")) { + if (i < argc - 1) { + i++; + if (!strcmp(argv[i], "10")) { + version = ARGON2_VERSION_10; + } else if (!strcmp(argv[i], "13")) { + version = ARGON2_VERSION_13; + } else { + fatal("invalid Argon2 version"); + } + } else { + fatal("missing -v argument"); + } + } else { + fatal("unknown argument"); + } + } + + if (types_specified > 1) { + fatal("cannot specify multiple Argon2 types"); + } + + if(encoded_only && raw_only) + fatal("cannot provide both -e and -r"); + + if(!encoded_only && !raw_only) { + printf("Type:\t\t%s\n", argon2_type2string(type, 1)); + printf("Iterations:\t%" PRIu32 " \n", t_cost); + printf("Memory:\t\t%" PRIu32 " KiB\n", m_cost); + printf("Parallelism:\t%" PRIu32 " \n", lanes); + } + + run(outlen, pwd, salt, t_cost, m_cost, lanes, threads, type, + encoded_only, raw_only, version); + + return ARGON2_OK; +} + diff --git a/src/3rdparty/argon2/src/timing.h b/src/3rdparty/argon2/src/timing.h new file mode 100644 index 00000000..0e39a1f3 --- /dev/null +++ b/src/3rdparty/argon2/src/timing.h @@ -0,0 +1,41 @@ +#include + +#ifdef _POSIX_SOURCE +#include + +struct timestamp { + struct timespec time; +}; + +static inline void timestamp_store(struct timestamp *out) +{ + clock_gettime(CLOCK_MONOTONIC, &out->time); +} + +static inline double timestamp_span_ms(const struct timestamp *start, + const struct timestamp *end) +{ + double res = 0.0; + res += (end->time.tv_sec - start->time.tv_sec) * 1000.0; + res += (end->time.tv_nsec - start->time.tv_nsec) / 1000000.0; + return res; +} +#else +#include + +struct timestamp { + clock_t time; +}; + +static inline void timestamp_store(struct timestamp *out) +{ + out->time = clock(); +} + +static inline double timestamp_span_ms(const struct timestamp *start, + const struct timestamp *end) +{ + double res = (end->time - start->time) * 1000; + return res / CLOCKS_PER_SEC; +} +#endif diff --git a/src/3rdparty/argon2/tests/test.c b/src/3rdparty/argon2/tests/test.c new file mode 100644 index 00000000..1fe9f0e2 --- /dev/null +++ b/src/3rdparty/argon2/tests/test.c @@ -0,0 +1,239 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "argon2.h" + +#define OUT_LEN 32 +#define ENCODED_LEN 108 + +/* Test harness will assert: + * argon2_hash() returns ARGON2_OK + * HEX output matches expected + * encoded output matches expected + * argon2_verify() correctly verifies value + */ + +void hashtest(uint32_t version, uint32_t t, uint32_t m, uint32_t p, char *pwd, + char *salt, char *hexref, char *mcfref) { + unsigned char out[OUT_LEN]; + unsigned char hex_out[OUT_LEN * 2 + 4]; + char encoded[ENCODED_LEN]; + int ret, i; + + printf("Hash test: $v=%d t=%d, m=%d, p=%d, pass=%s, salt=%s: ", version, + t, m, p, pwd, salt); + + ret = argon2_hash(t, 1 << m, p, pwd, strlen(pwd), salt, strlen(salt), out, + OUT_LEN, encoded, ENCODED_LEN, Argon2_i, version); + assert(ret == ARGON2_OK); + + for (i = 0; i < OUT_LEN; ++i) + sprintf((char *)(hex_out + i * 2), "%02x", out[i]); + + assert(memcmp(hex_out, hexref, OUT_LEN * 2) == 0); + + if (ARGON2_VERSION_NUMBER == version) { + assert(memcmp(encoded, mcfref, strlen(mcfref)) == 0); + } + + ret = argon2_verify(encoded, pwd, strlen(pwd), Argon2_i); + assert(ret == ARGON2_OK); + ret = argon2_verify(mcfref, pwd, strlen(pwd), Argon2_i); + assert(ret == ARGON2_OK); + + printf("PASS\n"); +} + +int main() { + int ret; + unsigned char out[OUT_LEN]; + char const *msg; + int version; + + argon2_select_impl(stderr, "[libargon2] "); + + version = ARGON2_VERSION_10; + printf("Test Argon2i version number: %02x\n", version); + + /* Multiple test cases for various input values */ + hashtest(version, 2, 16, 1, "password", "somesalt", + "f6c4db4a54e2a370627aff3db6176b94a2a209a62c8e36152711802f7b30c694", + "$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" + "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ"); +#ifdef TEST_LARGE_RAM + hashtest(version, 2, 20, 1, "password", "somesalt", + "9690ec55d28d3ed32562f2e73ea62b02b018757643a2ae6e79528459de8106e9", + "$argon2i$m=1048576,t=2,p=1$c29tZXNhbHQ" + "$lpDsVdKNPtMlYvLnPqYrArAYdXZDoq5ueVKEWd6BBuk"); +#endif + hashtest(version, 2, 18, 1, "password", "somesalt", + "3e689aaa3d28a77cf2bc72a51ac53166761751182f1ee292e3f677a7da4c2467", + "$argon2i$m=262144,t=2,p=1$c29tZXNhbHQ" + "$Pmiaqj0op3zyvHKlGsUxZnYXURgvHuKS4/Z3p9pMJGc"); + hashtest(version, 2, 8, 1, "password", "somesalt", + "fd4dd83d762c49bdeaf57c47bdcd0c2f1babf863fdeb490df63ede9975fccf06", + "$argon2i$m=256,t=2,p=1$c29tZXNhbHQ" + "$/U3YPXYsSb3q9XxHvc0MLxur+GP960kN9j7emXX8zwY"); + hashtest(version, 2, 8, 2, "password", "somesalt", + "b6c11560a6a9d61eac706b79a2f97d68b4463aa3ad87e00c07e2b01e90c564fb", + "$argon2i$m=256,t=2,p=2$c29tZXNhbHQ" + "$tsEVYKap1h6scGt5ovl9aLRGOqOth+AMB+KwHpDFZPs"); + hashtest(version, 1, 16, 1, "password", "somesalt", + "81630552b8f3b1f48cdb1992c4c678643d490b2b5eb4ff6c4b3438b5621724b2", + "$argon2i$m=65536,t=1,p=1$c29tZXNhbHQ" + "$gWMFUrjzsfSM2xmSxMZ4ZD1JCytetP9sSzQ4tWIXJLI"); + hashtest(version, 4, 16, 1, "password", "somesalt", + "f212f01615e6eb5d74734dc3ef40ade2d51d052468d8c69440a3a1f2c1c2847b", + "$argon2i$m=65536,t=4,p=1$c29tZXNhbHQ" + "$8hLwFhXm6110c03D70Ct4tUdBSRo2MaUQKOh8sHChHs"); + hashtest(version, 2, 16, 1, "differentpassword", "somesalt", + "e9c902074b6754531a3a0be519e5baf404b30ce69b3f01ac3bf21229960109a3", + "$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" + "$6ckCB0tnVFMaOgvlGeW69ASzDOabPwGsO/ISKZYBCaM"); + hashtest(version, 2, 16, 1, "password", "diffsalt", + "79a103b90fe8aef8570cb31fc8b22259778916f8336b7bdac3892569d4f1c497", + "$argon2i$m=65536,t=2,p=1$ZGlmZnNhbHQ" + "$eaEDuQ/orvhXDLMfyLIiWXeJFvgza3vaw4kladTxxJc"); + + /* Error state tests */ + + /* Handle an invalid encoding correctly (it is missing a $) */ + ret = argon2_verify("$argon2i$m=65536,t=2,p=1c29tZXNhbHQ" + "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_DECODING_FAIL); + printf("Recognise an invalid encoding: PASS\n"); + + /* Handle an invalid encoding correctly (it is missing a $) */ + ret = argon2_verify("$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" + "9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_DECODING_FAIL); + printf("Recognise an invalid encoding: PASS\n"); + + /* Handle an invalid encoding correctly (salt is too short) */ + ret = argon2_verify("$argon2i$m=65536,t=2,p=1$" + "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_SALT_TOO_SHORT); + printf("Recognise an invalid salt in encoding: PASS\n"); + + /* Handle an mismatching hash (the encoded password is "passwore") */ + ret = argon2_verify("$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" + "$b2G3seW+uPzerwQQC+/E1K50CLLO7YXy0JRcaTuswRo", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_VERIFY_MISMATCH); + printf("Verify with mismatched password: PASS\n"); + + msg = argon2_error_message(ARGON2_DECODING_FAIL); + assert(strcmp(msg, "Decoding failed") == 0); + printf("Decode an error message: PASS\n"); + + printf("\n"); + + version = ARGON2_VERSION_NUMBER; + printf("Test Argon2i version number: %02x\n", version); + + /* Multiple test cases for various input values */ + hashtest(version, 2, 16, 1, "password", "somesalt", + "c1628832147d9720c5bd1cfd61367078729f6dfb6f8fea9ff98158e0d7816ed0", + "$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" + "$wWKIMhR9lyDFvRz9YTZweHKfbftvj+qf+YFY4NeBbtA"); +#ifdef TEST_LARGE_RAM + hashtest(version, 2, 20, 1, "password", "somesalt", + "d1587aca0922c3b5d6a83edab31bee3c4ebaef342ed6127a55d19b2351ad1f41", + "$argon2i$v=19$m=1048576,t=2,p=1$c29tZXNhbHQ" + "$0Vh6ygkiw7XWqD7asxvuPE667zQu1hJ6VdGbI1GtH0E"); +#endif + hashtest(version, 2, 18, 1, "password", "somesalt", + "296dbae80b807cdceaad44ae741b506f14db0959267b183b118f9b24229bc7cb", + "$argon2i$v=19$m=262144,t=2,p=1$c29tZXNhbHQ" + "$KW266AuAfNzqrUSudBtQbxTbCVkmexg7EY+bJCKbx8s"); + hashtest(version, 2, 8, 1, "password", "somesalt", + "89e9029f4637b295beb027056a7336c414fadd43f6b208645281cb214a56452f", + "$argon2i$v=19$m=256,t=2,p=1$c29tZXNhbHQ" + "$iekCn0Y3spW+sCcFanM2xBT63UP2sghkUoHLIUpWRS8"); + hashtest(version, 2, 8, 2, "password", "somesalt", + "4ff5ce2769a1d7f4c8a491df09d41a9fbe90e5eb02155a13e4c01e20cd4eab61", + "$argon2i$v=19$m=256,t=2,p=2$c29tZXNhbHQ" + "$T/XOJ2mh1/TIpJHfCdQan76Q5esCFVoT5MAeIM1Oq2E"); + hashtest(version, 1, 16, 1, "password", "somesalt", + "d168075c4d985e13ebeae560cf8b94c3b5d8a16c51916b6f4ac2da3ac11bbecf", + "$argon2i$v=19$m=65536,t=1,p=1$c29tZXNhbHQ" + "$0WgHXE2YXhPr6uVgz4uUw7XYoWxRkWtvSsLaOsEbvs8"); + hashtest(version, 4, 16, 1, "password", "somesalt", + "aaa953d58af3706ce3df1aefd4a64a84e31d7f54175231f1285259f88174ce5b", + "$argon2i$v=19$m=65536,t=4,p=1$c29tZXNhbHQ" + "$qqlT1YrzcGzj3xrv1KZKhOMdf1QXUjHxKFJZ+IF0zls"); + hashtest(version, 2, 16, 1, "differentpassword", "somesalt", + "14ae8da01afea8700c2358dcef7c5358d9021282bd88663a4562f59fb74d22ee", + "$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" + "$FK6NoBr+qHAMI1jc73xTWNkCEoK9iGY6RWL1n7dNIu4"); + hashtest(version, 2, 16, 1, "password", "diffsalt", + "b0357cccfbef91f3860b0dba447b2348cbefecadaf990abfe9cc40726c521271", + "$argon2i$v=19$m=65536,t=2,p=1$ZGlmZnNhbHQ" + "$sDV8zPvvkfOGCw26RHsjSMvv7K2vmQq/6cxAcmxSEnE"); + + /* Error state tests */ + + /* Handle an invalid encoding correctly (it is missing a $) */ + ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1c29tZXNhbHQ" + "$wWKIMhR9lyDFvRz9YTZweHKfbftvj+qf+YFY4NeBbtA", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_DECODING_FAIL); + printf("Recognise an invalid encoding: PASS\n"); + + /* Handle an invalid encoding correctly (it is missing a $) */ + ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" + "wWKIMhR9lyDFvRz9YTZweHKfbftvj+qf+YFY4NeBbtA", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_DECODING_FAIL); + printf("Recognise an invalid encoding: PASS\n"); + + /* Handle an invalid encoding correctly (salt is too short) */ + ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1$" + "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_SALT_TOO_SHORT); + printf("Recognise an invalid salt in encoding: PASS\n"); + + /* Handle an mismatching hash (the encoded password is "passwore") */ + ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" + "$8iIuixkI73Js3G1uMbezQXD0b8LG4SXGsOwoQkdAQIM", + "password", strlen("password"), Argon2_i); + assert(ret == ARGON2_VERIFY_MISMATCH); + printf("Verify with mismatched password: PASS\n"); + + msg = argon2_error_message(ARGON2_DECODING_FAIL); + assert(strcmp(msg, "Decoding failed") == 0); + printf("Decode an error message: PASS\n"); + + /* Common error state tests */ + + printf("\n"); + printf("Common error state tests\n"); + + ret = argon2_hash(2, 1, 1, "password", strlen("password"), + "diffsalt", strlen("diffsalt"), + out, OUT_LEN, NULL, 0, Argon2_i, version); + assert(ret == ARGON2_MEMORY_TOO_LITTLE); + printf("Fail on invalid memory: PASS\n"); + + ret = argon2_hash(2, 1 << 12, 1, NULL, strlen("password"), + "diffsalt", strlen("diffsalt"), + out, OUT_LEN, NULL, 0, Argon2_i, version); + assert(ret == ARGON2_PWD_PTR_MISMATCH); + printf("Fail on invalid null pointer: PASS\n"); + + ret = argon2_hash(2, 1 << 12, 1, "password", strlen("password"), "s", 1, + out, OUT_LEN, NULL, 0, Argon2_i, version); + assert(ret == ARGON2_SALT_TOO_SHORT); + printf("Fail on salt too short: PASS\n"); + + return 0; +} diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index f14d034d..0c2c0c80 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -92,6 +92,10 @@ static AlgoData const algorithms[] = { { "cryptonight_turtle", "cn_turtle", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, # endif +# ifndef XMRIG_NO_ARGON2 + { "chukwa", "trtl-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA }, +# endif + # ifndef XMRIG_NO_CN_GPU { "cryptonight/gpu", "cn/gpu", xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU }, # endif @@ -138,7 +142,8 @@ static const char *variants[] = { "r", "rwz", "zls", - "double" + "double", + "chukwa" }; diff --git a/src/common/xmrig.h b/src/common/xmrig.h index e8ca8857..440afe29 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -36,6 +36,7 @@ enum Algo { CRYPTONIGHT_LITE, /* CryptoNight (1 MB) */ CRYPTONIGHT_HEAVY, /* CryptoNight (4 MB) */ CRYPTONIGHT_PICO, /* CryptoNight (256 KB) */ + ARGON2, /* Argon2 */ ALGO_MAX }; @@ -79,7 +80,8 @@ enum Variant { VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft) VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius) VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH) - VARIANT_MAX + VARIANT_CHUKWA = 17, // Argon2 Chukwa for TurtleCoin + VARIANT_MAX }; diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 9216027a..b6f65b28 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -34,6 +34,7 @@ #include "core/ConfigCreator.h" #include "crypto/Asm.h" #include "crypto/CryptoNight_constants.h" +#include "crypto/Argon2_constants.h" #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" #include "rapidjson/prettywriter.h" @@ -163,7 +164,16 @@ bool xmrig::Config::finalize() const AlgoVariant av = getAlgoVariant(); m_threads.mode = m_threads.count ? Simple : Automatic; - const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024; + size_t size; + + if (m_algorithm.algo() == xmrig::ARGON2) + { + size = CpuThread::multiway(av) * argon2_select_memory(m_algorithm.variant()); + } + else + { + size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024; + } if (!m_threads.count) { m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); diff --git a/src/crypto/Argon2.h b/src/crypto/Argon2.h new file mode 100644 index 00000000..7ea62db1 --- /dev/null +++ b/src/crypto/Argon2.h @@ -0,0 +1,45 @@ +#ifndef XMRIG_ARGON2_H +#define XMRIG_ARGON2_H + +#include + +#include "crypto/Argon2_constants.h" + +static bool argon_optimization_selected = false; + +template +inline void argon2_hash_function(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) +{ + /* If this is the first time we've called this hash function then + we need to have the Argon2 library check to see if any of the + available CPU instruction sets are going to help us out */ + if (!argon_optimization_selected) + { + /* Call the library quick benchmark test to set which CPU + instruction sets will be used */ + argon2_select_impl(NULL, NULL); + + argon_optimization_selected = true; + } + + uint8_t salt[xmrig::ARGON2_SALTLEN]; + + memcpy(salt, input, sizeof(salt)); + + const uint32_t ITERS = xmrig::argon2_select_iters(VARIANT); + const uint32_t MEMORY = xmrig::argon2_select_memory(VARIANT); + const uint32_t PARALLELISM = xmrig::argon2_select_parallelism(VARIANT); + const int ALGO = xmrig::argon2_select_algo(VARIANT); + + switch (ALGO) + { + case xmrig::Argon2Algo::I: + argon2i_hash_raw(ITERS, MEMORY, PARALLELISM, input, size, salt, xmrig::ARGON2_SALTLEN, output, xmrig::ARGON2_HASHLEN); + case xmrig::Argon2Algo::D: + argon2d_hash_raw(ITERS, MEMORY, PARALLELISM, input, size, salt, xmrig::ARGON2_SALTLEN, output, xmrig::ARGON2_HASHLEN); + case xmrig::Argon2Algo::ID: + argon2id_hash_raw(ITERS, MEMORY, PARALLELISM, input, size, salt, xmrig::ARGON2_SALTLEN, output, xmrig::ARGON2_HASHLEN); + } +} + +#endif \ No newline at end of file diff --git a/src/crypto/Argon2_constants.h b/src/crypto/Argon2_constants.h new file mode 100644 index 00000000..b2d6de4c --- /dev/null +++ b/src/crypto/Argon2_constants.h @@ -0,0 +1,72 @@ +#ifndef XMRIG_ARGON2_CONSTANTS_H +#define XMRIG_ARGON2_CONSTANTS_H + + +#include +#include + + +#include "common/xmrig.h" + +namespace xmrig +{ + enum Argon2Algo { + I = 0, + D = 1, + ID = 2 + }; + + constexpr const size_t ARGON2_SALTLEN = 16; + constexpr const size_t ARGON2_HASHLEN = 32; + + constexpr const size_t ARGON2_MEMORY_CHUKWA = 512; + constexpr const size_t ARGON2_ITERS_CHUKWA = 3; + constexpr const size_t ARGON2_PARALLELISM_CHUKWA = 1; + constexpr const int ARGON2_ALGO_CHUKWA = Argon2Algo::ID; + + inline int argon2_select_algo(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_ALGO_CHUKWA; + } + + return 0; + } + + inline uint32_t argon2_select_memory(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_MEMORY_CHUKWA; + } + + return 0; + } + + inline uint32_t argon2_select_iters(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_ITERS_CHUKWA; + } + + return 0; + } + + inline uint32_t argon2_select_parallelism(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_PARALLELISM_CHUKWA; + } + + return 0; + } +} + +#endif \ No newline at end of file diff --git a/src/crypto/Argon2_test.h b/src/crypto/Argon2_test.h new file mode 100644 index 00000000..e69de29b diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index 6548b461..ad06be63 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -39,6 +39,9 @@ # include "crypto/CryptoNight_x86.h" #endif +#ifndef XMRIG_NO_ARGON2 +# include "crypto/Argon2.h" +#endif xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly) : m_algorithm(algorithm), @@ -389,6 +392,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a cryptonight_triple_hash, cryptonight_quad_hash, cryptonight_penta_hash, + + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # ifndef XMRIG_NO_AEON cryptonight_single_hash, @@ -428,6 +433,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -446,6 +452,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # endif # ifndef XMRIG_NO_SUMO @@ -498,6 +505,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -516,6 +524,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # endif # ifndef XMRIG_NO_CN_PICO @@ -547,6 +556,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -565,6 +575,57 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA +# endif + +# ifndef XMRIG_NO_ARGON2 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, +# else + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA # endif }; diff --git a/src/workers/MultiWorker.cpp b/src/workers/MultiWorker.cpp index 02eec378..8045e200 100644 --- a/src/workers/MultiWorker.cpp +++ b/src/workers/MultiWorker.cpp @@ -101,6 +101,13 @@ bool MultiWorker::selfTest() } # endif +# ifndef XMRIG_NO_ARGON2 + if (m_thread->algorithm() == ARGON2) { + return true; + // return verify(VARIANT_CHUKWA, test_output_pico_trtl); + } +# endif + return false; } From 1a13b3d59bfc5c6558bb5c85b9a84f63dad7d2e1 Mon Sep 17 00:00:00 2001 From: wrkzdev <40448869+wrkzdev@users.noreply.github.com> Date: Thu, 6 Jun 2019 10:53:11 +0700 Subject: [PATCH 14/18] WRKZ support --- src/common/crypto/Algorithm.cpp | 4 +++- src/common/xmrig.h | 1 + src/crypto/Argon2_constants.h | 13 +++++++++++++ src/workers/CpuThread.cpp | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 0c2c0c80..6313ee60 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -94,6 +94,7 @@ static AlgoData const algorithms[] = { # ifndef XMRIG_NO_ARGON2 { "chukwa", "trtl-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA }, + { "chukwa/wrkz", "wrkz-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA_LITE }, # endif # ifndef XMRIG_NO_CN_GPU @@ -143,7 +144,8 @@ static const char *variants[] = { "rwz", "zls", "double", - "chukwa" + "chukwa", + "wrkz", }; diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 440afe29..32e8f66a 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -81,6 +81,7 @@ enum Variant { VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius) VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH) VARIANT_CHUKWA = 17, // Argon2 Chukwa for TurtleCoin + VARIANT_CHUKWA_LITE = 18, // Argon2 Chukwa Lite for WrkzCoin VARIANT_MAX }; diff --git a/src/crypto/Argon2_constants.h b/src/crypto/Argon2_constants.h index b2d6de4c..d5a29da4 100644 --- a/src/crypto/Argon2_constants.h +++ b/src/crypto/Argon2_constants.h @@ -22,6 +22,11 @@ namespace xmrig constexpr const size_t ARGON2_MEMORY_CHUKWA = 512; constexpr const size_t ARGON2_ITERS_CHUKWA = 3; constexpr const size_t ARGON2_PARALLELISM_CHUKWA = 1; + + constexpr const size_t ARGON2_MEMORY_CHUKWA_LITE = 256; + constexpr const size_t ARGON2_ITERS_CHUKWA_LITE = 4; + constexpr const size_t ARGON2_PARALLELISM_CHUKWA_LITE = 1; + constexpr const int ARGON2_ALGO_CHUKWA = Argon2Algo::ID; inline int argon2_select_algo(Variant variant) @@ -30,6 +35,8 @@ namespace xmrig { case VARIANT_CHUKWA: return ARGON2_ALGO_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_ALGO_CHUKWA; } return 0; @@ -41,6 +48,8 @@ namespace xmrig { case VARIANT_CHUKWA: return ARGON2_MEMORY_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_MEMORY_CHUKWA_LITE; } return 0; @@ -52,6 +61,8 @@ namespace xmrig { case VARIANT_CHUKWA: return ARGON2_ITERS_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_ITERS_CHUKWA_LITE; } return 0; @@ -63,6 +74,8 @@ namespace xmrig { case VARIANT_CHUKWA: return ARGON2_PARALLELISM_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_PARALLELISM_CHUKWA_LITE; } return 0; diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index ad06be63..c4981e93 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -394,6 +394,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a cryptonight_penta_hash, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # ifndef XMRIG_NO_AEON cryptonight_single_hash, @@ -434,6 +435,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -453,6 +455,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # endif # ifndef XMRIG_NO_SUMO @@ -506,6 +509,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -525,6 +529,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # endif # ifndef XMRIG_NO_CN_PICO @@ -557,6 +562,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -576,6 +582,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # endif # ifndef XMRIG_NO_ARGON2 @@ -607,6 +614,18 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a argon2_hash_function, argon2_hash_function, argon2_hash_function, + + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + argon2_hash_function, + # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 @@ -626,6 +645,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE # endif }; From 84f56f0a4e6ad53266f4f03393293e4070683a6e Mon Sep 17 00:00:00 2001 From: wrkzdev <40448869+wrkzdev@users.noreply.github.com> Date: Fri, 7 Jun 2019 10:10:42 +0700 Subject: [PATCH 15/18] Update sample config.json for chukwa wrkz --- src/config.json | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/config.json b/src/config.json index 5018db51..6843138a 100644 --- a/src/config.json +++ b/src/config.json @@ -1,5 +1,5 @@ { - "algo": "cryptonight", + "algo": "chukwa/wrkz", "api": { "port": 0, "access-token": null, @@ -15,20 +15,21 @@ "colors": true, "cpu-affinity": null, "cpu-priority": null, - "donate-level": 5, + "donate-level": 1, "huge-pages": true, "hw-aes": null, - "log-file": null, - "max-cpu-usage": 100, + "log-file": "./log.txt", + "max-cpu-usage": 95, "pools": [ { - "url": "donate.v2.xmrig.com:3333", - "user": "YOUR_WALLET_ADDRESS", + "url": "testnet.wrkz.work:5555", + "user": "WrkzRNDQDwFCBynKPc459v3LDa1gEGzG3j962tMUBko1fw9xgdaS9mNiGMgA9s1q7hS1Z8SGRVWzcGc8Sh8xsvfZ6u2wJEtoZB", "pass": "x", "rig-id": null, "nicehash": false, - "keepalive": false, - "variant": -1, + "keepalive": true, + "variant": "wrkz", + "enabled": true, "tls": false, "tls-fingerprint": null } @@ -37,7 +38,8 @@ "retries": 5, "retry-pause": 5, "safe": false, - "threads": null, + "threads": [ + ], "user-agent": null, "watch": true } \ No newline at end of file From 284534788177aa86a31ce5631b62109ad649c2e3 Mon Sep 17 00:00:00 2001 From: Haifa Bogdan Adnan Date: Mon, 26 Aug 2019 12:38:34 +0300 Subject: [PATCH 16/18] Conversion to NinjaRig. --- CMakeLists.txt | 255 +- cmake/TargetArch.cmake | 116 + cmake/asm.cmake | 45 - cmake/cn-gpu.cmake | 23 - doc/ALGORITHMS.md | 6 +- doc/api/1/config.json | 10 +- doc/api/1/summary.json | 9 +- doc/api/1/threads.json | 13 +- res/app.rc | 2 +- src/3rdparty/argon2/.gitattributes | 2 - src/3rdparty/argon2/.gitignore | 70 - src/3rdparty/argon2/.travis.yml | 23 - src/3rdparty/argon2/CMakeLists.txt | 203 -- src/3rdparty/argon2/Makefile.am | 125 - src/3rdparty/argon2/README.md | 58 - .../argon2/arch/generic/lib/argon2-arch.c | 20 - .../argon2/arch/x86_64/lib/argon2-arch.c | 38 - .../argon2/arch/x86_64/lib/argon2-avx2.c | 341 --- .../argon2/arch/x86_64/lib/argon2-avx2.h | 11 - .../argon2/arch/x86_64/lib/argon2-avx512f.c | 326 --- .../argon2/arch/x86_64/lib/argon2-avx512f.h | 11 - .../argon2/arch/x86_64/lib/argon2-sse2.c | 122 - .../argon2/arch/x86_64/lib/argon2-sse2.h | 11 - .../argon2/arch/x86_64/lib/argon2-ssse3.c | 134 - .../argon2/arch/x86_64/lib/argon2-ssse3.h | 11 - .../arch/x86_64/lib/argon2-template-128.h | 164 -- .../argon2/arch/x86_64/lib/argon2-xop.c | 122 - .../argon2/arch/x86_64/lib/argon2-xop.h | 11 - src/3rdparty/argon2/configure.ac | 108 - src/3rdparty/argon2/include/argon2.h | 478 ---- src/3rdparty/argon2/lib/argon2-template-64.h | 193 -- src/3rdparty/argon2/lib/argon2.c | 476 ---- src/3rdparty/argon2/lib/blake2/blake2-impl.h | 90 - src/3rdparty/argon2/lib/blake2/blake2.c | 225 -- src/3rdparty/argon2/lib/blake2/blake2.h | 30 - src/3rdparty/argon2/lib/core.c | 633 ----- src/3rdparty/argon2/lib/core.h | 226 -- src/3rdparty/argon2/lib/encoding.c | 432 --- src/3rdparty/argon2/lib/encoding.h | 40 - src/3rdparty/argon2/lib/genkat.c | 117 - src/3rdparty/argon2/lib/genkat.h | 47 - src/3rdparty/argon2/lib/impl-select.c | 120 - src/3rdparty/argon2/lib/impl-select.h | 23 - src/3rdparty/argon2/lib/thread.c | 36 - src/3rdparty/argon2/lib/thread.h | 47 - .../argon2/m4/ax_check_compile_flag.m4 | 74 - src/3rdparty/argon2/m4/ax_pthread.m4 | 485 ---- src/3rdparty/argon2/qmake/arch/arch.pro | 3 - .../argon2/qmake/arch/generic/generic.pro | 1 - .../x86_64/libargon2-avx2/libargon2-avx2.pro | 23 - .../libargon2-avx512f/libargon2-avx512f.pro | 23 - .../x86_64/libargon2-sse2/libargon2-sse2.pro | 24 - .../libargon2-ssse3/libargon2-ssse3.pro | 24 - .../x86_64/libargon2-xop/libargon2-xop.pro | 24 - .../argon2/qmake/arch/x86_64/x86_64.pro | 8 - .../qmake/argon2-bench2/argon2-bench2.pro | 19 - .../qmake/argon2-genkat/argon2-genkat.pro | 16 - .../argon2/qmake/argon2-test/argon2-test.pro | 16 - src/3rdparty/argon2/qmake/argon2.pro | 9 - src/3rdparty/argon2/qmake/argon2/argon2.pro | 18 - .../argon2/qmake/libargon2/libargon2.pro | 119 - .../metacentrum/start-all-benchmarks.sh | 12 - .../scripts/metacentrum/start-benchmark.sh | 75 - src/3rdparty/argon2/scripts/run-benchmark.sh | 40 - src/3rdparty/argon2/src/bench2.c | 179 -- src/3rdparty/argon2/src/genkat.c | 90 - src/3rdparty/argon2/src/run.c | 315 --- src/3rdparty/argon2/src/timing.h | 41 - src/3rdparty/argon2/tests/test.c | 239 -- src/App.cpp | 18 +- src/App.h | 1 + src/Mem.cpp | 77 - src/Mem.h | 78 - src/Mem_unix.cpp | 114 - src/Mem_win.cpp | 204 -- src/Summary.cpp | 120 +- src/api/ApiRouter.cpp | 52 +- src/api/ApiRouter.h | 2 + src/base/net/Pool.cpp | 103 +- src/base/tools/String.cpp | 14 +- src/base/tools/String.h | 4 +- src/common/config/CommonConfig.cpp | 4 +- src/common/cpu/BasicCpuInfo.cpp | 12 +- src/common/cpu/BasicCpuInfo.h | 4 +- src/common/cpu/BasicCpuInfo_arm.cpp | 2 +- src/common/crypto/Algorithm.cpp | 91 +- src/common/interfaces/IConfig.h | 36 +- src/common/interfaces/ICpuInfo.h | 3 +- src/common/net/Job.cpp | 14 - src/common/xmrig.h | 84 +- src/config.json | 10 +- src/core/Config.cpp | 370 +-- src/core/Config.h | 81 +- src/core/ConfigLoader_default.h | 5 +- src/core/ConfigLoader_platform.h | 29 +- src/core/HasherConfig.cpp | 112 + src/core/HasherConfig.h | 98 + src/core/cpu/AdvancedCpuInfo.cpp | 14 +- src/core/cpu/AdvancedCpuInfo.h | 4 +- src/core/usage.h | 27 +- src/crypto/Argon2.h | 45 - src/crypto/Argon2_constants.h | 2 +- src/crypto/Asm.cpp | 102 - src/crypto/Asm.h | 50 - src/crypto/CryptoNight.h | 62 - src/crypto/CryptoNight_arm.h | 844 ------ src/crypto/CryptoNight_constants.h | 225 -- src/crypto/CryptoNight_monero.h | 206 -- src/crypto/CryptoNight_test.h | 388 --- src/crypto/CryptoNight_x86.h | 1481 ---------- src/crypto/CryptonightR_gen.cpp | 187 -- src/crypto/SSE2NEON.h | 1497 ---------- src/crypto/argon2_hasher/common/DLLExport.h | 16 + src/crypto/argon2_hasher/common/DLLImport.h | 16 + src/crypto/argon2_hasher/common/common.cpp | 21 + src/crypto/argon2_hasher/common/common.h | 56 + src/crypto/argon2_hasher/crypt/base64.cpp | 103 + src/crypto/argon2_hasher/crypt/base64.h | 14 + src/crypto/argon2_hasher/crypt/hex.cpp | 30 + src/crypto/argon2_hasher/crypt/hex.h | 14 + .../argon2_hasher/crypt/random_generator.cpp | 27 + .../argon2_hasher/crypt/random_generator.h | 24 + src/crypto/argon2_hasher/crypt/sha512.cpp | 152 + src/crypto/argon2_hasher/crypt/sha512.h | 70 + src/crypto/argon2_hasher/hash/Hasher.cpp | 132 + src/crypto/argon2_hasher/hash/Hasher.h | 63 + .../argon2_hasher/hash/argon2/Argon2.cpp | 143 + src/crypto/argon2_hasher/hash/argon2/Argon2.h | 56 + src/crypto/argon2_hasher/hash/argon2/Defs.h | 50 + .../hash/argon2/argon2profile_3_1_512.c | 292 ++ .../hash/argon2/argon2profile_4_1_256.c | 168 ++ .../hash/argon2/blake2/blake2-config.h | 76 + .../hash/argon2/blake2/blake2-impl.h | 154 ++ .../argon2_hasher/hash/argon2/blake2/blake2.h | 90 + .../hash/argon2/blake2/blake2b-load-sse2.h | 68 + .../hash/argon2/blake2/blake2b-load-sse41.h | 402 +++ .../hash/argon2/blake2/blake2b-round.h | 154 ++ .../hash/argon2/blake2/blake2b.c | 514 ++++ .../argon2_hasher/hash/cpu/CpuHasher.cpp | 227 ++ src/crypto/argon2_hasher/hash/cpu/CpuHasher.h | 41 + .../hash/cpu/argon2_opt/blamka-round-opt.h | 567 ++++ .../hash/cpu/argon2_opt/blamka-round-ref.h | 55 + .../hash/cpu/argon2_opt/implementation.c | 448 +++ .../hash/cpu/cpu_features/.clang-format | 4 + .../hash/cpu/cpu_features/.gitignore | 1 + .../hash/cpu/cpu_features/.travis.yml | 91 + .../hash/cpu/cpu_features/CMakeLists.txt | 165 ++ .../hash/cpu/cpu_features/CONTRIBUTING.md | 23 + .../hash/cpu/cpu_features/LICENSE | 202 ++ .../hash/cpu/cpu_features/README.md | 165 ++ .../hash/cpu/cpu_features/WORKSPACE | 7 + .../hash/cpu/cpu_features/appveyor.yml | 24 + .../cmake/CpuFeaturesConfig.cmake.in | 3 + .../hash/cpu/cpu_features/cmake/README.md | 28 + .../cmake/googletest.CMakeLists.txt.in | 15 + .../cpu_features/cmake/mips32-linux-gcc.cmake | 34 + .../include/cpu_features_macros.h | 125 + .../cpu_features/include/cpuinfo_aarch64.h | 65 + .../cpu/cpu_features/include/cpuinfo_arm.h | 80 + .../cpu/cpu_features/include/cpuinfo_mips.h | 53 + .../cpu/cpu_features/include/cpuinfo_ppc.h | 141 + .../cpu/cpu_features/include/cpuinfo_x86.h | 154 ++ .../cpu_features/include/internal/bit_utils.h | 39 + .../cpu_features/include/internal/cpuid_x86.h | 37 + .../include/internal/filesystem.h | 38 + .../cpu_features/include/internal/hwcaps.h | 131 + .../internal/linux_features_aggregator.h | 60 + .../include/internal/stack_line_reader.h | 49 + .../include/internal/string_view.h | 108 + .../cpu_features/scripts/run_integration.sh | 173 ++ .../cpu_features/scripts/test_integration.sh | 80 + .../cpu_features/src/cpuid_x86_clang_gcc.c | 36 + .../cpu/cpu_features/src/cpuid_x86_msvc.c | 34 + .../cpu/cpu_features/src/cpuinfo_aarch64.c | 141 + .../hash/cpu/cpu_features/src/cpuinfo_arm.c | 259 ++ .../hash/cpu/cpu_features/src/cpuinfo_mips.c | 98 + .../hash/cpu/cpu_features/src/cpuinfo_ppc.c | 358 +++ .../hash/cpu/cpu_features/src/cpuinfo_x86.c | 447 +++ .../hash/cpu/cpu_features/src/filesystem.c | 57 + .../hash/cpu/cpu_features/src/hwcaps.c | 194 ++ .../src/linux_features_aggregator.c | 51 + .../cpu/cpu_features/src/stack_line_reader.c | 131 + .../hash/cpu/cpu_features/src/string_view.c | 182 ++ .../src/utils/list_cpu_features.c | 237 ++ .../hash/cpu/cpu_features/test/CMakeLists.txt | 79 + .../cpu/cpu_features/test/bit_utils_test.cc | 53 + .../cpu_features/test/cpuinfo_aarch64_test.cc | 74 + .../cpu/cpu_features/test/cpuinfo_arm_test.cc | 182 ++ .../cpu_features/test/cpuinfo_mips_test.cc | 125 + .../cpu/cpu_features/test/cpuinfo_ppc_test.cc | 119 + .../cpu/cpu_features/test/cpuinfo_x86_test.cc | 172 ++ .../test/filesystem_for_testing.cc | 103 + .../test/filesystem_for_testing.h | 61 + .../cpu_features/test/hwcaps_for_testing.cc | 45 + .../cpu_features/test/hwcaps_for_testing.h | 27 + .../test/linux_features_aggregator_test.cc | 95 + .../test/stack_line_reader_test.cc | 132 + .../cpu/cpu_features/test/string_view_test.cc | 144 + .../argon2_hasher/hash/gpu/cuda/blake2b.cu | 353 +++ .../hash/gpu/cuda/cuda_hasher.cpp | 340 +++ .../argon2_hasher/hash/gpu/cuda/cuda_hasher.h | 126 + .../hash/gpu/cuda/cuda_kernel.cu | 1132 ++++++++ .../hash/gpu/opencl/OpenCLHasher.cpp | 888 ++++++ .../hash/gpu/opencl/OpenCLHasher.h | 110 + .../hash/gpu/opencl/OpenCLKernel.cpp | 1085 ++++++++ .../hash/gpu/opencl/OpenCLKernel.h | 10 + .../asm/CryptonightR_soft_aes_template.inc | 281 -- .../CryptonightR_soft_aes_template_win.inc | 281 -- src/crypto/asm/CryptonightR_template.S | 1595 ----------- src/crypto/asm/CryptonightR_template.asm | 1585 ----------- src/crypto/asm/CryptonightR_template.h | 1087 -------- src/crypto/asm/CryptonightR_template.inc | 536 ---- src/crypto/asm/CryptonightR_template_win.inc | 536 ---- .../asm/CryptonightWOW_soft_aes_template.inc | 268 -- .../CryptonightWOW_soft_aes_template_win.inc | 268 -- src/crypto/asm/CryptonightWOW_template.inc | 491 ---- .../asm/CryptonightWOW_template_win.inc | 491 ---- .../cn2/cnv2_double_main_loop_sandybridge.inc | 413 --- .../asm/cn2/cnv2_main_loop_bulldozer.inc | 182 -- .../asm/cn2/cnv2_main_loop_ivybridge.inc | 188 -- src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc | 181 -- .../asm/cn2/cnv2_rwz_double_main_loop.inc | 413 --- src/crypto/asm/cn2/cnv2_rwz_main_loop.inc | 188 -- src/crypto/asm/cn_main_loop.S | 73 - src/crypto/asm/cn_main_loop.asm | 52 - .../CryptonightR_soft_aes_template_win.inc | 281 -- .../asm/win64/CryptonightR_template.asm | 1585 ----------- .../asm/win64/CryptonightR_template_win.inc | 536 ---- .../CryptonightWOW_soft_aes_template_win.inc | 268 -- .../asm/win64/CryptonightWOW_template_win.inc | 491 ---- .../cn2/cnv2_double_main_loop_sandybridge.inc | 413 --- .../win64/cn2/cnv2_main_loop_bulldozer.inc | 182 -- .../win64/cn2/cnv2_main_loop_ivybridge.inc | 188 -- .../asm/win64/cn2/cnv2_main_loop_ryzen.inc | 181 -- .../win64/cn2/cnv2_rwz_double_main_loop.inc | 413 --- .../asm/win64/cn2/cnv2_rwz_main_loop.inc | 188 -- src/crypto/asm/win64/cn_main_loop.S | 45 - src/crypto/asm/win64/cn_main_loop.asm | 52 - src/crypto/c_blake256.c | 326 --- src/crypto/c_blake256.h | 43 - src/crypto/c_groestl.c | 360 --- src/crypto/c_groestl.h | 60 - src/crypto/c_jh.c | 367 --- src/crypto/c_jh.h | 19 - src/crypto/c_skein.c | 701 ----- src/crypto/c_skein.h | 49 - src/crypto/cn_gpu_arm.cpp | 240 -- src/crypto/cn_gpu_avx.cpp | 209 -- src/crypto/cn_gpu_ssse3.cpp | 210 -- src/crypto/groestl_tables.h | 38 - src/crypto/hash.h | 5 - src/crypto/skein_port.h | 187 -- src/crypto/soft_aes.h | 146 - src/crypto/variant4_random_math.h | 448 --- src/donate.h | 5 +- src/interfaces/IThread.h | 77 - src/interfaces/IWorker.h | 1 + src/net/Network.cpp | 3 +- src/net/strategies/DonateStrategy.cpp | 174 +- src/net/strategies/DonateStrategy.h | 3 +- src/net/strategies/Http.cpp | 283 ++ src/net/strategies/Http.h | 33 + src/net/strategies/http_parser/AUTHORS | 68 + .../strategies/http_parser/LICENSE-MIT} | 22 +- src/net/strategies/http_parser/README.md | 246 ++ src/net/strategies/http_parser/http_parser.c | 2462 +++++++++++++++++ src/net/strategies/http_parser/http_parser.h | 436 +++ src/version.h | 22 +- src/workers/CpuThread.cpp | 825 ------ src/workers/CpuThread.h | 115 - src/workers/Handle.cpp | 56 +- src/workers/Handle.h | 39 +- src/workers/Hashrate.cpp | 72 +- src/workers/Hashrate.h | 18 +- src/workers/MultiWorker.cpp | 280 -- src/workers/MultiWorker.h | 76 - src/workers/Worker.cpp | 128 +- src/workers/Worker.h | 35 +- src/workers/Workers.cpp | 221 +- src/workers/Workers.h | 26 +- 280 files changed, 18971 insertions(+), 32469 deletions(-) create mode 100644 cmake/TargetArch.cmake delete mode 100644 cmake/asm.cmake delete mode 100644 cmake/cn-gpu.cmake delete mode 100644 src/3rdparty/argon2/.gitattributes delete mode 100644 src/3rdparty/argon2/.gitignore delete mode 100644 src/3rdparty/argon2/.travis.yml delete mode 100644 src/3rdparty/argon2/CMakeLists.txt delete mode 100644 src/3rdparty/argon2/Makefile.am delete mode 100644 src/3rdparty/argon2/README.md delete mode 100644 src/3rdparty/argon2/arch/generic/lib/argon2-arch.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c delete mode 100644 src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h delete mode 100644 src/3rdparty/argon2/configure.ac delete mode 100644 src/3rdparty/argon2/include/argon2.h delete mode 100644 src/3rdparty/argon2/lib/argon2-template-64.h delete mode 100644 src/3rdparty/argon2/lib/argon2.c delete mode 100644 src/3rdparty/argon2/lib/blake2/blake2-impl.h delete mode 100644 src/3rdparty/argon2/lib/blake2/blake2.c delete mode 100644 src/3rdparty/argon2/lib/blake2/blake2.h delete mode 100644 src/3rdparty/argon2/lib/core.c delete mode 100644 src/3rdparty/argon2/lib/core.h delete mode 100644 src/3rdparty/argon2/lib/encoding.c delete mode 100644 src/3rdparty/argon2/lib/encoding.h delete mode 100644 src/3rdparty/argon2/lib/genkat.c delete mode 100644 src/3rdparty/argon2/lib/genkat.h delete mode 100644 src/3rdparty/argon2/lib/impl-select.c delete mode 100644 src/3rdparty/argon2/lib/impl-select.h delete mode 100644 src/3rdparty/argon2/lib/thread.c delete mode 100644 src/3rdparty/argon2/lib/thread.h delete mode 100644 src/3rdparty/argon2/m4/ax_check_compile_flag.m4 delete mode 100644 src/3rdparty/argon2/m4/ax_pthread.m4 delete mode 100644 src/3rdparty/argon2/qmake/arch/arch.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/generic/generic.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro delete mode 100644 src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro delete mode 100644 src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro delete mode 100644 src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro delete mode 100644 src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro delete mode 100644 src/3rdparty/argon2/qmake/argon2.pro delete mode 100644 src/3rdparty/argon2/qmake/argon2/argon2.pro delete mode 100644 src/3rdparty/argon2/qmake/libargon2/libargon2.pro delete mode 100644 src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh delete mode 100644 src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh delete mode 100644 src/3rdparty/argon2/scripts/run-benchmark.sh delete mode 100644 src/3rdparty/argon2/src/bench2.c delete mode 100644 src/3rdparty/argon2/src/genkat.c delete mode 100644 src/3rdparty/argon2/src/run.c delete mode 100644 src/3rdparty/argon2/src/timing.h delete mode 100644 src/3rdparty/argon2/tests/test.c delete mode 100644 src/Mem.cpp delete mode 100644 src/Mem.h delete mode 100644 src/Mem_unix.cpp delete mode 100644 src/Mem_win.cpp create mode 100644 src/core/HasherConfig.cpp create mode 100644 src/core/HasherConfig.h delete mode 100644 src/crypto/Argon2.h delete mode 100644 src/crypto/Asm.cpp delete mode 100644 src/crypto/Asm.h delete mode 100644 src/crypto/CryptoNight.h delete mode 100644 src/crypto/CryptoNight_arm.h delete mode 100644 src/crypto/CryptoNight_constants.h delete mode 100644 src/crypto/CryptoNight_monero.h delete mode 100644 src/crypto/CryptoNight_test.h delete mode 100644 src/crypto/CryptoNight_x86.h delete mode 100644 src/crypto/CryptonightR_gen.cpp delete mode 100644 src/crypto/SSE2NEON.h create mode 100644 src/crypto/argon2_hasher/common/DLLExport.h create mode 100644 src/crypto/argon2_hasher/common/DLLImport.h create mode 100644 src/crypto/argon2_hasher/common/common.cpp create mode 100755 src/crypto/argon2_hasher/common/common.h create mode 100644 src/crypto/argon2_hasher/crypt/base64.cpp create mode 100644 src/crypto/argon2_hasher/crypt/base64.h create mode 100644 src/crypto/argon2_hasher/crypt/hex.cpp create mode 100644 src/crypto/argon2_hasher/crypt/hex.h create mode 100644 src/crypto/argon2_hasher/crypt/random_generator.cpp create mode 100644 src/crypto/argon2_hasher/crypt/random_generator.h create mode 100644 src/crypto/argon2_hasher/crypt/sha512.cpp create mode 100644 src/crypto/argon2_hasher/crypt/sha512.h create mode 100755 src/crypto/argon2_hasher/hash/Hasher.cpp create mode 100755 src/crypto/argon2_hasher/hash/Hasher.h create mode 100755 src/crypto/argon2_hasher/hash/argon2/Argon2.cpp create mode 100644 src/crypto/argon2_hasher/hash/argon2/Argon2.h create mode 100755 src/crypto/argon2_hasher/hash/argon2/Defs.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c create mode 100644 src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h create mode 100644 src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp create mode 100644 src/crypto/argon2_hasher/hash/cpu/CpuHasher.h create mode 100644 src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h create mode 100644 src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc create mode 100755 src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc create mode 100644 src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu create mode 100644 src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp create mode 100644 src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h create mode 100644 src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu create mode 100755 src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp create mode 100755 src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h create mode 100644 src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp create mode 100644 src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h delete mode 100644 src/crypto/asm/CryptonightR_soft_aes_template.inc delete mode 100644 src/crypto/asm/CryptonightR_soft_aes_template_win.inc delete mode 100644 src/crypto/asm/CryptonightR_template.S delete mode 100644 src/crypto/asm/CryptonightR_template.asm delete mode 100644 src/crypto/asm/CryptonightR_template.h delete mode 100644 src/crypto/asm/CryptonightR_template.inc delete mode 100644 src/crypto/asm/CryptonightR_template_win.inc delete mode 100644 src/crypto/asm/CryptonightWOW_soft_aes_template.inc delete mode 100644 src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc delete mode 100644 src/crypto/asm/CryptonightWOW_template.inc delete mode 100644 src/crypto/asm/CryptonightWOW_template_win.inc delete mode 100644 src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc delete mode 100644 src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc delete mode 100644 src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc delete mode 100644 src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc delete mode 100644 src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc delete mode 100644 src/crypto/asm/cn2/cnv2_rwz_main_loop.inc delete mode 100644 src/crypto/asm/cn_main_loop.S delete mode 100644 src/crypto/asm/cn_main_loop.asm delete mode 100644 src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc delete mode 100644 src/crypto/asm/win64/CryptonightR_template.asm delete mode 100644 src/crypto/asm/win64/CryptonightR_template_win.inc delete mode 100644 src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc delete mode 100644 src/crypto/asm/win64/CryptonightWOW_template_win.inc delete mode 100644 src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc delete mode 100644 src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc delete mode 100644 src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc delete mode 100644 src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc delete mode 100644 src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc delete mode 100644 src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc delete mode 100644 src/crypto/asm/win64/cn_main_loop.S delete mode 100644 src/crypto/asm/win64/cn_main_loop.asm delete mode 100644 src/crypto/c_blake256.c delete mode 100644 src/crypto/c_blake256.h delete mode 100644 src/crypto/c_groestl.c delete mode 100644 src/crypto/c_groestl.h delete mode 100644 src/crypto/c_jh.c delete mode 100644 src/crypto/c_jh.h delete mode 100644 src/crypto/c_skein.c delete mode 100644 src/crypto/c_skein.h delete mode 100644 src/crypto/cn_gpu_arm.cpp delete mode 100644 src/crypto/cn_gpu_avx.cpp delete mode 100644 src/crypto/cn_gpu_ssse3.cpp delete mode 100644 src/crypto/groestl_tables.h delete mode 100644 src/crypto/hash.h delete mode 100644 src/crypto/skein_port.h delete mode 100644 src/crypto/soft_aes.h delete mode 100644 src/crypto/variant4_random_math.h delete mode 100644 src/interfaces/IThread.h create mode 100755 src/net/strategies/Http.cpp create mode 100644 src/net/strategies/Http.h create mode 100755 src/net/strategies/http_parser/AUTHORS rename src/{3rdparty/argon2/LICENSE => net/strategies/http_parser/LICENSE-MIT} (57%) mode change 100644 => 100755 create mode 100755 src/net/strategies/http_parser/README.md create mode 100755 src/net/strategies/http_parser/http_parser.c create mode 100755 src/net/strategies/http_parser/http_parser.h delete mode 100644 src/workers/CpuThread.cpp delete mode 100644 src/workers/CpuThread.h delete mode 100644 src/workers/MultiWorker.cpp delete mode 100644 src/workers/MultiWorker.h diff --git a/CMakeLists.txt b/CMakeLists.txt index eb09197b..1f328ccc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,23 +1,26 @@ cmake_minimum_required(VERSION 2.8) -project(xmrig) +project(ninjarig) option(WITH_LIBCPUID "Use Libcpuid" ON) -option(WITH_AEON "CryptoNight-Lite support" ON) -option(WITH_SUMO "CryptoNight-Heavy support" ON) -option(WITH_CN_PICO "CryptoNight-Pico support" ON) -option(WITH_CN_GPU "CryptoNight-GPU support" ON) option(WITH_HTTPD "HTTP REST API" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) -option(WITH_ASM "Enable ASM PoW implementations" ON) -option(WITH_ARGON2 "Enable Argon2 Support" ON) -option(BUILD_STATIC "Build static binary" OFF) -option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) +option(WITH_CUDA "Enable CUDA support" ON) +option(WITH_OPENCL "Enable OpenCL support" ON) include (CheckIncludeFile) include (cmake/cpu.cmake) +include (cmake/TargetArch.cmake) +target_architecture (ARCH) +MESSAGE( STATUS "Target architecture is: " ${ARCH} ) + +SET(CMAKE_SKIP_BUILD_RPATH FALSE) +SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) +SET(CMAKE_INSTALL_RPATH "./") +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set (CMAKE_MACOSX_RPATH 0) set(HEADERS src/api/NetworkState.h @@ -70,45 +73,23 @@ set(HEADERS src/core/ConfigLoader_default.h src/core/Controller.h src/interfaces/IJobResultListener.h - src/interfaces/IThread.h src/interfaces/IWorker.h - src/Mem.h src/net/JobResult.h src/net/Network.h src/net/strategies/DonateStrategy.h + src/net/strategies/Http.h src/Summary.h src/version.h - src/workers/CpuThread.h + src/core/HasherConfig.h src/workers/Handle.h src/workers/Hashrate.h - src/workers/MultiWorker.h src/workers/Worker.h src/workers/Workers.h ) set(HEADERS_CRYPTO - src/crypto/Argon2.h - src/crypto/Argon2_constants.h - src/crypto/c_blake256.h - src/crypto/c_groestl.h - src/crypto/c_jh.h - src/crypto/c_skein.h - src/crypto/CryptoNight.h - src/crypto/CryptoNight_constants.h - src/crypto/CryptoNight_monero.h - src/crypto/CryptoNight_test.h - src/crypto/groestl_tables.h - src/crypto/hash.h - src/crypto/skein_port.h - src/crypto/soft_aes.h - src/crypto/asm/CryptonightR_template.h - ) - -if (XMRIG_ARM) - set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_arm.h) -else() - set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_x86.h) -endif() + src/crypto/Argon2_constants.h + ) set(SOURCES src/api/NetworkState.cpp @@ -141,25 +122,64 @@ set(SOURCES src/common/Platform.cpp src/core/Config.cpp src/core/Controller.cpp - src/Mem.cpp src/net/Network.cpp src/net/strategies/DonateStrategy.cpp + src/net/strategies/Http.cpp + src/net/strategies/http_parser/http_parser.c src/Summary.cpp - src/workers/CpuThread.cpp src/workers/Handle.cpp src/workers/Hashrate.cpp - src/workers/MultiWorker.cpp src/workers/Worker.cpp src/workers/Workers.cpp src/xmrig.cpp ) -set(SOURCES_CRYPTO - src/crypto/c_groestl.c - src/crypto/c_blake256.c - src/crypto/c_jh.c - src/crypto/c_skein.c - ) +set(HEADERS_COMMON + src/crypto/argon2_hasher/common/common.h + src/crypto/argon2_hasher/common/DLLExport.h + src/crypto/argon2_hasher/common/DLLImport.h + src/crypto/argon2_hasher/crypt/base64.h + src/crypto/argon2_hasher/crypt/hex.h + src/crypto/argon2_hasher/crypt/random_generator.h + src/crypto/argon2_hasher/crypt/sha512.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h + src/crypto/argon2_hasher/hash/argon2/Argon2.h + src/crypto/argon2_hasher/hash/argon2/Defs.h + src/crypto/argon2_hasher/hash/Hasher.h + ) + +set(SOURCES_COMMON + src/crypto/argon2_hasher/common/common.cpp + src/crypto/argon2_hasher/crypt/base64.cpp + src/crypto/argon2_hasher/crypt/hex.cpp + src/crypto/argon2_hasher/crypt/random_generator.cpp + src/crypto/argon2_hasher/crypt/sha512.cpp + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c + src/crypto/argon2_hasher/hash/argon2/Argon2.cpp + src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c + src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c + src/crypto/argon2_hasher/hash/Hasher.cpp + src/core/HasherConfig.cpp) + +set(SOURCE_CPU_HASHER src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp src/crypto/argon2_hasher/hash/cpu/CpuHasher.h) + +set(SOURCE_OPENCL_HASHER src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h + src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h) + +set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h + src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu) + +set(ARGON2_FILL_BLOCKS_SRC + src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c + src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h + src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h + src/crypto/argon2_hasher/hash/argon2/Defs.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h) if (WIN32) set(SOURCES_OS @@ -167,8 +187,7 @@ if (WIN32) src/App_win.cpp src/base/io/Json_win.cpp src/common/Platform_win.cpp - src/Mem_win.cpp - ) + ) add_definitions(/DWIN32) set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv) @@ -177,15 +196,13 @@ elseif (APPLE) src/App_unix.cpp src/base/io/Json_unix.cpp src/common/Platform_mac.cpp - src/Mem_unix.cpp - ) + ) else() set(SOURCES_OS src/App_unix.cpp src/base/io/Json_unix.cpp src/common/Platform_unix.cpp - src/Mem_unix.cpp - ) + ) if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD) set(EXTRA_LIBS kvm pthread) @@ -228,8 +245,6 @@ else() endif() include(cmake/OpenSSL.cmake) -include(cmake/asm.cmake) -include(cmake/cn-gpu.cmake) CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H) if (HAVE_SYSLOG_H) @@ -237,22 +252,6 @@ if (HAVE_SYSLOG_H) set(SOURCES_SYSLOG src/common/log/SysLog.h src/common/log/SysLog.cpp) endif() -if (NOT WITH_AEON) - add_definitions(/DXMRIG_NO_AEON) -endif() - -if (NOT WITH_SUMO) - add_definitions(/DXMRIG_NO_SUMO) -endif() - -if (NOT WITH_IPBC) - add_definitions(/DXMRIG_NO_IPBC) -endif() - -if (NOT WITH_CN_PICO) - add_definitions(/DXMRIG_NO_CN_PICO) -endif() - if (WITH_EMBEDDED_CONFIG) add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG) endif() @@ -284,26 +283,118 @@ else() add_definitions(/DXMRIG_NO_API) endif() -if (WITH_ARGON2) - add_subdirectory(src/3rdparty/argon2) - include_directories(src/3rdparty/argon2/include) - set(ARGON2_LIBRARY argon2) -else() - add_definitions(/DXMRIG_NO_ARGON2) - set(ARGON2_LIBRARY "") -endif() - include_directories(src) include_directories(src/3rdparty) include_directories(${UV_INCLUDE_DIR}) +include_directories(src/crypto/argon2_hasher/hash/cpu/cpu_features/include) -if (BUILD_STATIC) - set(CMAKE_EXE_LINKER_FLAGS " -static") -endif() +add_subdirectory(src/crypto/argon2_hasher/hash/cpu/cpu_features) +set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON) if (WITH_DEBUG_LOG) add_definitions(/DAPP_DEBUG) endif() -add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES}) -target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY}) +add_library(argon2_common SHARED ${HEADERS_COMMON} ${SOURCES_COMMON}) +target_link_libraries(argon2_common ${CMAKE_DL_LIBS}) + +add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) +target_link_libraries(${CMAKE_PROJECT_NAME} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB} argon2_common) + +add_library(cpu_hasher MODULE ${SOURCE_CPU_HASHER}) +set_target_properties(cpu_hasher + PROPERTIES + PREFIX "" + SUFFIX ".hsh" + LIBRARY_OUTPUT_DIRECTORY modules + ) +target_link_libraries(cpu_hasher argon2_common cpu_features) +add_dependencies(${CMAKE_PROJECT_NAME} cpu_hasher) + +add_library(argon2_fill_blocks_REF MODULE ${ARGON2_FILL_BLOCKS_SRC}) +set_target_properties(argon2_fill_blocks_REF + PROPERTIES + PREFIX "" + SUFFIX ".opt" + LIBRARY_OUTPUT_DIRECTORY modules + ) +target_compile_definitions(argon2_fill_blocks_REF PRIVATE BUILD_REF=1) +add_dependencies(cpu_hasher argon2_fill_blocks_REF) + +if(ARCH STREQUAL "x86_64") + add_library(argon2_fill_blocks_SSE2 MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_SSSE3 MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_AVX MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_AVX2 MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_AVX512F MODULE ${ARGON2_FILL_BLOCKS_SRC}) + set_target_properties(argon2_fill_blocks_SSE2 argon2_fill_blocks_SSSE3 argon2_fill_blocks_AVX argon2_fill_blocks_AVX2 argon2_fill_blocks_AVX512F + PROPERTIES + PREFIX "" + SUFFIX ".opt" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_compile_options(argon2_fill_blocks_SSE2 PRIVATE -msse2) + target_compile_options(argon2_fill_blocks_SSSE3 PRIVATE -mssse3) + target_compile_options(argon2_fill_blocks_AVX PRIVATE -mavx) + target_compile_options(argon2_fill_blocks_AVX2 PRIVATE -mavx2) + target_compile_options(argon2_fill_blocks_AVX512F PRIVATE -mavx512f) + add_dependencies(cpu_hasher argon2_fill_blocks_SSE2 argon2_fill_blocks_SSSE3 argon2_fill_blocks_AVX argon2_fill_blocks_AVX2 argon2_fill_blocks_AVX512F) +endif() + +if(ARCH STREQUAL "arm" OR ARCH STREQUAL "aarch64") + add_library(argon2_fill_blocks_NEON MODULE ${ARGON2_FILL_BLOCKS_SRC}) + set_target_properties(argon2_fill_blocks_NEON + PROPERTIES + PREFIX "" + SUFFIX ".opt" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_compile_options(common PRIVATE -D__NEON__) + if(ARCH STREQUAL "arm") + target_compile_options(argon2_fill_blocks_NEON PRIVATE -D__NEON__ -mfpu=neon -funsafe-math-optimizations) + else() + target_compile_options(argon2_fill_blocks_NEON PRIVATE -D__NEON__) + endif(ARCH STREQUAL "arm") + + add_dependencies(cpu_hasher argon2_fill_blocks_NEON) +endif(ARCH STREQUAL "arm" OR ARCH STREQUAL "aarch64") + +if(WITH_OPENCL) + add_definitions(-DWITH_OPENCL) + find_package(OpenCL REQUIRED) + include_directories(${OpenCL_INCLUDE_DIR}) + add_library(opencl_hasher MODULE ${SOURCE_OPENCL_HASHER}) + set_target_properties(opencl_hasher + PROPERTIES + PREFIX "" + SUFFIX ".hsh" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_link_libraries(opencl_hasher argon2_common ${OpenCL_LIBRARY}) + add_dependencies(${CMAKE_PROJECT_NAME} opencl_hasher) +endif() + +if(WITH_CUDA) + add_definitions(-DWITH_CUDA) + find_package(CUDA REQUIRED) + if(NOT WIN32) + add_definitions(-DPARALLEL_CUDA) + endif() + set( + CUDA_NVCC_FLAGS + ${CUDA_NVCC_FLAGS}; + -O3 -arch=compute_35 -std=c++11 + ) + cuda_add_library(cuda_hasher MODULE ${SOURCE_CUDA_HASHER}) + set_target_properties(cuda_hasher + PROPERTIES + PREFIX "" + SUFFIX ".hsh" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_link_libraries(cuda_hasher argon2_common) + add_dependencies(${CMAKE_PROJECT_NAME} cuda_hasher) +endif() + + + diff --git a/cmake/TargetArch.cmake b/cmake/TargetArch.cmake new file mode 100644 index 00000000..be66b82f --- /dev/null +++ b/cmake/TargetArch.cmake @@ -0,0 +1,116 @@ +# Based on the Qt 5 processor detection code, so should be very accurate +# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h +# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64) + +# Regarding POWER/PowerPC, just as is noted in the Qt source, +# "There are many more known variants/revisions that we do not handle/detect." + +set(archdetect_c_code " +#if defined(__arm__) || defined(__TARGET_ARCH_ARM) + #error cmake_ARCH arm +#elif defined(__aarch64__) + #error cmake_ARCH aarch64 +#elif defined(__i386) || defined(__i386__) || defined(_M_IX86) + #error cmake_ARCH i386 +#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) + #error cmake_ARCH x86_64 +#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) + #error cmake_ARCH ia64 +#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\ + || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \\ + || defined(_M_MPPC) || defined(_M_PPC) + #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) + #error cmake_ARCH ppc64 + #else + #error cmake_ARCH ppc + #endif +#endif + +#error cmake_ARCH unknown +") + +# Set ppc_support to TRUE before including this file or ppc and ppc64 +# will be treated as invalid architectures since they are no longer supported by Apple + +function(target_architecture output_var) + if(APPLE AND CMAKE_OSX_ARCHITECTURES) + # On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set + # First let's normalize the order of the values + + # Note that it's not possible to compile PowerPC applications if you are using + # the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we + # disable it by default + # See this page for more information: + # http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4 + + # Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime. + # On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise. + + foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES}) + if("${osx_arch}" STREQUAL "ppc" AND ppc_support) + set(osx_arch_ppc TRUE) + elseif("${osx_arch}" STREQUAL "i386") + set(osx_arch_i386 TRUE) + elseif("${osx_arch}" STREQUAL "x86_64") + set(osx_arch_x86_64 TRUE) + elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support) + set(osx_arch_ppc64 TRUE) + else() + message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}") + endif() + endforeach() + + # Now add all the architectures in our normalized order + if(osx_arch_ppc) + list(APPEND ARCH ppc) + endif() + + if(osx_arch_i386) + list(APPEND ARCH i386) + endif() + + if(osx_arch_x86_64) + list(APPEND ARCH x86_64) + endif() + + if(osx_arch_ppc64) + list(APPEND ARCH ppc64) + endif() + else() + file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}") + + enable_language(C) + + # Detect the architecture in a rather creative way... + # This compiles a small C program which is a series of ifdefs that selects a + # particular #error preprocessor directive whose message string contains the + # target architecture. The program will always fail to compile (both because + # file is not a valid C program, and obviously because of the presence of the + # #error preprocessor directives... but by exploiting the preprocessor in this + # way, we can detect the correct target architecture even when cross-compiling, + # since the program itself never needs to be run (only the compiler/preprocessor) + try_run( + run_result_unused + compile_result_unused + "${CMAKE_BINARY_DIR}" + "${CMAKE_BINARY_DIR}/arch.c" + COMPILE_OUTPUT_VARIABLE ARCH + CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + ) + + # Parse the architecture name from the compiler output + string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}") + + # Get rid of the value marker leaving just the architecture name + string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}") + + # If we are compiling with an unknown architecture this variable should + # already be set to "unknown" but in the case that it's empty (i.e. due + # to a typo in the code), then set it to unknown + if (NOT ARCH) + set(ARCH unknown) + endif() + endif() + + set(${output_var} "${ARCH}" PARENT_SCOPE) +endfunction() diff --git a/cmake/asm.cmake b/cmake/asm.cmake deleted file mode 100644 index 389f6723..00000000 --- a/cmake/asm.cmake +++ /dev/null @@ -1,45 +0,0 @@ -if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) - set(XMRIG_ASM_LIBRARY "xmrig-asm") - - if (CMAKE_C_COMPILER_ID MATCHES MSVC) - enable_language(ASM_MASM) - - if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141) - set(XMRIG_ASM_FILES - "src/crypto/asm/cn_main_loop.asm" - "src/crypto/asm/CryptonightR_template.asm" - ) - else() - set(XMRIG_ASM_FILES - "src/crypto/asm/win64/cn_main_loop.asm" - "src/crypto/asm/win64/CryptonightR_template.asm" - ) - endif() - - set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY ASM_MASM) - else() - enable_language(ASM) - - if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) - set(XMRIG_ASM_FILES - "src/crypto/asm/win64/cn_main_loop.S" - "src/crypto/asm/CryptonightR_template.S" - ) - else() - set(XMRIG_ASM_FILES - "src/crypto/asm/cn_main_loop.S" - "src/crypto/asm/CryptonightR_template.S" - ) - endif() - - set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C) - endif() - - add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES}) - set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp src/crypto/CryptonightR_gen.cpp) - set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) -else() - set(XMRIG_ASM_SOURCES "") - set(XMRIG_ASM_LIBRARY "") - add_definitions(/DXMRIG_NO_ASM) -endif() diff --git a/cmake/cn-gpu.cmake b/cmake/cn-gpu.cmake deleted file mode 100644 index b529f0b2..00000000 --- a/cmake/cn-gpu.cmake +++ /dev/null @@ -1,23 +0,0 @@ -if (WITH_CN_GPU AND CMAKE_SIZEOF_VOID_P EQUAL 8) - - if (XMRIG_ARM) - set(CN_GPU_SOURCES src/crypto/cn_gpu_arm.cpp) - - if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang) - set_source_files_properties(src/crypto/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O3") - endif() - else() - set(CN_GPU_SOURCES src/crypto/cn_gpu_avx.cpp src/crypto/cn_gpu_ssse3.cpp) - - if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang) - set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx2") - set_source_files_properties(src/crypto/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O3") - elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) - set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - endif() - endif() -else() - set(CN_GPU_SOURCES "") - - add_definitions(/DXMRIG_NO_CN_GPU) -endif() diff --git a/doc/ALGORITHMS.md b/doc/ALGORITHMS.md index 835a1d49..9b42ead1 100644 --- a/doc/ALGORITHMS.md +++ b/doc/ALGORITHMS.md @@ -1,17 +1,17 @@ # Algorithms -XMRig uses a different way to specify algorithms, compared to other miners. +NinjaRig uses a different way to specify algorithms, compared to other miners. Algorithm selection splitted to 2 parts: - * Global base algorithm per miner or proxy instance, `algo` option. Possible values: `cryptonight`, `cryptonight-lite`, `cryptonight-heavy`. + * Global base algorithm per miner or proxy instance, `algo` option. Possible values: `argon2id`. * Algorithm variant specified separately for each pool, `variant` option. * [Full table for supported algorithm and variants.](https://github.com/xmrig/xmrig-proxy/blob/master/doc/STRATUM_EXT.md#14-algorithm-names-and-variants) #### Example ```json { - "algo": "cryptonight", + "algo": "argon2id", ... "pools": [ { diff --git a/doc/api/1/config.json b/doc/api/1/config.json index 2c74cfba..560ff810 100644 --- a/doc/api/1/config.json +++ b/doc/api/1/config.json @@ -1,5 +1,5 @@ { - "algo": "cryptonight", + "algo": "chukwa", "api": { "port": 44444, "access-token": "TOKEN", @@ -19,16 +19,16 @@ "max-cpu-usage": 75, "pools": [ { - "url": "pool.monero.hashvault.pro:3333", - "user": "48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD", + "url": "publicnode.ydns.eu:4666", + "user": "WrkzZon3ZArBkZVqAH9n6MM2eq2tV6sN9GwqD73hTKuYAyhMYK48ukQPFQssEMXnFMFs3nwekTLiXa9obkxM6f1KA2i73gEcq8", "pass": "x", "keepalive": false, "nicehash": false, "variant": -1 }, { - "url": "pool.supportxmr.com:3333", - "user": "48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD", + "url": "testnet.wrkz.work:5555", + "user": "WrkzZon3ZArBkZVqAH9n6MM2eq2tV6sN9GwqD73hTKuYAyhMYK48ukQPFQssEMXnFMFs3nwekTLiXa9obkxM6f1KA2i73gEcq8", "pass": "x", "keepalive": false, "nicehash": false, diff --git a/doc/api/1/summary.json b/doc/api/1/summary.json index ed3cd128..95519d56 100644 --- a/doc/api/1/summary.json +++ b/doc/api/1/summary.json @@ -1,17 +1,16 @@ { "id": "92f3104f9a2ee78c", "worker_id": "Ubuntu-1604-xenial-64-minimal", - "version": "2.6.0-beta3", + "version": "1.0.0-alpha", "kind": "cpu", - "ua": "XMRig/2.6.0-beta3 (Linux x86_64) libuv/1.8.0 gcc/5.4.0", + "ua": "NinjaRig/1.0.0-alpha (Linux x86_64) libuv/1.8.0 gcc/5.4.0", "cpu": { "brand": "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz", "aes": true, "x64": true, "sockets": 1 }, - "algo": "cryptonight", - "hugepages": true, + "algo": "chukwa", "donate_level": 5, "hashrate": { "total": [ @@ -64,7 +63,7 @@ "error_log": [] }, "connection": { - "pool": "pool.monero.hashvault.pro:3333", + "pool": "publicnode.ydns.eu:4666", "uptime": 953, "ping": 35, "failures": 0, diff --git a/doc/api/1/threads.json b/doc/api/1/threads.json index e536883d..5b302af6 100644 --- a/doc/api/1/threads.json +++ b/doc/api/1/threads.json @@ -1,14 +1,9 @@ { - "hugepages": [ - 4, - 4 - ], "memory": 8388608, "threads": [ { "type": "cpu", - "algo": "cryptonight", - "av": 1, + "algo": "chukwa", "low_power_mode": 1, "affine_to_cpu": 0, "priority": -1, @@ -21,7 +16,7 @@ }, { "type": "cpu", - "algo": "cryptonight", + "algo": "chukwa", "av": 1, "low_power_mode": 1, "affine_to_cpu": 1, @@ -35,7 +30,7 @@ }, { "type": "cpu", - "algo": "cryptonight", + "algo": "chukwa", "av": 1, "low_power_mode": 1, "affine_to_cpu": 2, @@ -49,7 +44,7 @@ }, { "type": "cpu", - "algo": "cryptonight", + "algo": "chukwa", "av": 1, "low_power_mode": 1, "affine_to_cpu": 3, diff --git a/res/app.rc b/res/app.rc index 037d842a..84a9e90d 100644 --- a/res/app.rc +++ b/res/app.rc @@ -24,7 +24,7 @@ VS_VERSION_INFO VERSIONINFO VALUE "FileDescription", APP_DESC VALUE "FileVersion", APP_VERSION VALUE "LegalCopyright", APP_COPYRIGHT - VALUE "OriginalFilename", "xmrig.exe" + VALUE "OriginalFilename", "ninjarig.exe" VALUE "ProductName", APP_NAME VALUE "ProductVersion", APP_VERSION END diff --git a/src/3rdparty/argon2/.gitattributes b/src/3rdparty/argon2/.gitattributes deleted file mode 100644 index 69755b35..00000000 --- a/src/3rdparty/argon2/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -*.h linguist-language=C -*.pro linguist-language=QMake diff --git a/src/3rdparty/argon2/.gitignore b/src/3rdparty/argon2/.gitignore deleted file mode 100644 index 5bff3d06..00000000 --- a/src/3rdparty/argon2/.gitignore +++ /dev/null @@ -1,70 +0,0 @@ -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app - -# Autotools + Libtool -/aclocal.m4 -/config.status -/config -/install-sh -**/Makefile -**/Makefile.in -/autom4te.cache/ -/compile -/config.guess -/config.log -/config.sub -/configure -/depcomp -/libtool -/ltmain.sh -/m4/libtool.m4 -/m4/lt*.m4 -/missing -/test-driver -**/.deps/ -**/.dirstamp -**/.libs/ - -# Qt Creator -**/*.user -**/*.user.* -**/build-*/ - -# KDE -**/.directory - -# Vim -*.swp - -# CMake -CMakeFiles/ -*.cmake -CMakeCache.txt -Makefile - diff --git a/src/3rdparty/argon2/.travis.yml b/src/3rdparty/argon2/.travis.yml deleted file mode 100644 index 0298ff65..00000000 --- a/src/3rdparty/argon2/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -language: c - -dist: trusty -sudo: false - -compiler: - - clang - - gcc - -env: - - BUILD=cmake BUILD_TYPE=Debug - - BUILD=cmake BUILD_TYPE=Release - - BUILD=autotools - -script: | - case $BUILD in - cmake) - cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE . && make && make test - ;; - autotools) - autoreconf -i && ./configure && make && make check - ;; - esac diff --git a/src/3rdparty/argon2/CMakeLists.txt b/src/3rdparty/argon2/CMakeLists.txt deleted file mode 100644 index a8be1bed..00000000 --- a/src/3rdparty/argon2/CMakeLists.txt +++ /dev/null @@ -1,203 +0,0 @@ -cmake_minimum_required(VERSION 2.6) - -find_program(CCACHE_PROGRAM ccache) -if(CCACHE_PROGRAM) - message(STATUS "-- Argon2: Found ccache package... Activating...") - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") -endif() - -project(Argon2 C) -set(ARGON2_VERSION 1.0) -set(CMAKE_C_STANDARD 90) -set(CMAKE_C_STANDARD_REQUIRED ON) - -include(CheckCSourceCompiles) -find_package(Threads REQUIRED) - -add_library(argon2-interface INTERFACE) -target_include_directories(argon2-interface INTERFACE - $ - $ -) - -add_library(argon2-internal INTERFACE) -target_include_directories(argon2-internal INTERFACE lib lib/blake2) -target_link_libraries(argon2-internal INTERFACE argon2-interface) - -set(ARGON2_SRC - lib/argon2.c - lib/core.c - lib/encoding.c - lib/genkat.c - lib/impl-select.c - lib/thread.c - lib/blake2/blake2.c -) - -message("-- Argon2: Processor: ${CMAKE_SYSTEM_PROCESSOR}") -message("-- Argon2: Build Type: ${ARCH}") - -if((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") AND NOT "${ARCH}" STREQUAL "default") - include(CheckCXXSourceRuns) - - # Check for AVX2 - check_cxx_source_runs(" - #include - int main() - { - __m256i a, b, c; - const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - int dst[8]; - a = _mm256_loadu_si256( (__m256i*)src ); - b = _mm256_loadu_si256( (__m256i*)src ); - c = _mm256_add_epi32( a, b ); - _mm256_storeu_si256( (__m256i*)dst, c ); - for( int i = 0; i < 8; i++ ){ - if( ( src[i] + src[i] ) != dst[i] ){ - return -1; - } - } - return 0; - }" - HAVE_AVX2_EXTENSIONS) - - if(HAVE_AVX2_EXTENSIONS) - message("-- Argon2: AVX2 Extensions - Enabled") - add_definitions(-DHAVE_AVX2) - if(MSVC) - add_definitions(/arch:AVX2) - endif() - else() - message("-- Argon2: AVX2 Extensions - Disabled") - endif() - - # Check for AVX512 - check_cxx_source_runs(" - #include - int main() - { - __m512i a, b, c; - const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - int dst[8]; - a = _mm512_loadu_si512( (__m512i*)src ); - b = _mm512_loadu_si512( (__m512i*)src ); - c = _mm512_add_epi32( a, b ); - _mm512_storeu_si512( (__m512i*)dst, c ); - for( int i = 0; i < 8; i++ ){ - if( ( src[i] + src[i] ) != dst[i] ){ - return -1; - } - } - return 0; - }" - HAVE_AVX512F_EXTENSIONS) - - if(HAVE_AVX512F_EXTENSIONS) - message("-- Argon2: AVX512 Extensions - Enabled") - add_definitions(-DHAVE_AVX512F) - else() - message("-- Argon2: AVX512 Extensions - Disabled") - endif() - - # Check for SSE2 - check_cxx_source_runs(" - #include - int main() - { - __m128d a, b; - double vals[2] = {0}; - a = _mm_loadu_pd(vals); - b = _mm_add_pd(a,a); - _mm_storeu_pd(vals,b); - return 0; - }" - HAVE_SSE2_EXTENSIONS) - - if(HAVE_SSE2_EXTENSIONS) - message("-- Argon2: SSE2 Extensions - Enabled") - add_definitions(-DHAVE_SSE2) - if(MSVC) - add_definitions(/arch:SSE2) - endif() - else() - message("-- Argon2: SSE2 Extensions - Disabled") - endif() - - # Check for SSE3 - check_cxx_source_runs(" - #include - int main() - { - __m128d a, b; - double vals[2] = {0}; - a = _mm_loadu_pd(vals); - b = _mm_hadd_pd(a,a); - _mm_storeu_pd(vals, b); - return 0; - }" - HAVE_SSE3_EXTENSIONS) - - if(HAVE_SSE3_EXTENSIONS) - message("-- Argon2: SSE3 Extensions - Enabled") - add_definitions(-DHAVE_SSE3) - if(MSVC) - add_definitions(/arch:SSE3) - endif() - else() - message("-- Argon2: SSE3 Extensions - Disabled") - endif() - - # Check for XOP - check_cxx_source_runs(" - #include - int main() - { - __m128i a, b, c; - const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - a = _mm_load_si128( (__m128i*)src ); - b = _mm_load_si128( (__m128i*)src ); - c = _mm_roti_epi64(*a, b); - return 0; - }" - HAVE_XOP_EXTENSIONS) - - if(HAVE_XOP_EXTENSIONS) - message("-- Argon2: XOP Extensions - Enabled") - add_definitions(-DHAVE_XOP) - else() - message("-- Argon2: XOP Extensions - Disabled") - endif() - - list(APPEND ARGON2_SRC - arch/x86_64/lib/argon2-sse2.c - arch/x86_64/lib/argon2-ssse3.c - arch/x86_64/lib/argon2-xop.c - arch/x86_64/lib/argon2-avx2.c - arch/x86_64/lib/argon2-avx512f.c - arch/x86_64/lib/argon2-arch.c - ) -else() - list(APPEND ARGON2_SRC - arch/generic/lib/argon2-arch.c - ) -endif() - -add_library(argon2 STATIC ${ARGON2_SRC}) - -target_compile_definitions(argon2 - PUBLIC "A2_VISCTL" -) - -target_link_libraries(argon2 - PUBLIC argon2-interface ${CMAKE_THREAD_LIBS_INIT} - PRIVATE argon2-internal -) - -set_property(TARGET argon2 PROPERTY C_STANDARD 90) -set_property(TARGET argon2 PROPERTY VERSION ${Upstream_VERSION}) -set_property(TARGET argon2 PROPERTY SOVERSION 1) -set_property(TARGET argon2 PROPERTY INTERFACE_ARGON2_MAJOR_VERSION 1) -set_property(TARGET argon2 APPEND PROPERTY - COMPATIBLE_INTERFACE_STRING ARGON2_MAJOR_VERSION -) - diff --git a/src/3rdparty/argon2/Makefile.am b/src/3rdparty/argon2/Makefile.am deleted file mode 100644 index fffb45ca..00000000 --- a/src/3rdparty/argon2/Makefile.am +++ /dev/null @@ -1,125 +0,0 @@ -ACLOCAL_AMFLAGS = -I m4 - -include_HEADERS = include/argon2.h -lib_LTLIBRARIES = libargon2.la -noinst_LTLIBRARIES = - -bin_PROGRAMS = argon2 -noinst_PROGRAMS = argon2-genkat argon2-bench2 argon2-test - -TESTS = argon2-test - -AM_CPPFLAGS = \ - -I$(srcdir)/include \ - -I$(srcdir)/lib \ - -I$(srcdir)/arch/@ARCH@/include \ - -I$(srcdir)/arch/@ARCH@/lib - -libargon2_la_CFLAGS = @PTHREAD_CFLAGS@ -libargon2_la_LIBADD = @PTHREAD_LIBS@ -libargon2_la_SOURCES = \ - lib/argon2.c \ - lib/core.c \ - lib/encoding.c \ - lib/genkat.c \ - lib/impl-select.c \ - lib/thread.c \ - lib/blake2/blake2.c \ - lib/argon2-template-64.h \ - lib/core.h \ - lib/encoding.h \ - lib/genkat.h \ - lib/impl-select.h \ - lib/thread.h \ - lib/blake2/blake2.h \ - lib/blake2/blake2-impl.h - -if ARCH_X86_64 -noinst_LTLIBRARIES += \ - libargon2-sse2.la \ - libargon2-ssse3.la \ - libargon2-xop.la \ - libargon2-avx2.la \ - libargon2-avx512f.la -libargon2_la_LIBADD += \ - libargon2-sse2.la \ - libargon2-ssse3.la \ - libargon2-xop.la \ - libargon2-avx2.la \ - libargon2-avx512f.la - -libargon2_sse2_la_SOURCES = \ - arch/@ARCH@/lib/argon2-sse2.c \ - arch/@ARCH@/lib/argon2-sse2.h \ - arch/@ARCH@/lib/argon2-template-128.h -libargon2_sse2_la_CPPFLAGS = $(AM_CPPFLAGS) -libargon2_sse2_la_CFLAGS = $(AM_CFLAGS) -if HAVE_SSE2 -libargon2_sse2_la_CPPFLAGS += -DHAVE_SSE2 -libargon2_sse2_la_CFLAGS += -msse2 -endif # HAVE_SSE2 - -libargon2_ssse3_la_SOURCES = \ - arch/@ARCH@/lib/argon2-ssse3.c \ - arch/@ARCH@/lib/argon2-ssse3.h \ - arch/@ARCH@/lib/argon2-template-128.h -libargon2_ssse3_la_CPPFLAGS = $(AM_CPPFLAGS) -libargon2_ssse3_la_CFLAGS = $(AM_CFLAGS) -if HAVE_SSSE3 -libargon2_ssse3_la_CPPFLAGS += -DHAVE_SSSE3 -libargon2_ssse3_la_CFLAGS += -mssse3 -endif # HAVE_SSSE3 - -libargon2_xop_la_SOURCES = \ - arch/@ARCH@/lib/argon2-xop.c \ - arch/@ARCH@/lib/argon2-xop.h \ - arch/@ARCH@/lib/argon2-template-128.h -libargon2_xop_la_CPPFLAGS = $(AM_CPPFLAGS) -libargon2_xop_la_CFLAGS = $(AM_CFLAGS) -if HAVE_XOP -libargon2_xop_la_CPPFLAGS += -DHAVE_XOP -libargon2_xop_la_CFLAGS += -mxop -endif # HAVE_XOP - -libargon2_avx2_la_SOURCES = \ - arch/@ARCH@/lib/argon2-avx2.c \ - arch/@ARCH@/lib/argon2-avx2.h -libargon2_avx2_la_CPPFLAGS = $(AM_CPPFLAGS) -libargon2_avx2_la_CFLAGS = $(AM_CFLAGS) -if HAVE_AVX2 -libargon2_avx2_la_CPPFLAGS += -DHAVE_AVX2 -libargon2_avx2_la_CFLAGS += -mavx2 -endif # HAVE_AVX2 - -libargon2_avx512f_la_SOURCES = \ - arch/@ARCH@/lib/argon2-avx512f.c \ - arch/@ARCH@/lib/argon2-avx512f.h -libargon2_avx512f_la_CPPFLAGS = $(AM_CPPFLAGS) -libargon2_avx512f_la_CFLAGS = $(AM_CFLAGS) -if HAVE_AVX512F -libargon2_avx512f_la_CPPFLAGS += -DHAVE_AVX512F -libargon2_avx512f_la_CFLAGS += -mavx512f -endif # HAVE_AVX512F - -libargon2_la_SOURCES += \ - arch/@ARCH@/lib/argon2-arch.c \ - arch/@ARCH@/lib/cpu-flags.c \ - arch/@ARCH@/lib/cpu-flags.h -endif # ARCH_X86_64 - -if ARCH_GENERIC -libargon2_la_SOURCES += \ - arch/@ARCH@/lib/argon2-arch.c -endif # ARCH_GENERIC - -argon2_LDADD = libargon2.la -argon2_SOURCES = src/run.c lib/core.h - -argon2_genkat_LDADD = libargon2.la -argon2_genkat_SOURCES = src/genkat.c - -argon2_bench2_LDADD = libargon2.la -lrt -argon2_bench2_SOURCES = src/bench2.c src/timing.h - -argon2_test_LDADD = libargon2.la -argon2_test_SOURCES = tests/test.c diff --git a/src/3rdparty/argon2/README.md b/src/3rdparty/argon2/README.md deleted file mode 100644 index 254e26da..00000000 --- a/src/3rdparty/argon2/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# Argon2 [![Build Status](https://travis-ci.org/WOnder93/argon2.svg?branch=master)](https://travis-ci.org/WOnder93/argon2) -A multi-arch library implementing the Argon2 password hashing algorithm. - -This project is based on the [original source code](https://github.com/P-H-C/phc-winner-argon2) by the Argon2 authors. The goal of this project is to provide efficient Argon2 implementations for various HW architectures (x86, SSE, ARM, PowerPC, ...). - -For the x86_64 architecture, the library implements a simple CPU dispatch which automatically selects the best implementation based on CPU flags and quick benchmarks. - -# Building -## Using GNU autotools - -To prepare the build environment, run: -```bash -autoreconf -i -./configure -``` - -After that, just run `make` to build the library. - -### Running tests -After configuring the build environment, run `make check` to run the tests. - -### Architecture options -You can specify the target architecture by passing the `--host=...` flag to `./configure`. - -Supported architectures: - * `x86_64` – 64-bit x86 architecture - * `generic` – use generic C impementation - -## Using CMake - -To prepare the build environment, run: -```bash -cmake -DCMAKE_BUILD_TYPE=Release . -``` - -Then you can run `make` to build the library. - -## Using QMake/Qt Creator -A [QMake](http://doc.qt.io/qt-4.8/qmake-manual.html) project is also available in the `qmake` directory. You can open it in the [Qt Creator IDE](http://wiki.qt.io/Category:Tools::QtCreator) or build it from terminal: -```bash -cd qmake -# see table below for the list of possible ARCH and CONFIG values -qmake ARCH=... CONFIG+=... -make -``` - -### Architecture options -For QMake builds you can configure support for different architectures. Use the `ARCH` variable to choose the architecture and the `CONFIG` variable to set additional options. - -Supported architectures: - * `x86_64` – 64-bit x86 architecture - * QMake config flags: - * `USE_SSE2` – use SSE2 instructions - * `USE_SSSE3` – use SSSE3 instructions - * `USE_XOP` – use XOP instructions - * `USE_AVX2` – use AVX2 instructions - * `USE_AVX512F` – use AVX-512F instructions - * `generic` – use generic C impementation diff --git a/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c b/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c deleted file mode 100644 index 39abadee..00000000 --- a/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include -#include - -#include "impl-select.h" - -#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#include "argon2-template-64.h" - -void fill_segment_default(const argon2_instance_t *instance, - argon2_position_t position) -{ - fill_segment_64(instance, position); -} - -void argon2_get_impl_list(argon2_impl_list *list) -{ - list->count = 0; -} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c deleted file mode 100644 index 1d54b657..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include - -#include "impl-select.h" - -#include "argon2-sse2.h" -#include "argon2-ssse3.h" -#include "argon2-xop.h" -#include "argon2-avx2.h" -#include "argon2-avx512f.h" - -/* NOTE: there is no portable intrinsic for 64-bit rotate, but any - * sane compiler should be able to compile this into a ROR instruction: */ -#define rotr64(x, n) ((x) >> (n)) | ((x) << (64 - (n))) - -#include "argon2-template-64.h" - -void fill_segment_default(const argon2_instance_t *instance, - argon2_position_t position) -{ - fill_segment_64(instance, position); -} - -void argon2_get_impl_list(argon2_impl_list *list) -{ - static const argon2_impl IMPLS[] = { - { "x86_64", NULL, fill_segment_default }, - { "SSE2", check_sse2, fill_segment_sse2 }, - { "SSSE3", check_ssse3, fill_segment_ssse3 }, - { "XOP", check_xop, fill_segment_xop }, - { "AVX2", check_avx2, fill_segment_avx2 }, - { "AVX-512F", check_avx512f, fill_segment_avx512f }, - }; - - list->count = sizeof(IMPLS) / sizeof(IMPLS[0]); - list->entries = IMPLS; -} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c deleted file mode 100644 index 5dc41979..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c +++ /dev/null @@ -1,341 +0,0 @@ -#include "argon2-avx2.h" - -#ifdef HAVE_AVX2 -#include - -#include - -#define r16 (_mm256_setr_epi8( \ - 2, 3, 4, 5, 6, 7, 0, 1, \ - 10, 11, 12, 13, 14, 15, 8, 9, \ - 18, 19, 20, 21, 22, 23, 16, 17, \ - 26, 27, 28, 29, 30, 31, 24, 25)) - -#define r24 (_mm256_setr_epi8( \ - 3, 4, 5, 6, 7, 0, 1, 2, \ - 11, 12, 13, 14, 15, 8, 9, 10, \ - 19, 20, 21, 22, 23, 16, 17, 18, \ - 27, 28, 29, 30, 31, 24, 25, 26)) - -#define ror64_16(x) _mm256_shuffle_epi8((x), r16) -#define ror64_24(x) _mm256_shuffle_epi8((x), r24) -#define ror64_32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) -#define ror64_63(x) \ - _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) - -static __m256i f(__m256i x, __m256i y) -{ - __m256i z = _mm256_mul_epu32(x, y); - return _mm256_add_epi64(_mm256_add_epi64(x, y), _mm256_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm256_xor_si256(D0, A0); \ - D1 = _mm256_xor_si256(D1, A1); \ -\ - D0 = ror64_32(D0); \ - D1 = ror64_32(D1); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm256_xor_si256(B0, C0); \ - B1 = _mm256_xor_si256(B1, C1); \ -\ - B0 = ror64_24(B0); \ - B1 = ror64_24(B1); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm256_xor_si256(D0, A0); \ - D1 = _mm256_xor_si256(D1, A1); \ -\ - D0 = ror64_16(D0); \ - D1 = ror64_16(D1); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm256_xor_si256(B0, C0); \ - B1 = _mm256_xor_si256(B1, C1); \ -\ - B0 = ror64_63(B0); \ - B1 = ror64_63(B1); \ - } while ((void)0, 0) - -#define DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ - B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ -\ - C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ -\ - D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ - D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ - B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ -\ - C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ -\ - D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ - D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ - } while ((void)0, 0) - -#define DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m256i tmp1, tmp2; \ - tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ - tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ - B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ -\ - tmp1 = C0; \ - C0 = C1; \ - C1 = tmp1; \ -\ - tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ - tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ - D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m256i tmp1, tmp2; \ - tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ - tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ - B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ -\ - tmp1 = C0; \ - C0 = C1; \ - C1 = tmp1; \ -\ - tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ - tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ - D1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - D0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -enum { - ARGON2_HWORDS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 2, -}; - -static void fill_block(__m256i *s, const block *ref_block, block *next_block, - int with_xor) -{ - __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - s[i] =_mm256_xor_si256( - s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); - block_XY[i] = _mm256_xor_si256( - s[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); - } - - } else { - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - block_XY[i] = s[i] =_mm256_xor_si256( - s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); - } - } - - for (i = 0; i < 4; ++i) { - BLAKE2_ROUND1( - s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], - s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); - } - - for (i = 0; i < 4; ++i) { - BLAKE2_ROUND2( - s[4 * 0 + i], s[4 * 1 + i], s[4 * 2 + i], s[4 * 3 + i], - s[4 * 4 + i], s[4 * 5 + i], s[4 * 6 + i], s[4 * 7 + i]); - } - - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - s[i] = _mm256_xor_si256(s[i], block_XY[i]); - _mm256_storeu_si256((__m256i *)next_block->v + i, s[i]); - } -} - -static void next_addresses(block *address_block, block *input_block) -{ - /*Temporary zero-initialized blocks*/ - __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; - __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; - - memset(zero_block, 0, sizeof(zero_block)); - memset(zero2_block, 0, sizeof(zero2_block)); - - /*Increasing index counter*/ - input_block->v[6]++; - - /*First iteration of G*/ - fill_block(zero_block, input_block, address_block, 0); - - /*Second iteration of G*/ - fill_block(zero2_block, address_block, address_block, 0); -} - -void fill_segment_avx2(const argon2_instance_t *instance, - argon2_position_t position) -{ - block *ref_block = NULL, *curr_block = NULL; - block address_block, input_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; - __m256i state[ARGON2_HWORDS_IN_BLOCK]; - int data_independent_addressing; - - if (instance == NULL) { - return; - } - - data_independent_addressing = (instance->type == Argon2_i) || - (instance->type == Argon2_id && (position.pass == 0) && - (position.slice < ARGON2_SYNC_POINTS / 2)); - - if (data_independent_addressing) { - init_block_value(&input_block, 0); - - input_block.v[0] = position.pass; - input_block.v[1] = position.lane; - input_block.v[2] = position.slice; - input_block.v[3] = instance->memory_blocks; - input_block.v[4] = instance->passes; - input_block.v[5] = instance->type; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - - /* Don't forget to generate the first block of addresses: */ - if (data_independent_addressing) { - next_addresses(&address_block, &input_block); - } - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - next_addresses(&address_block, &input_block); - } - pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - - /* version 1.2.1 and earlier: overwrite, not XOR */ - if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { - fill_block(state, ref_block, curr_block, 0); - } else { - fill_block(state, ref_block, curr_block, 1); - } - } -} - -int check_avx2(void) -{ - return 1; -} - -#else - -void fill_segment_avx2(const argon2_instance_t *instance, - argon2_position_t position) -{ -} - -int check_avx2(void) -{ - return 0; -} - -#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h deleted file mode 100644 index 8abdb8a5..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef ARGON2_AVX2_H -#define ARGON2_AVX2_H - -#include "core.h" - -void fill_segment_avx2(const argon2_instance_t *instance, - argon2_position_t position); - -int check_avx2(void); - -#endif // ARGON2_AVX2_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c deleted file mode 100644 index f6de135b..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c +++ /dev/null @@ -1,326 +0,0 @@ -#include "argon2-avx512f.h" - -#ifdef HAVE_AVX512F -#include -#include - -#include - -#define ror64(x, n) _mm512_ror_epi64((x), (n)) - -static __m512i f(__m512i x, __m512i y) -{ - __m512i z = _mm512_mul_epu32(x, y); - return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm512_xor_si512(D0, A0); \ - D1 = _mm512_xor_si512(D1, A1); \ -\ - D0 = ror64(D0, 32); \ - D1 = ror64(D1, 32); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm512_xor_si512(B0, C0); \ - B1 = _mm512_xor_si512(B1, C1); \ -\ - B0 = ror64(B0, 24); \ - B1 = ror64(B1, 24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm512_xor_si512(D0, A0); \ - D1 = _mm512_xor_si512(D1, A1); \ -\ - D0 = ror64(D0, 16); \ - D1 = ror64(D1, 16); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm512_xor_si512(B0, C0); \ - B1 = _mm512_xor_si512(B1, C1); \ -\ - B0 = ror64(B0, 63); \ - B1 = ror64(B1, 63); \ - } while ((void)0, 0) - -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ - B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ -\ - C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ -\ - D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ - D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ - B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ -\ - C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ -\ - D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ - D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#define SWAP_HALVES(A0, A1) \ - do { \ - __m512i t0, t1; \ - t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ - t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ - A0 = t0; \ - A1 = t1; \ - } while((void)0, 0) - -#define SWAP_QUARTERS(A0, A1) \ - do { \ - SWAP_HALVES(A0, A1); \ - A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ - A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ - } while((void)0, 0) - -#define UNSWAP_QUARTERS(A0, A1) \ - do { \ - A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ - A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ - SWAP_HALVES(A0, A1); \ - } while((void)0, 0) - -#define BLAKE2_ROUND1(A0, C0, B0, D0, A1, C1, B1, D1) \ - do { \ - SWAP_HALVES(A0, B0); \ - SWAP_HALVES(C0, D0); \ - SWAP_HALVES(A1, B1); \ - SWAP_HALVES(C1, D1); \ - BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ - SWAP_HALVES(A0, B0); \ - SWAP_HALVES(C0, D0); \ - SWAP_HALVES(A1, B1); \ - SWAP_HALVES(C1, D1); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - SWAP_QUARTERS(A0, A1); \ - SWAP_QUARTERS(B0, B1); \ - SWAP_QUARTERS(C0, C1); \ - SWAP_QUARTERS(D0, D1); \ - BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ - UNSWAP_QUARTERS(A0, A1); \ - UNSWAP_QUARTERS(B0, B1); \ - UNSWAP_QUARTERS(C0, C1); \ - UNSWAP_QUARTERS(D0, D1); \ - } while ((void)0, 0) - -enum { - ARGON2_VECS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 4, -}; - -static void fill_block(__m512i *s, const block *ref_block, block *next_block, - int with_xor) -{ - __m512i block_XY[ARGON2_VECS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { - s[i] =_mm512_xor_si512( - s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); - block_XY[i] = _mm512_xor_si512( - s[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); - } - - } else { - for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { - block_XY[i] = s[i] =_mm512_xor_si512( - s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); - } - } - - for (i = 0; i < 2; ++i) { - BLAKE2_ROUND1( - s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], - s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); - } - - for (i = 0; i < 2; ++i) { - BLAKE2_ROUND2( - s[2 * 0 + i], s[2 * 1 + i], s[2 * 2 + i], s[2 * 3 + i], - s[2 * 4 + i], s[2 * 5 + i], s[2 * 6 + i], s[2 * 7 + i]); - } - - for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { - s[i] = _mm512_xor_si512(s[i], block_XY[i]); - _mm512_storeu_si512((__m512i *)next_block->v + i, s[i]); - } -} - -static void next_addresses(block *address_block, block *input_block) -{ - /*Temporary zero-initialized blocks*/ - __m512i zero_block[ARGON2_VECS_IN_BLOCK]; - __m512i zero2_block[ARGON2_VECS_IN_BLOCK]; - - memset(zero_block, 0, sizeof(zero_block)); - memset(zero2_block, 0, sizeof(zero2_block)); - - /*Increasing index counter*/ - input_block->v[6]++; - - /*First iteration of G*/ - fill_block(zero_block, input_block, address_block, 0); - - /*Second iteration of G*/ - fill_block(zero2_block, address_block, address_block, 0); -} - -void fill_segment_avx512f(const argon2_instance_t *instance, - argon2_position_t position) -{ - block *ref_block = NULL, *curr_block = NULL; - block address_block, input_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; - __m512i state[ARGON2_VECS_IN_BLOCK]; - int data_independent_addressing; - - if (instance == NULL) { - return; - } - - data_independent_addressing = (instance->type == Argon2_i) || - (instance->type == Argon2_id && (position.pass == 0) && - (position.slice < ARGON2_SYNC_POINTS / 2)); - - if (data_independent_addressing) { - init_block_value(&input_block, 0); - - input_block.v[0] = position.pass; - input_block.v[1] = position.lane; - input_block.v[2] = position.slice; - input_block.v[3] = instance->memory_blocks; - input_block.v[4] = instance->passes; - input_block.v[5] = instance->type; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - - /* Don't forget to generate the first block of addresses: */ - if (data_independent_addressing) { - next_addresses(&address_block, &input_block); - } - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - next_addresses(&address_block, &input_block); - } - pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - - /* version 1.2.1 and earlier: overwrite, not XOR */ - if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { - fill_block(state, ref_block, curr_block, 0); - } else { - fill_block(state, ref_block, curr_block, 1); - } - } -} - -int check_avx512f(void) -{ - return 1; -} - -#else - -void fill_segment_avx512f(const argon2_instance_t *instance, - argon2_position_t position) -{ -} - -int check_avx512f(void) -{ - return 0; -} - -#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h deleted file mode 100644 index ba431114..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef ARGON2_AVX512F_H -#define ARGON2_AVX512F_H - -#include "core.h" - -void fill_segment_avx512f(const argon2_instance_t *instance, - argon2_position_t position); - -int check_avx512f(void); - -#endif // ARGON2_AVX512F_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c deleted file mode 100644 index 60ffb7bb..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "argon2-sse2.h" - -#ifdef HAVE_SSE2 -#include - -#define ror64_16(x) \ - _mm_shufflehi_epi16( \ - _mm_shufflelo_epi16((x), _MM_SHUFFLE(0, 3, 2, 1)), \ - _MM_SHUFFLE(0, 3, 2, 1)) -#define ror64_24(x) \ - _mm_xor_si128(_mm_srli_epi64((x), 24), _mm_slli_epi64((x), 40)) -#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) -#define ror64_63(x) \ - _mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x))) - -static __m128i f(__m128i x, __m128i y) -{ - __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ -\ - D0 = ror64_32(D0); \ - D1 = ror64_32(D1); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ -\ - B0 = ror64_24(B0); \ - B1 = ror64_24(B1); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ -\ - D0 = ror64_16(D0); \ - D1 = ror64_16(D1); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ -\ - B0 = ror64_63(B0); \ - B1 = ror64_63(B1); \ - } while ((void)0, 0) - -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = D0; \ - __m128i t1 = B0; \ - D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ - D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ - B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ - B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = B0; \ - __m128i t1 = D0; \ - B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ - B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ - D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ - D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C1, D0, A1, B1, C0, D1); \ - G2(A0, B0, C1, D0, A1, B1, C0, D1); \ -\ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#include "argon2-template-128.h" - -void fill_segment_sse2(const argon2_instance_t *instance, - argon2_position_t position) -{ - fill_segment_128(instance, position); -} - -int check_sse2(void) -{ - return 1; -} - -#else - -void fill_segment_sse2(const argon2_instance_t *instance, - argon2_position_t position) -{ -} - -int check_sse2(void) -{ - return 0; -} - -#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h deleted file mode 100644 index 024d503d..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef ARGON2_SSE2_H -#define ARGON2_SSE2_H - -#include "core.h" - -void fill_segment_sse2(const argon2_instance_t *instance, - argon2_position_t position); - -int check_sse2(void); - -#endif // ARGON2_SSE2_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c deleted file mode 100644 index 7098ab22..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c +++ /dev/null @@ -1,134 +0,0 @@ -#include "argon2-ssse3.h" - -#ifdef HAVE_SSSE3 -#include - -#include - -#define r16 (_mm_setr_epi8( \ - 2, 3, 4, 5, 6, 7, 0, 1, \ - 10, 11, 12, 13, 14, 15, 8, 9)) - -#define r24 (_mm_setr_epi8( \ - 3, 4, 5, 6, 7, 0, 1, 2, \ - 11, 12, 13, 14, 15, 8, 9, 10)) - -#define ror64_16(x) _mm_shuffle_epi8((x), r16) -#define ror64_24(x) _mm_shuffle_epi8((x), r24) -#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) -#define ror64_63(x) \ - _mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x))) - -static __m128i f(__m128i x, __m128i y) -{ - __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ -\ - D0 = ror64_32(D0); \ - D1 = ror64_32(D1); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ -\ - B0 = ror64_24(B0); \ - B1 = ror64_24(B1); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ -\ - D0 = ror64_16(D0); \ - D1 = ror64_16(D1); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ -\ - B0 = ror64_63(B0); \ - B1 = ror64_63(B1); \ - } while ((void)0, 0) - -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ - __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ - B0 = t0; \ - B1 = t1; \ -\ - t0 = _mm_alignr_epi8(D1, D0, 8); \ - t1 = _mm_alignr_epi8(D0, D1, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ - __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ - B0 = t0; \ - B1 = t1; \ -\ - t0 = _mm_alignr_epi8(D0, D1, 8); \ - t1 = _mm_alignr_epi8(D1, D0, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C1, D0, A1, B1, C0, D1); \ - G2(A0, B0, C1, D0, A1, B1, C0, D1); \ -\ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#include "argon2-template-128.h" - -void fill_segment_ssse3(const argon2_instance_t *instance, - argon2_position_t position) -{ - fill_segment_128(instance, position); -} - -int check_ssse3(void) -{ - return 1; -} - -#else - -void fill_segment_ssse3(const argon2_instance_t *instance, - argon2_position_t position) -{ -} - -int check_ssse3(void) -{ - return 0; -} - -#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h deleted file mode 100644 index 139fdacc..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef ARGON2_SSSE3_H -#define ARGON2_SSSE3_H - -#include "core.h" - -void fill_segment_ssse3(const argon2_instance_t *instance, - argon2_position_t position); - -int check_ssse3(void); - -#endif // ARGON2_SSSE3_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h deleted file mode 100644 index 3062ec00..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h +++ /dev/null @@ -1,164 +0,0 @@ -#include - -#include - -#include "core.h" - -static void fill_block(__m128i *s, const block *ref_block, block *next_block, - int with_xor) -{ - __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - s[i] = _mm_xor_si128( - s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); - block_XY[i] = _mm_xor_si128( - s[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); - } - } else { - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - block_XY[i] = s[i] = _mm_xor_si128( - s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); - } - } - - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND( - s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], - s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); - } - - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND( - s[8 * 0 + i], s[8 * 1 + i], s[8 * 2 + i], s[8 * 3 + i], - s[8 * 4 + i], s[8 * 5 + i], s[8 * 6 + i], s[8 * 7 + i]); - } - - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - s[i] = _mm_xor_si128(s[i], block_XY[i]); - _mm_storeu_si128((__m128i *)next_block->v + i, s[i]); - } -} - -static void next_addresses(block *address_block, block *input_block) -{ - /*Temporary zero-initialized blocks*/ - __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; - __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; - - memset(zero_block, 0, sizeof(zero_block)); - memset(zero2_block, 0, sizeof(zero2_block)); - - /*Increasing index counter*/ - input_block->v[6]++; - - /*First iteration of G*/ - fill_block(zero_block, input_block, address_block, 0); - - /*Second iteration of G*/ - fill_block(zero2_block, address_block, address_block, 0); -} - -static void fill_segment_128(const argon2_instance_t *instance, - argon2_position_t position) -{ - block *ref_block = NULL, *curr_block = NULL; - block address_block, input_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; - __m128i state[ARGON2_OWORDS_IN_BLOCK]; - int data_independent_addressing; - - if (instance == NULL) { - return; - } - - data_independent_addressing = (instance->type == Argon2_i) || - (instance->type == Argon2_id && (position.pass == 0) && - (position.slice < ARGON2_SYNC_POINTS / 2)); - - if (data_independent_addressing) { - init_block_value(&input_block, 0); - - input_block.v[0] = position.pass; - input_block.v[1] = position.lane; - input_block.v[2] = position.slice; - input_block.v[3] = instance->memory_blocks; - input_block.v[4] = instance->passes; - input_block.v[5] = instance->type; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - - /* Don't forget to generate the first block of addresses: */ - if (data_independent_addressing) { - next_addresses(&address_block, &input_block); - } - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - next_addresses(&address_block, &input_block); - } - pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - - /* version 1.2.1 and earlier: overwrite, not XOR */ - if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { - fill_block(state, ref_block, curr_block, 0); - } else { - fill_block(state, ref_block, curr_block, 1); - } - } -} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c deleted file mode 100644 index a7f6e399..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "argon2-xop.h" - -#ifdef HAVE_XOP -#include - -#include - -#define ror64(x, c) _mm_roti_epi64((x), -(c)) - -static __m128i f(__m128i x, __m128i y) -{ - __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ -\ - D0 = ror64(D0, 32); \ - D1 = ror64(D1, 32); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ -\ - B0 = ror64(B0, 24); \ - B1 = ror64(B1, 24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = f(A0, B0); \ - A1 = f(A1, B1); \ -\ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ -\ - D0 = ror64(D0, 16); \ - D1 = ror64(D1, 16); \ -\ - C0 = f(C0, D0); \ - C1 = f(C1, D1); \ -\ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ -\ - B0 = ror64(B0, 63); \ - B1 = ror64(B1, 63); \ - } while ((void)0, 0) - -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ - __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ - B0 = t0; \ - B1 = t1; \ -\ - t0 = _mm_alignr_epi8(D1, D0, 8); \ - t1 = _mm_alignr_epi8(D0, D1, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ - __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ - B0 = t0; \ - B1 = t1; \ -\ - t0 = _mm_alignr_epi8(D0, D1, 8); \ - t1 = _mm_alignr_epi8(D1, D0, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ -\ - G1(A0, B0, C1, D0, A1, B1, C0, D1); \ - G2(A0, B0, C1, D0, A1, B1, C0, D1); \ -\ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#include "argon2-template-128.h" - -void fill_segment_xop(const argon2_instance_t *instance, - argon2_position_t position) -{ - fill_segment_128(instance, position); -} - -int check_xop(void) -{ - return 1; -} - -#else - -void fill_segment_xop(const argon2_instance_t *instance, - argon2_position_t position) -{ -} - -int check_xop(void) -{ - return 0; -} - -#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h deleted file mode 100644 index 1474a11c..00000000 --- a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef ARGON2_XOP_H -#define ARGON2_XOP_H - -#include "core.h" - -void fill_segment_xop(const argon2_instance_t *instance, - argon2_position_t position); - -int check_xop(void); - -#endif // ARGON2_XOP_H diff --git a/src/3rdparty/argon2/configure.ac b/src/3rdparty/argon2/configure.ac deleted file mode 100644 index 81607a97..00000000 --- a/src/3rdparty/argon2/configure.ac +++ /dev/null @@ -1,108 +0,0 @@ -dnl --------------------------------------------------------------------- -dnl Copyright (C) 2015, Ondrej Mosnacek -dnl -dnl This program is free software: you can redistribute it and/or -dnl modify it under the terms of the GNU General Public License -dnl as published by the Free Software Foundation: either version 2 -dnl of the License, or (at your option) any later version. -dnl -dnl This program is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -dnl GNU General Public License for more details. -dnl -dnl You should have received a copy of the GNU General Public License -dnl along with this program. If not, see . -dnl --------------------------------------------------------------------- - -AC_CONFIG_MACRO_DIR([m4]) - -AC_INIT([argon2], [0.1], []) -LT_INIT -AM_INIT_AUTOMAKE([foreign subdir-objects]) -AM_SILENT_RULES([yes]) - -AC_PROG_CC -AC_PROG_CC_C89 -AM_PROG_AS -AX_PTHREAD - -AC_CANONICAL_HOST - -AS_CASE([$host_cpu], - dnl [i?86], [ARCH=i386], - [x86_64], [ARCH=x86_64], - [ARCH=generic - AC_MSG_WARN("No code for architecture $host_cpu; using generic implementation")] -) -AC_SUBST([ARCH]) - -AM_CONDITIONAL([ARCH_X86_64], [test "$ARCH" = 'x86_64']) -AM_CONDITIONAL([ARCH_GENERIC], [test "$ARCH" = 'generic']) - -# AX_CHECK_COMPILER_FEATURE(NAME, FLAG, TEST_SOURCE) -# -------------------------- -AC_DEFUN([AX_CHECK_COMPILER_FEATURE], [{ - AX_CHECK_COMPILE_FLAG([-m$2], [HAVE_FLAG=1], [HAVE_FLAG=0]) - HAVE_FEATURE=0 - AS_IF([test "$HAVE_FLAG" = '1'], [{ - AC_MSG_CHECKING("whether C compiler supports $1 with -m$2...") - - CFLAGS_BACKUP="$CFLAGS" - CFLAGS="-m$2" - - AC_COMPILE_IFELSE([AC_LANG_SOURCE([$3])], [HAVE_FEATURE=1]) - - CFLAGS="$CFLAGS_BACKUP" - AS_IF([test "$HAVE_FEATURE" = '1'], [RESULT='yes'], [RESULT='no']) - AC_MSG_RESULT([$RESULT]) - - }]) - HAVE_$1=HAVE_FEATURE - AM_CONDITIONAL([HAVE_$1], [test "$HAVE_FEATURE" = '1']) -}]) - -AX_CHECK_COMPILER_FEATURE([SSE2], [sse2], [[ -#include - -void function_sse2(__m128i *dst, const __m128i *a, const __m128i *b) -{ - *dst = _mm_xor_si128(*a, *b); -} -]]) -AX_CHECK_COMPILER_FEATURE([SSSE3], [ssse3], [[ -#include - -void function_ssse3(__m128i *dst, const __m128i *a, const __m128i *b) -{ - *dst = _mm_shuffle_epi8(*a, *b); -} -]]) -AX_CHECK_COMPILER_FEATURE([XOP], [xop], [[ -#include - -void function_xop(__m128i *dst, const __m128i *a, int b) -{ - *dst = _mm_roti_epi64(*a, b); -} -]]) -AX_CHECK_COMPILER_FEATURE([AVX2], [avx2], [[ -#include - -void function_avx2(__m256i *dst, const __m256i *a, const __m256i *b) -{ - *dst = _mm256_xor_si256(*a, *b); -} -]]) -AX_CHECK_COMPILER_FEATURE([AVX512F], [avx512f], [[ -#include - -void function_avx512f(__m512i *dst, const __m512i *a) -{ - *dst = _mm512_ror_epi64(*a, 57); -} -]]) - -AC_CONFIG_FILES([Makefile]) - -AC_OUTPUT diff --git a/src/3rdparty/argon2/include/argon2.h b/src/3rdparty/argon2/include/argon2.h deleted file mode 100644 index 59df71aa..00000000 --- a/src/3rdparty/argon2/include/argon2.h +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication - * along with this software. If not, see - * . - */ - -#ifndef ARGON2_H -#define ARGON2_H - -#include -#include -#include -#include - -/* Symbols visibility control */ -#if defined(_WIN32) || defined(__CYGWIN__) - #if defined(A2_VISCTL) - #if defined(_MSC_VER) - #define ARGON2_PUBLIC __declspec(dllexport) - #else - #define ARGON2_PUBLIC __attribute__ ((dllexport)) - #endif - #else - #if defined(_MSC_VER) - #define ARGON2_PUBLIC __declspec(dllimport) - #else - #define ARGON2_PUBLIC /*__attribute__ ((dllimport))*/ - #endif - #endif - #define ARGON2_LOCAL -#else - #if defined(A2_VISCTL) - #define ARGON2_PUBLIC __attribute__ ((visibility ("default"))) - #define ARGON2_LOCAL __attribute__ ((visibility ("hidden"))) - #else - #define ARGON2_PUBLIC - #define ARGON2_LOCAL - #endif -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -/* - * Argon2 input parameter restrictions - */ - -/* Minimum and maximum number of lanes (degree of parallelism) */ -#define ARGON2_MIN_LANES UINT32_C(1) -#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) - -/* Minimum and maximum number of threads */ -#define ARGON2_MIN_THREADS UINT32_C(1) -#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) - -/* Number of synchronization points between lanes per pass */ -#define ARGON2_SYNC_POINTS UINT32_C(4) - -/* Minimum and maximum digest size in bytes */ -#define ARGON2_MIN_OUTLEN UINT32_C(4) -#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ -#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ - -#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) -/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ -#define ARGON2_MAX_MEMORY_BITS \ - ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) -#define ARGON2_MAX_MEMORY \ - ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) - -/* Minimum and maximum number of passes */ -#define ARGON2_MIN_TIME UINT32_C(1) -#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum password length in bytes */ -#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) -#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum associated data length in bytes */ -#define ARGON2_MIN_AD_LENGTH UINT32_C(0) -#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum salt length in bytes */ -#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) -#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum key length in bytes */ -#define ARGON2_MIN_SECRET UINT32_C(0) -#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) - -/* Flags to determine which fields are securely wiped (default = no wipe). */ -#define ARGON2_DEFAULT_FLAGS UINT32_C(0) -#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) -#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) -#define ARGON2_FLAG_GENKAT (UINT32_C(1) << 3) - -/* Global flag to determine if we are wiping internal memory buffers. This flag - * is defined in core.c and deafults to 1 (wipe internal memory). */ -extern int FLAG_clear_internal_memory; - -/* Error codes */ -typedef enum Argon2_ErrorCodes { - ARGON2_OK = 0, - - ARGON2_OUTPUT_PTR_NULL = -1, - - ARGON2_OUTPUT_TOO_SHORT = -2, - ARGON2_OUTPUT_TOO_LONG = -3, - - ARGON2_PWD_TOO_SHORT = -4, - ARGON2_PWD_TOO_LONG = -5, - - ARGON2_SALT_TOO_SHORT = -6, - ARGON2_SALT_TOO_LONG = -7, - - ARGON2_AD_TOO_SHORT = -8, - ARGON2_AD_TOO_LONG = -9, - - ARGON2_SECRET_TOO_SHORT = -10, - ARGON2_SECRET_TOO_LONG = -11, - - ARGON2_TIME_TOO_SMALL = -12, - ARGON2_TIME_TOO_LARGE = -13, - - ARGON2_MEMORY_TOO_LITTLE = -14, - ARGON2_MEMORY_TOO_MUCH = -15, - - ARGON2_LANES_TOO_FEW = -16, - ARGON2_LANES_TOO_MANY = -17, - - ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ - ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ - ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ - ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ - - ARGON2_MEMORY_ALLOCATION_ERROR = -22, - - ARGON2_FREE_MEMORY_CBK_NULL = -23, - ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, - - ARGON2_INCORRECT_PARAMETER = -25, - ARGON2_INCORRECT_TYPE = -26, - - ARGON2_OUT_PTR_MISMATCH = -27, - - ARGON2_THREADS_TOO_FEW = -28, - ARGON2_THREADS_TOO_MANY = -29, - - ARGON2_MISSING_ARGS = -30, - - ARGON2_ENCODING_FAIL = -31, - - ARGON2_DECODING_FAIL = -32, - - ARGON2_THREAD_FAIL = -33, - - ARGON2_DECODING_LENGTH_FAIL = -34, - - ARGON2_VERIFY_MISMATCH = -35 -} argon2_error_codes; - -/* Memory allocator types --- for external allocation */ -typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); -typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); - -/* Argon2 external data structures */ - -/* - ***** - * Context: structure to hold Argon2 inputs: - * output array and its length, - * password and its length, - * salt and its length, - * secret and its length, - * associated data and its length, - * number of passes, amount of used memory (in KBytes, can be rounded up a bit) - * number of parallel threads that will be run. - * All the parameters above affect the output hash value. - * Additionally, two function pointers can be provided to allocate and - * deallocate the memory (if NULL, memory will be allocated internally). - * Also, three flags indicate whether to erase password, secret as soon as they - * are pre-hashed (and thus not needed anymore), and the entire memory - ***** - * Simplest situation: you have output array out[8], password is stored in - * pwd[32], salt is stored in salt[16], you do not have keys nor associated - * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with - * 4 parallel lanes. - * You want to erase the password, but you're OK with last pass not being - * erased. You want to use the default memory allocator. - * Then you initialize: - Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) - */ -typedef struct Argon2_Context { - uint8_t *out; /* output array */ - uint32_t outlen; /* digest length */ - - uint8_t *pwd; /* password array */ - uint32_t pwdlen; /* password length */ - - uint8_t *salt; /* salt array */ - uint32_t saltlen; /* salt length */ - - uint8_t *secret; /* key array */ - uint32_t secretlen; /* key length */ - - uint8_t *ad; /* associated data array */ - uint32_t adlen; /* associated data length */ - - uint32_t t_cost; /* number of passes */ - uint32_t m_cost; /* amount of memory requested (KB) */ - uint32_t lanes; /* number of lanes */ - uint32_t threads; /* maximum number of threads */ - - uint32_t version; /* version number */ - - allocate_fptr allocate_cbk; /* pointer to memory allocator */ - deallocate_fptr free_cbk; /* pointer to memory deallocator */ - - uint32_t flags; /* array of bool options */ -} argon2_context; - -/* Argon2 primitive type */ -typedef enum Argon2_type { - Argon2_d = 0, - Argon2_i = 1, - Argon2_id = 2 -} argon2_type; - -/* Version of the algorithm */ -typedef enum Argon2_version { - ARGON2_VERSION_10 = 0x10, - ARGON2_VERSION_13 = 0x13, - ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 -} argon2_version; - -/* - * Function that gives the string representation of an argon2_type. - * @param type The argon2_type that we want the string for - * @param uppercase Whether the string should have the first letter uppercase - * @return NULL if invalid type, otherwise the string representation. - */ -ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase); - -/* - * Function that performs memory-hard hashing with certain degree of parallelism - * @param context Pointer to the Argon2 internal structure - * @return Error code if smth is wrong, ARGON2_OK otherwise - */ -ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type); - -/** - * Hashes a password with Argon2i, producing an encoded hash - * @param t_cost Number of iterations - * @param m_cost Sets memory usage to m_cost kibibytes - * @param parallelism Number of threads and compute lanes - * @param pwd Pointer to password - * @param pwdlen Password size in bytes - * @param salt Pointer to salt - * @param saltlen Salt size in bytes - * @param hashlen Desired length of the hash in bytes - * @param encoded Buffer where to write the encoded hash - * @param encodedlen Size of the buffer (thus max size of the encoded hash) - * @pre Different parallelism levels will give different results - * @pre Returns ARGON2_OK if successful - */ -ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, - const void *pwd, const size_t pwdlen, - const void *salt, const size_t saltlen, - const size_t hashlen, char *encoded, - const size_t encodedlen); - -/** - * Hashes a password with Argon2i, producing a raw hash by allocating memory at - * @hash - * @param t_cost Number of iterations - * @param m_cost Sets memory usage to m_cost kibibytes - * @param parallelism Number of threads and compute lanes - * @param pwd Pointer to password - * @param pwdlen Password size in bytes - * @param salt Pointer to salt - * @param saltlen Salt size in bytes - * @param hash Buffer where to write the raw hash - updated by the function - * @param hashlen Desired length of the hash in bytes - * @pre Different parallelism levels will give different results - * @pre Returns ARGON2_OK if successful - */ -ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen); - -ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, - const void *pwd, const size_t pwdlen, - const void *salt, const size_t saltlen, - const size_t hashlen, char *encoded, - const size_t encodedlen); - -ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen); - -ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, - const void *pwd, const size_t pwdlen, - const void *salt, const size_t saltlen, - const size_t hashlen, char *encoded, - const size_t encodedlen); - -ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost, - const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen); - -/* generic function underlying the above ones */ -ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, - const size_t hashlen, char *encoded, - const size_t encodedlen, argon2_type type, - const uint32_t version); - -/** - * Verifies a password against an encoded string - * Encoded string is restricted as in validate_inputs() - * @param encoded String encoding parameters, salt, hash - * @param pwd Pointer to password - * @pre Returns ARGON2_OK if successful - */ -ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd, - const size_t pwdlen); - -ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd, - const size_t pwdlen); - -ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd, - const size_t pwdlen); - -/* generic function underlying the above ones */ -ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd, - const size_t pwdlen, argon2_type type); - -/** - * Argon2d: Version of Argon2 that picks memory blocks depending - * on the password and salt. Only for side-channel-free - * environment!! - ***** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2d_ctx(argon2_context *context); - -/** - * Argon2i: Version of Argon2 that picks memory blocks - * independent on the password and salt. Good for side-channels, - * but worse w.r.t. tradeoff attacks if only one pass is used. - ***** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2i_ctx(argon2_context *context); - -/** - * Argon2id: Version of Argon2 where the first half-pass over memory is - * password-independent, the rest are password-dependent (on the password and - * salt). OK against side channels (they reduce to 1/2-pass Argon2i), and - * better with w.r.t. tradeoff attacks (similar to Argon2d). - ***** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2id_ctx(argon2_context *context); - -/** - * Verify if a given password is correct for Argon2d hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash); - -/** - * Verify if a given password is correct for Argon2i hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash); - -/** - * Verify if a given password is correct for Argon2id hashing - * @param context Pointer to current Argon2 context - * @param hash The password hash to verify. The length of the hash is - * specified by the context outlen member - * @return Zero if successful, a non zero error code otherwise - */ -ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context, - const char *hash); - -/* generic function underlying the above ones */ -ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash, - argon2_type type); - -/** - * Get the associated error message for given error code - * @return The error message associated with the given error code - */ -ARGON2_PUBLIC const char *argon2_error_message(int error_code); - -/** - * Returns the encoded hash length for the given input parameters - * @param t_cost Number of iterations - * @param m_cost Memory usage in kibibytes - * @param parallelism Number of threads; used to compute lanes - * @param saltlen Salt size in bytes - * @param hashlen Hash size in bytes - * @param type The argon2_type that we want the encoded length for - * @return The encoded hash length in bytes - */ -ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, - uint32_t parallelism, uint32_t saltlen, - uint32_t hashlen, argon2_type type); - -/* signals availability of argon2_select_impl: */ -#define ARGON2_SELECTABLE_IMPL - -/** - * Selects the fastest available optimized implementation. - * @param out The file for debug output (e. g. stderr; pass NULL for no - * debug output) - * @param prefix What to print before each line; NULL is equivalent to empty - * string - */ -ARGON2_PUBLIC void argon2_select_impl(FILE *out, const char *prefix); - -/* signals support for passing preallocated memory: */ -#define ARGON2_PREALLOCATED_MEMORY - -ARGON2_PUBLIC size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism); - -/** - * Function that performs memory-hard hashing with certain degree of parallelism - * @param context Pointer to the Argon2 internal structure - * @param type The Argon2 type - * @param memory Preallocated memory for blocks (or NULL) - * @param memory_size The size of preallocated memory - * @return Error code if smth is wrong, ARGON2_OK otherwise - */ -ARGON2_PUBLIC int argon2_ctx_mem(argon2_context *context, argon2_type type, - void *memory, size_t memory_size); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/src/3rdparty/argon2/lib/argon2-template-64.h b/src/3rdparty/argon2/lib/argon2-template-64.h deleted file mode 100644 index 16ddbd35..00000000 --- a/src/3rdparty/argon2/lib/argon2-template-64.h +++ /dev/null @@ -1,193 +0,0 @@ -#include - -#include "core.h" - -#define MASK_32 UINT64_C(0xFFFFFFFF) - -#define F(x, y) ((x) + (y) + 2 * ((x) & MASK_32) * ((y) & MASK_32)) - -#define G(a, b, c, d) \ - do { \ - a = F(a, b); \ - d = rotr64(d ^ a, 32); \ - c = F(c, d); \ - b = rotr64(b ^ c, 24); \ - a = F(a, b); \ - d = rotr64(d ^ a, 16); \ - c = F(c, d); \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, \ - v8, v9, v10, v11, v12, v13, v14, v15) \ - do { \ - G(v0, v4, v8, v12); \ - G(v1, v5, v9, v13); \ - G(v2, v6, v10, v14); \ - G(v3, v7, v11, v15); \ - G(v0, v5, v10, v15); \ - G(v1, v6, v11, v12); \ - G(v2, v7, v8, v13); \ - G(v3, v4, v9, v14); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND_NOMSG1(v) \ - BLAKE2_ROUND_NOMSG( \ - (v)[ 0], (v)[ 1], (v)[ 2], (v)[ 3], \ - (v)[ 4], (v)[ 5], (v)[ 6], (v)[ 7], \ - (v)[ 8], (v)[ 9], (v)[10], (v)[11], \ - (v)[12], (v)[13], (v)[14], (v)[15]) - -#define BLAKE2_ROUND_NOMSG2(v) \ - BLAKE2_ROUND_NOMSG( \ - (v)[ 0], (v)[ 1], (v)[ 16], (v)[ 17], \ - (v)[ 32], (v)[ 33], (v)[ 48], (v)[ 49], \ - (v)[ 64], (v)[ 65], (v)[ 80], (v)[ 81], \ - (v)[ 96], (v)[ 97], (v)[112], (v)[113]) - -static void fill_block(const block *prev_block, const block *ref_block, - block *next_block, int with_xor) -{ - block blockR, block_tmp; - - copy_block(&blockR, ref_block); - xor_block(&blockR, prev_block); - copy_block(&block_tmp, &blockR); - if (with_xor) { - xor_block(&block_tmp, next_block); - } - - /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then - (16,17,..31)... finally (112,113,...127) */ - BLAKE2_ROUND_NOMSG1(blockR.v + 0 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 1 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 2 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 3 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 4 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 5 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 6 * 16); - BLAKE2_ROUND_NOMSG1(blockR.v + 7 * 16); - - /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then - (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ - BLAKE2_ROUND_NOMSG2(blockR.v + 0 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 1 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 2 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 3 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 4 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 5 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 6 * 2); - BLAKE2_ROUND_NOMSG2(blockR.v + 7 * 2); - - copy_block(next_block, &block_tmp); - xor_block(next_block, &blockR); -} - -static void next_addresses(block *address_block, block *input_block, - const block *zero_block) -{ - input_block->v[6]++; - fill_block(zero_block, input_block, address_block, 0); - fill_block(zero_block, address_block, address_block, 0); -} - -static void fill_segment_64(const argon2_instance_t *instance, - argon2_position_t position) -{ - block *ref_block, *curr_block, *prev_block; - block address_block, input_block, zero_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; - int data_independent_addressing; - - if (instance == NULL) { - return; - } - - data_independent_addressing = (instance->type == Argon2_i) || - (instance->type == Argon2_id && (position.pass == 0) && - (position.slice < ARGON2_SYNC_POINTS / 2)); - - if (data_independent_addressing) { - init_block_value(&zero_block, 0); - init_block_value(&input_block, 0); - - input_block.v[0] = position.pass; - input_block.v[1] = position.lane; - input_block.v[2] = position.slice; - input_block.v[3] = instance->memory_blocks; - input_block.v[4] = instance->passes; - input_block.v[5] = instance->type; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - - /* Don't forget to generate the first block of addresses: */ - if (data_independent_addressing) { - next_addresses(&address_block, &input_block, &zero_block); - } - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - next_addresses(&address_block, &input_block, &zero_block); - } - pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - prev_block = instance->memory + prev_offset; - - /* version 1.2.1 and earlier: overwrite, not XOR */ - if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { - fill_block(prev_block, ref_block, curr_block, 0); - } else { - fill_block(prev_block, ref_block, curr_block, 1); - } - } -} diff --git a/src/3rdparty/argon2/lib/argon2.c b/src/3rdparty/argon2/lib/argon2.c deleted file mode 100644 index 28d3d402..00000000 --- a/src/3rdparty/argon2/lib/argon2.c +++ /dev/null @@ -1,476 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include - -#include "argon2.h" -#include "encoding.h" -#include "core.h" - -const char *argon2_type2string(argon2_type type, int uppercase) { - switch (type) { - case Argon2_d: - return uppercase ? "Argon2d" : "argon2d"; - case Argon2_i: - return uppercase ? "Argon2i" : "argon2i"; - case Argon2_id: - return uppercase ? "Argon2id" : "argon2id"; - } - - return NULL; -} - -static void argon2_compute_memory_blocks(uint32_t *memory_blocks, - uint32_t *segment_length, - uint32_t m_cost, uint32_t lanes) -{ - /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ - *memory_blocks = m_cost; - if (*memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) { - *memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes; - } - - *segment_length = *memory_blocks / (lanes * ARGON2_SYNC_POINTS); - /* Ensure that all segments have equal length */ - *memory_blocks = *segment_length * (lanes * ARGON2_SYNC_POINTS); -} - -size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism) { - uint32_t memory_blocks, segment_length; - argon2_compute_memory_blocks(&memory_blocks, &segment_length, m_cost, - parallelism); - return memory_blocks * ARGON2_BLOCK_SIZE; -} - -int argon2_ctx_mem(argon2_context *context, argon2_type type, void *memory, - size_t memory_size) { - /* 1. Validate all inputs */ - int result = validate_inputs(context); - uint32_t memory_blocks, segment_length; - argon2_instance_t instance; - - if (ARGON2_OK != result) { - return result; - } - - if (Argon2_d != type && Argon2_i != type && Argon2_id != type) { - return ARGON2_INCORRECT_TYPE; - } - - /* 2. Align memory size */ - argon2_compute_memory_blocks(&memory_blocks, &segment_length, - context->m_cost, context->lanes); - - /* check for sufficient memory size: */ - if (memory != NULL && (memory_size % ARGON2_BLOCK_SIZE != 0 || - memory_size / ARGON2_BLOCK_SIZE < memory_blocks)) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - instance.version = context->version; - instance.memory = (block *)memory; - instance.passes = context->t_cost; - instance.memory_blocks = memory_blocks; - instance.segment_length = segment_length; - instance.lane_length = segment_length * ARGON2_SYNC_POINTS; - instance.lanes = context->lanes; - instance.threads = context->threads; - instance.type = type; - instance.print_internals = !!(context->flags & ARGON2_FLAG_GENKAT); - instance.keep_memory = memory != NULL; - - if (instance.threads > instance.lanes) { - instance.threads = instance.lanes; - } - - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - result = initialize(&instance, context); - - if (ARGON2_OK != result) { - return result; - } - - /* 4. Filling memory */ - result = fill_memory_blocks(&instance); - - if (ARGON2_OK != result) { - return result; - } - /* 5. Finalization */ - finalize(context, &instance); - - return ARGON2_OK; -} - -int argon2_ctx(argon2_context *context, argon2_type type) { - return argon2_ctx_mem(context, type, NULL, 0); -} - -int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, const size_t saltlen, - void *hash, const size_t hashlen, char *encoded, - const size_t encodedlen, argon2_type type, - const uint32_t version){ - - argon2_context context; - int result; - uint8_t *out; - - if (pwdlen > ARGON2_MAX_PWD_LENGTH) { - return ARGON2_PWD_TOO_LONG; - } - - if (saltlen > ARGON2_MAX_SALT_LENGTH) { - return ARGON2_SALT_TOO_LONG; - } - - if (hashlen > ARGON2_MAX_OUTLEN) { - return ARGON2_OUTPUT_TOO_LONG; - } - - if (hashlen < ARGON2_MIN_OUTLEN) { - return ARGON2_OUTPUT_TOO_SHORT; - } - - out = malloc(hashlen); - if (!out) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - context.out = (uint8_t *)out; - context.outlen = (uint32_t)hashlen; - context.pwd = CONST_CAST(uint8_t *)pwd; - context.pwdlen = (uint32_t)pwdlen; - context.salt = CONST_CAST(uint8_t *)salt; - context.saltlen = (uint32_t)saltlen; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = parallelism; - context.threads = parallelism; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_DEFAULT_FLAGS; - context.version = version; - - result = argon2_ctx(&context, type); - - if (result != ARGON2_OK) { - clear_internal_memory(out, hashlen); - free(out); - return result; - } - - /* if raw hash requested, write it */ - if (hash) { - memcpy(hash, out, hashlen); - } - - /* if encoding requested, write it */ - if (encoded && encodedlen) { - if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) { - clear_internal_memory(out, hashlen); /* wipe buffers if error */ - clear_internal_memory(encoded, encodedlen); - free(out); - return ARGON2_ENCODING_FAIL; - } - } - clear_internal_memory(out, hashlen); - free(out); - - return ARGON2_OK; -} - -int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, const size_t hashlen, - char *encoded, const size_t encodedlen) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - NULL, hashlen, encoded, encodedlen, Argon2_i, - ARGON2_VERSION_NUMBER); -} - -int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER); -} - -int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, const size_t hashlen, - char *encoded, const size_t encodedlen) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - NULL, hashlen, encoded, encodedlen, Argon2_d, - ARGON2_VERSION_NUMBER); -} - -int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER); -} - -int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, const size_t hashlen, - char *encoded, const size_t encodedlen) { - - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - NULL, hashlen, encoded, encodedlen, Argon2_id, - ARGON2_VERSION_NUMBER); -} - -int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, - const uint32_t parallelism, const void *pwd, - const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen) { - return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, NULL, 0, Argon2_id, - ARGON2_VERSION_NUMBER); -} - -static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) { - size_t i; - uint8_t d = 0U; - - for (i = 0U; i < len; i++) { - d |= b1[i] ^ b2[i]; - } - return (int)((1 & ((d - 1) >> 8)) - 1); -} - -int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen, - argon2_type type) { - - argon2_context ctx; - uint8_t *desired_result = NULL; - - int ret = ARGON2_OK; - - size_t encoded_len; - uint32_t max_field_len; - - if (pwdlen > ARGON2_MAX_PWD_LENGTH) { - return ARGON2_PWD_TOO_LONG; - } - - if (encoded == NULL) { - return ARGON2_DECODING_FAIL; - } - - encoded_len = strlen(encoded); - if (encoded_len > UINT32_MAX) { - return ARGON2_DECODING_FAIL; - } - - /* No field can be longer than the encoded length */ - max_field_len = (uint32_t)encoded_len; - - ctx.saltlen = max_field_len; - ctx.outlen = max_field_len; - - ctx.salt = malloc(ctx.saltlen); - ctx.out = malloc(ctx.outlen); - if (!ctx.salt || !ctx.out) { - ret = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - ctx.pwd = (uint8_t *)pwd; - ctx.pwdlen = (uint32_t)pwdlen; - - ret = decode_string(&ctx, encoded, type); - if (ret != ARGON2_OK) { - goto fail; - } - - /* Set aside the desired result, and get a new buffer. */ - desired_result = ctx.out; - ctx.out = malloc(ctx.outlen); - if (!ctx.out) { - ret = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - ret = argon2_verify_ctx(&ctx, (char *)desired_result, type); - if (ret != ARGON2_OK) { - goto fail; - } - -fail: - free(ctx.salt); - free(ctx.out); - free(desired_result); - - return ret; -} - -int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) { - - return argon2_verify(encoded, pwd, pwdlen, Argon2_i); -} - -int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) { - - return argon2_verify(encoded, pwd, pwdlen, Argon2_d); -} - -int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) { - - return argon2_verify(encoded, pwd, pwdlen, Argon2_id); -} - -int argon2d_ctx(argon2_context *context) { - return argon2_ctx(context, Argon2_d); -} - -int argon2i_ctx(argon2_context *context) { - return argon2_ctx(context, Argon2_i); -} - -int argon2id_ctx(argon2_context *context) { - return argon2_ctx(context, Argon2_id); -} - -int argon2_verify_ctx(argon2_context *context, const char *hash, - argon2_type type) { - int ret = argon2_ctx(context, type); - if (ret != ARGON2_OK) { - return ret; - } - - if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) { - return ARGON2_VERIFY_MISMATCH; - } - - return ARGON2_OK; -} - -int argon2d_verify_ctx(argon2_context *context, const char *hash) { - return argon2_verify_ctx(context, hash, Argon2_d); -} - -int argon2i_verify_ctx(argon2_context *context, const char *hash) { - return argon2_verify_ctx(context, hash, Argon2_i); -} - -int argon2id_verify_ctx(argon2_context *context, const char *hash) { - return argon2_verify_ctx(context, hash, Argon2_id); -} - -const char *argon2_error_message(int error_code) { - switch (error_code) { - case ARGON2_OK: - return "OK"; - case ARGON2_OUTPUT_PTR_NULL: - return "Output pointer is NULL"; - case ARGON2_OUTPUT_TOO_SHORT: - return "Output is too short"; - case ARGON2_OUTPUT_TOO_LONG: - return "Output is too long"; - case ARGON2_PWD_TOO_SHORT: - return "Password is too short"; - case ARGON2_PWD_TOO_LONG: - return "Password is too long"; - case ARGON2_SALT_TOO_SHORT: - return "Salt is too short"; - case ARGON2_SALT_TOO_LONG: - return "Salt is too long"; - case ARGON2_AD_TOO_SHORT: - return "Associated data is too short"; - case ARGON2_AD_TOO_LONG: - return "Associated data is too long"; - case ARGON2_SECRET_TOO_SHORT: - return "Secret is too short"; - case ARGON2_SECRET_TOO_LONG: - return "Secret is too long"; - case ARGON2_TIME_TOO_SMALL: - return "Time cost is too small"; - case ARGON2_TIME_TOO_LARGE: - return "Time cost is too large"; - case ARGON2_MEMORY_TOO_LITTLE: - return "Memory cost is too small"; - case ARGON2_MEMORY_TOO_MUCH: - return "Memory cost is too large"; - case ARGON2_LANES_TOO_FEW: - return "Too few lanes"; - case ARGON2_LANES_TOO_MANY: - return "Too many lanes"; - case ARGON2_PWD_PTR_MISMATCH: - return "Password pointer is NULL, but password length is not 0"; - case ARGON2_SALT_PTR_MISMATCH: - return "Salt pointer is NULL, but salt length is not 0"; - case ARGON2_SECRET_PTR_MISMATCH: - return "Secret pointer is NULL, but secret length is not 0"; - case ARGON2_AD_PTR_MISMATCH: - return "Associated data pointer is NULL, but ad length is not 0"; - case ARGON2_MEMORY_ALLOCATION_ERROR: - return "Memory allocation error"; - case ARGON2_FREE_MEMORY_CBK_NULL: - return "The free memory callback is NULL"; - case ARGON2_ALLOCATE_MEMORY_CBK_NULL: - return "The allocate memory callback is NULL"; - case ARGON2_INCORRECT_PARAMETER: - return "Argon2_Context context is NULL"; - case ARGON2_INCORRECT_TYPE: - return "There is no such version of Argon2"; - case ARGON2_OUT_PTR_MISMATCH: - return "Output pointer mismatch"; - case ARGON2_THREADS_TOO_FEW: - return "Not enough threads"; - case ARGON2_THREADS_TOO_MANY: - return "Too many threads"; - case ARGON2_MISSING_ARGS: - return "Missing arguments"; - case ARGON2_ENCODING_FAIL: - return "Encoding failed"; - case ARGON2_DECODING_FAIL: - return "Decoding failed"; - case ARGON2_THREAD_FAIL: - return "Threading failure"; - case ARGON2_DECODING_LENGTH_FAIL: - return "Some of encoded parameters are too long or too short"; - case ARGON2_VERIFY_MISMATCH: - return "The password does not match the supplied hash"; - default: - return "Unknown error code"; - } -} - -size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism, - uint32_t saltlen, uint32_t hashlen, argon2_type type) { - return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) + - numlen(t_cost) + numlen(m_cost) + numlen(parallelism) + - b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + - 1; -} diff --git a/src/3rdparty/argon2/lib/blake2/blake2-impl.h b/src/3rdparty/argon2/lib/blake2/blake2-impl.h deleted file mode 100644 index e6cdf7c4..00000000 --- a/src/3rdparty/argon2/lib/blake2/blake2-impl.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef ARGON2_BLAKE2_IMPL_H -#define ARGON2_BLAKE2_IMPL_H - -#include - -/* Argon2 Team - Begin Code */ -/* - Not an exhaustive list, but should cover the majority of modern platforms - Additionally, the code will always be correct---this is only a performance - tweak. -*/ -#if (defined(__BYTE_ORDER__) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ - defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ - defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ - defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ - defined(_M_ARM) -#define NATIVE_LITTLE_ENDIAN -#endif -/* Argon2 Team - End Code */ - -static inline uint32_t load32(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - return *(const uint32_t *)src; -#else - const uint8_t *p = (const uint8_t *)src; - uint32_t w = *p++; - w |= (uint32_t)(*p++) << 8; - w |= (uint32_t)(*p++) << 16; - w |= (uint32_t)(*p++) << 24; - return w; -#endif -} - -static inline uint64_t load64(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - return *(const uint64_t *)src; -#else - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - w |= (uint64_t)(*p++) << 48; - w |= (uint64_t)(*p++) << 56; - return w; -#endif -} - -static inline void store32(void *dst, uint32_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - *(uint32_t *)dst = w; -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -static inline void store64(void *dst, uint64_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - *(uint64_t *)dst = w; -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -#endif // ARGON2_BLAKE2_IMPL_H diff --git a/src/3rdparty/argon2/lib/blake2/blake2.c b/src/3rdparty/argon2/lib/blake2/blake2.c deleted file mode 100644 index d32028ed..00000000 --- a/src/3rdparty/argon2/lib/blake2/blake2.c +++ /dev/null @@ -1,225 +0,0 @@ -#include - -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -#include "core.h" - -static const uint64_t blake2b_IV[8] = { - UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), - UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), - UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), - UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) -}; - -#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -static const unsigned int blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, -}; - -#define G(m, r, i, a, b, c, d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define ROUND(m, v, r) \ - do { \ - G(m, r, 0, v[0], v[4], v[ 8], v[12]); \ - G(m, r, 1, v[1], v[5], v[ 9], v[13]); \ - G(m, r, 2, v[2], v[6], v[10], v[14]); \ - G(m, r, 3, v[3], v[7], v[11], v[15]); \ - G(m, r, 4, v[0], v[5], v[10], v[15]); \ - G(m, r, 5, v[1], v[6], v[11], v[12]); \ - G(m, r, 6, v[2], v[7], v[ 8], v[13]); \ - G(m, r, 7, v[3], v[4], v[ 9], v[14]); \ - } while ((void)0, 0) - -void blake2b_compress(blake2b_state *S, const void *block, uint64_t f0) -{ - uint64_t m[16]; - uint64_t v[16]; - - m[ 0] = load64((const uint64_t *)block + 0); - m[ 1] = load64((const uint64_t *)block + 1); - m[ 2] = load64((const uint64_t *)block + 2); - m[ 3] = load64((const uint64_t *)block + 3); - m[ 4] = load64((const uint64_t *)block + 4); - m[ 5] = load64((const uint64_t *)block + 5); - m[ 6] = load64((const uint64_t *)block + 6); - m[ 7] = load64((const uint64_t *)block + 7); - m[ 8] = load64((const uint64_t *)block + 8); - m[ 9] = load64((const uint64_t *)block + 9); - m[10] = load64((const uint64_t *)block + 10); - m[11] = load64((const uint64_t *)block + 11); - m[12] = load64((const uint64_t *)block + 12); - m[13] = load64((const uint64_t *)block + 13); - m[14] = load64((const uint64_t *)block + 14); - m[15] = load64((const uint64_t *)block + 15); - - v[ 0] = S->h[0]; - v[ 1] = S->h[1]; - v[ 2] = S->h[2]; - v[ 3] = S->h[3]; - v[ 4] = S->h[4]; - v[ 5] = S->h[5]; - v[ 6] = S->h[6]; - v[ 7] = S->h[7]; - v[ 8] = blake2b_IV[0]; - v[ 9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ S->t[0]; - v[13] = blake2b_IV[5] ^ S->t[1]; - v[14] = blake2b_IV[6] ^ f0; - v[15] = blake2b_IV[7]; - - ROUND(m, v, 0); - ROUND(m, v, 1); - ROUND(m, v, 2); - ROUND(m, v, 3); - ROUND(m, v, 4); - ROUND(m, v, 5); - ROUND(m, v, 6); - ROUND(m, v, 7); - ROUND(m, v, 8); - ROUND(m, v, 9); - ROUND(m, v, 10); - ROUND(m, v, 11); - - S->h[0] ^= v[0] ^ v[ 8]; - S->h[1] ^= v[1] ^ v[ 9]; - S->h[2] ^= v[2] ^ v[10]; - S->h[3] ^= v[3] ^ v[11]; - S->h[4] ^= v[4] ^ v[12]; - S->h[5] ^= v[5] ^ v[13]; - S->h[6] ^= v[6] ^ v[14]; - S->h[7] ^= v[7] ^ v[15]; -} - -static void blake2b_increment_counter(blake2b_state *S, uint64_t inc) -{ - S->t[0] += inc; - S->t[1] += (S->t[0] < inc); -} - -static void blake2b_init_state(blake2b_state *S) -{ - memcpy(S->h, blake2b_IV, sizeof(S->h)); - S->t[1] = S->t[0] = 0; - S->buflen = 0; -} - -void blake2b_init(blake2b_state *S, size_t outlen) -{ - blake2b_init_state(S); - /* XOR initial state with param block: */ - S->h[0] ^= (uint64_t)outlen | (UINT64_C(1) << 16) | (UINT64_C(1) << 24); -} - -void blake2b_update(blake2b_state *S, const void *in, size_t inlen) -{ - const uint8_t *pin = (const uint8_t *)in; - - if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { - size_t left = S->buflen; - size_t fill = BLAKE2B_BLOCKBYTES - left; - memcpy(&S->buf[left], pin, fill); - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, S->buf, 0); - S->buflen = 0; - inlen -= fill; - pin += fill; - /* Avoid buffer copies when possible */ - while (inlen > BLAKE2B_BLOCKBYTES) { - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, pin, 0); - inlen -= BLAKE2B_BLOCKBYTES; - pin += BLAKE2B_BLOCKBYTES; - } - } - memcpy(&S->buf[S->buflen], pin, inlen); - S->buflen += inlen; -} - -void blake2b_final(blake2b_state *S, void *out, size_t outlen) -{ - uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; - unsigned int i; - - blake2b_increment_counter(S, S->buflen); - memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ - blake2b_compress(S, S->buf, UINT64_C(0xFFFFFFFFFFFFFFFF)); - - for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ - store64(buffer + i * sizeof(uint64_t), S->h[i]); - } - - memcpy(out, buffer, outlen); - clear_internal_memory(buffer, sizeof(buffer)); - clear_internal_memory(S->buf, sizeof(S->buf)); - clear_internal_memory(S->h, sizeof(S->h)); -} - -void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen) -{ - uint8_t *pout = (uint8_t *)out; - blake2b_state blake_state; - uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; - - store32(outlen_bytes, (uint32_t)outlen); - if (outlen <= BLAKE2B_OUTBYTES) { - blake2b_init(&blake_state, outlen); - blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); - blake2b_update(&blake_state, in, inlen); - blake2b_final(&blake_state, pout, outlen); - } else { - uint32_t toproduce; - uint8_t out_buffer[BLAKE2B_OUTBYTES]; - - blake2b_init(&blake_state, BLAKE2B_OUTBYTES); - blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); - blake2b_update(&blake_state, in, inlen); - blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); - - memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2); - pout += BLAKE2B_OUTBYTES / 2; - toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; - - while (toproduce > BLAKE2B_OUTBYTES) { - blake2b_init(&blake_state, BLAKE2B_OUTBYTES); - blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES); - blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); - - memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2); - pout += BLAKE2B_OUTBYTES / 2; - toproduce -= BLAKE2B_OUTBYTES / 2; - } - - blake2b_init(&blake_state, toproduce); - blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES); - blake2b_final(&blake_state, out_buffer, toproduce); - - memcpy(pout, out_buffer, toproduce); - - clear_internal_memory(out_buffer, sizeof(out_buffer)); - } -} diff --git a/src/3rdparty/argon2/lib/blake2/blake2.h b/src/3rdparty/argon2/lib/blake2/blake2.h deleted file mode 100644 index 7deeaa1f..00000000 --- a/src/3rdparty/argon2/lib/blake2/blake2.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef ARGON2_BLAKE2_H -#define ARGON2_BLAKE2_H - -#include -#include - -enum blake2b_constant { - BLAKE2B_BLOCKBYTES = 128, - BLAKE2B_OUTBYTES = 64, - BLAKE2B_KEYBYTES = 64, - BLAKE2B_SALTBYTES = 16, - BLAKE2B_PERSONALBYTES = 16 -}; - -typedef struct __blake2b_state { - uint64_t h[8]; - uint64_t t[2]; - uint8_t buf[BLAKE2B_BLOCKBYTES]; - size_t buflen; -} blake2b_state; - -/* Streaming API */ -void blake2b_init(blake2b_state *S, size_t outlen); -void blake2b_update(blake2b_state *S, const void *in, size_t inlen); -void blake2b_final(blake2b_state *S, void *out, size_t outlen); - -void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); - -#endif // ARGON2_BLAKE2_H - diff --git a/src/3rdparty/argon2/lib/core.c b/src/3rdparty/argon2/lib/core.c deleted file mode 100644 index d6592a6a..00000000 --- a/src/3rdparty/argon2/lib/core.c +++ /dev/null @@ -1,633 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -/*For memory wiping*/ -#ifdef _MSC_VER -#include -#include /* For SecureZeroMemory */ -#endif -#if defined __STDC_LIB_EXT1__ -#define __STDC_WANT_LIB_EXT1__ 1 -#endif -#define VC_GE_2005(version) (version >= 1400) - -#include -#include -#include -#include - -#include "core.h" -#include "thread.h" -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -#include "genkat.h" - -#if defined(__clang__) -#if __has_attribute(optnone) -#define NOT_OPTIMIZED __attribute__((optnone)) -#endif -#elif defined(__GNUC__) -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= 40400 -#define NOT_OPTIMIZED __attribute__((optimize("O0"))) -#endif -#endif -#ifndef NOT_OPTIMIZED -#define NOT_OPTIMIZED -#endif - -/***************Instance and Position constructors**********/ -void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } - -void copy_block(block *dst, const block *src) { - memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); -} - -void xor_block(block *dst, const block *src) { - int i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->v[i] ^= src->v[i]; - } -} - -static void load_block(block *dst, const void *input) { - unsigned i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); - } -} - -static void store_block(void *output, const block *src) { - unsigned i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); - } -} - -/***************Memory functions*****************/ - -int allocate_memory(const argon2_context *context, - argon2_instance_t *instance) { - size_t blocks = instance->memory_blocks; - size_t memory_size = blocks * ARGON2_BLOCK_SIZE; - - /* 0. Check for memory supplied by user: */ - /* NOTE: Sufficient memory size is already checked in argon2_ctx_mem() */ - if (instance->memory != NULL) { - return ARGON2_OK; - } - - /* 1. Check for multiplication overflow */ - if (blocks != 0 && memory_size / ARGON2_BLOCK_SIZE != blocks) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - /* 2. Try to allocate with appropriate allocator */ - if (context->allocate_cbk) { - (context->allocate_cbk)((uint8_t **)&instance->memory, memory_size); - } else { - instance->memory = malloc(memory_size); - } - - if (instance->memory == NULL) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - return ARGON2_OK; -} - -void free_memory(const argon2_context *context, - const argon2_instance_t *instance) { - size_t memory_size = instance->memory_blocks * ARGON2_BLOCK_SIZE; - - clear_internal_memory(instance->memory, memory_size); - - if (instance->keep_memory) { - /* user-supplied memory -- do not free */ - return; - } - - if (context->free_cbk) { - (context->free_cbk)((uint8_t *)instance->memory, memory_size); - } else { - free(instance->memory); - } -} - -void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { -#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) - SecureZeroMemory(v, n); -#elif defined memset_s - memset_s(v, n, 0, n); -#elif defined(__OpenBSD__) - explicit_bzero(v, n); -#else - static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; - memset_sec(v, 0, n); -#endif -} - -/* Memory clear flag defaults to true. */ -int FLAG_clear_internal_memory = 1; -void clear_internal_memory(void *v, size_t n) { - if (FLAG_clear_internal_memory && v) { - secure_wipe_memory(v, n); - } -} - -void finalize(const argon2_context *context, argon2_instance_t *instance) { - if (context != NULL && instance != NULL) { - block blockhash; - uint32_t l; - - copy_block(&blockhash, instance->memory + instance->lane_length - 1); - - /* XOR the last blocks */ - for (l = 1; l < instance->lanes; ++l) { - uint32_t last_block_in_lane = - l * instance->lane_length + (instance->lane_length - 1); - xor_block(&blockhash, instance->memory + last_block_in_lane); - } - - /* Hash the result */ - { - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - store_block(blockhash_bytes, &blockhash); - blake2b_long(context->out, context->outlen, blockhash_bytes, - ARGON2_BLOCK_SIZE); - /* clear blockhash and blockhash_bytes */ - clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); - clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); - } - - if (instance->print_internals) { - print_tag(context->out, context->outlen); - } - - free_memory(context, instance); - } -} - -uint32_t index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane) { - /* - * Pass 0: - * This lane : all already finished segments plus already constructed - * blocks in this segment - * Other lanes : all already finished segments - * Pass 1+: - * This lane : (SYNC_POINTS - 1) last segments plus already constructed - * blocks in this segment - * Other lanes : (SYNC_POINTS - 1) last segments - */ - uint32_t reference_area_size; - uint64_t relative_position; - uint32_t start_position, absolute_position; - - if (0 == position->pass) { - /* First pass */ - if (0 == position->slice) { - /* First slice */ - reference_area_size = - position->index - 1; /* all but the previous */ - } else { - if (same_lane) { - /* The same lane => add current segment */ - reference_area_size = - position->slice * instance->segment_length + - position->index - 1; - } else { - reference_area_size = - position->slice * instance->segment_length + - ((position->index == 0) ? (-1) : 0); - } - } - } else { - /* Second pass */ - if (same_lane) { - reference_area_size = instance->lane_length - - instance->segment_length + position->index - - 1; - } else { - reference_area_size = instance->lane_length - - instance->segment_length + - ((position->index == 0) ? (-1) : 0); - } - } - - /* 1.2.4. Mapping pseudo_rand to 0.. and produce - * relative position */ - relative_position = pseudo_rand; - relative_position = relative_position * relative_position >> 32; - relative_position = reference_area_size - 1 - - (reference_area_size * relative_position >> 32); - - /* 1.2.5 Computing starting position */ - start_position = 0; - - if (0 != position->pass) { - start_position = (position->slice == ARGON2_SYNC_POINTS - 1) - ? 0 - : (position->slice + 1) * instance->segment_length; - } - - /* 1.2.6. Computing absolute position */ - absolute_position = (start_position + relative_position) % - instance->lane_length; /* absolute position */ - return absolute_position; -} - -#ifdef _WIN32 -static unsigned __stdcall fill_segment_thr(void *thread_data) -#else -static void *fill_segment_thr(void *thread_data) -#endif -{ - argon2_thread_data *my_data = thread_data; - fill_segment(my_data->instance_ptr, my_data->pos); - argon2_thread_exit(); - return 0; -} - -/* Single-threaded version for p=1 case */ -static int fill_memory_blocks_st(argon2_instance_t *instance) { - uint32_t r, s, l; - - for (r = 0; r < instance->passes; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - for (l = 0; l < instance->lanes; ++l) { - argon2_position_t position = { r, l, (uint8_t)s, 0 }; - fill_segment(instance, position); - } - } - - if (instance->print_internals) { - internal_kat(instance, r); /* Print all memory blocks */ - } - } - return ARGON2_OK; -} - -/* Multi-threaded version for p > 1 case */ -static int fill_memory_blocks_mt(argon2_instance_t *instance) { - uint32_t r, s; - argon2_thread_handle_t *thread = NULL; - argon2_thread_data *thr_data = NULL; - int rc = ARGON2_OK; - - /* 1. Allocating space for threads */ - thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t)); - if (thread == NULL) { - rc = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - thr_data = calloc(instance->lanes, sizeof(argon2_thread_data)); - if (thr_data == NULL) { - rc = ARGON2_MEMORY_ALLOCATION_ERROR; - goto fail; - } - - for (r = 0; r < instance->passes; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - uint32_t l; - - /* 2. Calling threads */ - for (l = 0; l < instance->lanes; ++l) { - argon2_position_t position; - - /* 2.1 Join a thread if limit is exceeded */ - if (l >= instance->threads) { - if (argon2_thread_join(thread[l - instance->threads])) { - rc = ARGON2_THREAD_FAIL; - goto fail; - } - } - - /* 2.2 Create thread */ - position.pass = r; - position.lane = l; - position.slice = (uint8_t)s; - position.index = 0; - thr_data[l].instance_ptr = - instance; /* preparing the thread input */ - memcpy(&(thr_data[l].pos), &position, - sizeof(argon2_position_t)); - if (argon2_thread_create(&thread[l], &fill_segment_thr, - (void *)&thr_data[l])) { - rc = ARGON2_THREAD_FAIL; - goto fail; - } - - /* fill_segment(instance, position); */ - /*Non-thread equivalent of the lines above */ - } - - /* 3. Joining remaining threads */ - for (l = instance->lanes - instance->threads; l < instance->lanes; - ++l) { - if (argon2_thread_join(thread[l])) { - rc = ARGON2_THREAD_FAIL; - goto fail; - } - } - } - - if (instance->print_internals) { - internal_kat(instance, r); /* Print all memory blocks */ - } - } - -fail: - if (thread != NULL) { - free(thread); - } - if (thr_data != NULL) { - free(thr_data); - } - return rc; -} - -int fill_memory_blocks(argon2_instance_t *instance) { - if (instance == NULL || instance->lanes == 0) { - return ARGON2_INCORRECT_PARAMETER; - } - - return instance->threads == 1 ? - fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance); -} - -int validate_inputs(const argon2_context *context) { - if (NULL == context) { - return ARGON2_INCORRECT_PARAMETER; - } - - if (NULL == context->out) { - return ARGON2_OUTPUT_PTR_NULL; - } - - /* Validate output length */ - if (ARGON2_MIN_OUTLEN > context->outlen) { - return ARGON2_OUTPUT_TOO_SHORT; - } - - if (ARGON2_MAX_OUTLEN < context->outlen) { - return ARGON2_OUTPUT_TOO_LONG; - } - - /* Validate password (required param) */ - if (NULL == context->pwd) { - if (0 != context->pwdlen) { - return ARGON2_PWD_PTR_MISMATCH; - } - } - - if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { - return ARGON2_PWD_TOO_SHORT; - } - - if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { - return ARGON2_PWD_TOO_LONG; - } - - /* Validate salt (required param) */ - if (NULL == context->salt) { - if (0 != context->saltlen) { - return ARGON2_SALT_PTR_MISMATCH; - } - } - - if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { - return ARGON2_SALT_TOO_SHORT; - } - - if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { - return ARGON2_SALT_TOO_LONG; - } - - /* Validate secret (optional param) */ - if (NULL == context->secret) { - if (0 != context->secretlen) { - return ARGON2_SECRET_PTR_MISMATCH; - } - } else { - if (ARGON2_MIN_SECRET > context->secretlen) { - return ARGON2_SECRET_TOO_SHORT; - } - if (ARGON2_MAX_SECRET < context->secretlen) { - return ARGON2_SECRET_TOO_LONG; - } - } - - /* Validate associated data (optional param) */ - if (NULL == context->ad) { - if (0 != context->adlen) { - return ARGON2_AD_PTR_MISMATCH; - } - } else { - if (ARGON2_MIN_AD_LENGTH > context->adlen) { - return ARGON2_AD_TOO_SHORT; - } - if (ARGON2_MAX_AD_LENGTH < context->adlen) { - return ARGON2_AD_TOO_LONG; - } - } - - /* Validate memory cost */ - if (ARGON2_MIN_MEMORY > context->m_cost) { - return ARGON2_MEMORY_TOO_LITTLE; - } - - if (ARGON2_MAX_MEMORY < context->m_cost) { - return ARGON2_MEMORY_TOO_MUCH; - } - - if (context->m_cost < 8 * context->lanes) { - return ARGON2_MEMORY_TOO_LITTLE; - } - - /* Validate time cost */ - if (ARGON2_MIN_TIME > context->t_cost) { - return ARGON2_TIME_TOO_SMALL; - } - - if (ARGON2_MAX_TIME < context->t_cost) { - return ARGON2_TIME_TOO_LARGE; - } - - /* Validate lanes */ - if (ARGON2_MIN_LANES > context->lanes) { - return ARGON2_LANES_TOO_FEW; - } - - if (ARGON2_MAX_LANES < context->lanes) { - return ARGON2_LANES_TOO_MANY; - } - - /* Validate threads */ - if (ARGON2_MIN_THREADS > context->threads) { - return ARGON2_THREADS_TOO_FEW; - } - - if (ARGON2_MAX_THREADS < context->threads) { - return ARGON2_THREADS_TOO_MANY; - } - - if (NULL != context->allocate_cbk && NULL == context->free_cbk) { - return ARGON2_FREE_MEMORY_CBK_NULL; - } - - if (NULL == context->allocate_cbk && NULL != context->free_cbk) { - return ARGON2_ALLOCATE_MEMORY_CBK_NULL; - } - - return ARGON2_OK; -} - -void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { - uint32_t l; - /* Make the first and second block in each lane as G(H0||0||i) or - G(H0||1||i) */ - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - for (l = 0; l < instance->lanes; ++l) { - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); - blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, - ARGON2_PREHASH_SEED_LENGTH); - load_block(&instance->memory[l * instance->lane_length + 0], - blockhash_bytes); - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); - blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, - ARGON2_PREHASH_SEED_LENGTH); - load_block(&instance->memory[l * instance->lane_length + 1], - blockhash_bytes); - } - clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); -} - -void initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type) { - blake2b_state BlakeHash; - uint8_t value[sizeof(uint32_t)]; - - if (NULL == context || NULL == blockhash) { - return; - } - - blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); - - store32(&value, context->lanes); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->outlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->m_cost); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->t_cost); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->version); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, (uint32_t)type); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->pwdlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->pwd != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, - context->pwdlen); - - if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { - secure_wipe_memory(context->pwd, context->pwdlen); - context->pwdlen = 0; - } - } - - store32(&value, context->saltlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->salt != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->salt, - context->saltlen); - } - - store32(&value, context->secretlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->secret != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->secret, - context->secretlen); - - if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { - secure_wipe_memory(context->secret, context->secretlen); - context->secretlen = 0; - } - } - - store32(&value, context->adlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->ad != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->ad, - context->adlen); - } - - blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); -} - -int initialize(argon2_instance_t *instance, argon2_context *context) { - uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; - int result = ARGON2_OK; - - if (instance == NULL || context == NULL) - return ARGON2_INCORRECT_PARAMETER; - instance->context_ptr = context; - - /* 1. Memory allocation */ - - result = allocate_memory(context, instance); - if (result != ARGON2_OK) { - return result; - } - - /* 2. Initial hashing */ - /* H_0 + 8 extra bytes to produce the first blocks */ - /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ - /* Hashing all inputs */ - initial_hash(blockhash, context, instance->type); - /* Zeroing 8 extra bytes */ - clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, - ARGON2_PREHASH_SEED_LENGTH - - ARGON2_PREHASH_DIGEST_LENGTH); - - if (instance->print_internals) { - initial_kat(blockhash, context, instance->type); - } - - /* 3. Creating first blocks, we always have at least two blocks in a slice - */ - fill_first_blocks(blockhash, instance); - /* Clearing the hash */ - clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); - - return ARGON2_OK; -} diff --git a/src/3rdparty/argon2/lib/core.h b/src/3rdparty/argon2/lib/core.h deleted file mode 100644 index 5c67fa36..00000000 --- a/src/3rdparty/argon2/lib/core.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_CORE_H -#define ARGON2_CORE_H - -#include "argon2.h" - -#if defined(_MSC_VER) -#define ALIGN(n) __declspec(align(16)) -#elif defined(__GNUC__) || defined(__clang) -#define ALIGN(x) __attribute__((__aligned__(x))) -#else -#define ALIGN(x) -#endif - -#define CONST_CAST(x) (x)(uintptr_t) - -/**********************Argon2 internal constants*******************************/ - -enum argon2_core_constants { - /* Memory block size in bytes */ - ARGON2_BLOCK_SIZE = 1024, - ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, - ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, - - /* Number of pseudo-random values generated by one call to Blake in Argon2i - to - generate reference block positions */ - ARGON2_ADDRESSES_IN_BLOCK = 128, - - /* Pre-hashing digest length and its extension*/ - ARGON2_PREHASH_DIGEST_LENGTH = 64, - ARGON2_PREHASH_SEED_LENGTH = 72 -}; - -/*************************Argon2 internal data types***********************/ - -/* - * Structure for the (1KB) memory block implemented as 128 64-bit words. - * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no - * bounds checking). - */ -typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; - -/*****************Functions that work with the block******************/ - -/* Initialize each byte of the block with @in */ -void init_block_value(block *b, uint8_t in); - -/* Copy block @src to block @dst */ -void copy_block(block *dst, const block *src); - -/* XOR @src onto @dst bytewise */ -void xor_block(block *dst, const block *src); - -/* - * Argon2 instance: memory pointer, number of passes, amount of memory, type, - * and derived values. - * Used to evaluate the number and location of blocks to construct in each - * thread - */ -typedef struct Argon2_instance_t { - block *memory; /* Memory pointer */ - uint32_t version; - uint32_t passes; /* Number of passes */ - uint32_t memory_blocks; /* Number of blocks in memory */ - uint32_t segment_length; - uint32_t lane_length; - uint32_t lanes; - uint32_t threads; - argon2_type type; - int print_internals; /* whether to print the memory blocks */ - int keep_memory; - argon2_context *context_ptr; /* points back to original context */ -} argon2_instance_t; - -/* - * Argon2 position: where we construct the block right now. Used to distribute - * work between threads. - */ -typedef struct Argon2_position_t { - uint32_t pass; - uint32_t lane; - uint8_t slice; - uint32_t index; -} argon2_position_t; - -/*Struct that holds the inputs for thread handling FillSegment*/ -typedef struct Argon2_thread_data { - argon2_instance_t *instance_ptr; - argon2_position_t pos; -} argon2_thread_data; - -/*************************Argon2 core functions********************************/ - -/* Allocates memory to the given pointer, uses the appropriate allocator as - * specified in the context. Total allocated memory is num*size. - * @param context argon2_context which specifies the allocator - * @param instance the Argon2 instance - * @return ARGON2_OK if memory is allocated successfully - */ -int allocate_memory(const argon2_context *context, - argon2_instance_t *instance); - -/* - * Frees memory at the given pointer, uses the appropriate deallocator as - * specified in the context. Also cleans the memory using clear_internal_memory. - * @param context argon2_context which specifies the deallocator - * @param instance the Argon2 instance - */ -void free_memory(const argon2_context *context, - const argon2_instance_t *instance); - -/* Function that securely cleans the memory. This ignores any flags set - * regarding clearing memory. Usually one just calls clear_internal_memory. - * @param mem Pointer to the memory - * @param s Memory size in bytes - */ -void secure_wipe_memory(void *v, size_t n); - -/* Function that securely clears the memory if FLAG_clear_internal_memory is - * set. If the flag isn't set, this function does nothing. - * @param mem Pointer to the memory - * @param s Memory size in bytes - */ -ARGON2_PUBLIC void clear_internal_memory(void *v, size_t n); - -/* - * Computes absolute position of reference block in the lane following a skewed - * distribution and using a pseudo-random value as input - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rand 32-bit pseudo-random value used to determine the position - * @param same_lane Indicates if the block will be taken from the current lane. - * If so we can reference the current segment - * @pre All pointers must be valid - */ -uint32_t index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane); - -/* - * Function that validates all inputs against predefined restrictions and return - * an error code - * @param context Pointer to current Argon2 context - * @return ARGON2_OK if everything is all right, otherwise one of error codes - * (all defined in - */ -int validate_inputs(const argon2_context *context); - -/* - * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears - * password and secret if needed - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param blockhash Buffer for pre-hashing digest - * @param type Argon2 type - * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes - * allocated - */ -void initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type); - -/* - * Function creates first 2 blocks per lane - * @param instance Pointer to the current instance - * @param blockhash Pointer to the pre-hashing digest - * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values - */ -void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); - -/* - * Function allocates memory, hashes the inputs with Blake, and creates first - * two blocks. Returns the pointer to the main memory with 2 blocks per lane - * initialized - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param instance Current Argon2 instance - * @return Zero if successful, -1 if memory failed to allocate. @context->state - * will be modified if successful. - */ -int initialize(argon2_instance_t *instance, argon2_context *context); - -/* - * XORing the last block of each lane, hashing it, making the tag. Deallocates - * the memory. - * @param context Pointer to current Argon2 context (use only the out parameters - * from it) - * @param instance Pointer to current instance of Argon2 - * @pre instance->state must point to necessary amount of memory - * @pre context->out must point to outlen bytes of memory - * @pre if context->free_cbk is not NULL, it should point to a function that - * deallocates memory - */ -void finalize(const argon2_context *context, argon2_instance_t *instance); - -/* - * Function that fills the segment using previous segments also from other - * threads - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -/* - * Function that fills the entire memory t_cost times based on the first two - * blocks in each lane - * @param instance Pointer to the current instance - * @return ARGON2_OK if successful, @context->state - */ -int fill_memory_blocks(argon2_instance_t *instance); - -#endif diff --git a/src/3rdparty/argon2/lib/encoding.c b/src/3rdparty/argon2/lib/encoding.c deleted file mode 100644 index af56e447..00000000 --- a/src/3rdparty/argon2/lib/encoding.c +++ /dev/null @@ -1,432 +0,0 @@ -#include -#include -#include -#include -#include "encoding.h" -#include "core.h" - -/* - * Example code for a decoder and encoder of "hash strings", with Argon2 - * parameters. - * - * This code comprises three sections: - * - * -- The first section contains generic Base64 encoding and decoding - * functions. It is conceptually applicable to any hash function - * implementation that uses Base64 to encode and decode parameters, - * salts and outputs. It could be made into a library, provided that - * the relevant functions are made public (non-static) and be given - * reasonable names to avoid collisions with other functions. - * - * -- The second section is specific to Argon2. It encodes and decodes - * the parameters, salts and outputs. It does not compute the hash - * itself. - * - * The code was originally written by Thomas Pornin , - * to whom comments and remarks may be sent. It is released under what - * should amount to Public Domain or its closest equivalent; the - * following mantra is supposed to incarnate that fact with all the - * proper legal rituals: - * - * --------------------------------------------------------------------- - * This file is provided under the terms of Creative Commons CC0 1.0 - * Public Domain Dedication. To the extent possible under law, the - * author (Thomas Pornin) has waived all copyright and related or - * neighboring rights to this file. This work is published from: Canada. - * --------------------------------------------------------------------- - * - * Copyright (c) 2015 Thomas Pornin - */ - -/* ==================================================================== */ -/* - * Common code; could be shared between different hash functions. - * - * Note: the Base64 functions below assume that uppercase letters (resp. - * lowercase letters) have consecutive numerical codes, that fit on 8 - * bits. All modern systems use ASCII-compatible charsets, where these - * properties are true. If you are stuck with a dinosaur of a system - * that still defaults to EBCDIC then you already have much bigger - * interoperability issues to deal with. - */ - -/* - * Some macros for constant-time comparisons. These work over values in - * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". - */ -#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) -#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) -#define GE(x, y) (GT(y, x) ^ 0xFF) -#define LT(x, y) GT(y, x) -#define LE(x, y) GE(y, x) - -/* - * Convert value x (0..63) to corresponding Base64 character. - */ -static int b64_byte_to_char(unsigned x) { - return (LT(x, 26) & (x + 'A')) | - (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | - (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | - (EQ(x, 63) & '/'); -} - -/* - * Convert character c to the corresponding 6-bit value. If character c - * is not a Base64 character, then 0xFF (255) is returned. - */ -static unsigned b64_char_to_byte(int c) { - unsigned x; - - x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) | - (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) | - (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) | - (EQ(c, '/') & 63); - return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)); -} - -/* - * Convert some bytes to Base64. 'dst_len' is the length (in characters) - * of the output buffer 'dst'; if that buffer is not large enough to - * receive the result (including the terminating 0), then (size_t)-1 - * is returned. Otherwise, the zero-terminated Base64 string is written - * in the buffer, and the output length (counted WITHOUT the terminating - * zero) is returned. - */ -static size_t to_base64(char *dst, size_t dst_len, const void *src, - size_t src_len) { - size_t olen; - const unsigned char *buf; - unsigned acc, acc_len; - - olen = (src_len / 3) << 2; - switch (src_len % 3) { - case 2: - olen++; - /* fall through */ - case 1: - olen += 2; - break; - } - if (dst_len <= olen) { - return (size_t)-1; - } - acc = 0; - acc_len = 0; - buf = (const unsigned char *)src; - while (src_len-- > 0) { - acc = (acc << 8) + (*buf++); - acc_len += 8; - while (acc_len >= 6) { - acc_len -= 6; - *dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F); - } - } - if (acc_len > 0) { - *dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); - } - *dst++ = 0; - return olen; -} - -/* - * Decode Base64 chars into bytes. The '*dst_len' value must initially - * contain the length of the output buffer '*dst'; when the decoding - * ends, the actual number of decoded bytes is written back in - * '*dst_len'. - * - * Decoding stops when a non-Base64 character is encountered, or when - * the output buffer capacity is exceeded. If an error occurred (output - * buffer is too small, invalid last characters leading to unprocessed - * buffered bits), then NULL is returned; otherwise, the returned value - * points to the first non-Base64 character in the source stream, which - * may be the terminating zero. - */ -static const char *from_base64(void *dst, size_t *dst_len, const char *src) { - size_t len; - unsigned char *buf; - unsigned acc, acc_len; - - buf = (unsigned char *)dst; - len = 0; - acc = 0; - acc_len = 0; - for (;;) { - unsigned d; - - d = b64_char_to_byte(*src); - if (d == 0xFF) { - break; - } - src++; - acc = (acc << 6) + d; - acc_len += 6; - if (acc_len >= 8) { - acc_len -= 8; - if ((len++) >= *dst_len) { - return NULL; - } - *buf++ = (acc >> acc_len) & 0xFF; - } - } - - /* - * If the input length is equal to 1 modulo 4 (which is - * invalid), then there will remain 6 unprocessed bits; - * otherwise, only 0, 2 or 4 bits are buffered. The buffered - * bits must also all be zero. - */ - if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) { - return NULL; - } - *dst_len = len; - return src; -} - -/* - * Decode decimal integer from 'str'; the value is written in '*v'. - * Returned value is a pointer to the next non-decimal character in the - * string. If there is no digit at all, or the value encoding is not - * minimal (extra leading zeros), or the value does not fit in an - * 'unsigned long', then NULL is returned. - */ -static const char *decode_decimal(const char *str, unsigned long *v) { - const char *orig; - unsigned long acc; - - acc = 0; - for (orig = str;; str++) { - int c; - - c = *str; - if (c < '0' || c > '9') { - break; - } - c -= '0'; - if (acc > (ULONG_MAX / 10)) { - return NULL; - } - acc *= 10; - if ((unsigned long)c > (ULONG_MAX - acc)) { - return NULL; - } - acc += (unsigned long)c; - } - if (str == orig || (*orig == '0' && str != (orig + 1))) { - return NULL; - } - *v = acc; - return str; -} - -/* ==================================================================== */ -/* - * Code specific to Argon2. - * - * The code below applies the following format: - * - * $argon2[$v=]$m=,t=,p=$$ - * - * where is either 'd', 'id', or 'i', is a decimal integer (positive, - * fits in an 'unsigned long'), and is Base64-encoded data (no '=' padding - * characters, no newline or whitespace). - * - * The last two binary chunks (encoded in Base64) are, in that order, - * the salt and the output. Both are required. The binary salt length and the - * output length must be in the allowed ranges defined in argon2.h. - * - * The ctx struct must contain buffers large enough to hold the salt and pwd - * when it is fed into decode_string. - */ - -int decode_string(argon2_context *ctx, const char *str, argon2_type type) { - -/* check for prefix */ -#define CC(prefix) \ - do { \ - size_t cc_len = strlen(prefix); \ - if (strncmp(str, prefix, cc_len) != 0) { \ - return ARGON2_DECODING_FAIL; \ - } \ - str += cc_len; \ - } while ((void)0, 0) - -/* optional prefix checking with supplied code */ -#define CC_opt(prefix, code) \ - do { \ - size_t cc_len = strlen(prefix); \ - if (strncmp(str, prefix, cc_len) == 0) { \ - str += cc_len; \ - { code; } \ - } \ - } while ((void)0, 0) - -/* Decoding prefix into uint32_t decimal */ -#define DECIMAL_U32(x) \ - do { \ - unsigned long dec_x; \ - str = decode_decimal(str, &dec_x); \ - if (str == NULL || dec_x > UINT32_MAX) { \ - return ARGON2_DECODING_FAIL; \ - } \ - (x) = (uint32_t)dec_x; \ - } while ((void)0, 0) - -/* Decoding base64 into a binary buffer */ -#define BIN(buf, max_len, len) \ - do { \ - size_t bin_len = (max_len); \ - str = from_base64(buf, &bin_len, str); \ - if (str == NULL || bin_len > UINT32_MAX) { \ - return ARGON2_DECODING_FAIL; \ - } \ - (len) = (uint32_t)bin_len; \ - } while ((void)0, 0) - - size_t maxsaltlen = ctx->saltlen; - size_t maxoutlen = ctx->outlen; - int validation_result; - const char* type_string; - - /* We should start with the argon2_type we are using */ - type_string = argon2_type2string(type, 0); - if (!type_string) { - return ARGON2_INCORRECT_TYPE; - } - - CC("$"); - CC(type_string); - - /* Reading the version number if the default is suppressed */ - ctx->version = ARGON2_VERSION_10; - CC_opt("$v=", DECIMAL_U32(ctx->version)); - - CC("$m="); - DECIMAL_U32(ctx->m_cost); - CC(",t="); - DECIMAL_U32(ctx->t_cost); - CC(",p="); - DECIMAL_U32(ctx->lanes); - ctx->threads = ctx->lanes; - - CC("$"); - BIN(ctx->salt, maxsaltlen, ctx->saltlen); - CC("$"); - BIN(ctx->out, maxoutlen, ctx->outlen); - - /* The rest of the fields get the default values */ - ctx->secret = NULL; - ctx->secretlen = 0; - ctx->ad = NULL; - ctx->adlen = 0; - ctx->allocate_cbk = NULL; - ctx->free_cbk = NULL; - ctx->flags = ARGON2_DEFAULT_FLAGS; - - /* On return, must have valid context */ - validation_result = validate_inputs(ctx); - if (validation_result != ARGON2_OK) { - return validation_result; - } - - /* Can't have any additional characters */ - if (*str == 0) { - return ARGON2_OK; - } else { - return ARGON2_DECODING_FAIL; - } -#undef CC -#undef CC_opt -#undef DECIMAL_U32 -#undef BIN -} - -int encode_string(char *dst, size_t dst_len, argon2_context *ctx, - argon2_type type) { -#define SS(str) \ - do { \ - size_t pp_len = strlen(str); \ - if (pp_len >= dst_len) { \ - return ARGON2_ENCODING_FAIL; \ - } \ - memcpy(dst, str, pp_len + 1); \ - dst += pp_len; \ - dst_len -= pp_len; \ - } while ((void)0, 0) - -#define SX(x) \ - do { \ - char tmp[30]; \ - sprintf(tmp, "%lu", (unsigned long)(x)); \ - SS(tmp); \ - } while ((void)0, 0) - -#define SB(buf, len) \ - do { \ - size_t sb_len = to_base64(dst, dst_len, buf, len); \ - if (sb_len == (size_t)-1) { \ - return ARGON2_ENCODING_FAIL; \ - } \ - dst += sb_len; \ - dst_len -= sb_len; \ - } while ((void)0, 0) - - const char* type_string = argon2_type2string(type, 0); - int validation_result = validate_inputs(ctx); - - if (!type_string) { - return ARGON2_ENCODING_FAIL; - } - - if (validation_result != ARGON2_OK) { - return validation_result; - } - - SS("$"); - SS(type_string); - - SS("$v="); - SX(ctx->version); - - SS("$m="); - SX(ctx->m_cost); - SS(",t="); - SX(ctx->t_cost); - SS(",p="); - SX(ctx->lanes); - - SS("$"); - SB(ctx->salt, ctx->saltlen); - - SS("$"); - SB(ctx->out, ctx->outlen); - return ARGON2_OK; - -#undef SS -#undef SX -#undef SB -} - -size_t b64len(uint32_t len) { - size_t olen = ((size_t)len / 3) << 2; - - switch (len % 3) { - case 2: - olen++; - /* fall through */ - case 1: - olen += 2; - break; - } - - return olen; -} - -size_t numlen(uint32_t num) { - size_t len = 1; - while (num >= 10) { - ++len; - num = num / 10; - } - return len; -} - diff --git a/src/3rdparty/argon2/lib/encoding.h b/src/3rdparty/argon2/lib/encoding.h deleted file mode 100644 index e7834e4f..00000000 --- a/src/3rdparty/argon2/lib/encoding.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef ENCODING_H -#define ENCODING_H -#include "argon2.h" - -#define ARGON2_MAX_DECODED_LANES UINT32_C(255) -#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8) -#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12) - -/* -* encode an Argon2 hash string into the provided buffer. 'dst_len' -* contains the size, in characters, of the 'dst' buffer; if 'dst_len' -* is less than the number of required characters (including the -* terminating 0), then this function returns ARGON2_ENCODING_ERROR. -* -* on success, ARGON2_OK is returned. -*/ -int encode_string(char *dst, size_t dst_len, argon2_context *ctx, - argon2_type type); - -/* -* Decodes an Argon2 hash string into the provided structure 'ctx'. -* The only fields that must be set prior to this call are ctx.saltlen and -* ctx.outlen (which must be the maximal salt and out length values that are -* allowed), ctx.salt and ctx.out (which must be buffers of the specified -* length), and ctx.pwd and ctx.pwdlen which must hold a valid password. -* -* Invalid input string causes an error. On success, the ctx is valid and all -* fields have been initialized. -* -* Returned value is ARGON2_OK on success, other ARGON2_ codes on error. -*/ -int decode_string(argon2_context *ctx, const char *str, argon2_type type); - -/* Returns the length of the encoded byte stream with length len */ -size_t b64len(uint32_t len); - -/* Returns the length of the encoded number num */ -size_t numlen(uint32_t num); - -#endif diff --git a/src/3rdparty/argon2/lib/genkat.c b/src/3rdparty/argon2/lib/genkat.c deleted file mode 100644 index fd5663bf..00000000 --- a/src/3rdparty/argon2/lib/genkat.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include - -#include "genkat.h" - -void initial_kat(const uint8_t *blockhash, const argon2_context *context, - argon2_type type) { - unsigned i; - - if (blockhash != NULL && context != NULL) { - printf("=======================================\n"); - - printf("%s version number %d\n", argon2_type2string(type, 1), - context->version); - - printf("=======================================\n"); - - - printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag " - "length: %u bytes\n", - context->m_cost, context->t_cost, context->lanes, - context->outlen); - - printf("Password[%u]: ", context->pwdlen); - - if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { - printf("CLEARED\n"); - } else { - for (i = 0; i < context->pwdlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->pwd)[i]); - } - - printf("\n"); - } - - printf("Salt[%u]: ", context->saltlen); - - for (i = 0; i < context->saltlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->salt)[i]); - } - - printf("\n"); - - printf("Secret[%u]: ", context->secretlen); - - if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { - printf("CLEARED\n"); - } else { - for (i = 0; i < context->secretlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->secret)[i]); - } - - printf("\n"); - } - - printf("Associated data[%u]: ", context->adlen); - - for (i = 0; i < context->adlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->ad)[i]); - } - - printf("\n"); - - printf("Pre-hashing digest: "); - - for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) { - printf("%2.2x ", ((unsigned char *)blockhash)[i]); - } - - printf("\n"); - } -} - -void print_tag(const void *out, uint32_t outlen) { - unsigned i; - if (out != NULL) { - printf("Tag: "); - - for (i = 0; i < outlen; ++i) { - printf("%2.2x ", ((uint8_t *)out)[i]); - } - - printf("\n"); - } -} - -void internal_kat(const argon2_instance_t *instance, uint32_t pass) { - - if (instance != NULL) { - uint32_t i, j; - printf("\n After pass %u:\n", pass); - - for (i = 0; i < instance->memory_blocks; ++i) { - uint32_t how_many_words = - (instance->memory_blocks > ARGON2_QWORDS_IN_BLOCK) - ? 1 - : ARGON2_QWORDS_IN_BLOCK; - - for (j = 0; j < how_many_words; ++j) - printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j, - instance->memory[i].v[j]); - } - } -} diff --git a/src/3rdparty/argon2/lib/genkat.h b/src/3rdparty/argon2/lib/genkat.h deleted file mode 100644 index 815c09b5..00000000 --- a/src/3rdparty/argon2/lib/genkat.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_KAT_H -#define ARGON2_KAT_H - -#include "core.h" - -/* - * Initial KAT function that prints the inputs to the file - * @param blockhash Array that contains pre-hashing digest - * @param context Holds inputs - * @param type Argon2 type - * @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes - * @pre context member pointers must point to allocated memory of size according - * to the length values - */ -void initial_kat(const uint8_t *blockhash, const argon2_context *context, - argon2_type type); - -/* - * Function that prints the output tag - * @param out output array pointer - * @param outlen digest length - * @pre out must point to @a outlen bytes - **/ -void print_tag(const void *out, uint32_t outlen); - -/* - * Function that prints the internal state at given moment - * @param instance pointer to the current instance - * @param pass current pass number - * @pre instance must have necessary memory allocated - **/ -void internal_kat(const argon2_instance_t *instance, uint32_t pass); - -#endif diff --git a/src/3rdparty/argon2/lib/impl-select.c b/src/3rdparty/argon2/lib/impl-select.c deleted file mode 100644 index 84c62aec..00000000 --- a/src/3rdparty/argon2/lib/impl-select.c +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include - -#include "impl-select.h" - -#include "argon2.h" - -#define log_maybe(file, ...) \ - do { \ - if (file) { \ - fprintf(file, __VA_ARGS__); \ - } \ - } while((void)0, 0) - -#define BENCH_SAMPLES 512 -#define BENCH_MEM_BLOCKS 512 - -static argon2_impl selected_argon_impl = { - "(default)", NULL, fill_segment_default -}; - -/* the benchmark routine is not thread-safe, so we can use a global var here: */ -static block memory[BENCH_MEM_BLOCKS]; - -static uint64_t benchmark_impl(const argon2_impl *impl) { - clock_t time; - unsigned int i; - uint64_t bench; - argon2_instance_t instance; - argon2_position_t pos; - - memset(memory, 0, sizeof(memory)); - - instance.version = ARGON2_VERSION_NUMBER; - instance.memory = memory; - instance.passes = 1; - instance.memory_blocks = BENCH_MEM_BLOCKS; - instance.segment_length = BENCH_MEM_BLOCKS / ARGON2_SYNC_POINTS; - instance.lane_length = instance.segment_length * ARGON2_SYNC_POINTS; - instance.lanes = 1; - instance.threads = 1; - instance.type = Argon2_i; - - pos.lane = 0; - pos.pass = 0; - pos.slice = 0; - pos.index = 0; - - /* warm-up cache: */ - impl->fill_segment(&instance, pos); - - /* OK, now measure: */ - bench = 0; - time = clock(); - for (i = 0; i < BENCH_SAMPLES; i++) { - impl->fill_segment(&instance, pos); - } - time = clock() - time; - bench = (uint64_t)time; - return bench; -} - -static void select_impl(FILE *out, const char *prefix) -{ - argon2_impl_list impls; - unsigned int i; - const argon2_impl *best_impl = NULL; - uint64_t best_bench = UINT_MAX; - - log_maybe(out, "%sSelecting best fill_segment implementation...\n", prefix); - - argon2_get_impl_list(&impls); - - for (i = 0; i < impls.count; i++) { - const argon2_impl *impl = &impls.entries[i]; - uint64_t bench; - - log_maybe(out, "%s%s: Checking availability... ", prefix, impl->name); - if (impl->check != NULL && !impl->check()) { - log_maybe(out, "FAILED!\n"); - continue; - } - log_maybe(out, "OK!\n"); - - log_maybe(out, "%s%s: Benchmarking...\n", prefix, impl->name); - bench = benchmark_impl(impl); - log_maybe(out, "%s%s: Benchmark result: %llu\n", prefix, impl->name, - (unsigned long long)bench); - - if (bench < best_bench) { - best_bench = bench; - best_impl = impl; - } - } - - if (best_impl != NULL) { - log_maybe(out, - "%sBest implementation: '%s' (bench %llu)\n", prefix, - best_impl->name, (unsigned long long)best_bench); - - selected_argon_impl = *best_impl; - } else { - log_maybe(out, - "%sNo optimized implementation available, using default!\n", - prefix); - } -} - -void fill_segment(const argon2_instance_t *instance, argon2_position_t position) -{ - selected_argon_impl.fill_segment(instance, position); -} - -void argon2_select_impl(FILE *out, const char *prefix) -{ - if (prefix == NULL) { - prefix = ""; - } - select_impl(out, prefix); -} diff --git a/src/3rdparty/argon2/lib/impl-select.h b/src/3rdparty/argon2/lib/impl-select.h deleted file mode 100644 index e4acbd1f..00000000 --- a/src/3rdparty/argon2/lib/impl-select.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef ARGON2_IMPL_SELECT_H -#define ARGON2_IMPL_SELECT_H - -#include "core.h" - -typedef struct Argon2_impl { - const char *name; - int (*check)(void); - void (*fill_segment)(const argon2_instance_t *instance, - argon2_position_t position); -} argon2_impl; - -typedef struct Argon2_impl_list { - const argon2_impl *entries; - size_t count; -} argon2_impl_list; - -void argon2_get_impl_list(argon2_impl_list *list); -void fill_segment_default(const argon2_instance_t *instance, - argon2_position_t position); - -#endif // ARGON2_IMPL_SELECT_H - diff --git a/src/3rdparty/argon2/lib/thread.c b/src/3rdparty/argon2/lib/thread.c deleted file mode 100644 index 412261f1..00000000 --- a/src/3rdparty/argon2/lib/thread.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "thread.h" -#if defined(_WIN32) -#include -#endif - -int argon2_thread_create(argon2_thread_handle_t *handle, - argon2_thread_func_t func, void *args) { - if (NULL == handle || func == NULL) { - return -1; - } -#if defined(_WIN32) - *handle = _beginthreadex(NULL, 0, func, args, 0, NULL); - return *handle != 0 ? 0 : -1; -#else - return pthread_create(handle, NULL, func, args); -#endif -} - -int argon2_thread_join(argon2_thread_handle_t handle) { -#if defined(_WIN32) - if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) { - return CloseHandle((HANDLE)handle) != 0 ? 0 : -1; - } - return -1; -#else - return pthread_join(handle, NULL); -#endif -} - -void argon2_thread_exit(void) { -#if defined(_WIN32) - _endthreadex(0); -#else - pthread_exit(NULL); -#endif -} diff --git a/src/3rdparty/argon2/lib/thread.h b/src/3rdparty/argon2/lib/thread.h deleted file mode 100644 index f1ef5191..00000000 --- a/src/3rdparty/argon2/lib/thread.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef ARGON2_THREAD_H -#define ARGON2_THREAD_H -/* - Here we implement an abstraction layer for the simpĺe requirements - of the Argon2 code. We only require 3 primitives---thread creation, - joining, and termination---so full emulation of the pthreads API - is unwarranted. Currently we wrap pthreads and Win32 threads. - - The API defines 2 types: the function pointer type, - argon2_thread_func_t, - and the type of the thread handle---argon2_thread_handle_t. -*/ -#if defined(_WIN32) -#include -#include -typedef unsigned(__stdcall *argon2_thread_func_t)(void *); -typedef uintptr_t argon2_thread_handle_t; -#else -#include -typedef void *(*argon2_thread_func_t)(void *); -typedef pthread_t argon2_thread_handle_t; -#endif - -/* Creates a thread - * @param handle pointer to a thread handle, which is the output of this - * function. Must not be NULL. - * @param func A function pointer for the thread's entry point. Must not be - * NULL. - * @param args Pointer that is passed as an argument to @func. May be NULL. - * @return 0 if @handle and @func are valid pointers and a thread is successfuly - * created. - */ -int argon2_thread_create(argon2_thread_handle_t *handle, - argon2_thread_func_t func, void *args); - -/* Waits for a thread to terminate - * @param handle Handle to a thread created with argon2_thread_create. - * @return 0 if @handle is a valid handle, and joining completed successfully. -*/ -int argon2_thread_join(argon2_thread_handle_t handle); - -/* Terminate the current thread. Must be run inside a thread created by - * argon2_thread_create. -*/ -void argon2_thread_exit(void); - -#endif diff --git a/src/3rdparty/argon2/m4/ax_check_compile_flag.m4 b/src/3rdparty/argon2/m4/ax_check_compile_flag.m4 deleted file mode 100644 index ca363971..00000000 --- a/src/3rdparty/argon2/m4/ax_check_compile_flag.m4 +++ /dev/null @@ -1,74 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) -# -# DESCRIPTION -# -# Check whether the given FLAG works with the current language's compiler -# or gives an error. (Warnings, however, are ignored) -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# INPUT gives an alternative input source to AC_COMPILE_IFELSE. -# -# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this -# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 4 - -AC_DEFUN([AX_CHECK_COMPILE_FLAG], -[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF -AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl -AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ - ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" - AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[yes])], - [AS_VAR_SET(CACHEVAR,[no])]) - _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) -AS_VAR_IF(CACHEVAR,yes, - [m4_default([$2], :)], - [m4_default([$3], :)]) -AS_VAR_POPDEF([CACHEVAR])dnl -])dnl AX_CHECK_COMPILE_FLAGS diff --git a/src/3rdparty/argon2/m4/ax_pthread.m4 b/src/3rdparty/argon2/m4/ax_pthread.m4 deleted file mode 100644 index 4c4051ea..00000000 --- a/src/3rdparty/argon2/m4/ax_pthread.m4 +++ /dev/null @@ -1,485 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_pthread.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -# -# DESCRIPTION -# -# This macro figures out how to build C programs using POSIX threads. It -# sets the PTHREAD_LIBS output variable to the threads library and linker -# flags, and the PTHREAD_CFLAGS output variable to any special C compiler -# flags that are needed. (The user can also force certain compiler -# flags/libs to be tested by setting these environment variables.) -# -# Also sets PTHREAD_CC to any special C compiler that is needed for -# multi-threaded programs (defaults to the value of CC otherwise). (This -# is necessary on AIX to use the special cc_r compiler alias.) -# -# NOTE: You are assumed to not only compile your program with these flags, -# but also to link with them as well. For example, you might link with -# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS -# -# If you are only building threaded programs, you may wish to use these -# variables in your default LIBS, CFLAGS, and CC: -# -# LIBS="$PTHREAD_LIBS $LIBS" -# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -# CC="$PTHREAD_CC" -# -# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant -# has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to -# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). -# -# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the -# PTHREAD_PRIO_INHERIT symbol is defined when compiling with -# PTHREAD_CFLAGS. -# -# ACTION-IF-FOUND is a list of shell commands to run if a threads library -# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it -# is not found. If ACTION-IF-FOUND is not specified, the default action -# will define HAVE_PTHREAD. -# -# Please let the authors know if this macro fails on any platform, or if -# you have any other suggestions or comments. This macro was based on work -# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help -# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by -# Alejandro Forero Cuervo to the autoconf macro repository. We are also -# grateful for the helpful feedback of numerous users. -# -# Updated for Autoconf 2.68 by Daniel Richard G. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson -# Copyright (c) 2011 Daniel Richard G. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 23 - -AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) -AC_DEFUN([AX_PTHREAD], [ -AC_REQUIRE([AC_CANONICAL_HOST]) -AC_REQUIRE([AC_PROG_CC]) -AC_REQUIRE([AC_PROG_SED]) -AC_LANG_PUSH([C]) -ax_pthread_ok=no - -# We used to check for pthread.h first, but this fails if pthread.h -# requires special compiler flags (e.g. on Tru64 or Sequent). -# It gets checked for in the link test anyway. - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then - ax_pthread_save_CC="$CC" - ax_pthread_save_CFLAGS="$CFLAGS" - ax_pthread_save_LIBS="$LIBS" - AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) - AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) - AC_MSG_RESULT([$ax_pthread_ok]) - if test "x$ax_pthread_ok" = "xno"; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - CC="$ax_pthread_save_CC" - CFLAGS="$ax_pthread_save_CFLAGS" - LIBS="$ax_pthread_save_LIBS" -fi - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all, and "pthread-config" -# which is a program returning the flags for the Pth emulation library. - -ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 -# (Note: HP C rejects this with "bad form for `-t' option") -# -pthreads: Solaris/gcc (Note: HP C also rejects) -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads and -# -D_REENTRANT too), HP C (must be checked before -lpthread, which -# is present but should not be used directly; and before -mthreads, -# because the compiler interprets this as "-mt" + "-hreads") -# -mthreads: Mingw32/gcc, Lynx/gcc -# pthread: Linux, etcetera -# --thread-safe: KAI C++ -# pthread-config: use pthread-config program (for GNU Pth library) - -case $host_os in - - freebsd*) - - # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) - # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) - - ax_pthread_flags="-kthread lthread $ax_pthread_flags" - ;; - - hpux*) - - # From the cc(1) man page: "[-mt] Sets various -D flags to enable - # multi-threading and also sets -lpthread." - - ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" - ;; - - openedition*) - - # IBM z/OS requires a feature-test macro to be defined in order to - # enable POSIX threads at all, so give the user a hint if this is - # not set. (We don't define these ourselves, as they can affect - # other portions of the system API in unpredictable ways.) - - AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], - [ -# if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) - AX_PTHREAD_ZOS_MISSING -# endif - ], - [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) - ;; - - solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (N.B.: The stubs are missing - # pthread_cleanup_push, or rather a function called by this macro, - # so we could check for that, but who knows whether they'll stub - # that too in a future libc.) So we'll check first for the - # standard Solaris way of linking pthreads (-mt -lpthread). - - ax_pthread_flags="-mt,pthread pthread $ax_pthread_flags" - ;; -esac - -# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) - -AS_IF([test "x$GCC" = "xyes"], - [ax_pthread_flags="-pthread -pthreads $ax_pthread_flags"]) - -# The presence of a feature test macro requesting re-entrant function -# definitions is, on some systems, a strong hint that pthreads support is -# correctly enabled - -case $host_os in - darwin* | hpux* | linux* | osf* | solaris*) - ax_pthread_check_macro="_REENTRANT" - ;; - - aix*) - ax_pthread_check_macro="_THREAD_SAFE" - ;; - - *) - ax_pthread_check_macro="--" - ;; -esac -AS_IF([test "x$ax_pthread_check_macro" = "x--"], - [ax_pthread_check_cond=0], - [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) - -# Are we compiling with Clang? - -AC_CACHE_CHECK([whether $CC is Clang], - [ax_cv_PTHREAD_CLANG], - [ax_cv_PTHREAD_CLANG=no - # Note that Autoconf sets GCC=yes for Clang as well as GCC - if test "x$GCC" = "xyes"; then - AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], - [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ -# if defined(__clang__) && defined(__llvm__) - AX_PTHREAD_CC_IS_CLANG -# endif - ], - [ax_cv_PTHREAD_CLANG=yes]) - fi - ]) -ax_pthread_clang="$ax_cv_PTHREAD_CLANG" - -ax_pthread_clang_warning=no - -# Clang needs special handling, because older versions handle the -pthread -# option in a rather... idiosyncratic way - -if test "x$ax_pthread_clang" = "xyes"; then - - # Clang takes -pthread; it has never supported any other flag - - # (Note 1: This will need to be revisited if a system that Clang - # supports has POSIX threads in a separate library. This tends not - # to be the way of modern systems, but it's conceivable.) - - # (Note 2: On some systems, notably Darwin, -pthread is not needed - # to get POSIX threads support; the API is always present and - # active. We could reasonably leave PTHREAD_CFLAGS empty. But - # -pthread does define _REENTRANT, and while the Darwin headers - # ignore this macro, third-party headers might not.) - - PTHREAD_CFLAGS="-pthread" - PTHREAD_LIBS= - - ax_pthread_ok=yes - - # However, older versions of Clang make a point of warning the user - # that, in an invocation where only linking and no compilation is - # taking place, the -pthread option has no effect ("argument unused - # during compilation"). They expect -pthread to be passed in only - # when source code is being compiled. - # - # Problem is, this is at odds with the way Automake and most other - # C build frameworks function, which is that the same flags used in - # compilation (CFLAGS) are also used in linking. Many systems - # supported by AX_PTHREAD require exactly this for POSIX threads - # support, and in fact it is often not straightforward to specify a - # flag that is used only in the compilation phase and not in - # linking. Such a scenario is extremely rare in practice. - # - # Even though use of the -pthread flag in linking would only print - # a warning, this can be a nuisance for well-run software projects - # that build with -Werror. So if the active version of Clang has - # this misfeature, we search for an option to squash it. - - AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], - [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], - [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown - # Create an alternate version of $ac_link that compiles and - # links in two steps (.c -> .o, .o -> exe) instead of one - # (.c -> exe), because the warning occurs only in the second - # step - ax_pthread_save_ac_link="$ac_link" - ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' - ax_pthread_link_step=`$as_echo "$ac_link" | sed "$ax_pthread_sed"` - ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" - ax_pthread_save_CFLAGS="$CFLAGS" - for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do - AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) - CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" - ac_link="$ax_pthread_save_ac_link" - AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], - [ac_link="$ax_pthread_2step_ac_link" - AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], - [break]) - ]) - done - ac_link="$ax_pthread_save_ac_link" - CFLAGS="$ax_pthread_save_CFLAGS" - AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) - ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" - ]) - - case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in - no | unknown) ;; - *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; - esac - -fi # $ax_pthread_clang = yes - -if test "x$ax_pthread_ok" = "xno"; then -for ax_pthread_try_flag in $ax_pthread_flags; do - - case $ax_pthread_try_flag in - none) - AC_MSG_CHECKING([whether pthreads work without any flags]) - ;; - - -mt,pthread) - AC_MSG_CHECKING([whether pthreads work with -mt -lpthread]) - PTHREAD_CFLAGS="-mt" - PTHREAD_LIBS="-lpthread" - ;; - - -*) - AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) - PTHREAD_CFLAGS="$ax_pthread_try_flag" - ;; - - pthread-config) - AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) - AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) - PTHREAD_CFLAGS="`pthread-config --cflags`" - PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" - ;; - - *) - AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) - PTHREAD_LIBS="-l$ax_pthread_try_flag" - ;; - esac - - ax_pthread_save_CFLAGS="$CFLAGS" - ax_pthread_save_LIBS="$LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - - AC_LINK_IFELSE([AC_LANG_PROGRAM([#include -# if $ax_pthread_check_cond -# error "$ax_pthread_check_macro must be defined" -# endif - static void routine(void *a) { a = 0; } - static void *start_routine(void *a) { return a; }], - [pthread_t th; pthread_attr_t attr; - pthread_create(&th, 0, start_routine, 0); - pthread_join(th, 0); - pthread_attr_init(&attr); - pthread_cleanup_push(routine, 0); - pthread_cleanup_pop(0) /* ; */])], - [ax_pthread_ok=yes], - []) - - CFLAGS="$ax_pthread_save_CFLAGS" - LIBS="$ax_pthread_save_LIBS" - - AC_MSG_RESULT([$ax_pthread_ok]) - AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$ax_pthread_ok" = "xyes"; then - ax_pthread_save_CFLAGS="$CFLAGS" - ax_pthread_save_LIBS="$LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - - # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. - AC_CACHE_CHECK([for joinable pthread attribute], - [ax_cv_PTHREAD_JOINABLE_ATTR], - [ax_cv_PTHREAD_JOINABLE_ATTR=unknown - for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do - AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], - [int attr = $ax_pthread_attr; return attr /* ; */])], - [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], - []) - done - ]) - AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ - test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ - test "x$ax_pthread_joinable_attr_defined" != "xyes"], - [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], - [$ax_cv_PTHREAD_JOINABLE_ATTR], - [Define to necessary symbol if this constant - uses a non-standard name on your system.]) - ax_pthread_joinable_attr_defined=yes - ]) - - AC_CACHE_CHECK([whether more special flags are required for pthreads], - [ax_cv_PTHREAD_SPECIAL_FLAGS], - [ax_cv_PTHREAD_SPECIAL_FLAGS=no - case $host_os in - solaris*) - ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" - ;; - esac - ]) - AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ - test "x$ax_pthread_special_flags_added" != "xyes"], - [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" - ax_pthread_special_flags_added=yes]) - - AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], - [ax_cv_PTHREAD_PRIO_INHERIT], - [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], - [[int i = PTHREAD_PRIO_INHERIT;]])], - [ax_cv_PTHREAD_PRIO_INHERIT=yes], - [ax_cv_PTHREAD_PRIO_INHERIT=no]) - ]) - AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ - test "x$ax_pthread_prio_inherit_defined" != "xyes"], - [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) - ax_pthread_prio_inherit_defined=yes - ]) - - CFLAGS="$ax_pthread_save_CFLAGS" - LIBS="$ax_pthread_save_LIBS" - - # More AIX lossage: compile with *_r variant - if test "x$GCC" != "xyes"; then - case $host_os in - aix*) - AS_CASE(["x/$CC"], - [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], - [#handle absolute path differently from PATH based program lookup - AS_CASE(["x$CC"], - [x/*], - [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], - [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) - ;; - esac - fi -fi - -test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" - -AC_SUBST([PTHREAD_LIBS]) -AC_SUBST([PTHREAD_CFLAGS]) -AC_SUBST([PTHREAD_CC]) - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test "x$ax_pthread_ok" = "xyes"; then - ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) - : -else - ax_pthread_ok=no - $2 -fi -AC_LANG_POP -])dnl AX_PTHREAD diff --git a/src/3rdparty/argon2/qmake/arch/arch.pro b/src/3rdparty/argon2/qmake/arch/arch.pro deleted file mode 100644 index b1a83150..00000000 --- a/src/3rdparty/argon2/qmake/arch/arch.pro +++ /dev/null @@ -1,3 +0,0 @@ -TEMPLATE = subdirs - -SUBDIRS += $$ARCH diff --git a/src/3rdparty/argon2/qmake/arch/generic/generic.pro b/src/3rdparty/argon2/qmake/arch/generic/generic.pro deleted file mode 100644 index 96710850..00000000 --- a/src/3rdparty/argon2/qmake/arch/generic/generic.pro +++ /dev/null @@ -1 +0,0 @@ -TEMPLATE = subdirs diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro deleted file mode 100644 index 449dc508..00000000 --- a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx2/libargon2-avx2.pro +++ /dev/null @@ -1,23 +0,0 @@ -QT -= core gui - -TARGET = argon2-avx2 -TEMPLATE = lib -CONFIG += staticlib - -ARGON2_ROOT = ../../../.. - -INCLUDEPATH += \ - $$ARGON2_ROOT/include \ - $$ARGON2_ROOT/lib \ - $$ARGON2_ROOT/arch/$$ARCH/lib - -USE_AVX2 { - DEFINES += HAVE_AVX2 - QMAKE_CFLAGS += -mavx2 -} - -SOURCES += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx2.c - -HEADERS += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx2.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro deleted file mode 100644 index a4a32e06..00000000 --- a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-avx512f/libargon2-avx512f.pro +++ /dev/null @@ -1,23 +0,0 @@ -QT -= core gui - -TARGET = argon2-avx512f -TEMPLATE = lib -CONFIG += staticlib - -ARGON2_ROOT = ../../../.. - -INCLUDEPATH += \ - $$ARGON2_ROOT/include \ - $$ARGON2_ROOT/lib \ - $$ARGON2_ROOT/arch/$$ARCH/lib - -USE_AVX512F { - DEFINES += HAVE_AVX512F - QMAKE_CFLAGS += -mavx512f -} - -SOURCES += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx512f.c - -HEADERS += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-avx512f.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro deleted file mode 100644 index 49e7deee..00000000 --- a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-sse2/libargon2-sse2.pro +++ /dev/null @@ -1,24 +0,0 @@ -QT -= core gui - -TARGET = argon2-sse2 -TEMPLATE = lib -CONFIG += staticlib - -ARGON2_ROOT = ../../../.. - -INCLUDEPATH += \ - $$ARGON2_ROOT/include \ - $$ARGON2_ROOT/lib \ - $$ARGON2_ROOT/arch/$$ARCH/lib - -USE_SSE2 | USE_SSSE3 | USE_XOP | USE_AVX2 { - DEFINES += HAVE_SSE2 - QMAKE_CFLAGS += -msse2 -} - -SOURCES += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-sse2.c - -HEADERS += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-sse2.h \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-template-128.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro deleted file mode 100644 index 53ebe6e5..00000000 --- a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-ssse3/libargon2-ssse3.pro +++ /dev/null @@ -1,24 +0,0 @@ -QT -= core gui - -TARGET = argon2-ssse3 -TEMPLATE = lib -CONFIG += staticlib - -ARGON2_ROOT = ../../../.. - -INCLUDEPATH += \ - $$ARGON2_ROOT/include \ - $$ARGON2_ROOT/lib \ - $$ARGON2_ROOT/arch/$$ARCH/lib - -USE_SSSE3 | USE_XOP | USE_AVX2 { - DEFINES += HAVE_SSSE3 - QMAKE_CFLAGS += -mssse3 -} - -SOURCES += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-ssse3.c - -HEADERS += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-ssse3.h \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-template-128.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro b/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro deleted file mode 100644 index 85517dce..00000000 --- a/src/3rdparty/argon2/qmake/arch/x86_64/libargon2-xop/libargon2-xop.pro +++ /dev/null @@ -1,24 +0,0 @@ -QT -= core gui - -TARGET = argon2-xop -TEMPLATE = lib -CONFIG += staticlib - -ARGON2_ROOT = ../../../.. - -INCLUDEPATH += \ - $$ARGON2_ROOT/include \ - $$ARGON2_ROOT/lib \ - $$ARGON2_ROOT/arch/$$ARCH/lib - -USE_XOP { - DEFINES += HAVE_XOP - QMAKE_CFLAGS += -mxop -} - -SOURCES += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-xop.c - -HEADERS += \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-xop.h \ - $$ARGON2_ROOT/arch/x86_64/lib/argon2-template-128.h diff --git a/src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro b/src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro deleted file mode 100644 index b3cfe029..00000000 --- a/src/3rdparty/argon2/qmake/arch/x86_64/x86_64.pro +++ /dev/null @@ -1,8 +0,0 @@ -TEMPLATE = subdirs - -SUBDIRS += \ - libargon2-sse2 \ - libargon2-ssse3 \ - libargon2-xop \ - libargon2-avx2 \ - libargon2-avx512f diff --git a/src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro b/src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro deleted file mode 100644 index 1e858d42..00000000 --- a/src/3rdparty/argon2/qmake/argon2-bench2/argon2-bench2.pro +++ /dev/null @@ -1,19 +0,0 @@ -TEMPLATE = app -CONFIG += console c++11 -CONFIG -= app_bundle -CONFIG -= qt - -ARGON2_ROOT = ../.. - -SOURCES += \ - $$ARGON2_ROOT/src/bench2.c - -HEADERS += \ - $$ARGON2_ROOT/src/timing.h - -win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 -else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 -else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 - -INCLUDEPATH += $$PWD/../../include -DEPENDPATH += $$PWD/../../include diff --git a/src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro b/src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro deleted file mode 100644 index c397efec..00000000 --- a/src/3rdparty/argon2/qmake/argon2-genkat/argon2-genkat.pro +++ /dev/null @@ -1,16 +0,0 @@ -TEMPLATE = app -CONFIG += console c++11 -CONFIG -= app_bundle -CONFIG -= qt - -ARGON2_ROOT = ../.. - -SOURCES += \ - $$ARGON2_ROOT/src/genkat.c - -win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 -else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 -else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 - -INCLUDEPATH += $$PWD/../../include -DEPENDPATH += $$PWD/../../include diff --git a/src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro b/src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro deleted file mode 100644 index e0d23c45..00000000 --- a/src/3rdparty/argon2/qmake/argon2-test/argon2-test.pro +++ /dev/null @@ -1,16 +0,0 @@ -TEMPLATE = app -CONFIG += console c++11 -CONFIG -= app_bundle -CONFIG -= qt - -ARGON2_ROOT = ../.. - -SOURCES += \ - $$ARGON2_ROOT/tests/test.c - -win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 -else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 -else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 - -INCLUDEPATH += $$PWD/../../include -DEPENDPATH += $$PWD/../../include diff --git a/src/3rdparty/argon2/qmake/argon2.pro b/src/3rdparty/argon2/qmake/argon2.pro deleted file mode 100644 index 0949d39c..00000000 --- a/src/3rdparty/argon2/qmake/argon2.pro +++ /dev/null @@ -1,9 +0,0 @@ -TEMPLATE = subdirs - -SUBDIRS += \ - arch \ - libargon2 \ - argon2 \ - argon2-genkat \ - argon2-bench2 \ - argon2-test diff --git a/src/3rdparty/argon2/qmake/argon2/argon2.pro b/src/3rdparty/argon2/qmake/argon2/argon2.pro deleted file mode 100644 index ff8c2049..00000000 --- a/src/3rdparty/argon2/qmake/argon2/argon2.pro +++ /dev/null @@ -1,18 +0,0 @@ -TEMPLATE = app -CONFIG += console c++11 -CONFIG -= app_bundle -CONFIG -= qt - -ARGON2_ROOT = ../.. - -SOURCES += \ - $$ARGON2_ROOT/src/run.c - -win32: DEFINES += argon2_EXPORT - -win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libargon2/release/ -largon2 -else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libargon2/debug/ -largon2 -else:unix: LIBS += -L$$OUT_PWD/../libargon2/ -largon2 - -INCLUDEPATH += $$PWD/../../include $$PWD/../../lib -DEPENDPATH += $$PWD/../../include $$PWD/../../lib diff --git a/src/3rdparty/argon2/qmake/libargon2/libargon2.pro b/src/3rdparty/argon2/qmake/libargon2/libargon2.pro deleted file mode 100644 index 77846168..00000000 --- a/src/3rdparty/argon2/qmake/libargon2/libargon2.pro +++ /dev/null @@ -1,119 +0,0 @@ -#------------------------------------------------- -# -# Project created by QtCreator 2016-08-08T17:43:00 -# -#------------------------------------------------- - -QT -= core gui - -TARGET = argon2 -TEMPLATE = lib - -ARGON2_ROOT = ../.. - -INCLUDEPATH += \ - $$ARGON2_ROOT/include \ - $$ARGON2_ROOT/lib - -SOURCES += \ - $$ARGON2_ROOT/lib/argon2.c \ - $$ARGON2_ROOT/lib/core.c \ - $$ARGON2_ROOT/lib/encoding.c \ - $$ARGON2_ROOT/lib/genkat.c \ - $$ARGON2_ROOT/lib/impl-select.c \ - $$ARGON2_ROOT/lib/thread.c \ - $$ARGON2_ROOT/lib/blake2/blake2.c - -HEADERS += \ - $$ARGON2_ROOT/include/argon2.h \ - $$ARGON2_ROOT/lib/argon2-template-64.h \ - $$ARGON2_ROOT/lib/core.h \ - $$ARGON2_ROOT/lib/encoding.h \ - $$ARGON2_ROOT/lib/genkat.h \ - $$ARGON2_ROOT/lib/impl-select.h \ - $$ARGON2_ROOT/lib/thread.h \ - $$ARGON2_ROOT/lib/blake2/blake2.h \ - $$ARGON2_ROOT/lib/blake2/blake2-impl.h - -equals(ARCH, x86_64) { - SOURCES += \ - $$ARGON2_ROOT/arch/$$ARCH/lib/cpu-flags.c \ - $$ARGON2_ROOT/arch/$$ARCH/lib/argon2-arch.c - - HEADERS += \ - $$ARGON2_ROOT/arch/$$ARCH/lib/cpu-flags.h - - # libargon2-sse2.a: - win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-sse2/release/ -largon2-sse2 - else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-sse2/debug/ -largon2-sse2 - else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-sse2/ -largon2-sse2 - - DEPENDPATH += $$PWD/../arch/x86_64/libargon2-sse2 - - win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/release/libargon2-sse2.a - else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/debug/libargon2-sse2.a - else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/release/argon2-sse2.lib - else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/debug/argon2-sse2.lib - else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-sse2/libargon2-sse2.a - - # libargon2-ssse3.a: - win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-ssse3/release/ -largon2-ssse3 - else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-ssse3/debug/ -largon2-ssse3 - else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-ssse3/ -largon2-ssse3 - - DEPENDPATH += $$PWD/../arch/x86_64/libargon2-ssse3 - - win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/release/libargon2-ssse3.a - else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/debug/libargon2-ssse3.a - else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/release/argon2-ssse3.lib - else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/debug/argon2-ssse3.lib - else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-ssse3/libargon2-ssse3.a - - # libargon2-xop.a: - win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-xop/release/ -largon2-xop - else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-xop/debug/ -largon2-xop - else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-xop/ -largon2-xop - - DEPENDPATH += $$PWD/../arch/x86_64/libargon2-xop - - win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/release/libargon2-xop.a - else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/debug/libargon2-xop.a - else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/release/argon2-xop.lib - else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/debug/argon2-xop.lib - else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-xop/libargon2-xop.a - - # libargon2-avx2.a: - win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx2/release/ -largon2-avx2 - else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx2/debug/ -largon2-avx2 - else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx2/ -largon2-avx2 - - DEPENDPATH += $$PWD/../arch/x86_64/libargon2-avx2 - - win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/release/libargon2-avx2.a - else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/debug/libargon2-avx2.a - else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/release/argon2-avx2.lib - else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/debug/argon2-avx2.lib - else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx2/libargon2-avx2.a - - # libargon2-avx512f.a: - win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx512f/release/ -largon2-avx512f - else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx512f/debug/ -largon2-avx512f - else:unix: LIBS += -L$$OUT_PWD/../arch/x86_64/libargon2-avx512f/ -largon2-avx512f - - DEPENDPATH += $$PWD/../arch/x86_64/libargon2-avx512f - - win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/release/libargon2-avx512f.a - else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/debug/libargon2-avx512f.a - else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/release/argon2-avx512f.lib - else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/debug/argon2-avx512f.lib - else:unix: PRE_TARGETDEPS += $$OUT_PWD/../arch/x86_64/libargon2-avx512f/libargon2-avx512f.a -} -equals(ARCH, generic) { - SOURCES += \ - $$ARGON2_ROOT/arch/$$ARCH/lib/argon2-arch.c -} - -unix { - target.path = /usr/lib - INSTALLS += target -} diff --git a/src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh b/src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh deleted file mode 100644 index 47e925c6..00000000 --- a/src/3rdparty/argon2/scripts/metacentrum/start-all-benchmarks.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -dirname="$(dirname "$0")" - -cd "$dirname" || exit 1 - -./start-benchmark.sh luna -./start-benchmark.sh lex '' '' '' '' '' backfill -./start-benchmark.sh mandos -./start-benchmark.sh zubat -PBS_SERVER=wagap.cerit-sc.cz \ - ./start-benchmark.sh zapat '' '' '' '' '' default@wagap.cerit-sc.cz diff --git a/src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh b/src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh deleted file mode 100644 index beedf748..00000000 --- a/src/3rdparty/argon2/scripts/metacentrum/start-benchmark.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -machine="$1" -max_t_cost="$2" -max_m_cost="$3" -max_lanes="$4" -branch="$5" -duration="$6" -queue="$7" -run_tests="$8" - -if [ -z "$machine" ]; then - echo "ERROR: Machine must be specified!" 1>&2 - exit 1 -fi - -if [ -z "$max_t_cost" ]; then - max_t_cost=16 -fi - -if [ -z "$max_m_cost" ]; then - max_m_cost=$((8 * 1024 * 1024)) -fi - -if [ -z "$max_lanes" ]; then - max_lanes=16 -fi - -if [ -z "$branch" ]; then - branch='master' -fi - -if [ -z "$duration" ]; then - duration=2h -fi - -REPO_URL='https://github.com/WOnder93/argon2.git' - -dest_dir="$(pwd)" - -task_file="$(mktemp)" - -cat >$task_file <"$dest_dir/\$PBS_JOBID/benchmark-$machine-$branch.csv" -EOF - -qsub "$task_file" - -rm -f "$task_file" diff --git a/src/3rdparty/argon2/scripts/run-benchmark.sh b/src/3rdparty/argon2/scripts/run-benchmark.sh deleted file mode 100644 index f023b8e8..00000000 --- a/src/3rdparty/argon2/scripts/run-benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -max_t_cost="$1" -max_m_cost="$2" -max_lanes="$3" - -if [ -z "$max_t_cost" ]; then - echo "ERROR: Maximum time cost must be specified!" 1>&2 - exit 1 -fi - -if [ -z "$max_m_cost" ]; then - echo "ERROR: Maximum memory cost must be specified!" 1>&2 - exit 1 -fi - -if [ -z "$max_lanes" ]; then - echo "ERROR: Maximum number of lanes must be specified!" 1>&2 - exit 1 -fi - -dirname="$(dirname "$0")" - -cd "$dirname/.." || exit 1 - -echo "t_cost,m_cost,lanes,ms_i,ms_d,ms_id" -stdbuf -oL ./argon2-bench2 $max_t_cost $max_m_cost $max_lanes | -stdbuf -oL tail -n +2 | -while read line; do - print_comma=0 - for x in $line; do - if [ $print_comma -eq 1 ]; then - echo -n "," - else - print_comma=1 - fi - echo -n "$x" - done - echo -done diff --git a/src/3rdparty/argon2/src/bench2.c b/src/3rdparty/argon2/src/bench2.c deleted file mode 100644 index 0d7d7fca..00000000 --- a/src/3rdparty/argon2/src/bench2.c +++ /dev/null @@ -1,179 +0,0 @@ -#include -#include -#include -#include -#include - -#include "argon2.h" - -#include "timing.h" - -#define ARGON2_BLOCK_SIZE 1024 - -#define BENCH_MAX_T_COST 16 -#define BENCH_MAX_M_COST (1024 * 1024) -#define BENCH_MAX_THREADS 8 -#define BENCH_MIN_PASSES (1024 * 1024) -#define BENCH_MAX_SAMPLES 128 - -#define BENCH_OUTLEN 16 -#define BENCH_INLEN 16 - -static double pick_min(const double *samples, size_t count) -{ - size_t i; - double min = INFINITY; - for (i = 0; i < count; i++) { - if (samples[i] < min) { - min = samples[i]; - } - } - return min; -} - -static int benchmark(void *memory, size_t memory_size, - uint32_t t_cost, uint32_t m_cost, uint32_t p) -{ - static const unsigned char PASSWORD[BENCH_OUTLEN] = { 0 }; - static const unsigned char SALT[BENCH_INLEN] = { 1 }; - - unsigned char out[BENCH_OUTLEN]; - struct timestamp start, end; - double ms_d[BENCH_MAX_SAMPLES]; - double ms_i[BENCH_MAX_SAMPLES]; - double ms_id[BENCH_MAX_SAMPLES]; - - double ms_d_final, ms_i_final, ms_id_final; - unsigned int i, bench_samples; - argon2_context ctx; - - int res; - - ctx.out = out; - ctx.outlen = sizeof(out); - ctx.pwd = (uint8_t *)PASSWORD; - ctx.pwdlen = sizeof(PASSWORD); - ctx.salt = (uint8_t *)SALT; - ctx.saltlen = sizeof(SALT); - ctx.secret = NULL; - ctx.secretlen = 0; - ctx.ad = NULL; - ctx.adlen = 0; - ctx.t_cost = t_cost; - ctx.m_cost = m_cost; - ctx.lanes = ctx.threads = p; - ctx.version = ARGON2_VERSION_NUMBER; - ctx.allocate_cbk = NULL; - ctx.free_cbk = NULL; - ctx.flags = ARGON2_DEFAULT_FLAGS; - - bench_samples = (BENCH_MIN_PASSES * p) / (t_cost * m_cost); - bench_samples += (BENCH_MIN_PASSES * p) % (t_cost * m_cost) != 0; - - if (bench_samples > BENCH_MAX_SAMPLES) { - bench_samples = BENCH_MAX_SAMPLES; - } - for (i = 0; i < bench_samples; i++) { - timestamp_store(&start); - res = argon2_ctx_mem(&ctx, Argon2_d, memory, memory_size); - timestamp_store(&end); - if (res != ARGON2_OK) { - return res; - } - - ms_d[i] = timestamp_span_ms(&start, &end); - } - - for (i = 0; i < bench_samples; i++) { - timestamp_store(&start); - res = argon2_ctx_mem(&ctx, Argon2_i, memory, memory_size); - timestamp_store(&end); - if (res != ARGON2_OK) { - return res; - } - - ms_i[i] = timestamp_span_ms(&start, &end); - } - - for (i = 0; i < bench_samples; i++) { - timestamp_store(&start); - res = argon2_ctx_mem(&ctx, Argon2_id, memory, memory_size); - timestamp_store(&end); - if (res != ARGON2_OK) { - return res; - } - - ms_id[i] = timestamp_span_ms(&start, &end); - } - - ms_d_final = pick_min(ms_d, bench_samples); - ms_i_final = pick_min(ms_i, bench_samples); - ms_id_final = pick_min(ms_id, bench_samples); - - printf("%8lu%16lu%8lu%16.6lf%16.6lf%16.6lf\n", - (unsigned long)t_cost, (unsigned long)m_cost, (unsigned long)p, - ms_d_final, ms_i_final, ms_id_final); - return 0; -} - -int main(int argc, const char * const *argv) -{ - uint32_t max_t_cost = BENCH_MAX_T_COST; - uint32_t max_m_cost = BENCH_MAX_M_COST; - uint32_t max_p = BENCH_MAX_THREADS; - uint32_t t_cost, m_cost, p; - char *end; - int res; - - if (argc >= 2) { - max_t_cost = strtoul(argv[1], &end, 10); - if (end == argv[1]) { - fprintf(stderr, "ERROR: Invalid number format!\n"); - return 1; - } - } - - if (argc >= 3) { - max_m_cost = strtoul(argv[2], &end, 10); - if (end == argv[2]) { - fprintf(stderr, "ERROR: Invalid number format!\n"); - return 1; - } - } - - if (argc >= 4) { - max_p = strtoul(argv[3], &end, 10); - if (end == argv[3]) { - fprintf(stderr, "ERROR: Invalid number format!\n"); - return 1; - } - } - - argon2_select_impl(stderr, "[libargon2] "); - - size_t memory_size = (size_t)max_m_cost * (size_t)ARGON2_BLOCK_SIZE; - void *memory = malloc(memory_size); - if (memory == NULL) { - fprintf(stderr, "ERROR: Memory allocation failed!\n"); - return 1; - } - /* make sure the whole memory gets mapped to physical pages: */ - memset(memory, 0xAB, memory_size); - - printf("%8s%16s%8s%16s%16s%16s\n", "t_cost", "m_cost", "threads", - "Argon2d (ms)", "Argon2i (ms)", "Argon2id (ms)"); - for (t_cost = 1; t_cost <= max_t_cost; t_cost *= 2) { - uint32_t min_m_cost = max_p * ARGON2_SYNC_POINTS * 2; - for (m_cost = min_m_cost; m_cost <= max_m_cost; m_cost *= 2) { - for (p = 1; p <= max_p; p *= 2) { - res = benchmark(memory, memory_size, t_cost, m_cost, p); - if (res != 0) { - free(memory); - return res; - } - } - } - } - free(memory); - return 0; -} diff --git a/src/3rdparty/argon2/src/genkat.c b/src/3rdparty/argon2/src/genkat.c deleted file mode 100644 index 7295c985..00000000 --- a/src/3rdparty/argon2/src/genkat.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include - -#include "argon2.h" - -static void fatal(const char *error) { - fprintf(stderr, "Error: %s\n", error); - exit(1); -} - -static void generate_testvectors(argon2_type type, const uint32_t version) { -#define TEST_OUTLEN 32 -#define TEST_PWDLEN 32 -#define TEST_SALTLEN 16 -#define TEST_SECRETLEN 8 -#define TEST_ADLEN 12 - argon2_context context; - - unsigned char out[TEST_OUTLEN]; - unsigned char pwd[TEST_PWDLEN]; - unsigned char salt[TEST_SALTLEN]; - unsigned char secret[TEST_SECRETLEN]; - unsigned char ad[TEST_ADLEN]; - const allocate_fptr myown_allocator = NULL; - const deallocate_fptr myown_deallocator = NULL; - - unsigned t_cost = 3; - unsigned m_cost = 32; - unsigned lanes = 4; - - memset(pwd, 1, TEST_OUTLEN); - memset(salt, 2, TEST_SALTLEN); - memset(secret, 3, TEST_SECRETLEN); - memset(ad, 4, TEST_ADLEN); - - context.out = out; - context.outlen = TEST_OUTLEN; - context.version = version; - context.pwd = pwd; - context.pwdlen = TEST_PWDLEN; - context.salt = salt; - context.saltlen = TEST_SALTLEN; - context.secret = secret; - context.secretlen = TEST_SECRETLEN; - context.ad = ad; - context.adlen = TEST_ADLEN; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = lanes; - context.threads = lanes; - context.allocate_cbk = myown_allocator; - context.free_cbk = myown_deallocator; - context.flags = ARGON2_DEFAULT_FLAGS | ARGON2_FLAG_GENKAT; - -#undef TEST_OUTLEN -#undef TEST_PWDLEN -#undef TEST_SALTLEN -#undef TEST_SECRETLEN -#undef TEST_ADLEN - - argon2_ctx(&context, type); -} - -int main(int argc, char *argv[]) { - /* Get and check Argon2 type */ - const char *type_str = (argc > 1) ? argv[1] : "i"; - argon2_type type = Argon2_i; - uint32_t version = ARGON2_VERSION_NUMBER; - if (!strcmp(type_str, "d")) { - type = Argon2_d; - } else if (!strcmp(type_str, "i")) { - type = Argon2_i; - } else if (!strcmp(type_str, "id")) { - type = Argon2_id; - } else { - fatal("wrong Argon2 type"); - } - - /* Get and check Argon2 version number */ - if(argc > 2) { - version = strtoul(argv[2], NULL, 10); - } - if (ARGON2_VERSION_10 != version && ARGON2_VERSION_NUMBER != version) { - fatal("wrong Argon2 version number"); - } - - generate_testvectors(type, version); - return ARGON2_OK; -} diff --git a/src/3rdparty/argon2/src/run.c b/src/3rdparty/argon2/src/run.c deleted file mode 100644 index 9588a632..00000000 --- a/src/3rdparty/argon2/src/run.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#define _GNU_SOURCE 1 - -#include -#include -#include -#include -#include -#include - -#include "argon2.h" -#include "core.h" - -#define T_COST_DEF 3 -#define LOG_M_COST_DEF 12 /* 2^12 = 4 MiB */ -#define LANES_DEF 1 -#define THREADS_DEF 1 -#define OUTLEN_DEF 32 -#define MAX_PASS_LEN 128 - -#define UNUSED_PARAMETER(x) (void)(x) - -static void usage(const char *cmd) { - printf("Usage: %s [-h] salt [-i|-d|-id] [-t iterations] [-m memory] " - "[-p parallelism] [-l hash length] [-e|-r] [-v (10|13)]\n", - cmd); - printf("\tPassword is read from stdin\n"); - printf("Parameters:\n"); - printf("\tsalt\t\tThe salt to use, at least 8 characters\n"); - printf("\t-i\t\tUse Argon2i (this is the default)\n"); - printf("\t-d\t\tUse Argon2d instead of Argon2i\n"); - printf("\t-id\t\tUse Argon2id instead of Argon2i\n"); - printf("\t-t N\t\tSets the number of iterations to N (default = %d)\n", - T_COST_DEF); - printf("\t-m N\t\tSets the memory usage of 2^N KiB (default %d)\n", - LOG_M_COST_DEF); - printf("\t-p N\t\tSets parallelism to N threads (default %d)\n", - THREADS_DEF); - printf("\t-l N\t\tSets hash output length to N bytes (default %d)\n", - OUTLEN_DEF); - printf("\t-e\t\tOutput only encoded hash\n"); - printf("\t-r\t\tOutput only the raw bytes of the hash\n"); - printf("\t-v (10|13)\tArgon2 version (defaults to the most recent version, " - "currently %x)\n", ARGON2_VERSION_NUMBER); - printf("\t-h\t\tPrint %s usage\n", cmd); -} - -static void fatal(const char *error) { - fprintf(stderr, "Error: %s\n", error); - exit(1); -} - -static void print_hex(uint8_t *bytes, size_t bytes_len) { - size_t i; - for (i = 0; i < bytes_len; ++i) { - printf("%02x", bytes[i]); - } - printf("\n"); -} - -/* -Runs Argon2 with certain inputs and parameters, inputs not cleared. Prints the -Base64-encoded hash string -@out output array with at least 32 bytes allocated -@pwd NULL-terminated string, presumably from argv[] -@salt salt array -@t_cost number of iterations -@m_cost amount of requested memory in KB -@lanes amount of requested parallelism -@threads actual parallelism -@type Argon2 type we want to run -@encoded_only display only the encoded hash -@raw_only display only the hexadecimal of the hash -@version Argon2 version -*/ -static void run(uint32_t outlen, char *pwd, char *salt, uint32_t t_cost, - uint32_t m_cost, uint32_t lanes, uint32_t threads, - argon2_type type, int encoded_only, int raw_only, - uint32_t version) { - clock_t start_time, stop_time; - size_t pwdlen, saltlen, encodedlen; - int result; - unsigned char * out = NULL; - char * encoded = NULL; - - start_time = clock(); - - if (!pwd) { - fatal("password missing"); - } - - if (!salt) { - clear_internal_memory(pwd, strlen(pwd)); - fatal("salt missing"); - } - - pwdlen = strlen(pwd); - saltlen = strlen(salt); - if(UINT32_MAX < saltlen) { - fatal("salt is too long"); - } - - UNUSED_PARAMETER(lanes); - - out = malloc(outlen + 1); - if (!out) { - clear_internal_memory(pwd, strlen(pwd)); - fatal("could not allocate memory for output"); - } - - encodedlen = argon2_encodedlen(t_cost, m_cost, lanes, (uint32_t)saltlen, outlen, type); - encoded = malloc(encodedlen + 1); - if (!encoded) { - clear_internal_memory(pwd, strlen(pwd)); - fatal("could not allocate memory for hash"); - } - - result = argon2_hash(t_cost, m_cost, threads, pwd, pwdlen, salt, saltlen, - out, outlen, encoded, encodedlen, type, version); - if (result != ARGON2_OK) - fatal(argon2_error_message(result)); - - stop_time = clock(); - - if (encoded_only) - puts(encoded); - - if (raw_only) - print_hex(out, outlen); - - if (encoded_only || raw_only) { - free(out); - free(encoded); - return; - } - - printf("Hash:\t\t"); - print_hex(out, outlen); - free(out); - - printf("Encoded:\t%s\n", encoded); - - printf("%2.3f seconds\n", - ((double)stop_time - start_time) / (CLOCKS_PER_SEC)); - - result = argon2_verify(encoded, pwd, pwdlen, type); - if (result != ARGON2_OK) - fatal(argon2_error_message(result)); - printf("Verification ok\n"); - free(encoded); -} - -int main(int argc, char *argv[]) { - uint32_t outlen = OUTLEN_DEF; - uint32_t m_cost = 1 << LOG_M_COST_DEF; - uint32_t t_cost = T_COST_DEF; - uint32_t lanes = LANES_DEF; - uint32_t threads = THREADS_DEF; - argon2_type type = Argon2_i; /* Argon2i is the default type */ - int types_specified = 0; - int encoded_only = 0; - int raw_only = 0; - uint32_t version = ARGON2_VERSION_NUMBER; - int i; - size_t n; - char pwd[MAX_PASS_LEN], *salt; - - if (argc < 2) { - usage(argv[0]); - return ARGON2_MISSING_ARGS; - } else if (argc >= 2 && strcmp(argv[1], "-h") == 0) { - usage(argv[0]); - return 1; - } - - argon2_select_impl(stderr, "[libargon2] "); - - /* get password from stdin */ - n = fread(pwd, 1, sizeof pwd - 1, stdin); - if(n < 1) { - fatal("no password read"); - } - if(n == MAX_PASS_LEN-1) { - fatal("Provided password longer than supported in command line utility"); - } - - pwd[n] = '\0'; - if (pwd[n - 1] == '\n') { - pwd[n - 1] = '\0'; - } - - salt = argv[1]; - - /* parse options */ - for (i = 2; i < argc; i++) { - const char *a = argv[i]; - unsigned long input = 0; - if (!strcmp(a, "-h")) { - usage(argv[0]); - return 1; - } else if (!strcmp(a, "-m")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_MEMORY_BITS) { - fatal("bad numeric input for -m"); - } - m_cost = ARGON2_MIN(UINT64_C(1) << input, UINT32_C(0xFFFFFFFF)); - if (m_cost > ARGON2_MAX_MEMORY) { - fatal("m_cost overflow"); - } - continue; - } else { - fatal("missing -m argument"); - } - } else if (!strcmp(a, "-t")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_TIME) { - fatal("bad numeric input for -t"); - } - t_cost = input; - continue; - } else { - fatal("missing -t argument"); - } - } else if (!strcmp(a, "-p")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_THREADS || input > ARGON2_MAX_LANES) { - fatal("bad numeric input for -p"); - } - threads = input; - lanes = threads; - continue; - } else { - fatal("missing -p argument"); - } - } else if (!strcmp(a, "-l")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - outlen = input; - continue; - } else { - fatal("missing -l argument"); - } - } else if (!strcmp(a, "-i")) { - type = Argon2_i; - ++types_specified; - } else if (!strcmp(a, "-d")) { - type = Argon2_d; - ++types_specified; - } else if (!strcmp(a, "-id")) { - type = Argon2_id; - ++types_specified; - } else if (!strcmp(a, "-e")) { - encoded_only = 1; - } else if (!strcmp(a, "-r")) { - raw_only = 1; - } else if (!strcmp(a, "-v")) { - if (i < argc - 1) { - i++; - if (!strcmp(argv[i], "10")) { - version = ARGON2_VERSION_10; - } else if (!strcmp(argv[i], "13")) { - version = ARGON2_VERSION_13; - } else { - fatal("invalid Argon2 version"); - } - } else { - fatal("missing -v argument"); - } - } else { - fatal("unknown argument"); - } - } - - if (types_specified > 1) { - fatal("cannot specify multiple Argon2 types"); - } - - if(encoded_only && raw_only) - fatal("cannot provide both -e and -r"); - - if(!encoded_only && !raw_only) { - printf("Type:\t\t%s\n", argon2_type2string(type, 1)); - printf("Iterations:\t%" PRIu32 " \n", t_cost); - printf("Memory:\t\t%" PRIu32 " KiB\n", m_cost); - printf("Parallelism:\t%" PRIu32 " \n", lanes); - } - - run(outlen, pwd, salt, t_cost, m_cost, lanes, threads, type, - encoded_only, raw_only, version); - - return ARGON2_OK; -} - diff --git a/src/3rdparty/argon2/src/timing.h b/src/3rdparty/argon2/src/timing.h deleted file mode 100644 index 0e39a1f3..00000000 --- a/src/3rdparty/argon2/src/timing.h +++ /dev/null @@ -1,41 +0,0 @@ -#include - -#ifdef _POSIX_SOURCE -#include - -struct timestamp { - struct timespec time; -}; - -static inline void timestamp_store(struct timestamp *out) -{ - clock_gettime(CLOCK_MONOTONIC, &out->time); -} - -static inline double timestamp_span_ms(const struct timestamp *start, - const struct timestamp *end) -{ - double res = 0.0; - res += (end->time.tv_sec - start->time.tv_sec) * 1000.0; - res += (end->time.tv_nsec - start->time.tv_nsec) / 1000000.0; - return res; -} -#else -#include - -struct timestamp { - clock_t time; -}; - -static inline void timestamp_store(struct timestamp *out) -{ - out->time = clock(); -} - -static inline double timestamp_span_ms(const struct timestamp *start, - const struct timestamp *end) -{ - double res = (end->time - start->time) * 1000; - return res / CLOCKS_PER_SEC; -} -#endif diff --git a/src/3rdparty/argon2/tests/test.c b/src/3rdparty/argon2/tests/test.c deleted file mode 100644 index 1fe9f0e2..00000000 --- a/src/3rdparty/argon2/tests/test.c +++ /dev/null @@ -1,239 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "argon2.h" - -#define OUT_LEN 32 -#define ENCODED_LEN 108 - -/* Test harness will assert: - * argon2_hash() returns ARGON2_OK - * HEX output matches expected - * encoded output matches expected - * argon2_verify() correctly verifies value - */ - -void hashtest(uint32_t version, uint32_t t, uint32_t m, uint32_t p, char *pwd, - char *salt, char *hexref, char *mcfref) { - unsigned char out[OUT_LEN]; - unsigned char hex_out[OUT_LEN * 2 + 4]; - char encoded[ENCODED_LEN]; - int ret, i; - - printf("Hash test: $v=%d t=%d, m=%d, p=%d, pass=%s, salt=%s: ", version, - t, m, p, pwd, salt); - - ret = argon2_hash(t, 1 << m, p, pwd, strlen(pwd), salt, strlen(salt), out, - OUT_LEN, encoded, ENCODED_LEN, Argon2_i, version); - assert(ret == ARGON2_OK); - - for (i = 0; i < OUT_LEN; ++i) - sprintf((char *)(hex_out + i * 2), "%02x", out[i]); - - assert(memcmp(hex_out, hexref, OUT_LEN * 2) == 0); - - if (ARGON2_VERSION_NUMBER == version) { - assert(memcmp(encoded, mcfref, strlen(mcfref)) == 0); - } - - ret = argon2_verify(encoded, pwd, strlen(pwd), Argon2_i); - assert(ret == ARGON2_OK); - ret = argon2_verify(mcfref, pwd, strlen(pwd), Argon2_i); - assert(ret == ARGON2_OK); - - printf("PASS\n"); -} - -int main() { - int ret; - unsigned char out[OUT_LEN]; - char const *msg; - int version; - - argon2_select_impl(stderr, "[libargon2] "); - - version = ARGON2_VERSION_10; - printf("Test Argon2i version number: %02x\n", version); - - /* Multiple test cases for various input values */ - hashtest(version, 2, 16, 1, "password", "somesalt", - "f6c4db4a54e2a370627aff3db6176b94a2a209a62c8e36152711802f7b30c694", - "$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" - "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ"); -#ifdef TEST_LARGE_RAM - hashtest(version, 2, 20, 1, "password", "somesalt", - "9690ec55d28d3ed32562f2e73ea62b02b018757643a2ae6e79528459de8106e9", - "$argon2i$m=1048576,t=2,p=1$c29tZXNhbHQ" - "$lpDsVdKNPtMlYvLnPqYrArAYdXZDoq5ueVKEWd6BBuk"); -#endif - hashtest(version, 2, 18, 1, "password", "somesalt", - "3e689aaa3d28a77cf2bc72a51ac53166761751182f1ee292e3f677a7da4c2467", - "$argon2i$m=262144,t=2,p=1$c29tZXNhbHQ" - "$Pmiaqj0op3zyvHKlGsUxZnYXURgvHuKS4/Z3p9pMJGc"); - hashtest(version, 2, 8, 1, "password", "somesalt", - "fd4dd83d762c49bdeaf57c47bdcd0c2f1babf863fdeb490df63ede9975fccf06", - "$argon2i$m=256,t=2,p=1$c29tZXNhbHQ" - "$/U3YPXYsSb3q9XxHvc0MLxur+GP960kN9j7emXX8zwY"); - hashtest(version, 2, 8, 2, "password", "somesalt", - "b6c11560a6a9d61eac706b79a2f97d68b4463aa3ad87e00c07e2b01e90c564fb", - "$argon2i$m=256,t=2,p=2$c29tZXNhbHQ" - "$tsEVYKap1h6scGt5ovl9aLRGOqOth+AMB+KwHpDFZPs"); - hashtest(version, 1, 16, 1, "password", "somesalt", - "81630552b8f3b1f48cdb1992c4c678643d490b2b5eb4ff6c4b3438b5621724b2", - "$argon2i$m=65536,t=1,p=1$c29tZXNhbHQ" - "$gWMFUrjzsfSM2xmSxMZ4ZD1JCytetP9sSzQ4tWIXJLI"); - hashtest(version, 4, 16, 1, "password", "somesalt", - "f212f01615e6eb5d74734dc3ef40ade2d51d052468d8c69440a3a1f2c1c2847b", - "$argon2i$m=65536,t=4,p=1$c29tZXNhbHQ" - "$8hLwFhXm6110c03D70Ct4tUdBSRo2MaUQKOh8sHChHs"); - hashtest(version, 2, 16, 1, "differentpassword", "somesalt", - "e9c902074b6754531a3a0be519e5baf404b30ce69b3f01ac3bf21229960109a3", - "$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" - "$6ckCB0tnVFMaOgvlGeW69ASzDOabPwGsO/ISKZYBCaM"); - hashtest(version, 2, 16, 1, "password", "diffsalt", - "79a103b90fe8aef8570cb31fc8b22259778916f8336b7bdac3892569d4f1c497", - "$argon2i$m=65536,t=2,p=1$ZGlmZnNhbHQ" - "$eaEDuQ/orvhXDLMfyLIiWXeJFvgza3vaw4kladTxxJc"); - - /* Error state tests */ - - /* Handle an invalid encoding correctly (it is missing a $) */ - ret = argon2_verify("$argon2i$m=65536,t=2,p=1c29tZXNhbHQ" - "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_DECODING_FAIL); - printf("Recognise an invalid encoding: PASS\n"); - - /* Handle an invalid encoding correctly (it is missing a $) */ - ret = argon2_verify("$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" - "9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_DECODING_FAIL); - printf("Recognise an invalid encoding: PASS\n"); - - /* Handle an invalid encoding correctly (salt is too short) */ - ret = argon2_verify("$argon2i$m=65536,t=2,p=1$" - "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_SALT_TOO_SHORT); - printf("Recognise an invalid salt in encoding: PASS\n"); - - /* Handle an mismatching hash (the encoded password is "passwore") */ - ret = argon2_verify("$argon2i$m=65536,t=2,p=1$c29tZXNhbHQ" - "$b2G3seW+uPzerwQQC+/E1K50CLLO7YXy0JRcaTuswRo", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_VERIFY_MISMATCH); - printf("Verify with mismatched password: PASS\n"); - - msg = argon2_error_message(ARGON2_DECODING_FAIL); - assert(strcmp(msg, "Decoding failed") == 0); - printf("Decode an error message: PASS\n"); - - printf("\n"); - - version = ARGON2_VERSION_NUMBER; - printf("Test Argon2i version number: %02x\n", version); - - /* Multiple test cases for various input values */ - hashtest(version, 2, 16, 1, "password", "somesalt", - "c1628832147d9720c5bd1cfd61367078729f6dfb6f8fea9ff98158e0d7816ed0", - "$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" - "$wWKIMhR9lyDFvRz9YTZweHKfbftvj+qf+YFY4NeBbtA"); -#ifdef TEST_LARGE_RAM - hashtest(version, 2, 20, 1, "password", "somesalt", - "d1587aca0922c3b5d6a83edab31bee3c4ebaef342ed6127a55d19b2351ad1f41", - "$argon2i$v=19$m=1048576,t=2,p=1$c29tZXNhbHQ" - "$0Vh6ygkiw7XWqD7asxvuPE667zQu1hJ6VdGbI1GtH0E"); -#endif - hashtest(version, 2, 18, 1, "password", "somesalt", - "296dbae80b807cdceaad44ae741b506f14db0959267b183b118f9b24229bc7cb", - "$argon2i$v=19$m=262144,t=2,p=1$c29tZXNhbHQ" - "$KW266AuAfNzqrUSudBtQbxTbCVkmexg7EY+bJCKbx8s"); - hashtest(version, 2, 8, 1, "password", "somesalt", - "89e9029f4637b295beb027056a7336c414fadd43f6b208645281cb214a56452f", - "$argon2i$v=19$m=256,t=2,p=1$c29tZXNhbHQ" - "$iekCn0Y3spW+sCcFanM2xBT63UP2sghkUoHLIUpWRS8"); - hashtest(version, 2, 8, 2, "password", "somesalt", - "4ff5ce2769a1d7f4c8a491df09d41a9fbe90e5eb02155a13e4c01e20cd4eab61", - "$argon2i$v=19$m=256,t=2,p=2$c29tZXNhbHQ" - "$T/XOJ2mh1/TIpJHfCdQan76Q5esCFVoT5MAeIM1Oq2E"); - hashtest(version, 1, 16, 1, "password", "somesalt", - "d168075c4d985e13ebeae560cf8b94c3b5d8a16c51916b6f4ac2da3ac11bbecf", - "$argon2i$v=19$m=65536,t=1,p=1$c29tZXNhbHQ" - "$0WgHXE2YXhPr6uVgz4uUw7XYoWxRkWtvSsLaOsEbvs8"); - hashtest(version, 4, 16, 1, "password", "somesalt", - "aaa953d58af3706ce3df1aefd4a64a84e31d7f54175231f1285259f88174ce5b", - "$argon2i$v=19$m=65536,t=4,p=1$c29tZXNhbHQ" - "$qqlT1YrzcGzj3xrv1KZKhOMdf1QXUjHxKFJZ+IF0zls"); - hashtest(version, 2, 16, 1, "differentpassword", "somesalt", - "14ae8da01afea8700c2358dcef7c5358d9021282bd88663a4562f59fb74d22ee", - "$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" - "$FK6NoBr+qHAMI1jc73xTWNkCEoK9iGY6RWL1n7dNIu4"); - hashtest(version, 2, 16, 1, "password", "diffsalt", - "b0357cccfbef91f3860b0dba447b2348cbefecadaf990abfe9cc40726c521271", - "$argon2i$v=19$m=65536,t=2,p=1$ZGlmZnNhbHQ" - "$sDV8zPvvkfOGCw26RHsjSMvv7K2vmQq/6cxAcmxSEnE"); - - /* Error state tests */ - - /* Handle an invalid encoding correctly (it is missing a $) */ - ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1c29tZXNhbHQ" - "$wWKIMhR9lyDFvRz9YTZweHKfbftvj+qf+YFY4NeBbtA", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_DECODING_FAIL); - printf("Recognise an invalid encoding: PASS\n"); - - /* Handle an invalid encoding correctly (it is missing a $) */ - ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" - "wWKIMhR9lyDFvRz9YTZweHKfbftvj+qf+YFY4NeBbtA", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_DECODING_FAIL); - printf("Recognise an invalid encoding: PASS\n"); - - /* Handle an invalid encoding correctly (salt is too short) */ - ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1$" - "$9sTbSlTio3Biev89thdrlKKiCaYsjjYVJxGAL3swxpQ", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_SALT_TOO_SHORT); - printf("Recognise an invalid salt in encoding: PASS\n"); - - /* Handle an mismatching hash (the encoded password is "passwore") */ - ret = argon2_verify("$argon2i$v=19$m=65536,t=2,p=1$c29tZXNhbHQ" - "$8iIuixkI73Js3G1uMbezQXD0b8LG4SXGsOwoQkdAQIM", - "password", strlen("password"), Argon2_i); - assert(ret == ARGON2_VERIFY_MISMATCH); - printf("Verify with mismatched password: PASS\n"); - - msg = argon2_error_message(ARGON2_DECODING_FAIL); - assert(strcmp(msg, "Decoding failed") == 0); - printf("Decode an error message: PASS\n"); - - /* Common error state tests */ - - printf("\n"); - printf("Common error state tests\n"); - - ret = argon2_hash(2, 1, 1, "password", strlen("password"), - "diffsalt", strlen("diffsalt"), - out, OUT_LEN, NULL, 0, Argon2_i, version); - assert(ret == ARGON2_MEMORY_TOO_LITTLE); - printf("Fail on invalid memory: PASS\n"); - - ret = argon2_hash(2, 1 << 12, 1, NULL, strlen("password"), - "diffsalt", strlen("diffsalt"), - out, OUT_LEN, NULL, 0, Argon2_i, version); - assert(ret == ARGON2_PWD_PTR_MISMATCH); - printf("Fail on invalid null pointer: PASS\n"); - - ret = argon2_hash(2, 1 << 12, 1, "password", strlen("password"), "s", 1, - out, OUT_LEN, NULL, 0, Argon2_i, version); - assert(ret == ARGON2_SALT_TOO_SHORT); - printf("Fail on salt too short: PASS\n"); - - return 0; -} diff --git a/src/App.cpp b/src/App.cpp index e75766ac..0b69c884 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -27,23 +27,19 @@ #include #include - #include "api/Api.h" #include "App.h" #include "base/kernel/Signals.h" #include "common/Console.h" -#include "common/cpu/Cpu.h" #include "common/log/Log.h" #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "crypto/CryptoNight.h" -#include "Mem.h" #include "net/Network.h" #include "Summary.h" -#include "version.h" #include "workers/Workers.h" - +#include +#include #ifndef XMRIG_NO_HTTPD # include "common/api/Httpd.h" @@ -55,6 +51,8 @@ xmrig::App::App(Process *process) : m_httpd(nullptr), m_signals(nullptr) { + srand(time(NULL)); + m_controller = new Controller(process); if (m_controller->init() != 0) { return; @@ -63,6 +61,8 @@ xmrig::App::App(Process *process) : if (!m_controller->config()->isBackground()) { m_console = new Console(this); } + + process->location(Process::ExeLocation, m_appFileName); } @@ -90,7 +90,8 @@ int xmrig::App::exec() background(); - Mem::init(m_controller->config()->isHugePages()); + // load hasher modules + Hasher::loadHashers(m_appFileName); Summary::print(m_controller); @@ -115,7 +116,8 @@ int xmrig::App::exec() m_httpd->start(); # endif - Workers::start(m_controller); + if(!Workers::start(m_controller)) + return 0; m_controller->network()->connect(); diff --git a/src/App.h b/src/App.h index fc944967..b1e9d8a3 100644 --- a/src/App.h +++ b/src/App.h @@ -64,6 +64,7 @@ private: Controller *m_controller; Httpd *m_httpd; Signals *m_signals; + char m_appFileName[512]; }; diff --git a/src/Mem.cpp b/src/Mem.cpp deleted file mode 100644 index 01a2157b..00000000 --- a/src/Mem.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include "common/utils/mm_malloc.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "Mem.h" - - -bool Mem::m_enabled = true; -int Mem::m_flags = 0; - - -MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count) -{ - using namespace xmrig; - - MemInfo info; - info.size = cn_select_memory(algorithm) * count; - - constexpr const size_t align_size = 2 * 1024 * 1024; - info.size = ((info.size + align_size - 1) / align_size) * align_size; - info.pages = info.size / align_size; - - allocate(info, m_enabled); - - for (size_t i = 0; i < count; ++i) { - cryptonight_ctx *c = static_cast(_mm_malloc(sizeof(cryptonight_ctx), 4096)); - c->memory = info.memory + (i * cn_select_memory(algorithm)); - - uint8_t* p = reinterpret_cast(allocateExecutableMemory(0x4000)); - c->generated_code = reinterpret_cast(p); - c->generated_code_double = reinterpret_cast(p + 0x2000); - - c->generated_code_data.variant = xmrig::VARIANT_MAX; - c->generated_code_data.height = (uint64_t)(-1); - c->generated_code_double_data = c->generated_code_data; - - ctx[i] = c; - } - - return info; -} - - -void Mem::release(cryptonight_ctx **ctx, size_t count, MemInfo &info) -{ - release(info); - - for (size_t i = 0; i < count; ++i) { - _mm_free(ctx[i]); - } -} - diff --git a/src/Mem.h b/src/Mem.h deleted file mode 100644 index 9e39e963..00000000 --- a/src/Mem.h +++ /dev/null @@ -1,78 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_MEM_H -#define XMRIG_MEM_H - - -#include -#include - - -#include "common/xmrig.h" - - -struct cryptonight_ctx; - - -struct MemInfo -{ - alignas(16) uint8_t *memory; - - size_t hugePages; - size_t pages; - size_t size; -}; - - -class Mem -{ -public: - enum Flags { - HugepagesAvailable = 1, - HugepagesEnabled = 2, - Lock = 4 - }; - - static MemInfo create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count); - static void init(bool enabled); - static void release(cryptonight_ctx **ctx, size_t count, MemInfo &info); - - static void *allocateExecutableMemory(size_t size); - static void protectExecutableMemory(void *p, size_t size); - static void flushInstructionCache(void *p, size_t size); - - static inline bool isHugepagesAvailable() { return (m_flags & HugepagesAvailable) != 0; } - -private: - static void allocate(MemInfo &info, bool enabled); - static void release(MemInfo &info); - - static int m_flags; - static bool m_enabled; -}; - - -#endif /* XMRIG_MEM_H */ diff --git a/src/Mem_unix.cpp b/src/Mem_unix.cpp deleted file mode 100644 index 833c200c..00000000 --- a/src/Mem_unix.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include - - -#include "common/log/Log.h" -#include "common/utils/mm_malloc.h" -#include "common/xmrig.h" -#include "crypto/CryptoNight.h" -#include "Mem.h" - - -void Mem::init(bool enabled) -{ - m_enabled = enabled; -} - - -void Mem::allocate(MemInfo &info, bool enabled) -{ - info.hugePages = 0; - - if (!enabled) { - info.memory = static_cast(_mm_malloc(info.size, 4096)); - - return; - } - -# if defined(__APPLE__) - info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0)); -# elif defined(__FreeBSD__) - info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0)); -# else - info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0)); -# endif - - if (info.memory == MAP_FAILED) { - return allocate(info, false);; - } - - info.hugePages = info.pages; - - if (madvise(info.memory, info.size, MADV_RANDOM | MADV_WILLNEED) != 0) { - LOG_ERR("madvise failed"); - } - - if (mlock(info.memory, info.size) == 0) { - m_flags |= Lock; - } -} - - -void Mem::release(MemInfo &info) -{ - if (info.hugePages) { - if (m_flags & Lock) { - munlock(info.memory, info.size); - } - - munmap(info.memory, info.size); - } - else { - _mm_free(info.memory); - } -} - - -void *Mem::allocateExecutableMemory(size_t size) -{ -# if defined(__APPLE__) - return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); -# else - return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -# endif -} - - -void Mem::protectExecutableMemory(void *p, size_t size) -{ - mprotect(p, size, PROT_READ | PROT_EXEC); -} - - -void Mem::flushInstructionCache(void *p, size_t size) -{ -# ifndef __FreeBSD__ - __builtin___clear_cache(reinterpret_cast(p), reinterpret_cast(p) + size); -# endif -} diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp deleted file mode 100644 index 27c1348b..00000000 --- a/src/Mem_win.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include -#include -#include - - -#include "common/log/Log.h" -#include "common/utils/mm_malloc.h" -#include "common/xmrig.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "Mem.h" - - -/***************************************************************** -SetLockPagesPrivilege: a function to obtain or -release the privilege of locking physical pages. - -Inputs: - -HANDLE hProcess: Handle for the process for which the -privilege is needed - -BOOL bEnable: Enable (TRUE) or disable? - -Return value: TRUE indicates success, FALSE failure. - -*****************************************************************/ -/** - * AWE Example: https://msdn.microsoft.com/en-us/library/windows/desktop/aa366531(v=vs.85).aspx - * Creating a File Mapping Using Large Pages: https://msdn.microsoft.com/en-us/library/aa366543(VS.85).aspx - */ -static BOOL SetLockPagesPrivilege() { - HANDLE token; - - if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != TRUE) { - return FALSE; - } - - TOKEN_PRIVILEGES tp; - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid)) != TRUE) { - return FALSE; - } - - BOOL rc = AdjustTokenPrivileges(token, FALSE, (PTOKEN_PRIVILEGES) &tp, 0, NULL, NULL); - if (rc != TRUE || GetLastError() != ERROR_SUCCESS) { - return FALSE; - } - - CloseHandle(token); - - return TRUE; -} - - -static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) { - LSA_UNICODE_STRING lsaString; - - DWORD dwLen = (DWORD) wcslen(string); - lsaString.Buffer = (LPWSTR) string; - lsaString.Length = (USHORT)((dwLen) * sizeof(WCHAR)); - lsaString.MaximumLength = (USHORT)((dwLen + 1) * sizeof(WCHAR)); - return lsaString; -} - - -static BOOL ObtainLockPagesPrivilege() { - HANDLE token; - PTOKEN_USER user = NULL; - - if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token) == TRUE) { - DWORD size = 0; - - GetTokenInformation(token, TokenUser, NULL, 0, &size); - if (size) { - user = (PTOKEN_USER) LocalAlloc(LPTR, size); - } - - GetTokenInformation(token, TokenUser, user, size, &size); - CloseHandle(token); - } - - if (!user) { - return FALSE; - } - - LSA_HANDLE handle; - LSA_OBJECT_ATTRIBUTES attributes; - ZeroMemory(&attributes, sizeof(attributes)); - - BOOL result = FALSE; - if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME)); - - if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { - LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); - result = TRUE; - } - - LsaClose(handle); - } - - LocalFree(user); - return result; -} - - -static BOOL TrySetLockPagesPrivilege() { - if (SetLockPagesPrivilege()) { - return TRUE; - } - - return ObtainLockPagesPrivilege() && SetLockPagesPrivilege(); -} - - -void Mem::init(bool enabled) -{ - m_enabled = enabled; - - if (enabled && TrySetLockPagesPrivilege()) { - m_flags |= HugepagesAvailable; - } -} - - -void Mem::allocate(MemInfo &info, bool enabled) -{ - info.hugePages = 0; - - if (!enabled) { - info.memory = static_cast(_mm_malloc(info.size, 4096)); - - return; - } - - info.memory = static_cast(VirtualAlloc(nullptr, info.size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE)); - if (info.memory) { - info.hugePages = info.pages; - - return; - } - - allocate(info, false); -} - - -void Mem::release(MemInfo &info) -{ - if (info.hugePages) { - VirtualFree(info.memory, 0, MEM_RELEASE); - } - else { - _mm_free(info.memory); - } -} - - -void *Mem::allocateExecutableMemory(size_t size) -{ - return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); -} - - -void Mem::protectExecutableMemory(void *p, size_t size) -{ - DWORD oldProtect; - VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect); -} - - -void Mem::flushInstructionCache(void *p, size_t size) -{ - ::FlushInstructionCache(GetCurrentProcess(), p, size); -} diff --git a/src/Summary.cpp b/src/Summary.cpp index 60a9278f..f9e80d1b 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -33,115 +33,9 @@ #include "common/log/Log.h" #include "core/Config.h" #include "core/Controller.h" -#include "crypto/Asm.h" -#include "Mem.h" #include "Summary.h" #include "version.h" - -#ifndef XMRIG_NO_ASM -static const char *coloredAsmNames[] = { - "\x1B[1;31mnone\x1B[0m", - "auto", - "\x1B[1;32mintel\x1B[0m", - "\x1B[1;32mryzen\x1B[0m", - "\x1B[1;32mbulldozer\x1B[0m" -}; - - -inline static const char *asmName(xmrig::Assembly assembly, bool colors) -{ - return colors ? coloredAsmNames[assembly] : xmrig::Asm::toString(assembly); -} -#endif - - -static void print_memory(xmrig::Config *config) { -# ifdef _WIN32 - if (config->isColors()) { - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "HUGE PAGES", Mem::isHugepagesAvailable() ? "\x1B[1;32mavailable" : "\x1B[01;31munavailable"); - } - else { - Log::i()->text(" * %-13s%s", "HUGE PAGES", Mem::isHugepagesAvailable() ? "available" : "unavailable"); - } -# endif -} - - -static void print_cpu(xmrig::Config *config) -{ - using namespace xmrig; - - if (config->isColors()) { - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES %sAVX2", - "CPU", - Cpu::info()->brand(), - Cpu::info()->sockets(), - Cpu::info()->isX64() ? "\x1B[1;32m" : "\x1B[1;31m-", - Cpu::info()->hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-", - Cpu::info()->hasAVX2() ? "\x1B[1;32m" : "\x1B[1;31m-"); -# ifndef XMRIG_NO_LIBCPUID - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0); -# endif - } - else { - Log::i()->text(" * %-13s%s (%d) %sx64 %sAES %sAVX2", - "CPU", - Cpu::info()->brand(), - Cpu::info()->sockets(), - Cpu::info()->isX64() ? "" : "-", - Cpu::info()->hasAES() ? "" : "-", - Cpu::info()->hasAVX2() ? "" : "-"); -# ifndef XMRIG_NO_LIBCPUID - Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0); -# endif - } -} - - -static void print_threads(xmrig::Config *config) -{ - if (config->threadsMode() != xmrig::Config::Advanced) { - char buf[32] = { 0 }; - if (config->affinity() != -1L) { - snprintf(buf, sizeof buf, ", affinity=0x%" PRIX64, config->affinity()); - } - - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%d") WHITE_BOLD(", %s, av=%d, %sdonate=%d%%") WHITE_BOLD("%s") - : " * %-13s%d, %s, av=%d, %sdonate=%d%%%s", - "THREADS", - config->threadsCount(), - config->algorithm().name(), - config->algoVariant(), - config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "", - config->donateLevel(), - buf); - } - else { - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%d") WHITE_BOLD(", %s, %sdonate=%d%%") - : " * %-13s%d, %s, %sdonate=%d%%", - "THREADS", - config->threadsCount(), - config->algorithm().name(), - config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "", - config->donateLevel()); - } - -# ifndef XMRIG_NO_ASM - if (config->assembly() == xmrig::ASM_AUTO) { - const xmrig::Assembly assembly = xmrig::Cpu::info()->assembly(); - - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13sauto:%s") - : " * %-13sauto:%s", "ASSEMBLY", asmName(assembly, config->isColors())); - } - else { - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s") : " * %-13s%s", "ASSEMBLY", asmName(config->assembly(), config->isColors())); - } -# endif -} - - static void print_commands(xmrig::Config *config) { if (config->isColors()) { @@ -154,16 +48,24 @@ static void print_commands(xmrig::Config *config) } } +static void print_donate(xmrig::Config *config) +{ + if (config->isColors()) { + Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("DONATE ") MAGENTA_BOLD("%d%%") WHITE_BOLD(" (change with --donate-level option)"), config->donateLevel()); + } + else { + Log::i()->text(" * DONATE %d%% (change with --donate-level option)", config->donateLevel()); + } +} void Summary::print(xmrig::Controller *controller) { controller->config()->printVersions(); - print_memory(controller->config()); - print_cpu(controller->config()); - print_threads(controller->config()); controller->config()->printPools(); controller->config()->printAPI(); + print_donate(controller->config()); + print_commands(controller->config()); } diff --git a/src/api/ApiRouter.cpp b/src/api/ApiRouter.cpp index beee8fd3..6cdd5f26 100644 --- a/src/api/ApiRouter.cpp +++ b/src/api/ApiRouter.cpp @@ -42,20 +42,20 @@ #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "interfaces/IThread.h" #include "rapidjson/document.h" #include "rapidjson/prettywriter.h" #include "rapidjson/stringbuffer.h" #include "version.h" #include "workers/Hashrate.h" #include "workers/Workers.h" +#include "workers/Handle.h" -static inline rapidjson::Value normalize(double d) +rapidjson::Value ApiRouter::normalize(double d) { using namespace rapidjson; - if (!isnormal(d)) { + if (!std::isnormal(d)) { return Value(kNullType); } @@ -216,13 +216,16 @@ void ApiRouter::getHashrate(rapidjson::Document &doc) const total.PushBack(normalize(hr->calc(Hashrate::MediumInterval)), allocator); total.PushBack(normalize(hr->calc(Hashrate::LargeInterval)), allocator); - for (size_t i = 0; i < Workers::threads(); i++) { - rapidjson::Value thread(rapidjson::kArrayType); - thread.PushBack(normalize(hr->calc(i, Hashrate::ShortInterval)), allocator); - thread.PushBack(normalize(hr->calc(i, Hashrate::MediumInterval)), allocator); - thread.PushBack(normalize(hr->calc(i, Hashrate::LargeInterval)), allocator); + vector workers = Workers::workers(); + for (size_t i = 0; i < workers.size(); i++) { + for(size_t j = 0; j < workers[i]->hasher()->deviceCount(); j++) { + rapidjson::Value thread(rapidjson::kArrayType); + thread.PushBack(normalize(hr->calc(i, j, Hashrate::ShortInterval)), allocator); + thread.PushBack(normalize(hr->calc(i, j, Hashrate::MediumInterval)), allocator); + thread.PushBack(normalize(hr->calc(i, j, Hashrate::LargeInterval)), allocator); - threads.PushBack(thread, allocator); + threads.PushBack(thread, allocator); + } } hashrate.AddMember("total", total, allocator); @@ -244,18 +247,10 @@ void ApiRouter::getMiner(rapidjson::Document &doc) const using namespace xmrig; auto &allocator = doc.GetAllocator(); - rapidjson::Value cpu(rapidjson::kObjectType); - cpu.AddMember("brand", rapidjson::StringRef(Cpu::info()->brand()), allocator); - cpu.AddMember("aes", Cpu::info()->hasAES(), allocator); - cpu.AddMember("x64", Cpu::info()->isX64(), allocator); - cpu.AddMember("sockets", Cpu::info()->sockets(), allocator); - doc.AddMember("version", APP_VERSION, allocator); doc.AddMember("kind", APP_KIND, allocator); doc.AddMember("ua", rapidjson::StringRef(Platform::userAgent()), allocator); - doc.AddMember("cpu", cpu, allocator); doc.AddMember("algo", rapidjson::StringRef(m_controller->config()->algorithm().name()), allocator); - doc.AddMember("hugepages", Workers::hugePages() > 0, allocator); doc.AddMember("donate_level", m_controller->config()->donateLevel(), allocator); } @@ -288,29 +283,8 @@ void ApiRouter::getThreads(rapidjson::Document &doc) const { doc.SetObject(); auto &allocator = doc.GetAllocator(); - const Hashrate *hr = Workers::hashrate(); - Workers::threadsSummary(doc); - - const std::vector &threads = m_controller->config()->threads(); - rapidjson::Value list(rapidjson::kArrayType); - - size_t i = 0; - for (const xmrig::IThread *thread : threads) { - rapidjson::Value value = thread->toAPI(doc); - - rapidjson::Value hashrate(rapidjson::kArrayType); - hashrate.PushBack(normalize(hr->calc(i, Hashrate::ShortInterval)), allocator); - hashrate.PushBack(normalize(hr->calc(i, Hashrate::MediumInterval)), allocator); - hashrate.PushBack(normalize(hr->calc(i, Hashrate::LargeInterval)), allocator); - - i++; - - value.AddMember("hashrate", hashrate, allocator); - list.PushBack(value, allocator); - } - - doc.AddMember("threads", list, allocator); + Workers::hashersSummary(doc); } diff --git a/src/api/ApiRouter.h b/src/api/ApiRouter.h index a92173ce..61b35f7d 100644 --- a/src/api/ApiRouter.h +++ b/src/api/ApiRouter.h @@ -52,6 +52,8 @@ public: void tick(const xmrig::NetworkState &results); + static rapidjson::Value normalize(double d); + protected: void onConfigChanged(xmrig::Config *config, xmrig::Config *previousConfig) override; diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 9d4f2bde..fa442904 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -290,21 +290,7 @@ rapidjson::Value xmrig::Pool::toJSON(rapidjson::Document &doc) const obj.AddMember(StringRef(kKeepalive), m_keepAlive, allocator); } - switch (m_algorithm.variant()) { - case VARIANT_AUTO: - case VARIANT_0: - case VARIANT_1: - obj.AddMember(StringRef(kVariant), m_algorithm.variant(), allocator); - break; - - case VARIANT_2: - obj.AddMember(StringRef(kVariant), 2, allocator); - break; - - default: - obj.AddMember(StringRef(kVariant), StringRef(m_algorithm.variantName()), allocator); - break; - } + obj.AddMember(StringRef(kVariant), StringRef(m_algorithm.variantName()), allocator); obj.AddMember(StringRef(kEnabled), m_enabled, allocator); obj.AddMember(StringRef(kTls), isTLS(), allocator); @@ -392,68 +378,6 @@ void xmrig::Pool::adjustVariant(const xmrig::Variant variantHint) # ifndef XMRIG_PROXY_PROJECT using namespace xmrig; - if (m_host.contains(".nicehash.com")) { - m_keepAlive = false; - m_nicehash = true; - bool valid = true; - - switch (m_port) { - case 3355: - case 33355: - valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonight."); - m_algorithm.setVariant(VARIANT_0); - break; - - case 3363: - case 33363: - valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv7."); - m_algorithm.setVariant(VARIANT_1); - break; - - case 3364: - valid = m_algorithm.algo() == CRYPTONIGHT_HEAVY && m_host.contains("cryptonightheavy."); - m_algorithm.setVariant(VARIANT_0); - break; - - case 3367: - case 33367: - valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv8."); - m_algorithm.setVariant(VARIANT_2); - break; - - default: - break; - } - - if (!valid) { - m_algorithm.setAlgo(INVALID_ALGO); - } - - m_tls = m_port > 33000; - return; - } - - if (m_host.contains(".minergate.com")) { - m_keepAlive = false; - bool valid = true; - m_algorithm.setVariant(VARIANT_1); - - if (m_host.contains("xmr.pool.")) { - valid = m_algorithm.algo() == CRYPTONIGHT; - m_algorithm.setVariant(m_port == 45700 ? VARIANT_AUTO : VARIANT_0); - } - else if (m_host.contains("aeon.pool.") && m_port == 45690) { - valid = m_algorithm.algo() == CRYPTONIGHT_LITE; - m_algorithm.setVariant(VARIANT_1); - } - - if (!valid) { - m_algorithm.setAlgo(INVALID_ALGO); - } - - return; - } - if (variantHint != VARIANT_AUTO) { m_algorithm.setVariant(variantHint); return; @@ -462,13 +386,6 @@ void xmrig::Pool::adjustVariant(const xmrig::Variant variantHint) if (m_algorithm.variant() != VARIANT_AUTO) { return; } - - if (m_algorithm.algo() == CRYPTONIGHT_HEAVY) { - m_algorithm.setVariant(VARIANT_0); - } - else if (m_algorithm.algo() == CRYPTONIGHT_LITE) { - m_algorithm.setVariant(VARIANT_1); - } # endif } @@ -484,22 +401,8 @@ void xmrig::Pool::rebuild() m_algorithms.push_back(m_algorithm); # ifndef XMRIG_PROXY_PROJECT - addVariant(VARIANT_4); - addVariant(VARIANT_WOW); - addVariant(VARIANT_2); - addVariant(VARIANT_1); - addVariant(VARIANT_0); - addVariant(VARIANT_HALF); - addVariant(VARIANT_XTL); - addVariant(VARIANT_TUBE); - addVariant(VARIANT_MSR); - addVariant(VARIANT_XHV); - addVariant(VARIANT_XAO); - addVariant(VARIANT_RTO); - addVariant(VARIANT_GPU); - addVariant(VARIANT_RWZ); - addVariant(VARIANT_ZLS); - addVariant(VARIANT_DOUBLE); addVariant(VARIANT_AUTO); + addVariant(VARIANT_CHUKWA); + addVariant(VARIANT_CHUKWA_LITE); # endif } diff --git a/src/base/tools/String.cpp b/src/base/tools/String.cpp index 7ed61d01..ccffc2a8 100644 --- a/src/base/tools/String.cpp +++ b/src/base/tools/String.cpp @@ -68,19 +68,25 @@ xmrig::String::String(const String &other) : } -bool xmrig::String::isEqual(const char *str) const +bool xmrig::String::isEqual(const char *str, bool caseInsensitive) const { - return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr); + if(caseInsensitive) + return (m_data != nullptr && str != nullptr && strcasecmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr); + else + return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr); } -bool xmrig::String::isEqual(const String &other) const +bool xmrig::String::isEqual(const String &other, bool caseInsensitive) const { if (m_size != other.m_size) { return false; } - return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr); + if(caseInsensitive) + return (m_data != nullptr && other.m_data != nullptr && strncasecmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr); + else + return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr); } diff --git a/src/base/tools/String.h b/src/base/tools/String.h index 0c191dfd..b25c0a64 100644 --- a/src/base/tools/String.h +++ b/src/base/tools/String.h @@ -56,8 +56,8 @@ public: inline ~String() { delete [] m_data; } - bool isEqual(const char *str) const; - bool isEqual(const String &other) const; + bool isEqual(const char *str, bool caseInsensitive = false) const; + bool isEqual(const String &other, bool caseInsensitive = false) const; inline bool contains(const char *str) const { return isNull() ? false : strstr(m_data, str) != nullptr; } diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 36d156a3..94c68350 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -65,7 +65,7 @@ xmrig::CommonConfig::CommonConfig() : - m_algorithm(CRYPTONIGHT, VARIANT_AUTO), + m_algorithm(ARGON2, VARIANT_AUTO), m_adjusted(false), m_apiIPv6(false), m_apiRestricted(true), @@ -168,7 +168,7 @@ void xmrig::CommonConfig::printVersions() bool xmrig::CommonConfig::save() { if (m_fileName.isNull()) { - return false; + m_fileName = "config.json"; } rapidjson::Document doc; diff --git a/src/common/cpu/BasicCpuInfo.cpp b/src/common/cpu/BasicCpuInfo.cpp index d7778bdd..990b12ff 100644 --- a/src/common/cpu/BasicCpuInfo.cpp +++ b/src/common/cpu/BasicCpuInfo.cpp @@ -121,7 +121,6 @@ static inline bool has_ossave() xmrig::BasicCpuInfo::BasicCpuInfo() : - m_assembly(ASM_NONE), m_aes(has_aes_ni()), m_avx2(has_avx2() && has_ossave()), m_brand(), @@ -129,7 +128,6 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : { cpu_brand_string(m_brand); -# ifndef XMRIG_NO_ASM if (hasAES()) { char vendor[13] = { 0 }; int32_t data[4] = { 0 }; @@ -139,19 +137,11 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : memcpy(vendor + 0, &data[1], 4); memcpy(vendor + 4, &data[3], 4); memcpy(vendor + 8, &data[2], 4); - - if (memcmp(vendor, "GenuineIntel", 12) == 0) { - m_assembly = ASM_INTEL; - } - else if (memcmp(vendor, "AuthenticAMD", 12) == 0) { - m_assembly = ASM_RYZEN; - } } -# endif } -size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize) const { const size_t count = threads() / 2; diff --git a/src/common/cpu/BasicCpuInfo.h b/src/common/cpu/BasicCpuInfo.h index 95857ed2..9f34c7b9 100644 --- a/src/common/cpu/BasicCpuInfo.h +++ b/src/common/cpu/BasicCpuInfo.h @@ -38,9 +38,8 @@ public: BasicCpuInfo(); protected: - size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; + size_t optimalThreadsCount(size_t memSize) const override; - inline Assembly assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } inline bool isSupported() const override { return true; } @@ -59,7 +58,6 @@ protected: # endif private: - Assembly m_assembly; bool m_aes; bool m_avx2; char m_brand[64]; diff --git a/src/common/cpu/BasicCpuInfo_arm.cpp b/src/common/cpu/BasicCpuInfo_arm.cpp index 33961346..26979e11 100644 --- a/src/common/cpu/BasicCpuInfo_arm.cpp +++ b/src/common/cpu/BasicCpuInfo_arm.cpp @@ -52,7 +52,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : } -size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize) const { return threads(); } diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 6313ee60..197db5a3 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -26,8 +26,6 @@ #include #include -#include -#include #include "common/crypto/Algorithm.h" @@ -54,52 +52,10 @@ struct AlgoData static AlgoData const algorithms[] = { - { "cryptonight", "cn", xmrig::CRYPTONIGHT, xmrig::VARIANT_AUTO }, - { "cryptonight/0", "cn/0", xmrig::CRYPTONIGHT, xmrig::VARIANT_0 }, - { "cryptonight/1", "cn/1", xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, - { "cryptonight/xtl", "cn/xtl", xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL }, - { "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR }, - { "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, - { "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO }, - { "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 }, - { "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, - { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, - { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, - { "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, - { "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ }, - { "cryptonight/zls", "cn/zls", xmrig::CRYPTONIGHT, xmrig::VARIANT_ZLS }, - { "cryptonight/double", "cn/double", xmrig::CRYPTONIGHT, xmrig::VARIANT_DOUBLE }, - -# ifndef XMRIG_NO_AEON - { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, - { "cryptonight-light", "cn-light", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, - { "cryptonight-lite/0", "cn-lite/0", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0 }, - { "cryptonight-lite/1", "cn-lite/1", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1 }, -# endif - -# ifndef XMRIG_NO_SUMO - { "cryptonight-heavy", "cn-heavy", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_AUTO }, - { "cryptonight-heavy/0", "cn-heavy/0", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0 }, - { "cryptonight-heavy/xhv", "cn-heavy/xhv", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV }, - { "cryptonight-heavy/tube", "cn-heavy/tube", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE }, -# endif - -# ifndef XMRIG_NO_CN_PICO - { "cryptonight-pico/trtl", "cn-pico/trtl", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight-pico", "cn-pico", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight-turtle", "cn-trtl", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight-ultralite", "cn-ultralite", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight_turtle", "cn_turtle", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, -# endif - -# ifndef XMRIG_NO_ARGON2 { "chukwa", "trtl-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA }, +// { "argon2/trtl", "trtl-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA }, { "chukwa/wrkz", "wrkz-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA_LITE }, -# endif - -# ifndef XMRIG_NO_CN_GPU - { "cryptonight/gpu", "cn/gpu", xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU }, -# endif + { "argon2/wrkz", "wrkz-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA_LITE }, }; @@ -127,23 +83,6 @@ static AlgoData const xmrStakAlgorithms[] = { static const char *variants[] = { - "0", - "1", - "tube", - "xtl", - "msr", - "xhv", - "xao", - "rto", - "2", - "half", - "trtl", - "gpu", - "wow", - "r", - "rwz", - "zls", - "double", "chukwa", "wrkz", }; @@ -177,7 +116,6 @@ const char *xmrig::Algorithm::variantName() const return variants[m_variant]; } - void xmrig::Algorithm::parseAlgorithm(const char *algo) { m_algo = INVALID_ALGO; @@ -228,41 +166,20 @@ void xmrig::Algorithm::parseVariant(const char *variant) return; } } - - if (strcasecmp(variant, "xtlv9") == 0) { - m_variant = VARIANT_HALF; - } } void xmrig::Algorithm::parseVariant(int variant) { - assert(variant >= -1 && variant <= 2); + assert(variant >= VARIANT_AUTO && variant < VARIANT_MAX); - switch (variant) { - case -1: - case 0: - case 1: - m_variant = static_cast(variant); - break; - - case 2: - m_variant = VARIANT_2; - break; - - default: - break; - } + m_variant = static_cast(variant); } void xmrig::Algorithm::setAlgo(Algo algo) { m_algo = algo; - - if (m_algo == CRYPTONIGHT_PICO && m_variant == VARIANT_AUTO) { - m_variant = xmrig::VARIANT_TRTL; - } } diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index 7e6931a8..ba2d88ce 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -71,33 +71,20 @@ public: AutoSaveKey = 1016, // xmrig common - CPUPriorityKey = 1021, + PriorityKey = 1021, NicehashKey = 1006, PrintTimeKey = 1007, // xmrig cpu - AVKey = 'v', + CPUThreadsKey = 't', + CPUOptimizationKey = 5004, CPUAffinityKey = 1020, DryRunKey = 5000, - HugePagesKey = 1009, - MaxCPUUsageKey = 1004, - SafeKey = 1005, - ThreadsKey = 't', - HardwareAESKey = 1011, - AssemblyKey = 1015, - // xmrig amd - OclPlatformKey = 1400, - OclAffinityKey = 1401, - OclDevicesKey = 1402, - OclLaunchKey = 1403, - OclCacheKey = 1404, - OclPrintKey = 1405, - OclLoaderKey = 1406, - OclSridedIndexKey = 1407, - OclMemChunkKey = 1408, - OclUnrollKey = 1409, - OclCompModeKey = 1410, + // ninjarig gpu + UseGPUKey = 5001, + GPUIntensityKey = 5002, + GPUFilterKey = 5003, // xmrig-proxy AccessLogFileKey = 'A', @@ -117,15 +104,6 @@ public: TlsCiphersKey = 1112, TlsCipherSuitesKey = 1113, TlsProtocolsKey = 1114, - - // xmrig nvidia - CudaMaxThreadsKey = 1200, - CudaBFactorKey = 1201, - CudaBSleepKey = 1202, - CudaDevicesKey = 1203, - CudaLaunchKey = 1204, - CudaAffinityKey = 1205, - CudaMaxUsageKey = 1206, }; virtual ~IConfig() = default; diff --git a/src/common/interfaces/ICpuInfo.h b/src/common/interfaces/ICpuInfo.h index dd4034b3..c25ecc4f 100644 --- a/src/common/interfaces/ICpuInfo.h +++ b/src/common/interfaces/ICpuInfo.h @@ -52,8 +52,7 @@ public: virtual int32_t nodes() const = 0; virtual int32_t sockets() const = 0; virtual int32_t threads() const = 0; - virtual size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const = 0; - virtual xmrig::Assembly assembly() const = 0; + virtual size_t optimalThreadsCount(size_t memSize) const = 0; }; diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index cb6be4e6..62aeeeb9 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -230,19 +230,5 @@ char *xmrig::Job::toHex(const unsigned char* in, unsigned int len) xmrig::Variant xmrig::Job::variant() const { - switch (m_algorithm.algo()) { - case CRYPTONIGHT: - return (m_blob[0] >= 10) ? VARIANT_4 : ((m_blob[0] >= 8) ? VARIANT_2 : VARIANT_1); - - case CRYPTONIGHT_LITE: - return VARIANT_1; - - case CRYPTONIGHT_HEAVY: - return VARIANT_0; - - default: - break; - } - return m_algorithm.variant(); } diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 32e8f66a..7a639a1e 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -25,100 +25,22 @@ #ifndef XMRIG_XMRIG_H #define XMRIG_XMRIG_H - namespace xmrig { - enum Algo { INVALID_ALGO = -1, - CRYPTONIGHT, /* CryptoNight (2 MB) */ - CRYPTONIGHT_LITE, /* CryptoNight (1 MB) */ - CRYPTONIGHT_HEAVY, /* CryptoNight (4 MB) */ - CRYPTONIGHT_PICO, /* CryptoNight (256 KB) */ ARGON2, /* Argon2 */ ALGO_MAX }; - -//--av=1 For CPUs with hardware AES. -//--av=2 Lower power mode (double hash) of 1. -//--av=3 Software AES implementation. -//--av=4 Lower power mode (double hash) of 3. -enum AlgoVariant { - AV_AUTO, // --av=0 Automatic mode. - AV_SINGLE, // --av=1 Single hash mode - AV_DOUBLE, // --av=2 Double hash mode - AV_SINGLE_SOFT, // --av=3 Single hash mode (Software AES) - AV_DOUBLE_SOFT, // --av=4 Double hash mode (Software AES) - AV_TRIPLE, // --av=5 Triple hash mode - AV_QUAD, // --av=6 Quard hash mode - AV_PENTA, // --av=7 Penta hash mode - AV_TRIPLE_SOFT, // --av=8 Triple hash mode (Software AES) - AV_QUAD_SOFT, // --av=9 Quard hash mode (Software AES) - AV_PENTA_SOFT, // --av=10 Penta hash mode (Software AES) - AV_MAX -}; - - enum Variant { VARIANT_AUTO = -1, // Autodetect - VARIANT_0 = 0, // Original CryptoNight or CryptoNight-Heavy - VARIANT_1 = 1, // CryptoNight variant 1 also known as Monero7 and CryptoNightV7 - VARIANT_TUBE = 2, // Modified CryptoNight-Heavy (TUBE only) - VARIANT_XTL = 3, // Modified CryptoNight variant 1 (Stellite only) - VARIANT_MSR = 4, // Modified CryptoNight variant 1 (Masari only) - VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only) - VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only) - VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only) - VARIANT_2 = 8, // CryptoNight variant 2 - VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite) - VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL) - VARIANT_GPU = 11, // CryptoNight-GPU (Ryo) - VARIANT_WOW = 12, // CryptoNightR (Wownero) - VARIANT_4 = 13, // CryptoNightR (Monero's variant 4) - VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft) - VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius) - VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH) - VARIANT_CHUKWA = 17, // Argon2 Chukwa for TurtleCoin - VARIANT_CHUKWA_LITE = 18, // Argon2 Chukwa Lite for WrkzCoin - VARIANT_MAX + VARIANT_CHUKWA = 0, // Argon2 Chukwa for TurtleCoin + VARIANT_CHUKWA_LITE = 1, // Argon2 Chukwa Lite for WrkzCoin + VARIANT_MAX }; - -enum AlgoVerify { - VERIFY_HW_AES = 1, - VERIFY_SOFT_AES = 2 -}; - - -enum AesMode { - AES_AUTO, - AES_HW, - AES_SOFT -}; - - -enum OclVendor { - OCL_VENDOR_UNKNOWN = -2, - OCL_VENDOR_MANUAL = -1, - OCL_VENDOR_AMD = 0, - OCL_VENDOR_NVIDIA = 1, - OCL_VENDOR_INTEL = 2 -}; - - -enum Assembly { - ASM_NONE, - ASM_AUTO, - ASM_INTEL, - ASM_RYZEN, - ASM_BULLDOZER, - ASM_MAX -}; - - } /* namespace xmrig */ - #endif /* XMRIG_XMRIG_H */ diff --git a/src/config.json b/src/config.json index 6843138a..f36136ed 100644 --- a/src/config.json +++ b/src/config.json @@ -8,18 +8,16 @@ "ipv6": false, "restricted": true }, - "asm": true, "autosave": true, - "av": 0, "background": false, "colors": true, + "threads": "all", "cpu-affinity": null, "cpu-priority": null, + "use-gpu": "CUDA", + "gpu-intensity": 50, "donate-level": 1, - "huge-pages": true, - "hw-aes": null, "log-file": "./log.txt", - "max-cpu-usage": 95, "pools": [ { "url": "testnet.wrkz.work:5555", @@ -38,8 +36,6 @@ "retries": 5, "retry-pause": 5, "safe": false, - "threads": [ - ], "user-agent": null, "watch": true } \ No newline at end of file diff --git a/src/core/Config.cpp b/src/core/Config.cpp index b6f65b28..7ddb5d70 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -27,32 +27,24 @@ #include #include - #include "common/config/ConfigLoader.h" #include "common/cpu/Cpu.h" #include "core/Config.h" #include "core/ConfigCreator.h" -#include "crypto/Asm.h" -#include "crypto/CryptoNight_constants.h" #include "crypto/Argon2_constants.h" #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" #include "rapidjson/prettywriter.h" -#include "workers/CpuThread.h" +#include "HasherConfig.h" static char affinity_tmp[20] = { 0 }; xmrig::Config::Config() : xmrig::CommonConfig(), - m_aesMode(AES_AUTO), - m_algoVariant(AV_AUTO), - m_assembly(ASM_AUTO), - m_hugePages(true), - m_safe(false), m_shouldSave(false), - m_maxCpuUsage(100), - m_priority(-1) + m_priority(-1), + m_mask(-1) { } @@ -82,47 +74,31 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const api.AddMember("restricted", isApiRestricted(), allocator); doc.AddMember("api", api, allocator); -# ifndef XMRIG_NO_ASM - doc.AddMember("asm", Asm::toJSON(m_assembly), allocator); -# endif - doc.AddMember("autosave", isAutoSave(), allocator); - doc.AddMember("av", algoVariant(), allocator); doc.AddMember("background", isBackground(), allocator); doc.AddMember("colors", isColors(), allocator); - if (affinity() != -1L) { - snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, affinity()); + doc.AddMember("cpu-threads", cpuThreads(), allocator); + if(cpuOptimization().isNull() || cpuOptimization().isEmpty()) + doc.AddMember("cpu-optimization", kNullType, allocator); + else + doc.AddMember("cpu-optimization", StringRef(cpuOptimization().data()), allocator); + + if (cpuAffinity() != -1L) { + snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, cpuAffinity()); doc.AddMember("cpu-affinity", StringRef(affinity_tmp), allocator); } else { doc.AddMember("cpu-affinity", kNullType, allocator); } - doc.AddMember("cpu-priority", priority() != -1 ? Value(priority()) : Value(kNullType), allocator); + doc.AddMember("priority", priority() != -1 ? Value(priority()) : Value(kNullType), allocator); doc.AddMember("donate-level", donateLevel(), allocator); - doc.AddMember("huge-pages", isHugePages(), allocator); - doc.AddMember("hw-aes", m_aesMode == AES_AUTO ? Value(kNullType) : Value(m_aesMode == AES_HW), allocator); doc.AddMember("log-file", logFile() ? Value(StringRef(logFile())).Move() : Value(kNullType).Move(), allocator); - doc.AddMember("max-cpu-usage", m_maxCpuUsage, allocator); doc.AddMember("pools", m_pools.toJSON(doc), allocator); doc.AddMember("print-time", printTime(), allocator); doc.AddMember("retries", m_pools.retries(), allocator); doc.AddMember("retry-pause", m_pools.retryPause(), allocator); - doc.AddMember("safe", m_safe, allocator); - - if (threadsMode() != Simple) { - Value threads(kArrayType); - - for (const IThread *thread : m_threads.list) { - threads.PushBack(thread->toConfig(doc), allocator); - } - - doc.AddMember("threads", threads, allocator); - } - else { - doc.AddMember("threads", threadsCount(), allocator); - } doc.AddMember("user-agent", userAgent() ? Value(StringRef(userAgent())).Move() : Value(kNullType).Move(), allocator); @@ -131,6 +107,30 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const # endif doc.AddMember("watch", m_watch, allocator); + + Value gpuEngines(kArrayType); + + for (const String gpuEngine : m_gpuEngine) { + gpuEngines.PushBack(gpuEngine.toJSON(doc), allocator); + } + + doc.AddMember("use-gpu", gpuEngines, allocator); + + Value gpuIntensities(kArrayType); + + for (const double gpuIntensity : m_gpuIntensity) { + gpuIntensities.PushBack(gpuIntensity, allocator); + } + + doc.AddMember("gpu-intensity", gpuIntensities, allocator); + + Value gpuFilters(kArrayType); + + for (const GPUFilter gpuFilter : m_gpuFilter) { + gpuFilters.PushBack(toGPUFilterConfig(gpuFilter, doc), allocator); + } + + doc.AddMember("gpu-filter", gpuFilters, allocator); } @@ -150,46 +150,20 @@ bool xmrig::Config::finalize() return false; } - if (!m_threads.cpu.empty()) { - m_threads.mode = Advanced; - const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; + if(m_gpuIntensity.size() == 0) + m_gpuIntensity.push_back(50); - for (size_t i = 0; i < m_threads.cpu.size(); ++i) { - m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES)); - } + HasherConfig hasherConfig(m_algorithm.algo(), m_algorithm.variant(), m_priority, m_cpuThreads, m_mask, m_cpuOptimization.isNull() ? "" : m_cpuOptimization.data(), m_gpuIntensity, m_gpuFilter); - return true; - } + if(m_cpuThreads > 0) + m_hashers.push_back(hasherConfig.clone(m_hashers.size(), "CPU")); - const AlgoVariant av = getAlgoVariant(); - m_threads.mode = m_threads.count ? Simple : Automatic; + if(m_gpuEngine.size() > 0) + for(String gpuEngine : m_gpuEngine) + m_hashers.push_back(hasherConfig.clone(m_hashers.size(), gpuEngine.data())); - size_t size; - - if (m_algorithm.algo() == xmrig::ARGON2) - { - size = CpuThread::multiway(av) * argon2_select_memory(m_algorithm.variant()); - } - else - { - size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024; - } + m_shouldSave = true; - if (!m_threads.count) { - m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - } - else if (m_safe) { - const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads.count > count) { - m_threads.count = count; - } - } - - for (size_t i = 0; i < m_threads.count; ++i) { - m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority, m_assembly)); - } - - m_shouldSave = m_threads.mode == Automatic; return true; } @@ -200,29 +174,6 @@ bool xmrig::Config::parseBoolean(int key, bool enable) return false; } - switch (key) { - case SafeKey: /* --safe */ - m_safe = enable; - break; - - case HugePagesKey: /* --no-huge-pages */ - m_hugePages = enable; - break; - - case HardwareAESKey: /* hw-aes config only */ - m_aesMode = enable ? AES_HW : AES_SOFT; - break; - -# ifndef XMRIG_NO_ASM - case AssemblyKey: - m_assembly = Asm::parse(enable); - break; -# endif - - default: - break; - } - return true; } @@ -234,36 +185,92 @@ bool xmrig::Config::parseString(int key, const char *arg) } switch (key) { - case AVKey: /* --av */ - case MaxCPUUsageKey: /* --max-cpu-usage */ - case CPUPriorityKey: /* --cpu-priority */ + case PriorityKey: /* --cpu-priority */ return parseUint64(key, strtol(arg, nullptr, 10)); - case SafeKey: /* --safe */ - return parseBoolean(key, true); - - case HugePagesKey: /* --no-huge-pages */ - return parseBoolean(key, false); - - case ThreadsKey: /* --threads */ + case CPUThreadsKey: /* --threads */ if (strncmp(arg, "all", 3) == 0) { - m_threads.count = Cpu::info()->threads(); + m_cpuThreads = Cpu::info()->threads(); return true; } return parseUint64(key, strtol(arg, nullptr, 10)); + case CPUOptimizationKey: + { + String value = arg; + if(value.isEqual("REF", true)) + value = "REF"; + else if(value.isEqual("SSE2", true)) + value = "SSE2"; + else if(value.isEqual("SSSE3", true)) + value = "SSSE3"; + else if(value.isEqual("AVX", true)) + value = "AVX"; + else if(value.isEqual("AVX2", true)) + value = "AVX2"; + else if(value.isEqual("AVX512F", true)) + value = "AVX512F"; + else if(value.isEqual("NEON", true)) + value = "NEON"; + else { + printf("Invalid CPU optimization %s.\n", arg); + return false; + } + m_cpuOptimization = value; + return true; + } + case CPUAffinityKey: /* --cpu-affinity */ { const char *p = strstr(arg, "0x"); return parseUint64(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10)); } -# ifndef XMRIG_NO_ASM - case AssemblyKey: /* --asm */ - m_assembly = Asm::parse(arg); - break; -# endif + case UseGPUKey: + { + String strArg = arg; + std::vector gpuEngines = strArg.split(','); + m_gpuEngine.clear(); + for(String engine : gpuEngines) { + if(engine.isEqual("OPENCL", true)) + m_gpuEngine.push_back("OPENCL"); + else if(engine.isEqual("CUDA", true)) + m_gpuEngine.push_back("CUDA"); + else { + printf("Invalid GPU hasher %s, ignoring.\n", engine.data()); + } + } + + return m_gpuEngine.size() > 0; + } + + case GPUIntensityKey: + { + String strArg = arg; + std::vector gpuIntensities = strArg.split(','); + for (const String intensity : gpuIntensities) { + double value = strtod(intensity.data(), NULL); + if(value > 100) value = 100; + if(value < 0) value = 0; + m_gpuIntensity.push_back(value); + } + return true; + } + + case GPUFilterKey: + { + String strArg = arg; + std::vector gpuFilters = strArg.split(','); + for (const String filter : gpuFilters) { + std::vector explodedFilter = filter.split(':'); + if(explodedFilter.size() == 1) + m_gpuFilter.push_back(GPUFilter("", explodedFilter[0].data())); + else if(explodedFilter.size() >= 2) + m_gpuFilter.push_back(GPUFilter(explodedFilter[0].data(), explodedFilter[1].data())); + } + return true; + } default: break; @@ -282,7 +289,7 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg) switch (key) { case CPUAffinityKey: /* --cpu-affinity */ if (arg) { - m_threads.mask = arg; + m_mask = arg; } break; @@ -298,20 +305,89 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) { CommonConfig::parseJSON(doc); - const rapidjson::Value &threads = doc["threads"]; + const rapidjson::Value &threads = doc["cpu-threads"]; - if (threads.IsArray()) { - for (const rapidjson::Value &value : threads.GetArray()) { - if (!value.IsObject()) { + if (threads.IsUint()) + m_cpuThreads = threads.GetUint(); + else if(threads.IsString() && strcasecmp(threads.GetString(), "all") == 0) + m_cpuThreads = Cpu::info()->threads(); + + const rapidjson::Value &cpuOptimization = doc["cpu-optimization"]; + + if (cpuOptimization.IsString()) { + String value = cpuOptimization.GetString(); + if(value.isEqual("REF", true)) + value = "REF"; + else if(value.isEqual("SSE2", true)) + value = "SSE2"; + else if(value.isEqual("SSSE3", true)) + value = "SSSE3"; + else if(value.isEqual("AVX", true)) + value = "AVX"; + else if(value.isEqual("AVX2", true)) + value = "AVX2"; + else if(value.isEqual("AVX512F", true)) + value = "AVX512F"; + else if(value.isEqual("NEON", true)) + value = "NEON"; + else { + printf("Invalid CPU optimization %s, ignoring.\n", value.data()); + value = ""; + } + + if(!value.isEqual("")) + m_cpuOptimization = value; + } + + const rapidjson::Value &gpuEngines = doc["use-gpu"]; + + if(gpuEngines.IsArray()) { + m_gpuEngine.clear(); + + for(const rapidjson::Value &value : gpuEngines.GetArray()) { + if(!value.IsString()) { continue; } - if (value.HasMember("low_power_mode")) { - auto data = CpuThread::parse(value); + String engine = value.GetString(); + if(engine.isEqual("OPENCL", true)) + m_gpuEngine.push_back("OPENCL"); + else if(engine.isEqual("CUDA", true)) + m_gpuEngine.push_back("CUDA"); + else { + printf("Invalid GPU hasher %s, ignoring.\n", engine.data()); + } + } + } - if (data.valid) { - m_threads.cpu.push_back(std::move(data)); - } + const rapidjson::Value &gpuIntensities = doc["gpu-intensity"]; + + if(gpuIntensities.IsArray()) { + for(const rapidjson::Value &value : gpuIntensities.GetArray()) { + if(!value.IsDouble()) { + continue; + } + + double intensity = value.GetDouble(); + if(intensity > 100) intensity = 100; + if(intensity < 0) intensity = 0; + + m_gpuIntensity.push_back(intensity); + } + } + + const rapidjson::Value &gpuFilters = doc["gpu-filter"]; + + if(gpuFilters.IsArray()) { + for(const rapidjson::Value &value : gpuFilters.GetArray()) { + if(!value.IsObject()) { + continue; + } + + if(value.HasMember("filter")) { + auto data = parseGPUFilterConfig(value); + + m_gpuFilter.push_back(data); } } } @@ -321,25 +397,13 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) bool xmrig::Config::parseInt(int key, int arg) { switch (key) { - case ThreadsKey: /* --threads */ + case CPUThreadsKey: /* --threads */ if (arg >= 0 && arg < 1024) { - m_threads.count = arg; + m_cpuThreads = arg; } break; - case AVKey: /* --av */ - if (arg >= AV_AUTO && arg < AV_MAX) { - m_algoVariant = static_cast(arg); - } - break; - - case MaxCPUUsageKey: /* --max-cpu-usage */ - if (m_maxCpuUsage > 0 && arg <= 100) { - m_maxCpuUsage = arg; - } - break; - - case CPUPriorityKey: /* --cpu-priority */ + case PriorityKey: /* --cpu-priority */ if (arg >= 0 && arg <= 5) { m_priority = arg; } @@ -351,39 +415,3 @@ bool xmrig::Config::parseInt(int key, int arg) return true; } - - -xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const -{ -# ifndef XMRIG_NO_AEON - if (m_algorithm.algo() == xmrig::CRYPTONIGHT_LITE) { - return getAlgoVariantLite(); - } -# endif - - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} - - -#ifndef XMRIG_NO_AEON -xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const -{ - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} -#endif diff --git a/src/core/Config.h b/src/core/Config.h index d2e8c166..f12db222 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -28,18 +28,16 @@ #include #include - #include "common/config/CommonConfig.h" #include "common/xmrig.h" #include "rapidjson/fwd.h" -#include "workers/CpuThread.h" +#include "rapidjson/schema.h" +#include "HasherConfig.h" namespace xmrig { -class ConfigLoader; -class IThread; class IConfigListener; class Process; @@ -58,29 +56,22 @@ class Process; class Config : public CommonConfig { public: - enum ThreadsMode { - Automatic, - Simple, - Advanced - }; - - Config(); bool reload(const char *json); void getJSON(rapidjson::Document &doc) const override; - inline AesMode aesMode() const { return m_aesMode; } - inline AlgoVariant algoVariant() const { return m_algoVariant; } - inline Assembly assembly() const { return m_assembly; } - inline bool isHugePages() const { return m_hugePages; } inline bool isShouldSave() const { return m_shouldSave && isAutoSave(); } - inline const std::vector &threads() const { return m_threads.list; } + inline const std::vector &hasherConfigs() const { return m_hashers; } inline int priority() const { return m_priority; } - inline int threadsCount() const { return m_threads.list.size(); } - inline int64_t affinity() const { return m_threads.mask; } - inline ThreadsMode threadsMode() const { return m_threads.mode; } + inline int hashersCount() const { return m_hashers.size(); } + inline int cpuThreads() const { return m_cpuThreads; } + inline String cpuOptimization() const { return m_cpuOptimization; } + inline int64_t cpuAffinity() const { return m_mask; } + inline std::vector gpuEngine() const { return m_gpuEngine; } + inline std::vector gpuIntensity() const { return m_gpuIntensity; } + inline std::vector gpuFilter() const { return m_gpuFilter; } static Config *load(Process *process, IConfigListener *listener); @@ -94,36 +85,42 @@ protected: private: bool parseInt(int key, int arg); - AlgoVariant getAlgoVariant() const; -# ifndef XMRIG_NO_AEON - AlgoVariant getAlgoVariantLite() const; -# endif + static rapidjson::Value toGPUFilterConfig(const GPUFilter &filter, rapidjson::Document &doc) { + using namespace rapidjson; + Value obj(kObjectType); + auto &allocator = doc.GetAllocator(); + if(!filter.engine.empty() && filter.engine != "*") + obj.AddMember("engine", Value(filter.engine.data(), doc.GetAllocator()), allocator); + obj.AddMember("filter", Value(filter.filter.data(), doc.GetAllocator()), allocator); + return obj; + } - struct Threads - { - inline Threads() : mask(-1L), count(0), mode(Automatic) {} + static GPUFilter parseGPUFilterConfig(const rapidjson::Value &object) { + std::string engineInfo; + std::string filterInfo; + const auto &filter = object["filter"]; + if (filter.IsString()) { + filterInfo = filter.GetString(); + } + const auto &engine = object["engine"]; + if (engine.IsString()) { + engineInfo = engine.GetString(); + } - int64_t mask; - size_t count; - std::vector cpu; - std::vector list; - ThreadsMode mode; - }; - - - AesMode m_aesMode; - AlgoVariant m_algoVariant; - Assembly m_assembly; - bool m_hugePages; - bool m_safe; + return GPUFilter(engineInfo, filterInfo); + } bool m_shouldSave; - int m_maxCpuUsage; int m_priority; - Threads m_threads; + int64_t m_mask; + int m_cpuThreads; + String m_cpuOptimization; + std::vector m_gpuEngine; + std::vector m_gpuIntensity; + std::vector m_gpuFilter; + std::vector m_hashers; }; - } /* namespace xmrig */ #endif /* XMRIG_CONFIG_H */ diff --git a/src/core/ConfigLoader_default.h b/src/core/ConfigLoader_default.h index 8fd0502b..a0f098fc 100644 --- a/src/core/ConfigLoader_default.h +++ b/src/core/ConfigLoader_default.h @@ -33,7 +33,7 @@ namespace xmrig { const static char *default_config = R"===( { - "algo": "cryptonight", + "algo": "argon2", "api": { "port": 0, "access-token": null, @@ -42,16 +42,13 @@ R"===( "ipv6": false, "restricted": true }, - "asm": true, "autosave": true, - "av": 0, "background": false, "colors": true, "cpu-affinity": null, "cpu-priority": null, "donate-level": 5, "huge-pages": true, - "hw-aes": null, "log-file": null, "max-cpu-usage": 100, "pools": [ diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index 0b71c3fd..ecfd9844 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -40,7 +40,7 @@ namespace xmrig { -static char const short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:S"; +static char const short_options[] = "a:c:Bp:Px:r:R:s:t:T:o:u:O:v:l:S"; static struct option const options[] = { @@ -51,28 +51,28 @@ static struct option const options[] = { { "api-id", 1, nullptr, xmrig::IConfig::ApiIdKey }, { "api-ipv6", 0, nullptr, xmrig::IConfig::ApiIPv6Key }, { "api-no-restricted", 0, nullptr, xmrig::IConfig::ApiRestrictedKey }, - { "av", 1, nullptr, xmrig::IConfig::AVKey }, { "background", 0, nullptr, xmrig::IConfig::BackgroundKey }, { "config", 1, nullptr, xmrig::IConfig::ConfigKey }, + { "cpu-threads", 1, nullptr, xmrig::IConfig::CPUThreadsKey }, + { "cpu-optimization", 1, nullptr, xmrig::IConfig::CPUOptimizationKey}, { "cpu-affinity", 1, nullptr, xmrig::IConfig::CPUAffinityKey }, - { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, + { "use-gpu", 1, nullptr, xmrig::IConfig::UseGPUKey }, + { "gpu-intensity", 1, nullptr, xmrig::IConfig::GPUIntensityKey }, + { "gpu-filter", 1, nullptr, xmrig::IConfig::GPUFilterKey }, + { "priority", 1, nullptr, xmrig::IConfig::PriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, { "keepalive", 0, nullptr, xmrig::IConfig::KeepAliveKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, - { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, { "nicehash", 0, nullptr, xmrig::IConfig::NicehashKey }, { "no-color", 0, nullptr, xmrig::IConfig::ColorKey }, { "no-watch", 0, nullptr, xmrig::IConfig::WatchKey }, - { "no-huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, { "variant", 1, nullptr, xmrig::IConfig::VariantKey }, { "pass", 1, nullptr, xmrig::IConfig::PasswordKey }, { "print-time", 1, nullptr, xmrig::IConfig::PrintTimeKey }, { "retries", 1, nullptr, xmrig::IConfig::RetriesKey }, { "retry-pause", 1, nullptr, xmrig::IConfig::RetryPauseKey }, - { "safe", 0, nullptr, xmrig::IConfig::SafeKey }, { "syslog", 0, nullptr, xmrig::IConfig::SyslogKey }, - { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, { "url", 1, nullptr, xmrig::IConfig::UrlKey }, { "user", 1, nullptr, xmrig::IConfig::UserKey }, { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, @@ -80,33 +80,30 @@ static struct option const options[] = { { "rig-id", 1, nullptr, xmrig::IConfig::RigIdKey }, { "tls", 0, nullptr, xmrig::IConfig::TlsKey }, { "tls-fingerprint", 1, nullptr, xmrig::IConfig::FingerprintKey }, - { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, { nullptr, 0, nullptr, 0 } }; static struct option const config_options[] = { { "algo", 1, nullptr, xmrig::IConfig::AlgorithmKey }, - { "av", 1, nullptr, xmrig::IConfig::AVKey }, { "background", 0, nullptr, xmrig::IConfig::BackgroundKey }, { "colors", 0, nullptr, xmrig::IConfig::ColorKey }, + { "cpu-threads", 1, nullptr, xmrig::IConfig::CPUThreadsKey }, + { "cpu-optimization",1, nullptr, xmrig::IConfig::CPUOptimizationKey }, { "cpu-affinity", 1, nullptr, xmrig::IConfig::CPUAffinityKey }, - { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, + { "use-gpu", 1, nullptr, xmrig::IConfig::UseGPUKey }, + { "gpu-intensity", 1, nullptr, xmrig::IConfig::GPUIntensityKey}, + { "gpu-filter", 1, nullptr, xmrig::IConfig::GPUFilterKey }, + { "priority", 1, nullptr, xmrig::IConfig::PriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, - { "huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, - { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, { "print-time", 1, nullptr, xmrig::IConfig::PrintTimeKey }, { "retries", 1, nullptr, xmrig::IConfig::RetriesKey }, { "retry-pause", 1, nullptr, xmrig::IConfig::RetryPauseKey }, - { "safe", 0, nullptr, xmrig::IConfig::SafeKey }, { "syslog", 0, nullptr, xmrig::IConfig::SyslogKey }, - { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, { "watch", 0, nullptr, xmrig::IConfig::WatchKey }, - { "hw-aes", 0, nullptr, xmrig::IConfig::HardwareAESKey }, - { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, { "autosave", 0, nullptr, xmrig::IConfig::AutoSaveKey }, { nullptr, 0, nullptr, 0 } }; diff --git a/src/core/HasherConfig.cpp b/src/core/HasherConfig.cpp new file mode 100644 index 00000000..901fa65e --- /dev/null +++ b/src/core/HasherConfig.cpp @@ -0,0 +1,112 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "crypto/argon2_hasher/common/DLLExport.h" + +#include "HasherConfig.h" + +int xmrig::HasherConfig::m_gpuCardsCount = 0; + +xmrig::HasherConfig::HasherConfig(xmrig::Algo algorithm, xmrig::Variant variant, int priority, int cpuThreads, + int64_t cpuAffinity, std::string cpuOptimization, + std::vector &gpuIntensity, std::vector &gpuFilter) : + m_index(-1), + m_type(""), + m_algorithm(algorithm), + m_variant(variant), + m_priority(priority), + m_cpuThreads(cpuThreads), + m_cpuAffinity(cpuAffinity), + m_cpuOptimization(cpuOptimization), + m_gpuIntensity(gpuIntensity), + m_gpuFilter(gpuFilter){ + +} + +xmrig::HasherConfig::HasherConfig(int index, std::string type, xmrig::Algo algorithm, xmrig::Variant variant, int priority, int cpuThreads, + int64_t cpuAffinity, std::string cpuOptimization, + std::vector &gpuIntensity, std::vector &gpuFilter) : + m_index(index), + m_type(type), + m_algorithm(algorithm), + m_variant(variant), + m_priority(priority), + m_cpuThreads(cpuThreads), + m_cpuAffinity(cpuAffinity), + m_cpuOptimization(cpuOptimization), + m_gpuIntensity(gpuIntensity) { + for(GPUFilter filter : gpuFilter) { + if(filter.engine.empty() || filter.engine == "*" || filter.engine == type) { + m_gpuFilter.push_back(filter); + } + } +} + +double xmrig::HasherConfig::getGPUIntensity(int cardIndex) { + if(cardIndex < m_gpuIntensity.size()) + return m_gpuIntensity[cardIndex]; + else if(m_gpuIntensity.size() > 0) + return m_gpuIntensity[0]; + else + return 50; +} + +int64_t xmrig::HasherConfig::getCPUAffinity(int cpuIndex) { + int64_t cpuId = -1L; + + if (m_cpuAffinity != -1L) { + size_t idx = 0; + + for (size_t i = 0; i < 64; i++) { + if (!(m_cpuAffinity & (1ULL << i))) { + continue; + } + + if (idx == cpuIndex) { + cpuId = i; + break; + } + + idx++; + } + } + + return cpuId; +} + +xmrig::HasherConfig *xmrig::HasherConfig::clone(int index, std::string hasherType) { + return new HasherConfig(index, hasherType, m_algorithm, m_variant, m_priority, m_cpuThreads, m_cpuAffinity, m_cpuOptimization, m_gpuIntensity, m_gpuFilter); +} + +double xmrig::HasherConfig::getAverageGPUIntensity() { + double result = 0; + for(double intensity : m_gpuIntensity) result += intensity; + return result / (m_gpuIntensity.size() > 0 ? m_gpuIntensity.size() : 1); +} + diff --git a/src/core/HasherConfig.h b/src/core/HasherConfig.h new file mode 100644 index 00000000..03fb4073 --- /dev/null +++ b/src/core/HasherConfig.h @@ -0,0 +1,98 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_HASHERCONFIG_H +#define XMRIG_HASHERCONFIG_H + + +#include "common/xmrig.h" +#include "crypto/argon2_hasher/common/common.h" + +namespace xmrig { + +struct GPUFilter { + GPUFilter(std::string engine, std::string filter) : engine(engine), filter(filter) {} + std::string engine; + std::string filter; +}; + +class DLLEXPORT HasherConfig +{ +public: + HasherConfig(Algo algorithm, + Variant variant, + int priority, + int cpuThreads, + int64_t cpuAffinity, + std::string cpuOptimization, + std::vector &gpuIntensity, + std::vector &gpuFilter); + + HasherConfig *clone(int index, std::string hasherType); + + inline size_t index() const { return m_index; } + inline std::string type() const { return m_type; } + inline Algo algorithm() const { return m_algorithm; } + inline Variant variant() const { return m_variant; } + inline int priority() const { return m_priority; } + inline int cpuThreads() const { return m_cpuThreads; } + inline std::string cpuOptimization() const { return m_cpuOptimization; } + inline std::vector &gpuFilter() { return m_gpuFilter; } + + double getAverageGPUIntensity(); + double getGPUIntensity(int cardIndex); + int64_t getCPUAffinity(int cpuIndex); + + inline void addGPUCardsCount(int count) { m_gpuCardsCount += count; } + inline int getGPUCardsCount() { return m_gpuCardsCount; } + +private: + HasherConfig(int index, + std::string type, + Algo algorithm, + Variant variant, + int priority, + int cpuThreads, + int64_t cpuAffinity, + std::string cpuOptimization, + std::vector &gpuIntensity, + std::vector &gpuFilter); + + const size_t m_index; + const std::string m_type; + const Algo m_algorithm; + const Variant m_variant; + const int m_priority; + const int m_cpuThreads; + const int64_t m_cpuAffinity; + const std::string m_cpuOptimization; + std::vector m_gpuIntensity; + std::vector m_gpuFilter; + + static int m_gpuCardsCount; +}; + +} /* namespace xmrig */ + +#endif /*XMRIG_HASHERCONFIG_H*/ diff --git a/src/core/cpu/AdvancedCpuInfo.cpp b/src/core/cpu/AdvancedCpuInfo.cpp index df6a385e..d844e798 100644 --- a/src/core/cpu/AdvancedCpuInfo.cpp +++ b/src/core/cpu/AdvancedCpuInfo.cpp @@ -31,7 +31,6 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : - m_assembly(ASM_NONE), m_aes(false), m_avx2(false), m_L2_exclusive(false), @@ -76,20 +75,13 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : if (data.flags[CPU_FEATURE_AES]) { m_aes = true; - - if (data.vendor == VENDOR_AMD) { - m_assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER; - } - else if (data.vendor == VENDOR_INTEL) { - m_assembly = ASM_INTEL; - } } m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE]; } -size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize) const { if (threads() == 1) { return 1; @@ -120,9 +112,5 @@ size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsa count = threads(); } - if (((float) count / threads() * 100) > maxCpuUsage) { - count = (int) ceil((float) threads() * (maxCpuUsage / 100.0)); - } - return count < 1 ? 1 : count; } diff --git a/src/core/cpu/AdvancedCpuInfo.h b/src/core/cpu/AdvancedCpuInfo.h index 0765da33..8377189c 100644 --- a/src/core/cpu/AdvancedCpuInfo.h +++ b/src/core/cpu/AdvancedCpuInfo.h @@ -38,9 +38,8 @@ public: AdvancedCpuInfo(); protected: - size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; + size_t optimalThreadsCount(size_t memSize) const override; - inline Assembly assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } inline bool isSupported() const override { return true; } @@ -59,7 +58,6 @@ protected: # endif private: - Assembly m_assembly; bool m_aes; bool m_avx2; bool m_L2_exclusive; diff --git a/src/core/usage.h b/src/core/usage.h index 0d5c4781..f85a04dd 100644 --- a/src/core/usage.h +++ b/src/core/usage.h @@ -36,32 +36,26 @@ static char const usage[] = "\ Usage: " APP_ID " [OPTIONS]\n\ Options:\n\ -a, --algo=ALGO specify the algorithm to use\n\ - cryptonight\n" -#ifndef XMRIG_NO_AEON -"\ - cryptonight-lite\n" -#endif -#ifndef XMRIG_NO_SUMO -"\ - cryptonight-heavy\n" -#endif -"\ + chukwa\n\ + chukwa/wrkz\n\ -o, --url=URL URL of mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\ -u, --user=USERNAME username for mining server\n\ -p, --pass=PASSWORD password for mining server\n\ --rig-id=ID rig identifier for pool-side statistics (needs pool support)\n\ - -t, --threads=N number of miner threads\n\ - -v, --av=N algorithm variation, 0 auto select\n\ + -t, --cpu-threads=N number of cpu miner threads - use 0 to disable\n\ + --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\ + --cpu-optimization=REF|SSE2|SSSE3|AVX|AVX2|AVX512F|NEON force specific optimization for cpu mining\n\ + --use-gpu=CUDA,OPENCL gpu engine to use, ignore this param to disable gpu support\n\ + --gpu-intensity=v1,v2... percent of gpu memory to use - you can have different values for each card (default 50)\n\ + --gpu-filter=,CUDA:,OPENCL: gpu filters to select cards\n\ -k, --keepalive send keepalived packet for prevent timeout (needs pool support)\n\ --nicehash enable nicehash.com support\n\ --tls enable SSL/TLS support (needs pool support)\n\ --tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning\n\ -r, --retries=N number of times to retry before switch to backup server (default: 5)\n\ -R, --retry-pause=N time to pause between retries (default: 5)\n\ - --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\ - --cpu-priority set process priority (0 idle, 2 normal to 5 highest)\n\ - --no-huge-pages disable huge pages support\n\ + --priority set process priority (0 idle, 2 normal to 5 highest)\n\ --no-color disable colored output\n\ --variant algorithm PoW variant\n\ --donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\ @@ -74,9 +68,6 @@ Options:\n\ -S, --syslog use system log for output messages\n" # endif "\ - --max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\ - --safe safe adjust threads and av settings for current CPU\n\ - --asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen, bulldozer.\n\ --print-time=N print hashrate report every N seconds\n\ --api-port=N port for the miner API\n\ --api-access-token=T access token for API\n\ diff --git a/src/crypto/Argon2.h b/src/crypto/Argon2.h deleted file mode 100644 index 7ea62db1..00000000 --- a/src/crypto/Argon2.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef XMRIG_ARGON2_H -#define XMRIG_ARGON2_H - -#include - -#include "crypto/Argon2_constants.h" - -static bool argon_optimization_selected = false; - -template -inline void argon2_hash_function(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - /* If this is the first time we've called this hash function then - we need to have the Argon2 library check to see if any of the - available CPU instruction sets are going to help us out */ - if (!argon_optimization_selected) - { - /* Call the library quick benchmark test to set which CPU - instruction sets will be used */ - argon2_select_impl(NULL, NULL); - - argon_optimization_selected = true; - } - - uint8_t salt[xmrig::ARGON2_SALTLEN]; - - memcpy(salt, input, sizeof(salt)); - - const uint32_t ITERS = xmrig::argon2_select_iters(VARIANT); - const uint32_t MEMORY = xmrig::argon2_select_memory(VARIANT); - const uint32_t PARALLELISM = xmrig::argon2_select_parallelism(VARIANT); - const int ALGO = xmrig::argon2_select_algo(VARIANT); - - switch (ALGO) - { - case xmrig::Argon2Algo::I: - argon2i_hash_raw(ITERS, MEMORY, PARALLELISM, input, size, salt, xmrig::ARGON2_SALTLEN, output, xmrig::ARGON2_HASHLEN); - case xmrig::Argon2Algo::D: - argon2d_hash_raw(ITERS, MEMORY, PARALLELISM, input, size, salt, xmrig::ARGON2_SALTLEN, output, xmrig::ARGON2_HASHLEN); - case xmrig::Argon2Algo::ID: - argon2id_hash_raw(ITERS, MEMORY, PARALLELISM, input, size, salt, xmrig::ARGON2_SALTLEN, output, xmrig::ARGON2_HASHLEN); - } -} - -#endif \ No newline at end of file diff --git a/src/crypto/Argon2_constants.h b/src/crypto/Argon2_constants.h index d5a29da4..fc1982f4 100644 --- a/src/crypto/Argon2_constants.h +++ b/src/crypto/Argon2_constants.h @@ -42,7 +42,7 @@ namespace xmrig return 0; } - inline uint32_t argon2_select_memory(Variant variant) + inline uint64_t argon2_select_memory(Variant variant) { switch (variant) { diff --git a/src/crypto/Asm.cpp b/src/crypto/Asm.cpp deleted file mode 100644 index 88812c6c..00000000 --- a/src/crypto/Asm.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 SChernykh - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include - - -#ifdef _MSC_VER -# define strncasecmp _strnicmp -# define strcasecmp _stricmp -#endif - - -#include "crypto/Asm.h" -#include "rapidjson/document.h" - - -static const char *asmNames[] = { - "none", - "auto", - "intel", - "ryzen", - "bulldozer" -}; - - -xmrig::Assembly xmrig::Asm::parse(const char *assembly, Assembly defaultValue) -{ - constexpr size_t const size = sizeof(asmNames) / sizeof((asmNames)[0]); - assert(assembly != nullptr); - assert(ASM_MAX == size); - - if (assembly == nullptr) { - return defaultValue; - } - - for (size_t i = 0; i < size; i++) { - if (strcasecmp(assembly, asmNames[i]) == 0) { - return static_cast(i); - } - } - - return defaultValue; -} - - -xmrig::Assembly xmrig::Asm::parse(const rapidjson::Value &value, Assembly defaultValue) -{ - if (value.IsBool()) { - return parse(value.GetBool()); - } - - if (value.IsString()) { - return parse(value.GetString(), defaultValue); - } - - return defaultValue; -} - - -const char *xmrig::Asm::toString(Assembly assembly) -{ - return asmNames[assembly]; -} - - -rapidjson::Value xmrig::Asm::toJSON(Assembly assembly) -{ - using namespace rapidjson; - - if (assembly == ASM_NONE) { - return Value(false); - } - - if (assembly == ASM_AUTO) { - return Value(true); - } - - return Value(StringRef(toString(assembly))); -} diff --git a/src/crypto/Asm.h b/src/crypto/Asm.h deleted file mode 100644 index 3b755fd6..00000000 --- a/src/crypto/Asm.h +++ /dev/null @@ -1,50 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_ASM_H -#define XMRIG_ASM_H - - -#include "common/xmrig.h" -#include "rapidjson/fwd.h" - - -namespace xmrig { - - -class Asm -{ -public: - static Assembly parse(const char *assembly, Assembly defaultValue = ASM_AUTO); - static Assembly parse(const rapidjson::Value &value, Assembly defaultValue = ASM_AUTO); - static const char *toString(Assembly assembly); - static rapidjson::Value toJSON(Assembly assembly); - - inline static Assembly parse(bool enable) { return enable ? ASM_AUTO : ASM_NONE; } -}; - - -} /* namespace xmrig */ - - -#endif /* XMRIG_ASM_H */ diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h deleted file mode 100644 index b1ec2371..00000000 --- a/src/crypto/CryptoNight.h +++ /dev/null @@ -1,62 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_H -#define XMRIG_CRYPTONIGHT_H - - -#include -#include - -#if defined _MSC_VER || defined XMRIG_ARM -#define ABI_ATTRIBUTE -#else -#define ABI_ATTRIBUTE __attribute__((ms_abi)) -#endif - -struct cryptonight_ctx; -typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE; - -struct cryptonight_r_data { - int variant; - uint64_t height; - - bool match(const int v, const uint64_t h) const { return (v == variant) && (h == height); } -}; - -struct cryptonight_ctx { - alignas(16) uint8_t state[224]; - alignas(16) uint8_t *memory; - - uint8_t unused[40]; - const uint32_t* saes_table; - - cn_mainloop_fun_ms_abi generated_code; - cn_mainloop_fun_ms_abi generated_code_double; - cryptonight_r_data generated_code_data; - cryptonight_r_data generated_code_double_data; -}; - - -#endif /* XMRIG_CRYPTONIGHT_H */ diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h deleted file mode 100644 index d762929c..00000000 --- a/src/crypto/CryptoNight_arm.h +++ /dev/null @@ -1,844 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2016 Imran Yusuff - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_ARM_H -#define XMRIG_CRYPTONIGHT_ARM_H - - -#include "common/crypto/keccak.h" -#include "common/utils/mm_malloc.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "crypto/CryptoNight_monero.h" -#include "crypto/soft_aes.h" - - -extern "C" -{ -#include "crypto/c_groestl.h" -#include "crypto/c_blake256.h" -#include "crypto/c_jh.h" -#include "crypto/c_skein.h" -} - - -static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) { - blake256_hash(output, input, len); -} - - -static inline void do_groestl_hash(const uint8_t *input, size_t len, uint8_t *output) { - groestl(input, len * 8, output); -} - - -static inline void do_jh_hash(const uint8_t *input, size_t len, uint8_t *output) { - jh_hash(32 * 8, input, 8 * len, output); -} - - -static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *output) { - xmr_skein(input, output); -} - - -void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; - - -static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) -{ - return vcombine_u64(vcreate_u64(b), vcreate_u64(a)); -} - - -#if __ARM_FEATURE_CRYPTO -static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey) -{ - alignas(16) const __m128i zero = { 0 }; - return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey ); -} -#else -static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey) -{ - alignas(16) const __m128i zero = { 0 }; - return zero; -} -#endif - - -/* this one was not implemented yet so here it is */ -static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a) -{ - return vgetq_lane_u64(a, 0); -} - - -#if defined (__arm64__) || defined (__aarch64__) -static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#else -static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -template -static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2) -{ - __m128i xout1 = soft_aeskeygenassist(*xout2); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = soft_aeskeygenassist<0x00>(*xout0); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -template -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - soft_aes_genkey_sub<0x01>(&xout0, &xout2); - *k2 = xout0; - *k3 = xout2; - - soft_aes_genkey_sub<0x02>(&xout0, &xout2); - *k4 = xout0; - *k5 = xout2; - - soft_aes_genkey_sub<0x04>(&xout0, &xout2); - *k6 = xout0; - *k7 = xout2; - - soft_aes_genkey_sub<0x08>(&xout0, &xout2); - *k8 = xout0; - *k9 = xout2; -} - - -template -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - if (SOFT_AES) { - *x0 = soft_aesenc((uint32_t*)x0, key); - *x1 = soft_aesenc((uint32_t*)x1, key); - *x2 = soft_aesenc((uint32_t*)x2, key); - *x3 = soft_aesenc((uint32_t*)x3, key); - *x4 = soft_aesenc((uint32_t*)x4, key); - *x5 = soft_aesenc((uint32_t*)x5, key); - *x6 = soft_aesenc((uint32_t*)x6, key); - *x7 = soft_aesenc((uint32_t*)x7, key); - } - else { - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); - } -} - - -inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7) -{ - __m128i tmp0 = x0; - x0 = _mm_xor_si128(x0, x1); - x1 = _mm_xor_si128(x1, x2); - x2 = _mm_xor_si128(x2, x3); - x3 = _mm_xor_si128(x3, x4); - x4 = _mm_xor_si128(x4, x5); - x5 = _mm_xor_si128(x5, x6); - x6 = _mm_xor_si128(x6, x7); - x7 = _mm_xor_si128(x7, tmp0); -} - - -template -static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7); - } - } - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output) -{ - constexpr size_t hash_size = 200; // 25x8 bytes - alignas(16) uint64_t hash[25]; - - for (uint64_t i = 0; i < MEM / 512; i++) - { - memcpy(hash, input, hash_size); - hash[0] ^= i; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 160); - output += 160; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - } -} -#endif - - -template -static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key) -{ - alignas(16) uint32_t k[4]; - alignas(16) uint32_t x[4]; - - _mm_store_si128((__m128i*) k, key); - _mm_store_si128((__m128i*) x, _mm_xor_si128(in, _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffffffff))); - - #define BYTE(p, i) ((unsigned char*)&x[p])[i] - k[0] ^= saes_table[0][BYTE(0, 0)] ^ saes_table[1][BYTE(1, 1)] ^ saes_table[2][BYTE(2, 2)] ^ saes_table[3][BYTE(3, 3)]; - x[0] ^= k[0]; - k[1] ^= saes_table[0][BYTE(1, 0)] ^ saes_table[1][BYTE(2, 1)] ^ saes_table[2][BYTE(3, 2)] ^ saes_table[3][BYTE(0, 3)]; - x[1] ^= k[1]; - k[2] ^= saes_table[0][BYTE(2, 0)] ^ saes_table[1][BYTE(3, 1)] ^ saes_table[2][BYTE(0, 2)] ^ saes_table[3][BYTE(1, 3)]; - x[2] ^= k[2]; - k[3] ^= saes_table[0][BYTE(3, 0)] ^ saes_table[1][BYTE(0, 1)] ^ saes_table[2][BYTE(1, 2)] ^ saes_table[3][BYTE(2, 3)]; - #undef BYTE - - return _mm_load_si128((__m128i*)k); -} - - -template -static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx) -{ - uint64_t* mem_out = (uint64_t*)&l[idx]; - - if (BASE == xmrig::VARIANT_2) { - VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx)); - } else { - __m128i tmp = _mm_xor_si128(bx0, cx); - mem_out[0] = _mm_cvtsi128_si64(tmp); - - uint64_t vh = vgetq_lane_u64(tmp, 1); - - uint8_t x = vh >> 24; - static const uint16_t table = 0x7531; - const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; - - mem_out[1] = vh; - } -} - - -template -inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - - const uint8_t* l0 = ctx[0]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - - VARIANT1_INIT(0); - VARIANT2_INIT(0); - VARIANT4_RANDOM_MATH_INIT(0); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - - uint64_t idx0 = al0; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx = aes_round_tweak_div(cx, ax0); - } - else if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); - } - else { - cx = _mm_aesenc_si128(cx, ax0); - } - - if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { - cryptonight_monero_tweak(l0, idx0 & MASK, ax0, bx0, bx1, cx); - } else { - _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - } - - idx0 = _mm_cvtsi128_si64(cx); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - const int64x2_t x = vld1q_s64(reinterpret_cast(&l0[idx0 & MASK])); - const int64_t n = vgetq_lane_s64(x, 0); - const int32_t d = vgetq_lane_s32(x, 2); - const int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - idx0 = (~d) ^ q; - } - else { - idx0 = d ^ q; - } - } - - if (BASE == xmrig::VARIANT_2) { - bx1 = bx0; - } - - bx0 = cx; - } - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf(h0, 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad); - - -template -inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK; - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - - static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); - - xmrig::keccak(input, size, ctx[0]->state); - cn_explode_scratchpad_gpu(ctx[0]->state, ctx[0]->memory); - - fesetround(FE_TONEAREST); - - cn_gpu_inner_arm(ctx[0]->state, ctx[0]->memory); - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf((uint64_t*) ctx[0]->state, 24); - memcpy(output, ctx[0]->state, 32); -} -#endif - - -template -inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 64); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - xmrig::keccak(input + size, size, ctx[1]->state); - - const uint8_t* l0 = ctx[0]->memory; - const uint8_t* l1 = ctx[1]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - - VARIANT1_INIT(0); - VARIANT1_INIT(1); - VARIANT2_INIT(0); - VARIANT2_INIT(1); - VARIANT4_RANDOM_MATH_INIT(0); - VARIANT4_RANDOM_MATH_INIT(1); - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); - - uint64_t idx0 = al0; - uint64_t idx1 = al1; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0, cx1; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - const __m128i ax1 = _mm_set_epi64x(ah1, al1); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx0 = aes_round_tweak_div(cx0, ax0); - cx1 = aes_round_tweak_div(cx1, ax1); - } - else if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1); - } - else { - cx0 = _mm_aesenc_si128(cx0, ax0); - cx1 = _mm_aesenc_si128(cx1, ax1); - } - - if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) { - cryptonight_monero_tweak(l0, idx0 & MASK, ax0, bx00, bx01, cx0); - cryptonight_monero_tweak(l1, idx1 & MASK, ax1, bx10, bx11, cx1); - } else { - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1)); - } - - idx0 = _mm_cvtsi128_si64(cx0); - idx1 = _mm_cvtsi128_si64(cx1); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx0); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - const int64x2_t x = vld1q_s64(reinterpret_cast(&l0[idx0 & MASK])); - const int64_t n = vgetq_lane_s64(x, 0); - const int32_t d = vgetq_lane_s32(x, 2); - const int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - idx0 = (~d) ^ q; - } - else { - idx0 = d ^ q; - } - } - - cl = ((uint64_t*) &l1[idx1 & MASK])[0]; - ch = ((uint64_t*) &l1[idx1 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); - if (VARIANT == xmrig::VARIANT_4) { - al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32); - ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(1, cl, cx1); - } - } - - lo = __umul128(idx1, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); - } else { - VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al1 += hi; - ah1 += lo; - - ((uint64_t*)&l1[idx1 & MASK])[0] = al1; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; - } else { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1; - } - - al1 ^= cl; - ah1 ^= ch; - idx1 = al1; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - const int64x2_t x = vld1q_s64(reinterpret_cast(&l1[idx1 & MASK])); - const int64_t n = vgetq_lane_s64(x, 0); - const int32_t d = vgetq_lane_s32(x, 2); - const int64_t q = n / (d | 0x5); - - ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - idx1 = (~d) ^ q; - } - else { - idx1 = d ^ q; - } - } - if (BASE == xmrig::VARIANT_2) { - bx01 = bx00; - bx11 = bx10; - } - bx00 = cx0; - bx10 = cx1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - xmrig::keccakf(h0, 24); - xmrig::keccakf(h1, 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} - - -template -inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ -} - - -template -inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ -} - - -template -inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ -} - -#endif /* __CRYPTONIGHT_ARM_H__ */ diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h deleted file mode 100644 index 58a3915f..00000000 --- a/src/crypto/CryptoNight_constants.h +++ /dev/null @@ -1,225 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_CONSTANTS_H -#define XMRIG_CRYPTONIGHT_CONSTANTS_H - - -#include -#include - - -#include "common/xmrig.h" - - -namespace xmrig -{ - -constexpr const size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024; -constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0; -constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000; -constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000; -constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000; -constexpr const uint32_t CRYPTONIGHT_DOUBLE_ITER = 0x100000; -constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER = 0x60000; -constexpr const uint32_t CRYPTONIGHT_ZLS_ITER = 0x60000; - -constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000; -constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0; - -constexpr const size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024; -constexpr const uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0; -constexpr const uint32_t CRYPTONIGHT_LITE_ITER = 0x40000; - -constexpr const size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024; -constexpr const uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0; -constexpr const uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000; - -constexpr const size_t CRYPTONIGHT_PICO_MEMORY = 256 * 1024; -constexpr const uint32_t CRYPTONIGHT_PICO_MASK = 0x1FFF0; -constexpr const uint32_t CRYPTONIGHT_PICO_ITER = 0x40000; -constexpr const uint32_t CRYPTONIGHT_TRTL_ITER = 0x10000; - - -template inline constexpr size_t cn_select_memory() { return 0; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_MEMORY; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_LITE_MEMORY; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_HEAVY_MEMORY; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_PICO_MEMORY; } - - -inline size_t cn_select_memory(Algo algorithm) -{ - switch(algorithm) - { - case CRYPTONIGHT: - return CRYPTONIGHT_MEMORY; - - case CRYPTONIGHT_LITE: - return CRYPTONIGHT_LITE_MEMORY; - - case CRYPTONIGHT_HEAVY: - return CRYPTONIGHT_HEAVY_MEMORY; - - case CRYPTONIGHT_PICO: - return CRYPTONIGHT_PICO_MEMORY; - - default: - break; - } - - return 0; -} - - -template inline constexpr uint32_t cn_select_mask() { return 0; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_MASK; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_LITE_MASK; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_HEAVY_MASK; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_PICO_MASK; } - - -inline uint32_t cn_select_mask(Algo algorithm) -{ - switch(algorithm) - { - case CRYPTONIGHT: - return CRYPTONIGHT_MASK; - - case CRYPTONIGHT_LITE: - return CRYPTONIGHT_LITE_MASK; - - case CRYPTONIGHT_HEAVY: - return CRYPTONIGHT_HEAVY_MASK; - - case CRYPTONIGHT_PICO: - return CRYPTONIGHT_PICO_MASK; - - default: - break; - } - - return 0; -} - - -template inline constexpr uint32_t cn_select_iter() { return 0; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HALF_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HALF_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_XAO_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_GPU_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_WALTZ_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ZLS_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_DOUBLE_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_TRTL_ITER; } - - -inline uint32_t cn_select_iter(Algo algorithm, Variant variant) -{ - switch (variant) { - case VARIANT_MSR: - case VARIANT_HALF: - return CRYPTONIGHT_HALF_ITER; - - case VARIANT_GPU: - return CRYPTONIGHT_GPU_ITER; - - case VARIANT_RTO: - case VARIANT_DOUBLE: - return CRYPTONIGHT_XAO_ITER; - - case VARIANT_TRTL: - return CRYPTONIGHT_TRTL_ITER; - - case VARIANT_RWZ: - case VARIANT_ZLS: - return CRYPTONIGHT_WALTZ_ITER; - - default: - break; - } - - switch(algorithm) - { - case CRYPTONIGHT: - return CRYPTONIGHT_ITER; - - case CRYPTONIGHT_LITE: - return CRYPTONIGHT_LITE_ITER; - - case CRYPTONIGHT_HEAVY: - return CRYPTONIGHT_HEAVY_ITER; - - case CRYPTONIGHT_PICO: - return CRYPTONIGHT_TRTL_ITER; - - default: - break; - } - - return 0; -} - - -template inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_GPU; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } - - -template inline constexpr bool cn_is_cryptonight_r() { return false; } -template<> inline constexpr bool cn_is_cryptonight_r() { return true; } -template<> inline constexpr bool cn_is_cryptonight_r() { return true; } - -} /* namespace xmrig */ - - -#endif /* XMRIG_CRYPTONIGHT_CONSTANTS_H */ diff --git a/src/crypto/CryptoNight_monero.h b/src/crypto/CryptoNight_monero.h deleted file mode 100644 index 4e84ac5d..00000000 --- a/src/crypto/CryptoNight_monero.h +++ /dev/null @@ -1,206 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_MONERO_H -#define XMRIG_CRYPTONIGHT_MONERO_H - -#include -#include - -// VARIANT ALTERATIONS -#ifndef XMRIG_ARM -# define VARIANT1_INIT(part) \ - uint64_t tweak1_2_##part = 0; \ - if (BASE == xmrig::VARIANT_1) { \ - tweak1_2_##part = (*reinterpret_cast(input + 35 + part * size) ^ \ - *(reinterpret_cast(ctx[part]->state) + 24)); \ - } -#else -# define VARIANT1_INIT(part) \ - uint64_t tweak1_2_##part = 0; \ - if (BASE == xmrig::VARIANT_1) { \ - memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \ - tweak1_2_##part ^= *(reinterpret_cast(ctx[part]->state) + 24); \ - } -#endif - -#define VARIANT1_1(p) \ - if (BASE == xmrig::VARIANT_1) { \ - const uint8_t tmp = reinterpret_cast(p)[11]; \ - static const uint32_t table = 0x75310; \ - const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ - ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ - } - -#define VARIANT1_2(p, part) \ - if (BASE == xmrig::VARIANT_1) { \ - (p) ^= tweak1_2_##part; \ - } - - -#ifndef XMRIG_ARM -# define VARIANT2_INIT(part) \ - __m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \ - __m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]); - -#ifdef _MSC_VER -# define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); } -#else -# define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); } -#endif - -# define VARIANT2_INTEGER_MATH(part, cl, cx) \ - do { \ - const uint64_t sqrt_result = static_cast(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \ - const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \ - cl ^= static_cast(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \ - const uint32_t d = static_cast(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \ - const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \ - const uint64_t division_result = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \ - division_result_xmm_##part = _mm_cvtsi64_si128(static_cast(division_result)); \ - sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \ - } while (0) - -# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \ - do { \ - const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \ - const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ - const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ - if (VARIANT == xmrig::VARIANT_4) { \ - _c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \ - } \ - } while (0) - -# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \ - do { \ - const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \ - const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ - hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ - lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ - const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \ - if (reverse) { \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk1, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk3, _b)); \ - } else { \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ - } \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ - } while (0) - -#else -# define VARIANT2_INIT(part) \ - uint64_t division_result_##part = h##part[12]; \ - uint64_t sqrt_result_##part = h##part[13]; - -# define VARIANT2_INTEGER_MATH(part, cl, cx) \ - do { \ - const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \ - cl ^= division_result_##part ^ (sqrt_result_##part << 32); \ - const uint32_t d = static_cast(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \ - const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \ - division_result_##part = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \ - const uint64_t sqrt_input = cx_0 + division_result_##part; \ - sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \ - const uint64_t s = sqrt_result_##part >> 1; \ - const uint64_t b = sqrt_result_##part & 1; \ - const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \ - sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \ - } while (0) - -# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \ - do { \ - const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \ - const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ - const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ - if (VARIANT == xmrig::VARIANT_4) { \ - _c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \ - } \ - } while (0) - -# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \ - do { \ - const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \ - const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ - hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ - lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ - const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \ - if (reverse) { \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b))); \ - } else { \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ - } \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ - } while (0) -#endif - -#define SWAP32LE(x) x -#define SWAP64LE(x) x -#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length)) - -#ifndef NOINLINE -#ifdef __GNUC__ -#define NOINLINE __attribute__ ((noinline)) -#elif _MSC_VER -#define NOINLINE __declspec(noinline) -#else -#define NOINLINE -#endif -#endif - -#include "common/xmrig.h" -#include "variant4_random_math.h" - -#define VARIANT4_RANDOM_MATH_INIT(part) \ - uint32_t r##part[9]; \ - struct V4_Instruction code##part[256]; \ - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \ - r##part[0] = (uint32_t)(h##part[12]); \ - r##part[1] = (uint32_t)(h##part[12] >> 32); \ - r##part[2] = (uint32_t)(h##part[13]); \ - r##part[3] = (uint32_t)(h##part[13] >> 32); \ - } \ - v4_random_math_init(code##part, height); - -#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \ - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \ - cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \ - r##part[4] = static_cast(al); \ - r##part[5] = static_cast(ah); \ - r##part[6] = static_cast(_mm_cvtsi128_si32(bx0)); \ - r##part[7] = static_cast(_mm_cvtsi128_si32(bx1)); \ - r##part[8] = static_cast(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \ - v4_random_math(code##part, r##part); \ - } - -#endif /* XMRIG_CRYPTONIGHT_MONERO_H */ diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h deleted file mode 100644 index 6fa9dd28..00000000 --- a/src/crypto/CryptoNight_test.h +++ /dev/null @@ -1,388 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_TEST_H -#define XMRIG_CRYPTONIGHT_TEST_H - - -#include - - -const static uint8_t test_input[380] = { - 0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00, - 0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B, - 0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62, - 0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92, - 0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01, - 0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19, - 0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9, - 0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F, - 0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46, - 0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02, - 0x07, 0x07, 0xB4, 0x87, 0xD0, 0xD6, 0x05, 0x26, 0xE0, 0xC6, 0xDD, 0x9B, 0xC7, 0x18, 0xC3, 0xCF, - 0x52, 0x04, 0xBD, 0x4F, 0x9B, 0x27, 0xF6, 0x73, 0xB9, 0x3F, 0xEF, 0x7B, 0xB2, 0xF7, 0x2B, 0xBB, - 0x3F, 0x3E, 0x9C, 0x3E, 0x9D, 0x33, 0x1E, 0xDE, 0xAD, 0xBE, 0xEF, 0x4E, 0x00, 0x91, 0x81, 0x29, - 0x74, 0xB2, 0x70, 0xE7, 0x6D, 0xD2, 0x2A, 0x5F, 0x52, 0x04, 0x93, 0xE6, 0x18, 0x89, 0x40, 0xD8, - 0xC6, 0xE3, 0x90, 0x6E, 0xAA, 0x6A, 0xB7, 0xE2, 0x08, 0x7E, 0x78, 0x0E, - 0x01, 0x00, 0xEE, 0xB2, 0xD1, 0xD6, 0x05, 0xFF, 0x27, 0x7F, 0x26, 0xDB, 0xAA, 0xB2, 0xC9, 0x26, - 0x30, 0xC6, 0xCF, 0x11, 0x64, 0xEA, 0x6C, 0x8A, 0xE0, 0x98, 0x01, 0xF8, 0x75, 0x4B, 0x49, 0xAF, - 0x79, 0x70, 0xAE, 0xEE, 0xA7, 0x62, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x47, 0x8C, 0x63, 0xE7, 0xD8, - 0x40, 0x02, 0x3C, 0xDA, 0xEA, 0x92, 0x52, 0x53, 0xAC, 0xFD, 0xC7, 0x8A, 0x4C, 0x31, 0xB2, 0xF2, - 0xEC, 0x72, 0x7B, 0xFF, 0xCE, 0xC0, 0xE7, 0x12, 0xD4, 0xE9, 0x2A, 0x01, - 0x07, 0x07, 0xA9, 0xB7, 0xD1, 0xD6, 0x05, 0x3F, 0x0D, 0x5E, 0xFD, 0xC7, 0x03, 0xFC, 0xFC, 0xD2, - 0xCE, 0xBC, 0x44, 0xD8, 0xAB, 0x44, 0xA6, 0xA0, 0x3A, 0xE4, 0x4D, 0x8F, 0x15, 0xAF, 0x62, 0x17, - 0xD1, 0xE0, 0x92, 0x85, 0xE4, 0x73, 0xF9, 0x00, 0x00, 0x00, 0xA0, 0xFC, 0x09, 0xDE, 0xAB, 0xF5, - 0x8B, 0x6F, 0x1D, 0xCA, 0xA8, 0xBA, 0xAC, 0x74, 0xDD, 0x74, 0x19, 0xD5, 0xD6, 0x10, 0xEC, 0x38, - 0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04 -}; - - -struct cn_r_test_input_data -{ - uint64_t height; - size_t size; - uint8_t data[64]; -}; - - -const static cn_r_test_input_data cn_r_test_input[] = { - { 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } }, - { 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } }, - { 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } }, - { 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } }, - { 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } }, - { 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } }, - { 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } }, - { 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } }, - { 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } }, - { 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } }, -}; - - -// "cn/wow" -const static uint8_t test_output_wow[] = { - 0x9d, 0x47, 0xbf, 0x4c, 0x41, 0xb7, 0xe8, 0xe7, 0x27, 0xe6, 0x81, 0x71, 0x5a, 0xcb, 0x47, 0xfa, 0x16, 0x77, 0xcd, 0xba, 0x9c, 0xa7, 0xbc, 0xb0, 0x5a, 0xd8, 0xcc, 0x8a, 0xbd, 0x5d, 0xaa, 0x66, - 0x0d, 0x4a, 0x49, 0x5c, 0xb8, 0x44, 0xa3, 0xca, 0x8b, 0xa4, 0xed, 0xb8, 0xe6, 0xbc, 0xf8, 0x29, 0xef, 0x1c, 0x06, 0xd9, 0xcd, 0xea, 0x2b, 0x62, 0xca, 0x46, 0xc2, 0xa2, 0x1b, 0x8b, 0x0a, 0x79, - 0xa1, 0xd6, 0xd8, 0x48, 0xb5, 0xc5, 0x91, 0x5f, 0xcc, 0xd2, 0xf6, 0x4c, 0xf2, 0x16, 0xc6, 0xb1, 0xa0, 0x2c, 0xf7, 0xc7, 0x7b, 0xc8, 0x0d, 0x8d, 0x4e, 0x51, 0xb4, 0x19, 0xe8, 0x8f, 0xf0, 0xdd, - 0xaf, 0x3a, 0x85, 0x44, 0xa0, 0x22, 0x1a, 0x14, 0x8c, 0x2a, 0xc9, 0x04, 0x84, 0xb1, 0x98, 0x61, 0xe3, 0xaf, 0xca, 0x33, 0xfe, 0x17, 0x02, 0x1e, 0xfb, 0x8a, 0xd6, 0x49, 0x6b, 0x56, 0x79, 0x15, - 0x31, 0x33, 0x99, 0xe0, 0x96, 0x3a, 0xe8, 0xa9, 0x9d, 0xab, 0x8a, 0xf6, 0x6d, 0x34, 0x3e, 0x09, 0x7d, 0xae, 0x0c, 0x0f, 0xeb, 0x08, 0xdb, 0xc4, 0x3c, 0xcd, 0xaf, 0xef, 0x55, 0x15, 0xf4, 0x13, - 0x60, 0x21, 0xc6, 0xef, 0x90, 0xbf, 0xf9, 0xae, 0x94, 0xa7, 0x50, 0x6d, 0x62, 0x3d, 0x3a, 0x7a, 0x86, 0xc1, 0x75, 0x6d, 0x65, 0x5f, 0x50, 0xdd, 0x55, 0x8f, 0x71, 0x6d, 0x64, 0x62, 0x2a, 0x34, - 0x2b, 0x13, 0x00, 0x05, 0x35, 0xf3, 0xdb, 0x5f, 0x9b, 0x9b, 0x84, 0xa6, 0x5c, 0x43, 0x51, 0xf3, 0x86, 0xcd, 0x2c, 0xde, 0xde, 0xbb, 0x8c, 0x3a, 0xd2, 0xea, 0xb0, 0x86, 0xe6, 0xa3, 0xfe, 0xe5, - 0xfc, 0x0e, 0x1d, 0xad, 0x8e, 0x89, 0x57, 0x49, 0xdc, 0x90, 0xeb, 0x69, 0x0b, 0xc1, 0xba, 0x05, 0x9a, 0x1c, 0xd7, 0x72, 0xaf, 0xaa, 0xf6, 0x5a, 0x10, 0x6b, 0xf9, 0xe5, 0xe6, 0xb8, 0x05, 0x03, - 0xb6, 0x0b, 0x0a, 0xfe, 0x14, 0x4d, 0xef, 0xf7, 0xd9, 0x03, 0xed, 0x2d, 0x55, 0x45, 0xe7, 0x7e, 0xbe, 0x66, 0xa3, 0xc5, 0x1f, 0xee, 0x70, 0x16, 0xee, 0xb8, 0xfe, 0xe9, 0xeb, 0x63, 0x0c, 0x0f, - 0x64, 0x77, 0x4b, 0x27, 0xe7, 0xd5, 0xfe, 0xc8, 0x62, 0xfc, 0x4c, 0x0c, 0x13, 0xac, 0x6b, 0xf0, 0x91, 0x23, 0xb6, 0xf0, 0x5b, 0xb0, 0xe4, 0xb7, 0x5c, 0x97, 0xf3, 0x79, 0xa2, 0xb3, 0xa6, 0x79, -}; - - -// "cn/r" -const static uint8_t test_output_r[] = { - 0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc, - 0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66, - 0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab, - 0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb, - 0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02, - 0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef, - 0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd, - 0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4, - 0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3, - 0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e, -}; - - -// "cn/0" -const static uint8_t test_output_v0[160] = { - 0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7, - 0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00, - 0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66, - 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F, - 0xA1, 0xB4, 0xFA, 0xE3, 0xE5, 0x76, 0xCE, 0xCF, 0xB7, 0x9C, 0xAF, 0x3E, 0x29, 0x92, 0xE4, 0xE0, - 0x31, 0x24, 0x05, 0x48, 0xBF, 0x8D, 0x5F, 0x7B, 0x11, 0x03, 0x60, 0xAA, 0xD7, 0x50, 0x3F, 0x0C, - 0x2D, 0x30, 0xF3, 0x87, 0x4F, 0x86, 0xA1, 0x4A, 0xB5, 0xA2, 0x1A, 0x08, 0xD0, 0x44, 0x2C, 0x9D, - 0x16, 0xE9, 0x28, 0x49, 0xA1, 0xFF, 0x85, 0x6F, 0x12, 0xBB, 0x7D, 0xAB, 0x11, 0x1C, 0xE7, 0xF7, - 0x2D, 0x9D, 0x19, 0xE4, 0xD2, 0x26, 0x44, 0x1E, 0xCD, 0x22, 0x08, 0x24, 0xA8, 0x97, 0x46, 0x62, - 0x04, 0x84, 0x90, 0x4A, 0xEE, 0x99, 0x14, 0xED, 0xB8, 0xC6, 0x0D, 0x37, 0xA1, 0x66, 0x17, 0xB0 -}; - - -// "cn/1" Cryptonight variant 1 (Monero v7) -const static uint8_t test_output_v1[160] = { - 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, - 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, - 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D, - 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22, - 0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98, - 0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C, - 0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE, - 0x92, 0x2C, 0xD6, 0x0C, 0x46, 0x9D, 0x9B, 0xC2, 0x84, 0x52, 0x65, 0xF6, 0xBD, 0xFA, 0x0D, 0x74, - 0x00, 0x66, 0x10, 0x07, 0xF1, 0x19, 0x06, 0x3A, 0x6C, 0xFF, 0xEE, 0xB2, 0x40, 0xE5, 0x88, 0x2B, - 0x6C, 0xAB, 0x6B, 0x1D, 0x88, 0xB8, 0x44, 0x25, 0xF4, 0xEA, 0xB7, 0xEC, 0xBA, 0x12, 0x8A, 0x24 -}; - - -// "cn/2" Cryptonight variant 2 (Monero v8) -const static uint8_t test_output_v2[160] = { - 0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14, - 0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21, - 0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89, - 0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78, - 0xE6, 0x0D, 0x24, 0x0F, 0x65, 0x85, 0x60, 0x3A, 0x4A, 0xE5, 0x5F, 0x54, 0x9B, 0xC8, 0x79, 0x93, - 0xEB, 0x3D, 0x98, 0x2C, 0xFE, 0x9B, 0xFB, 0x15, 0xB6, 0x88, 0x21, 0x94, 0xB0, 0x05, 0x86, 0x5C, - 0x59, 0x8B, 0x93, 0x7A, 0xDA, 0xD2, 0xA2, 0x14, 0xED, 0xB7, 0xC4, 0x5D, 0xA1, 0xEF, 0x26, 0xF3, - 0xC7, 0x73, 0x29, 0x4D, 0xF1, 0xC8, 0x2C, 0xE0, 0xD0, 0xE9, 0xED, 0x0C, 0x70, 0x75, 0x05, 0x3E, - 0x5B, 0xF6, 0xA0, 0x6E, 0xEA, 0xDE, 0x87, 0x0B, 0x06, 0x29, 0x03, 0xBF, 0xB4, 0x85, 0x9D, 0x04, - 0x75, 0x1A, 0xCD, 0x1E, 0xD6, 0xAA, 0x1B, 0x05, 0x24, 0x6A, 0x2C, 0x80, 0x69, 0x68, 0xDC, 0x97 -}; - - -// "cn/xtl" Stellite (XTL) -const static uint8_t test_output_xtl[160] = { - 0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3, - 0xC0, 0x8E, 0x0E, 0x1F, 0xE3, 0xBE, 0x48, 0x57, 0x07, 0x03, 0xFE, 0xE1, 0xEC, 0x0E, 0xB0, 0xB1, - 0x21, 0x26, 0xFF, 0x98, 0xE6, 0x86, 0x08, 0x5B, 0xC9, 0x96, 0x44, 0xA3, 0xB8, 0x4E, 0x28, 0x90, - 0x76, 0xED, 0xAD, 0xB9, 0xAA, 0xAC, 0x01, 0x94, 0x1D, 0xBE, 0x3E, 0xEA, 0xAD, 0xEE, 0xB2, 0xCF, - 0xB0, 0x43, 0x4B, 0x88, 0xFC, 0xB2, 0xF3, 0x82, 0x9D, 0xD7, 0xDF, 0x51, 0x97, 0x2C, 0x5A, 0xE3, - 0xC7, 0x16, 0x0B, 0xC8, 0x7C, 0xB7, 0x2F, 0x1C, 0x55, 0x33, 0xCA, 0xE1, 0xEE, 0x08, 0xA4, 0x86, - 0x60, 0xED, 0x6E, 0x9D, 0x2D, 0x05, 0x0D, 0x7D, 0x02, 0x49, 0x23, 0x39, 0x7C, 0xC3, 0x6D, 0x3D, - 0x05, 0x51, 0x28, 0xF1, 0x9B, 0x3C, 0xDF, 0xC4, 0xEA, 0x8A, 0xA6, 0x6A, 0x3C, 0x8B, 0xE2, 0xAF, - 0x47, 0x00, 0xFC, 0x36, 0xED, 0x50, 0xBB, 0xD2, 0x2E, 0x63, 0x4B, 0x93, 0x11, 0x0C, 0xA7, 0xBA, - 0x32, 0x6E, 0x47, 0x4D, 0xCE, 0xCC, 0x82, 0x54, 0x1D, 0x06, 0xF8, 0x06, 0x86, 0xBD, 0x22, 0x48 -}; - - -// "cn/half" -const static uint8_t test_output_half[160] = { - 0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD, - 0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7, - 0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B, - 0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25, - 0xD0, 0xFD, 0x8E, 0xC6, 0x15, 0xD5, 0x12, 0x53, 0x7B, 0x26, 0xF6, 0x01, 0xA5, 0xA8, 0xBE, 0x7C, - 0xCF, 0x5E, 0x19, 0xB7, 0x63, 0x0D, 0x0F, 0x02, 0x2B, 0xD7, 0xC4, 0x8C, 0x12, 0x24, 0x80, 0x02, - 0xE7, 0xB7, 0xA0, 0x4F, 0x94, 0xF9, 0x46, 0xB5, 0x18, 0x64, 0x7E, 0x4E, 0x9C, 0x81, 0x6C, 0x60, - 0x7D, 0x2E, 0xEA, 0xCF, 0x90, 0xCB, 0x68, 0x09, 0xC9, 0x53, 0xF6, 0xA9, 0xCA, 0x0C, 0xAC, 0xDC, - 0xFD, 0x07, 0xDA, 0x24, 0x1D, 0xD1, 0x35, 0x32, 0x3C, 0xE8, 0x64, 0x44, 0x5E, 0xCB, 0xB5, 0x00, - 0x69, 0xF4, 0x6F, 0xBB, 0x62, 0x0D, 0x25, 0xD8, 0xAC, 0x20, 0x90, 0xC5, 0x1B, 0xD3, 0x5F, 0xCA -}; - - -// "cn/msr" Masari (MSR) -const static uint8_t test_output_msr[160] = { - 0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C, - 0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5, - 0x69, 0xDF, 0x38, 0x51, 0x1B, 0xB3, 0xEB, 0x7D, 0xE7, 0x6B, 0x08, 0x8E, 0xB6, 0x7E, 0xB7, 0x1C, - 0x5F, 0x3C, 0x81, 0xC9, 0xF7, 0xCE, 0xAE, 0x28, 0xC0, 0xFE, 0xEB, 0xBA, 0x0B, 0x40, 0x38, 0x1D, - 0x44, 0xD0, 0xD5, 0xD3, 0x98, 0x1F, 0xA3, 0x0E, 0xE9, 0x89, 0x1A, 0xD7, 0x88, 0xCC, 0x25, 0x76, - 0x9C, 0xFF, 0x4D, 0x7F, 0x9C, 0xCF, 0x48, 0x07, 0x91, 0xF9, 0x82, 0xF5, 0x4C, 0xE9, 0xBD, 0x82, - 0x36, 0x36, 0x64, 0x14, 0xED, 0xB8, 0x54, 0xEE, 0x22, 0xA1, 0x66, 0xA3, 0x87, 0x10, 0x76, 0x1F, - 0x5A, 0xCD, 0x4C, 0x31, 0x4C, 0xBA, 0x41, 0xD2, 0xDB, 0x6C, 0x31, 0x2E, 0x7A, 0x64, 0x15, 0xFF, - 0xA6, 0xD9, 0xB9, 0x7D, 0x1C, 0x3C, 0x98, 0xDD, 0x16, 0xE6, 0xD3, 0xAA, 0xEF, 0xB6, 0xB3, 0x53, - 0x74, 0xD1, 0xAC, 0x5C, 0x04, 0x26, 0x7D, 0x71, 0xDE, 0xAB, 0x66, 0x28, 0x91, 0x3A, 0x6F, 0x4F -}; - - -// "cn/xao" Alloy (XAO) -const static uint8_t test_output_xao[160] = { - 0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C, - 0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33, - 0xF1, 0xC9, 0x98, 0x40, 0x42, 0xDE, 0x39, 0xD1, 0xBA, 0x2D, 0xAD, 0xEC, 0xFE, 0xEA, 0xD8, 0x46, - 0x56, 0x1C, 0x32, 0x90, 0x42, 0x63, 0x10, 0x80, 0xD7, 0x01, 0xE4, 0xE6, 0x20, 0xB3, 0x60, 0x45, - 0x05, 0xE5, 0xC2, 0x18, 0xCD, 0x07, 0xA4, 0x40, 0x42, 0x91, 0xE2, 0xA4, 0x52, 0x54, 0x79, 0xBA, - 0xCD, 0x7E, 0x61, 0x2D, 0x7F, 0x7E, 0x69, 0x5E, 0xD7, 0xC0, 0x06, 0x65, 0xD7, 0xA1, 0xB8, 0xB8, - 0x1E, 0x31, 0x1C, 0xD3, 0xB7, 0xBC, 0x78, 0x3C, 0x01, 0xAF, 0x77, 0xAA, 0xF3, 0x0F, 0x4C, 0xF2, - 0xD1, 0x8B, 0x58, 0xC7, 0xEB, 0x99, 0x91, 0x53, 0x43, 0x71, 0x47, 0x99, 0x9E, 0x04, 0xA4, 0xEA, - 0xB8, 0xA3, 0xB0, 0x9E, 0x09, 0xF5, 0x57, 0x5C, 0xCF, 0x8A, 0xC6, 0xCA, 0x88, 0x51, 0x9A, 0x01, - 0x31, 0xCC, 0x0C, 0xA6, 0x53, 0xB5, 0x5F, 0xFD, 0x7D, 0x29, 0x3A, 0x35, 0xE9, 0x0E, 0x25, 0x6C -}; - - -// "cn/rto" Arto (RTO) -const static uint8_t test_output_rto[160] = { - 0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19, - 0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5, - 0xB3, 0xFB, 0xF4, 0x3F, 0x2B, 0x6A, 0x3A, 0x39, 0xA3, 0x6E, 0x08, 0x33, 0x67, 0x90, 0x31, 0xB9, - 0x3F, 0x27, 0xE4, 0x79, 0x32, 0x61, 0x6B, 0x5C, 0x8A, 0xF8, 0xAF, 0xC0, 0x60, 0xFD, 0x83, 0xB7, - 0x11, 0x11, 0x89, 0xB4, 0xDC, 0xAE, 0x40, 0xC8, 0x64, 0xAA, 0x4D, 0x19, 0x23, 0x7B, 0xD3, 0x27, - 0xB2, 0x0F, 0xA7, 0x50, 0x7D, 0xCA, 0xF5, 0x03, 0x06, 0xB2, 0x26, 0x62, 0xF3, 0x68, 0x2D, 0x30, - 0x6F, 0x93, 0x1E, 0xFF, 0xCD, 0x85, 0x40, 0x28, 0x5F, 0xC3, 0x8C, 0x76, 0x51, 0x9E, 0xD5, 0x06, - 0x32, 0xD6, 0x35, 0x83, 0xF6, 0x3B, 0x54, 0x4F, 0xA1, 0x9C, 0x13, 0xD8, 0xC4, 0x0E, 0x01, 0x2F, - 0x29, 0xDB, 0x8C, 0x1C, 0xB7, 0x06, 0x86, 0x79, 0x6D, 0xFF, 0x9F, 0x89, 0x3B, 0x3A, 0xA5, 0x79, - 0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3 -}; - -// "cn/rwz" -const static uint8_t test_output_rwz[160] = { - 0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85, - 0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8, - 0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52, - 0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75, - 0x13, 0x21, 0x1d, 0xce, 0x61, 0x5a, 0xdc, 0x5f, 0x8c, 0xcb, 0x1f, 0x6f, 0xbb, 0x92, 0x88, 0xc3, - 0xe3, 0xe2, 0xfc, 0x4f, 0x62, 0xfb, 0xf0, 0x48, 0x02, 0x01, 0xd3, 0xbe, 0x77, 0x6a, 0x40, 0xca, - 0x9a, 0xe9, 0xba, 0x0c, 0xc0, 0x2b, 0x11, 0xf6, 0x9b, 0xee, 0x24, 0x3a, 0xd8, 0x86, 0x18, 0xd0, - 0xe8, 0xeb, 0xcb, 0x38, 0x2c, 0xf5, 0x99, 0x83, 0x14, 0x7b, 0x0c, 0x20, 0xbe, 0x50, 0xf4, 0x87, - 0x83, 0x41, 0x75, 0xd8, 0xd1, 0xdd, 0x4b, 0x73, 0xb3, 0x92, 0x8f, 0xe6, 0x1c, 0x72, 0x70, 0xf5, - 0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f -}; - -// "cn/zls" -const static uint8_t test_output_zls[160] = { - 0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E, - 0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2, - 0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80, - 0x17, 0xB5, 0x1B, 0x25, 0x31, 0x39, 0x87, 0xD2, 0x2D, 0x6A, 0x9D, 0x1C, 0x74, 0xF4, 0x43, 0x22, - 0x4B, 0x97, 0x1F, 0x6A, 0xD0, 0xBE, 0x00, 0x74, 0xEC, 0xC5, 0xD8, 0x3B, 0xE6, 0xF4, 0x03, 0x8A, - 0x7B, 0xBA, 0x80, 0xCC, 0x9F, 0x00, 0xCB, 0xC2, 0x14, 0x8F, 0xF3, 0xD8, 0x92, 0x73, 0xBF, 0x17, - 0x3D, 0x9B, 0x22, 0xA3, 0x61, 0x94, 0x41, 0x9E, 0xF9, 0x68, 0x1D, 0x42, 0x48, 0x3B, 0x39, 0x45, - 0xE2, 0xE6, 0x16, 0x84, 0xFC, 0x21, 0xE6, 0xDA, 0x38, 0x7F, 0x17, 0xAB, 0xD3, 0xF2, 0xCE, 0x1A, - 0x2F, 0x35, 0xD5, 0x74, 0xFA, 0x45, 0x3B, 0x06, 0xD1, 0x4E, 0x84, 0x3A, 0x5D, 0xE3, 0x0E, 0xA5, - 0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F -}; - -// "cn/double" -const static uint8_t test_output_double[160] = { - 0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9, - 0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21, - 0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8, - 0xEB, 0x3A, 0xE1, 0xDC, 0x91, 0xF7, 0x62, 0x4B, 0x9F, 0x48, 0xE6, 0x92, 0xBD, 0xE4, 0x5D, 0xC1, - 0xF1, 0x3C, 0x63, 0x1D, 0xEB, 0x0B, 0x04, 0xA3, 0x30, 0xD5, 0x11, 0x15, 0x4C, 0xCE, 0xEF, 0x4F, - 0xDF, 0x69, 0xE3, 0x9E, 0xD2, 0x68, 0xFC, 0x1B, 0x6F, 0xE8, 0x08, 0x9C, 0xBB, 0xA5, 0x2B, 0x60, - 0x52, 0x0F, 0xE5, 0xD2, 0xF3, 0x8A, 0xB3, 0xE1, 0x76, 0x7F, 0x44, 0x25, 0x76, 0xEC, 0xFF, 0xA2, - 0x0C, 0x64, 0xD0, 0x0E, 0x32, 0x33, 0x28, 0x20, 0x73, 0xE0, 0x31, 0x66, 0x4E, 0x54, 0x83, 0x49, - 0x51, 0x55, 0x4D, 0x2E, 0x22, 0xB7, 0x51, 0x09, 0x73, 0x61, 0x7E, 0x6A, 0x57, 0x0B, 0x28, 0x3C, - 0x5E, 0x2E, 0xC1, 0x80, 0x89, 0x39, 0xB3, 0x54, 0x39, 0x52, 0x0E, 0x69, 0x3D, 0xF6, 0xC5, 0x4A -}; - -#ifndef XMRIG_NO_AEON -// "cn-lite/0" -const static uint8_t test_output_v0_lite[160] = { - 0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E, - 0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88, - 0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE, - 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD, - 0x38, 0x08, 0xE1, 0x17, 0x0B, 0x99, 0x8D, 0x1A, 0x3C, 0xCE, 0x35, 0xC5, 0xC7, 0x3A, 0x00, 0x2E, - 0xCB, 0x54, 0xF0, 0x78, 0x2E, 0x9E, 0xDB, 0xC7, 0xDF, 0x2E, 0x71, 0x9A, 0x16, 0x97, 0xC4, 0x18, - 0x4B, 0x97, 0x07, 0xFE, 0x5D, 0x98, 0x9A, 0xD6, 0xD8, 0xE5, 0x92, 0x66, 0x87, 0x7F, 0x19, 0x37, - 0xA2, 0x5E, 0xE6, 0x96, 0xB5, 0x97, 0x33, 0x89, 0xE0, 0xA7, 0xC9, 0xDD, 0x4A, 0x7E, 0x9E, 0x53, - 0xBE, 0x91, 0x2B, 0xF5, 0xF5, 0xAF, 0xDD, 0x09, 0xA2, 0xF4, 0xA4, 0x56, 0xEB, 0x96, 0x22, 0xC9, - 0x94, 0xFB, 0x7B, 0x28, 0xC9, 0x97, 0x65, 0x04, 0xAC, 0x4F, 0x84, 0x71, 0xDA, 0x6E, 0xD8, 0xC5 -}; - - -// "cn-lite/1" AEON v7 -const static uint8_t test_output_v1_lite[160] = { - 0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22, - 0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41, - 0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45, - 0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F, - 0x16, 0x08, 0x74, 0xC7, 0xA2, 0xD2, 0xA3, 0x97, 0x95, 0x76, 0xCA, 0x4D, 0x06, 0x39, 0x7A, 0xAB, - 0x6C, 0x87, 0x58, 0x33, 0x4D, 0xC8, 0x5A, 0xAB, 0x04, 0x27, 0xFE, 0x8B, 0x1C, 0x23, 0x2F, 0x32, - 0xC0, 0x44, 0xFF, 0x0D, 0xB5, 0x3B, 0x27, 0x96, 0x06, 0x89, 0x7B, 0xA3, 0x0B, 0xD0, 0xCE, 0x9E, - 0x90, 0x22, 0x77, 0x5A, 0xAD, 0xA1, 0xE5, 0xB6, 0xFC, 0xCB, 0x39, 0x7E, 0x2B, 0x10, 0xEE, 0xB4, - 0x8C, 0x2B, 0xA4, 0x1F, 0x60, 0x76, 0x39, 0xD7, 0xF6, 0x46, 0x77, 0x18, 0x20, 0xAD, 0xD4, 0xC9, - 0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6 -}; -#endif - - -#ifndef XMRIG_NO_SUMO -// "cn-heavy/0" -const static uint8_t test_output_v0_heavy[160] = { - 0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64, - 0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2, - 0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A, - 0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D, - 0x3E, 0xE1, 0x23, 0x03, 0x5A, 0x63, 0x7B, 0x66, 0xF6, 0xD7, 0xC2, 0x2A, 0x34, 0x5E, 0x88, 0xE7, - 0xFA, 0xC4, 0x25, 0x36, 0x54, 0xCB, 0xD2, 0x5C, 0x2F, 0x80, 0x2A, 0xF9, 0xCC, 0x43, 0xF7, 0xCD, - 0xE5, 0x18, 0xA8, 0x05, 0x60, 0x18, 0xA5, 0x73, 0x72, 0x9B, 0x32, 0xDC, 0x69, 0x83, 0xC1, 0xE1, - 0x1F, 0xDB, 0xDA, 0x6B, 0xAC, 0xEC, 0x9F, 0x67, 0xF8, 0x27, 0x1D, 0xC7, 0xE6, 0x46, 0x42, 0xF9, - 0x53, 0x62, 0x0A, 0x54, 0x7D, 0x43, 0xEA, 0x18, 0x94, 0xED, 0xD8, 0x92, 0x06, 0x6A, 0xA1, 0x51, - 0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB -}; - - -// "cn-heavy/xhv" -const static uint8_t test_output_xhv_heavy[160] = { - 0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57, - 0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6, - 0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F, - 0xDE, 0x18, 0x29, 0x8E, 0xBB, 0x34, 0x2B, 0xEF, 0x7A, 0x04, 0x22, 0xD1, 0xB1, 0xF2, 0x48, 0xDA, - 0xE3, 0x7F, 0x4B, 0x4C, 0xB4, 0xDF, 0xE8, 0xD3, 0x70, 0xE2, 0xE7, 0x44, 0x25, 0x87, 0x12, 0xF9, - 0x8F, 0x28, 0x0B, 0xCE, 0x2C, 0xEE, 0xDD, 0x88, 0x94, 0x35, 0x48, 0x51, 0xAE, 0xC8, 0x9C, 0x0B, - 0xED, 0x2F, 0xE6, 0x0F, 0x39, 0x05, 0xB4, 0x4A, 0x8F, 0x38, 0x44, 0x2D, 0x4B, 0xE9, 0x7B, 0x81, - 0xC6, 0xB0, 0xE0, 0x0A, 0x39, 0x8C, 0x38, 0xFE, 0x63, 0x31, 0x47, 0x65, 0x0D, 0x2B, 0xF4, 0x96, - 0x13, 0x91, 0x89, 0xB4, 0x5B, 0xA9, 0x2A, 0x7A, 0x09, 0x65, 0x14, 0x20, 0x76, 0x24, 0x6C, 0x80, - 0x1D, 0x3F, 0x9F, 0xCD, 0x68, 0x39, 0xA9, 0x42, 0x27, 0xC1, 0x0C, 0x53, 0x98, 0x35, 0x60, 0x7A -}; - - -// "cn-heavy/tube" -const static uint8_t test_output_tube_heavy[160] = { - 0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF, - 0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35, - 0xCD, 0x3F, 0x29, 0xDF, 0x07, 0x4A, 0x14, 0xAD, 0x0B, 0x98, 0x99, 0x37, 0xCA, 0x14, 0x68, 0xA3, - 0x8D, 0xAE, 0x86, 0xC1, 0xA3, 0x54, 0x05, 0xBE, 0xEA, 0x6D, 0x29, 0x24, 0x0C, 0x82, 0x97, 0x74, - 0xA0, 0x64, 0x77, 0xCD, 0x8D, 0x8A, 0xC3, 0x10, 0xB4, 0x89, 0x0E, 0xBB, 0x7D, 0xE6, 0x32, 0x8F, - 0xF4, 0x2D, 0xB6, 0x9E, 0x8A, 0xF9, 0xF8, 0xEE, 0x2C, 0xD0, 0x74, 0xED, 0xA9, 0xAA, 0xA1, 0xFB, - 0xE2, 0xC9, 0x89, 0x66, 0xD6, 0x66, 0x52, 0xA2, 0x16, 0xDA, 0x36, 0xA0, 0x10, 0x62, 0xD2, 0xB1, - 0x76, 0xD1, 0x31, 0xE9, 0x1C, 0x08, 0xB6, 0xCA, 0xAF, 0x89, 0xB9, 0x3D, 0x2C, 0xFA, 0x9A, 0x30, - 0x74, 0x6A, 0x96, 0xA1, 0x95, 0x6C, 0xBB, 0x46, 0x4D, 0xE0, 0xEB, 0x28, 0xBE, 0x2A, 0x8C, 0x34, - 0x57, 0x79, 0xBE, 0x52, 0xFB, 0xBC, 0x68, 0x43, 0x45, 0xF4, 0xDF, 0xA5, 0xA8, 0xFD, 0x55, 0xA6 -}; -#endif - - -#ifndef XMRIG_NO_CN_PICO -// "cn-pico/trtl" -const static uint8_t test_output_pico_trtl[160] = { - 0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69, - 0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF, - 0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E, - 0xE3, 0x54, 0x58, 0x2B, 0xCB, 0x93, 0xF8, 0x69, 0xD4, 0x29, 0x74, 0x4D, 0xE5, 0x72, 0x6A, 0x26, - 0x4E, 0xFD, 0x28, 0xFC, 0xD3, 0x74, 0x8A, 0x83, 0xF3, 0xCA, 0x92, 0x84, 0xE7, 0x4E, 0x10, 0xC2, - 0x05, 0x62, 0xC7, 0xBE, 0x99, 0x73, 0xED, 0x90, 0xB5, 0x6F, 0xDA, 0x64, 0x71, 0x2D, 0x99, 0x39, - 0x29, 0xDB, 0x22, 0x2B, 0x97, 0xB6, 0x37, 0x0E, 0x9A, 0x03, 0x65, 0xCC, 0xF7, 0xD0, 0x9A, 0xB7, - 0x68, 0xCE, 0x07, 0x3E, 0x15, 0x40, 0x3C, 0xCE, 0x8C, 0x63, 0x16, 0x72, 0xB5, 0x74, 0x84, 0xF4, - 0xA1, 0xE7, 0x53, 0x85, 0xFB, 0x72, 0xDD, 0x75, 0x90, 0x39, 0xB2, 0x3D, 0xC3, 0x08, 0x2C, 0xD5, - 0x01, 0x08, 0x27, 0x75, 0x86, 0xB9, 0xBB, 0x9B, 0xDF, 0xEA, 0x49, 0xDE, 0x46, 0xCB, 0x83, 0x45 -}; -#endif - - -#ifndef XMRIG_NO_CN_GPU -// "cn/gpu" -const static uint8_t test_output_gpu[160] = { - 0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7, - 0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; -#endif - - -#endif /* XMRIG_CRYPTONIGHT_TEST_H */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h deleted file mode 100644 index 202b662a..00000000 --- a/src/crypto/CryptoNight_x86.h +++ /dev/null @@ -1,1481 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_X86_H -#define XMRIG_CRYPTONIGHT_X86_H - - -#ifdef __GNUC__ -# include -#else -# include -# define __restrict__ __restrict -#endif - - -#include "common/cpu/Cpu.h" -#include "common/crypto/keccak.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "crypto/CryptoNight_monero.h" -#include "crypto/soft_aes.h" - - -extern "C" -{ -#include "crypto/c_groestl.h" -#include "crypto/c_blake256.h" -#include "crypto/c_jh.h" -#include "crypto/c_skein.h" -} - - -static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) { - blake256_hash(output, input, len); -} - - -static inline void do_groestl_hash(const uint8_t *input, size_t len, uint8_t *output) { - groestl(input, len * 8, output); -} - - -static inline void do_jh_hash(const uint8_t *input, size_t len, uint8_t *output) { - jh_hash(32 * 8, input, 8 * len, output); -} - - -static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *output) { - xmr_skein(input, output); -} - - -void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; - - -#if defined(__x86_64__) || defined(_M_AMD64) -# ifdef __GNUC__ -static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -# else - #define __umul128 _umul128 -# endif -#elif defined(__i386__) || defined(_M_IX86) -static inline int64_t _mm_cvtsi128_si64(__m128i a) -{ - return ((uint64_t)(uint32_t)_mm_cvtsi128_si32(a) | ((uint64_t)(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4)) << 32)); -} - -static inline __m128i _mm_cvtsi64_si128(int64_t a) { - return _mm_set_epi64x(0, a); -} - -static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -template -static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) -{ - __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -template -static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2) -{ - __m128i xout1 = soft_aeskeygenassist(*xout2); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = soft_aeskeygenassist<0x00>(*xout0); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -template -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : aes_genkey_sub<0x01>(&xout0, &xout2); - *k2 = xout0; - *k3 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : aes_genkey_sub<0x02>(&xout0, &xout2); - *k4 = xout0; - *k5 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : aes_genkey_sub<0x04>(&xout0, &xout2); - *k6 = xout0; - *k7 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : aes_genkey_sub<0x08>(&xout0, &xout2); - *k8 = xout0; - *k9 = xout2; -} - - -static FORCEINLINE void soft_aesenc(void* __restrict ptr, const void* __restrict key, const uint32_t* __restrict t) -{ - uint32_t x0 = ((const uint32_t*)(ptr))[0]; - uint32_t x1 = ((const uint32_t*)(ptr))[1]; - uint32_t x2 = ((const uint32_t*)(ptr))[2]; - uint32_t x3 = ((const uint32_t*)(ptr))[3]; - - uint32_t y0 = t[x0 & 0xff]; x0 >>= 8; - uint32_t y1 = t[x1 & 0xff]; x1 >>= 8; - uint32_t y2 = t[x2 & 0xff]; x2 >>= 8; - uint32_t y3 = t[x3 & 0xff]; x3 >>= 8; - t += 256; - - y0 ^= t[x1 & 0xff]; x1 >>= 8; - y1 ^= t[x2 & 0xff]; x2 >>= 8; - y2 ^= t[x3 & 0xff]; x3 >>= 8; - y3 ^= t[x0 & 0xff]; x0 >>= 8; - t += 256; - - y0 ^= t[x2 & 0xff]; x2 >>= 8; - y1 ^= t[x3 & 0xff]; x3 >>= 8; - y2 ^= t[x0 & 0xff]; x0 >>= 8; - y3 ^= t[x1 & 0xff]; x1 >>= 8; - t += 256; - - y0 ^= t[x3]; - y1 ^= t[x0]; - y2 ^= t[x1]; - y3 ^= t[x2]; - - ((uint32_t*)ptr)[0] = y0 ^ ((uint32_t*)key)[0]; - ((uint32_t*)ptr)[1] = y1 ^ ((uint32_t*)key)[1]; - ((uint32_t*)ptr)[2] = y2 ^ ((uint32_t*)key)[2]; - ((uint32_t*)ptr)[3] = y3 ^ ((uint32_t*)key)[3]; -} - -static FORCEINLINE __m128i soft_aesenc(const void* __restrict ptr, const __m128i key, const uint32_t* __restrict t) -{ - uint32_t x0 = ((const uint32_t*)(ptr))[0]; - uint32_t x1 = ((const uint32_t*)(ptr))[1]; - uint32_t x2 = ((const uint32_t*)(ptr))[2]; - uint32_t x3 = ((const uint32_t*)(ptr))[3]; - - uint32_t y0 = t[x0 & 0xff]; x0 >>= 8; - uint32_t y1 = t[x1 & 0xff]; x1 >>= 8; - uint32_t y2 = t[x2 & 0xff]; x2 >>= 8; - uint32_t y3 = t[x3 & 0xff]; x3 >>= 8; - t += 256; - - y0 ^= t[x1 & 0xff]; x1 >>= 8; - y1 ^= t[x2 & 0xff]; x2 >>= 8; - y2 ^= t[x3 & 0xff]; x3 >>= 8; - y3 ^= t[x0 & 0xff]; x0 >>= 8; - t += 256; - - y0 ^= t[x2 & 0xff]; x2 >>= 8; - y1 ^= t[x3 & 0xff]; x3 >>= 8; - y2 ^= t[x0 & 0xff]; x0 >>= 8; - y3 ^= t[x1 & 0xff]; x1 >>= 8; - - y0 ^= t[x3 + 256]; - y1 ^= t[x0 + 256]; - y2 ^= t[x1 + 256]; - y3 ^= t[x2 + 256]; - - return _mm_xor_si128(_mm_set_epi32(y3, y2, y1, y0), key); -} - -template -void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7); - -template<> -NOINLINE void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = soft_aesenc((uint32_t*)x0, key, (const uint32_t*)saes_table); - *x1 = soft_aesenc((uint32_t*)x1, key, (const uint32_t*)saes_table); - *x2 = soft_aesenc((uint32_t*)x2, key, (const uint32_t*)saes_table); - *x3 = soft_aesenc((uint32_t*)x3, key, (const uint32_t*)saes_table); - *x4 = soft_aesenc((uint32_t*)x4, key, (const uint32_t*)saes_table); - *x5 = soft_aesenc((uint32_t*)x5, key, (const uint32_t*)saes_table); - *x6 = soft_aesenc((uint32_t*)x6, key, (const uint32_t*)saes_table); - *x7 = soft_aesenc((uint32_t*)x7, key, (const uint32_t*)saes_table); -} - -template<> -FORCEINLINE void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); -} - -inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7) -{ - __m128i tmp0 = x0; - x0 = _mm_xor_si128(x0, x1); - x1 = _mm_xor_si128(x1, x2); - x2 = _mm_xor_si128(x2, x3); - x3 = _mm_xor_si128(x3, x4); - x4 = _mm_xor_si128(x4, x5); - x5 = _mm_xor_si128(x5, x6); - x6 = _mm_xor_si128(x6, x7); - x7 = _mm_xor_si128(x7, tmp0); -} - - -template -static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7); - } - } - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output) -{ - constexpr size_t hash_size = 200; // 25x8 bytes - alignas(16) uint64_t hash[25]; - - for (uint64_t i = 0; i < MEM / 512; i++) - { - memcpy(hash, input, hash_size); - hash[0] ^= i; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 160); - output += 160; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - } -} -#endif - - -template -static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key) -{ - alignas(16) uint32_t k[4]; - alignas(16) uint32_t x[4]; - - _mm_store_si128((__m128i*) k, key); - _mm_store_si128((__m128i*) x, _mm_xor_si128(in, _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffffffff))); - - #define BYTE(p, i) ((unsigned char*)&x[p])[i] - k[0] ^= saes_table[0][BYTE(0, 0)] ^ saes_table[1][BYTE(1, 1)] ^ saes_table[2][BYTE(2, 2)] ^ saes_table[3][BYTE(3, 3)]; - x[0] ^= k[0]; - k[1] ^= saes_table[0][BYTE(1, 0)] ^ saes_table[1][BYTE(2, 1)] ^ saes_table[2][BYTE(3, 2)] ^ saes_table[3][BYTE(0, 3)]; - x[1] ^= k[1]; - k[2] ^= saes_table[0][BYTE(2, 0)] ^ saes_table[1][BYTE(3, 1)] ^ saes_table[2][BYTE(0, 2)] ^ saes_table[3][BYTE(1, 3)]; - x[2] ^= k[2]; - k[3] ^= saes_table[0][BYTE(3, 0)] ^ saes_table[1][BYTE(0, 1)] ^ saes_table[2][BYTE(1, 2)] ^ saes_table[3][BYTE(2, 3)]; - #undef BYTE - - return _mm_load_si128((__m128i*)k); -} - - -static inline __m128i int_sqrt_v2(const uint64_t n0) -{ - __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52))); - x = _mm_sqrt_sd(_mm_setzero_pd(), x); - uint64_t r = static_cast(_mm_cvtsi128_si64(_mm_castpd_si128(x))); - - const uint64_t s = r >> 20; - r >>= 19; - - uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1); -# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64)) - _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r); -# else - if (x2 < n0) ++r; -# endif - - return _mm_cvtsi64_si128(r); -} - - -template -static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx) -{ - if (BASE == xmrig::VARIANT_2) { - VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx)); - } else { - __m128i tmp = _mm_xor_si128(bx0, cx); - mem_out[0] = _mm_cvtsi128_si64(tmp); - - tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); - uint64_t vh = _mm_cvtsi128_si64(tmp); - - uint8_t x = static_cast(vh >> 24); - static const uint16_t table = 0x7531; - const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; - - mem_out[1] = vh; - } -} - -void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); - -template -inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - -#ifndef XMRIG_NO_ASM - if (SOFT_AES && xmrig::cn_is_cryptonight_r()) - { - if (!ctx[0]->generated_code_data.match(VARIANT, height)) { - V4_Instruction code[256]; - const int code_size = v4_random_math_init(code, height); - - if (VARIANT == xmrig::VARIANT_WOW) - wow_soft_aes_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), xmrig::ASM_NONE); - else if (VARIANT == xmrig::VARIANT_4) - v4_soft_aes_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), xmrig::ASM_NONE); - - ctx[0]->generated_code_data.variant = VARIANT; - ctx[0]->generated_code_data.height = height; - } - - ctx[0]->saes_table = (const uint32_t*)saes_table; - ctx[0]->generated_code(ctx); - } else { -#endif - - const uint8_t* l0 = ctx[0]->memory; - - VARIANT1_INIT(0); - VARIANT2_INIT(0); - VARIANT2_SET_ROUNDING_MODE(); - VARIANT4_RANDOM_MATH_INIT(0); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - - uint64_t idx0 = al0; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx = aes_round_tweak_div(cx, ax0); - } - else if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table); - } - else { - cx = _mm_aesenc_si128(cx, ax0); - } - - if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx0, bx1, cx); - } else { - _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - } - - idx0 = _mm_cvtsi128_si64(cx); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - d = ~d; - } - - idx0 = d ^ q; - } - - if (BASE == xmrig::VARIANT_2) { - bx1 = bx0; - } - - bx0 = cx; - } - -#ifndef XMRIG_NO_ASM - } -#endif - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf(h0, 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_gpu_inner_avx(const uint8_t *spad, uint8_t *lpad); - - -template -void cn_gpu_inner_ssse3(const uint8_t *spad, uint8_t *lpad); - - -template -inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK; - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - - static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); - - xmrig::keccak(input, size, ctx[0]->state); - cn_explode_scratchpad_gpu(ctx[0]->state, ctx[0]->memory); - -# ifdef _MSC_VER - _control87(RC_NEAR, MCW_RC); -# else - fesetround(FE_TONEAREST); -# endif - - if (xmrig::Cpu::info()->hasAVX2()) { - cn_gpu_inner_avx(ctx[0]->state, ctx[0]->memory); - } else { - cn_gpu_inner_ssse3(ctx[0]->state, ctx[0]->memory); - } - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf((uint64_t*) ctx[0]->state, 24); - memcpy(output, ctx[0]->state, 32); -} -#endif - - -#ifndef XMRIG_NO_ASM -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); - -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; - -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; - -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; - -extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; - -void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); - -template -void cn_r_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - v4_compile_code(code, code_size, machine_code, ASM); -} - -template -void cn_r_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - v4_compile_code_double(code, code_size, machine_code, ASM); -} - -template<> -void cn_r_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - wow_compile_code(code, code_size, machine_code, ASM); -} - -template<> -void cn_r_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - wow_compile_code_double(code, code_size, machine_code, ASM); -} - -template -inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MEM = xmrig::cn_select_memory(); - - if (xmrig::cn_is_cryptonight_r() && !ctx[0]->generated_code_data.match(VARIANT, height)) { - V4_Instruction code[256]; - const int code_size = v4_random_math_init(code, height); - cn_r_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), ASM); - ctx[0]->generated_code_data.variant = VARIANT; - ctx[0]->generated_code_data.height = height; - } - - xmrig::keccak(input, size, ctx[0]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory)); - - if (VARIANT == xmrig::VARIANT_2) { - if (ASM == xmrig::ASM_INTEL) { - cnv2_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cnv2_mainloop_ryzen_asm(ctx); - } - else { - cnv2_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_HALF) { - if (ASM == xmrig::ASM_INTEL) { - cn_half_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_half_mainloop_ryzen_asm(ctx); - } - else { - cn_half_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_TRTL) { - if (ASM == xmrig::ASM_INTEL) { - cn_trtl_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_trtl_mainloop_ryzen_asm(ctx); - } - else { - cn_trtl_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_mainloop_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_ZLS) { - if (ASM == xmrig::ASM_INTEL) { - cn_zls_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_zls_mainloop_ryzen_asm(ctx); - } - else { - cn_zls_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_DOUBLE) { - if (ASM == xmrig::ASM_INTEL) { - cn_double_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_double_mainloop_ryzen_asm(ctx); - } - else { - cn_double_mainloop_bulldozer_asm(ctx); - } - } - else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code(ctx); - } - - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); - xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -template -inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MEM = xmrig::cn_select_memory(); - - if (xmrig::cn_is_cryptonight_r() && !ctx[0]->generated_code_double_data.match(VARIANT, height)) { - V4_Instruction code[256]; - const int code_size = v4_random_math_init(code, height); - cn_r_compile_code_double(code, code_size, reinterpret_cast(ctx[0]->generated_code_double), ASM); - ctx[0]->generated_code_double_data.variant = VARIANT; - ctx[0]->generated_code_double_data.height = height; - } - - xmrig::keccak(input, size, ctx[0]->state); - xmrig::keccak(input + size, size, ctx[1]->state); - - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory)); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory)); - - if (VARIANT == xmrig::VARIANT_2) { - cnv2_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_HALF) { - cn_half_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_TRTL) { - cn_trtl_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_double_mainloop_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_ZLS) { - cn_zls_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_DOUBLE) { - cn_double_double_mainloop_sandybridge_asm(ctx); - } - else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code_double(ctx); - } - - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state)); - - xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); - xmrig::keccakf(reinterpret_cast(ctx[1]->state), 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} -#endif - - -template -inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 64); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - xmrig::keccak(input + size, size, ctx[1]->state); - - const uint8_t* l0 = ctx[0]->memory; - const uint8_t* l1 = ctx[1]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - - VARIANT1_INIT(0); - VARIANT1_INIT(1); - VARIANT2_INIT(0); - VARIANT2_INIT(1); - VARIANT2_SET_ROUNDING_MODE(); - VARIANT4_RANDOM_MATH_INIT(0); - VARIANT4_RANDOM_MATH_INIT(1); - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); - - uint64_t idx0 = al0; - uint64_t idx1 = al1; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0, cx1; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - const __m128i ax1 = _mm_set_epi64x(ah1, al1); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx0 = aes_round_tweak_div(cx0, ax0); - cx1 = aes_round_tweak_div(cx1, ax1); - } - else if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1, (const uint32_t*)saes_table); - } - else { - cx0 = _mm_aesenc_si128(cx0, ax0); - cx1 = _mm_aesenc_si128(cx1, ax1); - } - - if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) { - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0); - cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1); - } else { - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1)); - } - - idx0 = _mm_cvtsi128_si64(cx0); - idx1 = _mm_cvtsi128_si64(cx1); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx0); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - d = ~d; - } - - idx0 = d ^ q; - } - - cl = ((uint64_t*) &l1[idx1 & MASK])[0]; - ch = ((uint64_t*) &l1[idx1 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); - if (VARIANT == xmrig::VARIANT_4) { - al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32); - ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(1, cl, cx1); - } - } - - lo = __umul128(idx1, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); - } else { - VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al1 += hi; - ah1 += lo; - - ((uint64_t*)&l1[idx1 & MASK])[0] = al1; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; - } else { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1; - } - - al1 ^= cl; - ah1 ^= ch; - idx1 = al1; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l1[idx1 & MASK])[0]; - int32_t d = ((int32_t*)&l1[idx1 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - d = ~d; - } - - idx1 = d ^ q; - } - - if (BASE == xmrig::VARIANT_2) { - bx01 = bx00; - bx11 = bx10; - } - - bx00 = cx0; - bx10 = cx1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - xmrig::keccakf(h0, 24); - xmrig::keccakf(h1, 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} - - -#define CN_STEP1(a, b0, b1, c, l, ptr, idx) \ - ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ - c = _mm_load_si128(ptr); - - -#define CN_STEP2(a, b0, b1, c, l, ptr, idx) \ - if (VARIANT == xmrig::VARIANT_TUBE) { \ - c = aes_round_tweak_div(c, a); \ - } \ - else if (SOFT_AES) { \ - c = soft_aesenc(&c, a, (const uint32_t*)saes_table); \ - } else { \ - c = _mm_aesenc_si128(c, a); \ - } \ - \ - if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { \ - cryptonight_monero_tweak((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \ - } else { \ - _mm_store_si128(ptr, _mm_xor_si128(b0, c)); \ - } - - -#define CN_STEP3(part, a, b0, b1, c, l, ptr, idx) \ - idx = _mm_cvtsi128_si64(c); \ - ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ - uint64_t cl##part = ((uint64_t*)ptr)[0]; \ - uint64_t ch##part = ((uint64_t*)ptr)[1]; - - -#define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \ - uint64_t al##part, ah##part; \ - if (BASE == xmrig::VARIANT_2) { \ - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \ - al##part = _mm_cvtsi128_si64(a); \ - ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \ - VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1); \ - if (VARIANT == xmrig::VARIANT_4) { \ - al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \ - ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \ - } \ - } else { \ - VARIANT2_INTEGER_MATH(part, cl##part, c); \ - } \ - } \ - lo = __umul128(idx, cl##part, &hi); \ - if (BASE == xmrig::VARIANT_2) { \ - if (VARIANT == xmrig::VARIANT_4) { \ - VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \ - } else { \ - VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); \ - } \ - } \ - if (VARIANT == xmrig::VARIANT_4) { \ - a = _mm_set_epi64x(ah##part, al##part); \ - } \ - a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - \ - if (BASE == xmrig::VARIANT_1) { \ - _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ - \ - if (VARIANT == xmrig::VARIANT_TUBE || \ - VARIANT == xmrig::VARIANT_RTO) { \ - ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0]; \ - } \ - } else { \ - _mm_store_si128(ptr, a); \ - } \ - \ - a = _mm_xor_si128(a, _mm_set_epi64x(ch##part, cl##part)); \ - idx = _mm_cvtsi128_si64(a); \ - \ - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \ - int64_t n = ((int64_t*)&l[idx & MASK])[0]; \ - int32_t d = ((int32_t*)&l[idx & MASK])[2]; \ - int64_t q = n / (d | 0x5); \ - ((int64_t*)&l[idx & MASK])[0] = n ^ q; \ - if (VARIANT == xmrig::VARIANT_XHV) { \ - d = ~d; \ - } \ - \ - idx = d ^ q; \ - } \ - if (BASE == xmrig::VARIANT_2) { \ - b1 = b0; \ - } \ - b0 = c; - - -#define CONST_INIT(ctx, n) \ - __m128i mc##n; \ - __m128i division_result_xmm_##n; \ - __m128i sqrt_result_xmm_##n; \ - if (BASE == xmrig::VARIANT_1) { \ - mc##n = _mm_set_epi64x(*reinterpret_cast(input + n * size + 35) ^ \ - *(reinterpret_cast((ctx)->state) + 24), 0); \ - } \ - if (BASE == xmrig::VARIANT_2) { \ - division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \ - sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \ - } \ - __m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]); \ - __m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \ - __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \ - __m128i cx##n = _mm_setzero_si128(); \ - VARIANT4_RANDOM_MATH_INIT(n); - - -template -inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32 * 3); - return; - } - - for (size_t i = 0; i < 3; i++) { - xmrig::keccak(input + size * i, size, ctx[i]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); - } - - uint8_t* l0 = ctx[0]->memory; - uint8_t* l1 = ctx[1]->memory; - uint8_t* l2 = ctx[2]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - uint64_t* h2 = reinterpret_cast(ctx[2]->state); - - CONST_INIT(ctx[0], 0); - CONST_INIT(ctx[1], 1); - CONST_INIT(ctx[2], 2); - VARIANT2_SET_ROUNDING_MODE(); - - uint64_t idx0, idx1, idx2; - idx0 = _mm_cvtsi128_si64(ax0); - idx1 = _mm_cvtsi128_si64(ax1); - idx2 = _mm_cvtsi128_si64(ax2); - - for (size_t i = 0; i < ITERATIONS; i++) { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2; - - CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - - CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - - CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2); - - CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2); - } - - for (size_t i = 0; i < 3; i++) { - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state)); - xmrig::keccakf(reinterpret_cast(ctx[i]->state), 24); - extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i); - } -} - - -template -inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32 * 4); - return; - } - - for (size_t i = 0; i < 4; i++) { - xmrig::keccak(input + size * i, size, ctx[i]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); - } - - uint8_t* l0 = ctx[0]->memory; - uint8_t* l1 = ctx[1]->memory; - uint8_t* l2 = ctx[2]->memory; - uint8_t* l3 = ctx[3]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - uint64_t* h2 = reinterpret_cast(ctx[2]->state); - uint64_t* h3 = reinterpret_cast(ctx[3]->state); - - CONST_INIT(ctx[0], 0); - CONST_INIT(ctx[1], 1); - CONST_INIT(ctx[2], 2); - CONST_INIT(ctx[3], 3); - VARIANT2_SET_ROUNDING_MODE(); - - uint64_t idx0, idx1, idx2, idx3; - idx0 = _mm_cvtsi128_si64(ax0); - idx1 = _mm_cvtsi128_si64(ax1); - idx2 = _mm_cvtsi128_si64(ax2); - idx3 = _mm_cvtsi128_si64(ax3); - - for (size_t i = 0; i < ITERATIONS; i++) - { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2, *ptr3; - - CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - - CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - - CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3); - - CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2); - CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3); - } - - for (size_t i = 0; i < 4; i++) { - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state)); - xmrig::keccakf(reinterpret_cast(ctx[i]->state), 24); - extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i); - } -} - - -template -inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32 * 5); - return; - } - - for (size_t i = 0; i < 5; i++) { - xmrig::keccak(input + size * i, size, ctx[i]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); - } - - uint8_t* l0 = ctx[0]->memory; - uint8_t* l1 = ctx[1]->memory; - uint8_t* l2 = ctx[2]->memory; - uint8_t* l3 = ctx[3]->memory; - uint8_t* l4 = ctx[4]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - uint64_t* h2 = reinterpret_cast(ctx[2]->state); - uint64_t* h3 = reinterpret_cast(ctx[3]->state); - uint64_t* h4 = reinterpret_cast(ctx[4]->state); - - CONST_INIT(ctx[0], 0); - CONST_INIT(ctx[1], 1); - CONST_INIT(ctx[2], 2); - CONST_INIT(ctx[3], 3); - CONST_INIT(ctx[4], 4); - VARIANT2_SET_ROUNDING_MODE(); - - uint64_t idx0, idx1, idx2, idx3, idx4; - idx0 = _mm_cvtsi128_si64(ax0); - idx1 = _mm_cvtsi128_si64(ax1); - idx2 = _mm_cvtsi128_si64(ax2); - idx3 = _mm_cvtsi128_si64(ax3); - idx4 = _mm_cvtsi128_si64(ax4); - - for (size_t i = 0; i < ITERATIONS; i++) - { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; - - CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4); - - CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - CN_STEP2(ax4, bx40, bx41, cx4, l4, ptr4, idx4); - - CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3); - CN_STEP3(4, ax4, bx40, bx41, cx4, l4, ptr4, idx4); - - CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2); - CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3); - CN_STEP4(4, ax4, bx40, bx41, cx4, l4, mc4, ptr4, idx4); - } - - for (size_t i = 0; i < 5; i++) { - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state)); - xmrig::keccakf(reinterpret_cast(ctx[i]->state), 24); - extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i); - } -} - -#endif /* XMRIG_CRYPTONIGHT_X86_H */ diff --git a/src/crypto/CryptonightR_gen.cpp b/src/crypto/CryptonightR_gen.cpp deleted file mode 100644 index 3fba49cd..00000000 --- a/src/crypto/CryptonightR_gen.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include "crypto/CryptoNight_monero.h" - -typedef void(*void_func)(); - -#include "crypto/asm/CryptonightR_template.h" -#include "Mem.h" - - -static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)()) -{ - const ptrdiff_t size = reinterpret_cast(p2) - reinterpret_cast(p1); - if (size > 0) { - memcpy(p, reinterpret_cast(p1), size); - p += size; - } -} - -static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, xmrig::Assembly ASM) -{ - uint32_t prev_rot_src = (uint32_t)(-1); - - for (int i = 0;; ++i) { - const V4_Instruction inst = code[i]; - if (inst.opcode == RET) { - break; - } - - uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2); - uint8_t dst_index = inst.dst_index; - uint8_t src_index = inst.src_index; - - const uint32_t a = inst.dst_index; - const uint32_t b = inst.src_index; - const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS)); - - switch (inst.opcode) { - case ROR: - case ROL: - if (b != prev_rot_src) { - prev_rot_src = b; - add_code(p, instructions_mov[c], instructions_mov[c + 1]); - } - break; - } - - if (a == prev_rot_src) { - prev_rot_src = (uint32_t)(-1); - } - - void_func begin = instructions[c]; - - if ((ASM = xmrig::ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) { - // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL - // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41 - uint8_t* prefix = reinterpret_cast(begin); - - if (*prefix == 0x49) { - *(p++) = 0x41; - } - - begin = reinterpret_cast(prefix + 1); - } - - add_code(p, begin, instructions[c + 1]); - - if (inst.opcode == ADD) { - *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C; - if (is_64_bit) { - prev_rot_src = (uint32_t)(-1); - } - } - } -} - -void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0)); - add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); - add_code(p, CryptonightR_template_part3, CryptonightR_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0)); - add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0)); - add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0)); - add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0)); - add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} diff --git a/src/crypto/SSE2NEON.h b/src/crypto/SSE2NEON.h deleted file mode 100644 index 6a00448d..00000000 --- a/src/crypto/SSE2NEON.h +++ /dev/null @@ -1,1497 +0,0 @@ -#ifndef SSE2NEON_H -#define SSE2NEON_H - -// This header file provides a simple API translation layer -// between SSE intrinsics to their corresponding ARM NEON versions -// -// This header file does not (yet) translate *all* of the SSE intrinsics. -// Since this is in support of a specific porting effort, I have only -// included the intrinsics I needed to get my port to work. -// -// Questions/Comments/Feedback send to: jratcliffscarab@gmail.com -// -// If you want to improve or add to this project, send me an -// email and I will probably approve your access to the depot. -// -// Project is located here: -// -// https://github.com/jratcliff63367/sse2neon -// -// Show your appreciation for open source by sending me a bitcoin tip to the following -// address. -// -// TipJar: 1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p : -// https://blockchain.info/address/1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p -// -// -// Contributors to this project are: -// -// John W. Ratcliff : jratcliffscarab@gmail.com -// Brandon Rowlett : browlett@nvidia.com -// Ken Fast : kfast@gdeb.com -// Eric van Beurden : evanbeurden@nvidia.com -// Alexander Potylitsin : apotylitsin@nvidia.com -// -// -// ********************************************************************************************************************* -// apoty: March 17, 2017 -// Current version was changed in most to fix issues and potential issues. -// All unit tests were rewritten as a part of forge lib project to cover all implemented functions. -// ********************************************************************************************************************* -// Release notes for January 20, 2017 version: -// -// The unit tests have been refactored. They no longer assert on an error, instead they return a pass/fail condition -// The unit-tests now test 10,000 random float and int values against each intrinsic. -// -// SSE2NEON now supports 95 SSE intrinsics. 39 of them have formal unit tests which have been implemented and -// fully tested on NEON/ARM. The remaining 56 still need unit tests implemented. -// -// A struct is now defined in this header file called 'SIMDVec' which can be used by applications which -// attempt to access the contents of an _m128 struct directly. It is important to note that accessing the __m128 -// struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx -// -// However, some legacy source code may try to access the contents of an __m128 struct directly so the developer -// can use the SIMDVec as an alias for it. Any casting must be done manually by the developer, as you cannot -// cast or otherwise alias the base NEON data type for intrinsic operations. -// -// A bug was found with the _mm_shuffle_ps intrinsic. If the shuffle permutation was not one of the ones with -// a custom/unique implementation causing it to fall through to the default shuffle implementation it was failing -// to return the correct value. This is now fixed. -// -// A bug was found with the _mm_cvtps_epi32 intrinsic. This converts floating point values to integers. -// It was not honoring the correct rounding mode. In SSE the default rounding mode when converting from float to int -// is to use 'round to even' otherwise known as 'bankers rounding'. ARMv7 did not support this feature but ARMv8 does. -// As it stands today, this header file assumes ARMv8. If you are trying to target really old ARM devices, you may get -// a build error. -// -// Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are -// producing the correct results on NEON. These unit tests will be added as soon as possible. -// -// Here is the list of new instrinsics which have been added: -// -// _mm_cvtss_f32 : extracts the lower order floating point value from the parameter -// _mm_add_ss : adds the scalar single - precision floating point values of a and b -// _mm_div_ps : Divides the four single - precision, floating - point values of a and b. -// _mm_div_ss : Divides the scalar single - precision floating point value of a by b. -// _mm_sqrt_ss : Computes the approximation of the square root of the scalar single - precision floating point value of in. -// _mm_rsqrt_ps : Computes the approximations of the reciprocal square roots of the four single - precision floating point values of in. -// _mm_comilt_ss : Compares the lower single - precision floating point scalar values of a and b using a less than operation -// _mm_comigt_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than operation. -// _mm_comile_ss : Compares the lower single - precision floating point scalar values of a and b using a less than or equal operation. -// _mm_comige_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than or equal operation. -// _mm_comieq_ss : Compares the lower single - precision floating point scalar values of a and b using an equality operation. -// _mm_comineq_s : Compares the lower single - precision floating point scalar values of a and b using an inequality operation -// _mm_unpackhi_epi8 : Interleaves the upper 8 signed or unsigned 8 - bit integers in a with the upper 8 signed or unsigned 8 - bit integers in b. -// _mm_unpackhi_epi16: Interleaves the upper 4 signed or unsigned 16 - bit integers in a with the upper 4 signed or unsigned 16 - bit integers in b. -// -// ********************************************************************************************************************* -/* -** The MIT license: -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and associated documentation files (the "Software"), to deal -** in the Software without restriction, including without limitation the rights -** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -** copies of the Software, and to permit persons to whom the Software is furnished -** to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in all -** copies or substantial portions of the Software. - -** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#define ENABLE_CPP_VERSION 0 - -#if defined(__GNUC__) || defined(__clang__) -# pragma push_macro("FORCE_INLINE") -# pragma push_macro("ALIGN_STRUCT") -# define FORCE_INLINE static inline __attribute__((always_inline)) -# define ALIGN_STRUCT(x) __attribute__((aligned(x))) -#else -# error "Macro name collisions may happens with unknown compiler" -# define FORCE_INLINE static inline -# define ALIGN_STRUCT(x) __declspec(align(x)) -#endif - -#include -#include "arm_neon.h" - - -/*******************************************************/ -/* MACRO for shuffle parameter for _mm_shuffle_ps(). */ -/* Argument fp3 is a digit[0123] that represents the fp*/ -/* from argument "b" of mm_shuffle_ps that will be */ -/* placed in fp3 of result. fp2 is the same for fp2 in */ -/* result. fp1 is a digit[0123] that represents the fp */ -/* from argument "a" of mm_shuffle_ps that will be */ -/* places in fp1 of result. fp0 is the same for fp0 of */ -/* result */ -/*******************************************************/ -#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ - (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) - -/* indicate immediate constant argument in a given range */ -#define __constrange(a,b) \ - const - -typedef float32x4_t __m128; -typedef int32x4_t __m128i; - - -// ****************************************** -// type-safe casting between types -// ****************************************** - -#define vreinterpretq_m128_f16(x) \ - vreinterpretq_f32_f16(x) - -#define vreinterpretq_m128_f32(x) \ - (x) - -#define vreinterpretq_m128_f64(x) \ - vreinterpretq_f32_f64(x) - - -#define vreinterpretq_m128_u8(x) \ - vreinterpretq_f32_u8(x) - -#define vreinterpretq_m128_u16(x) \ - vreinterpretq_f32_u16(x) - -#define vreinterpretq_m128_u32(x) \ - vreinterpretq_f32_u32(x) - -#define vreinterpretq_m128_u64(x) \ - vreinterpretq_f32_u64(x) - - -#define vreinterpretq_m128_s8(x) \ - vreinterpretq_f32_s8(x) - -#define vreinterpretq_m128_s16(x) \ - vreinterpretq_f32_s16(x) - -#define vreinterpretq_m128_s32(x) \ - vreinterpretq_f32_s32(x) - -#define vreinterpretq_m128_s64(x) \ - vreinterpretq_f32_s64(x) - - -#define vreinterpretq_f16_m128(x) \ - vreinterpretq_f16_f32(x) - -#define vreinterpretq_f32_m128(x) \ - (x) - -#define vreinterpretq_f64_m128(x) \ - vreinterpretq_f64_f32(x) - - -#define vreinterpretq_u8_m128(x) \ - vreinterpretq_u8_f32(x) - -#define vreinterpretq_u16_m128(x) \ - vreinterpretq_u16_f32(x) - -#define vreinterpretq_u32_m128(x) \ - vreinterpretq_u32_f32(x) - -#define vreinterpretq_u64_m128(x) \ - vreinterpretq_u64_f32(x) - - -#define vreinterpretq_s8_m128(x) \ - vreinterpretq_s8_f32(x) - -#define vreinterpretq_s16_m128(x) \ - vreinterpretq_s16_f32(x) - -#define vreinterpretq_s32_m128(x) \ - vreinterpretq_s32_f32(x) - -#define vreinterpretq_s64_m128(x) \ - vreinterpretq_s64_f32(x) - - -#define vreinterpretq_m128i_s8(x) \ - vreinterpretq_s32_s8(x) - -#define vreinterpretq_m128i_s16(x) \ - vreinterpretq_s32_s16(x) - -#define vreinterpretq_m128i_s32(x) \ - (x) - -#define vreinterpretq_m128i_s64(x) \ - vreinterpretq_s32_s64(x) - - -#define vreinterpretq_m128i_u8(x) \ - vreinterpretq_s32_u8(x) - -#define vreinterpretq_m128i_u16(x) \ - vreinterpretq_s32_u16(x) - -#define vreinterpretq_m128i_u32(x) \ - vreinterpretq_s32_u32(x) - -#define vreinterpretq_m128i_u64(x) \ - vreinterpretq_s32_u64(x) - - -#define vreinterpretq_s8_m128i(x) \ - vreinterpretq_s8_s32(x) - -#define vreinterpretq_s16_m128i(x) \ - vreinterpretq_s16_s32(x) - -#define vreinterpretq_s32_m128i(x) \ - (x) - -#define vreinterpretq_s64_m128i(x) \ - vreinterpretq_s64_s32(x) - - -#define vreinterpretq_u8_m128i(x) \ - vreinterpretq_u8_s32(x) - -#define vreinterpretq_u16_m128i(x) \ - vreinterpretq_u16_s32(x) - -#define vreinterpretq_u32_m128i(x) \ - vreinterpretq_u32_s32(x) - -#define vreinterpretq_u64_m128i(x) \ - vreinterpretq_u64_s32(x) - - -// union intended to allow direct access to an __m128 variable using the names that the MSVC -// compiler provides. This union should really only be used when trying to access the members -// of the vector as integer values. GCC/clang allow native access to the float members through -// a simple array access operator (in C since 4.6, in C++ since 4.8). -// -// Ideally direct accesses to SIMD vectors should not be used since it can cause a performance -// hit. If it really is needed however, the original __m128 variable can be aliased with a -// pointer to this union and used to access individual components. The use of this union should -// be hidden behind a macro that is used throughout the codebase to access the members instead -// of always declaring this type of variable. -typedef union ALIGN_STRUCT(16) SIMDVec -{ - float m128_f32[4]; // as floats - do not to use this. Added for convenience. - int8_t m128_i8[16]; // as signed 8-bit integers. - int16_t m128_i16[8]; // as signed 16-bit integers. - int32_t m128_i32[4]; // as signed 32-bit integers. - int64_t m128_i64[2]; // as signed 64-bit integers. - uint8_t m128_u8[16]; // as unsigned 8-bit integers. - uint16_t m128_u16[8]; // as unsigned 16-bit integers. - uint32_t m128_u32[4]; // as unsigned 32-bit integers. - uint64_t m128_u64[2]; // as unsigned 64-bit integers. -} SIMDVec; - - -// ****************************************** -// Set/get methods -// ****************************************** - -// extracts the lower order floating point value from the parameter : https://msdn.microsoft.com/en-us/library/bb514059%28v=vs.120%29.aspx?f=255&MSPPError=-2147217396 -FORCE_INLINE float _mm_cvtss_f32(__m128 a) -{ - return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); -} - -// Sets the 128-bit value to zero https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx -FORCE_INLINE __m128i _mm_setzero_si128() -{ - return vreinterpretq_m128i_s32(vdupq_n_s32(0)); -} - -// Clears the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setzero_ps(void) -{ - return vreinterpretq_m128_f32(vdupq_n_f32(0)); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set1_ps(float _w) -{ - return vreinterpretq_m128_f32(vdupq_n_f32(_w)); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps1(float _w) -{ - return vreinterpretq_m128_f32(vdupq_n_f32(_w)); -} - -// Sets the four single-precision, floating-point values to the four inputs. https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) -{ - float __attribute__((aligned(16))) data[4] = { x, y, z, w }; - return vreinterpretq_m128_f32(vld1q_f32(data)); -} - -// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) -{ - float __attribute__ ((aligned (16))) data[4] = { w, z, y, x }; - return vreinterpretq_m128_f32(vld1q_f32(data)); -} - -// Sets the 4 signed 32-bit integer values to i. https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set1_epi32(int _i) -{ - return vreinterpretq_m128i_s32(vdupq_n_s32(_i)); -} - -// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) -{ - int32_t __attribute__((aligned(16))) data[4] = { i0, i1, i2, i3 }; - return vreinterpretq_m128i_s32(vld1q_s32(data)); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx -FORCE_INLINE void _mm_store_ps(float *p, __m128 a) -{ - vst1q_f32(p, vreinterpretq_f32_m128(a)); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx -FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) -{ - vst1q_f32(p, vreinterpretq_f32_m128(a)); -} - -// Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx -FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a) -{ - vst1q_s32((int32_t*) p, vreinterpretq_s32_m128i(a)); -} - -// Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx -FORCE_INLINE void _mm_store_ss(float *p, __m128 a) -{ - vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0); -} - -// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx -FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b) -{ - uint64x1_t hi = vget_high_u64(vreinterpretq_u64_m128i(*a)); - uint64x1_t lo = vget_low_u64(vreinterpretq_u64_m128i(b)); - *a = vreinterpretq_m128i_u64(vcombine_u64(lo, hi)); -} - -// Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load1_ps(const float * p) -{ - return vreinterpretq_m128_f32(vld1q_dup_f32(p)); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load_ps(const float * p) -{ - return vreinterpretq_m128_f32(vld1q_f32(p)); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_loadu_ps(const float * p) -{ - // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon - return vreinterpretq_m128_f32(vld1q_f32(p)); -} - -// Loads an single - precision, floating - point value into the low word and clears the upper three words. https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_load_ss(const float * p) -{ - return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0)); -} - - -// ****************************************** -// Logic/Binary operations -// ****************************************** - -// Compares for inequality. https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32( vmvnq_u32( vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)) ) ); -} - -// Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx -FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( vbicq_s32(vreinterpretq_s32_m128(b), vreinterpretq_s32_m128(a)) ); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( vbicq_s32(vreinterpretq_s32_m128i(b), vreinterpretq_s32_m128i(a)) ); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) ); -} - -// Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) ); -} - -// Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx -FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) ); -} - -// Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) ); -} - -// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx -FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) ); -} - -// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx -FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) ); -} - -// NEON does not provide this method -// Creates a 4-bit mask from the most significant bits of the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_ps(__m128 a) -{ -#if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this - uint32x4_t &ia = *(uint32x4_t *)&a; - return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8); -#else - static const uint32x4_t movemask = { 1, 2, 4, 8 }; - static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - uint32x4_t t0 = vreinterpretq_u32_m128(a); - uint32x4_t t1 = vtstq_u32(t0, highbit); - uint32x4_t t2 = vandq_u32(t1, movemask); - uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); - return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); -#endif -} - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of b and places it into the high end of the result. -FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) -{ - float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a32, b10)); -} - -// takes the lower two 32-bit values from a and swaps them and places in high end of result -// takes the higher two 32 bit values from b and swaps them and places in low end of result. -FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b))); - return vreinterpretq_m128_f32(vcombine_f32(a01, b23)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b) -{ - float32x2_t a21 = vget_high_f32(vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); - float32x2_t b03 = vget_low_f32(vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); - return vreinterpretq_m128_f32(vcombine_f32(a21, b03)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b) -{ - float32x2_t a03 = vget_low_f32(vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); - float32x2_t b21 = vget_high_f32(vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); - return vreinterpretq_m128_f32(vcombine_f32(a03, b21)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a10, b10)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a01, b10)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b))); - return vreinterpretq_m128_f32(vcombine_f32(a01, b01)); -} - -// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the high -FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a10, b32)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) -{ - float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vcombine_f32(a11, b00)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) -{ - float32x2_t a22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vcombine_f32(a22, b00)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) -{ - float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0); - float32x2_t b22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vcombine_f32(a00, b22)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) -{ - float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); - float32x2_t a22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); - float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* apoty: TODO: use vzip ?*/ - float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a02, b32)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) -{ - float32x2_t a33 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1); - float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1); - return vreinterpretq_m128_f32(vcombine_f32(a33, b11)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - float32x2_t b20 = vset_lane_f32(b2, b00, 1); - return vreinterpretq_m128_f32(vcombine_f32(a10, b20)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - float32x2_t b20 = vset_lane_f32(b2, b00, 1); - return vreinterpretq_m128_f32(vcombine_f32(a01, b20)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) -{ - float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - float32x2_t b20 = vset_lane_f32(b2, b00, 1); - return vreinterpretq_m128_f32(vcombine_f32(a32, b20)); -} - -// NEON does not support a general purpose permute intrinsic -// Currently I am not sure whether the C implementation is faster or slower than the NEON version. -// Note, this has to be expanded as a template because the shuffle value must be an immediate value. -// The same is true on SSE as well. -// Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx -#if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet. -FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0,255) int imm) -{ - __m128 ret; - ret[0] = a[imm & 0x3]; - ret[1] = a[(imm >> 2) & 0x3]; - ret[2] = b[(imm >> 4) & 0x03]; - ret[3] = b[(imm >> 6) & 0x03]; - return ret; -} -#else -#define _mm_shuffle_ps_default(a, b, imm) \ -({ \ - float32x4_t ret; \ - ret = vmovq_n_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & 0x3)); \ - ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), ret, 1); \ - ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), ret, 2); \ - ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), ret, 3); \ - vreinterpretq_m128_f32(ret); \ -}) -#endif - -//FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) int imm) -#define _mm_shuffle_ps(a, b, imm) \ -({ \ - __m128 ret; \ - switch (imm) \ - { \ - case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_ps_1032((a), (b)); break; \ - case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_ps_2301((a), (b)); break; \ - case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_ps_0321((a), (b)); break; \ - case _MM_SHUFFLE(2, 1, 0, 3): ret = _mm_shuffle_ps_2103((a), (b)); break; \ - case _MM_SHUFFLE(1, 0, 1, 0): ret = _mm_shuffle_ps_1010((a), (b)); break; \ - case _MM_SHUFFLE(1, 0, 0, 1): ret = _mm_shuffle_ps_1001((a), (b)); break; \ - case _MM_SHUFFLE(0, 1, 0, 1): ret = _mm_shuffle_ps_0101((a), (b)); break; \ - case _MM_SHUFFLE(3, 2, 1, 0): ret = _mm_shuffle_ps_3210((a), (b)); break; \ - case _MM_SHUFFLE(0, 0, 1, 1): ret = _mm_shuffle_ps_0011((a), (b)); break; \ - case _MM_SHUFFLE(0, 0, 2, 2): ret = _mm_shuffle_ps_0022((a), (b)); break; \ - case _MM_SHUFFLE(2, 2, 0, 0): ret = _mm_shuffle_ps_2200((a), (b)); break; \ - case _MM_SHUFFLE(3, 2, 0, 2): ret = _mm_shuffle_ps_3202((a), (b)); break; \ - case _MM_SHUFFLE(1, 1, 3, 3): ret = _mm_shuffle_ps_1133((a), (b)); break; \ - case _MM_SHUFFLE(2, 0, 1, 0): ret = _mm_shuffle_ps_2010((a), (b)); break; \ - case _MM_SHUFFLE(2, 0, 0, 1): ret = _mm_shuffle_ps_2001((a), (b)); break; \ - case _MM_SHUFFLE(2, 0, 3, 2): ret = _mm_shuffle_ps_2032((a), (b)); break; \ - default: ret = _mm_shuffle_ps_default((a), (b), (imm)); break; \ - } \ - ret; \ -}) - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of a and places it into the high end of the result. -FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a) -{ - int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); - int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(vcombine_s32(a32, a10)); -} - -// takes the lower two 32-bit values from a and swaps them and places in low end of result -// takes the higher two 32 bit values from a and swaps them and places in high end of result. -FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a) -{ - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a))); - return vreinterpretq_m128i_s32(vcombine_s32(a01, a23)); -} - -// rotates the least significant 32 bits into the most signficant 32 bits, and shifts the rest down -FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a) -{ - return vreinterpretq_m128i_s32(vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1)); -} - -// rotates the most significant 32 bits into the least signficant 32 bits, and shifts the rest up -FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a) -{ - return vreinterpretq_m128i_s32(vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3)); -} - -// gets the lower 64 bits of a, and places it in the upper 64 bits -// gets the lower 64 bits of a and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a) -{ - int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(vcombine_s32(a10, a10)); -} - -// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the lower 64 bits -// gets the lower 64 bits of a, and places it in the upper 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a) -{ - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(vcombine_s32(a01, a10)); -} - -// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the upper 64 bits -// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a) -{ - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - return vreinterpretq_m128i_s32(vcombine_s32(a01, a01)); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a) -{ - int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1); - int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); - return vreinterpretq_m128i_s32(vcombine_s32(a11, a22)); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a) -{ - int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - return vreinterpretq_m128i_s32(vcombine_s32(a22, a01)); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) -{ - int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); - int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1); - return vreinterpretq_m128i_s32(vcombine_s32(a32, a33)); -} - -//FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm) -#if ENABLE_CPP_VERSION -FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm) -{ - __m128i ret; - ret[0] = a[imm & 0x3]; - ret[1] = a[(imm >> 2) & 0x3]; - ret[2] = a[(imm >> 4) & 0x03]; - ret[3] = a[(imm >> 6) & 0x03]; - return ret; -} -#else -#define _mm_shuffle_epi32_default(a, imm) \ -({ \ - int32x4_t ret; \ - ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & 0x3)); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), ret, 1); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), ret, 2); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), ret, 3); \ - vreinterpretq_m128i_s32(ret); \ -}) -#endif - -//FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255) int imm) -#if defined(__aarch64__) -#define _mm_shuffle_epi32_splat(a, imm) \ -({ \ - vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \ -}) -#else -#define _mm_shuffle_epi32_splat(a, imm) \ -({ \ - vreinterpretq_m128i_s32(vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \ -}) -#endif - -// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx -//FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_shuffle_epi32(a, imm) \ -({ \ - __m128i ret; \ - switch (imm) \ - { \ - case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_epi_1032((a)); break; \ - case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_epi_2301((a)); break; \ - case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_epi_0321((a)); break; \ - case _MM_SHUFFLE(2, 1, 0, 3): ret = _mm_shuffle_epi_2103((a)); break; \ - case _MM_SHUFFLE(1, 0, 1, 0): ret = _mm_shuffle_epi_1010((a)); break; \ - case _MM_SHUFFLE(1, 0, 0, 1): ret = _mm_shuffle_epi_1001((a)); break; \ - case _MM_SHUFFLE(0, 1, 0, 1): ret = _mm_shuffle_epi_0101((a)); break; \ - case _MM_SHUFFLE(2, 2, 1, 1): ret = _mm_shuffle_epi_2211((a)); break; \ - case _MM_SHUFFLE(0, 1, 2, 2): ret = _mm_shuffle_epi_0122((a)); break; \ - case _MM_SHUFFLE(3, 3, 3, 2): ret = _mm_shuffle_epi_3332((a)); break; \ - case _MM_SHUFFLE(0, 0, 0, 0): ret = _mm_shuffle_epi32_splat((a),0); break; \ - case _MM_SHUFFLE(1, 1, 1, 1): ret = _mm_shuffle_epi32_splat((a),1); break; \ - case _MM_SHUFFLE(2, 2, 2, 2): ret = _mm_shuffle_epi32_splat((a),2); break; \ - case _MM_SHUFFLE(3, 3, 3, 3): ret = _mm_shuffle_epi32_splat((a),3); break; \ - default: ret = _mm_shuffle_epi32_default((a), (imm)); break; \ - } \ - ret; \ -}) - -// Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, __constrange(0,255) int imm) -#define _mm_shufflehi_epi16_function(a, imm) \ -({ \ - int16x8_t ret = vreinterpretq_s16_s32(a); \ - int16x4_t highBits = vget_high_s16(ret); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & 0x3), ret, 4); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, 5); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, 6); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, 7); \ - vreinterpretq_s32_s16(ret); \ -}) - -//FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, __constrange(0,255) int imm) -#define _mm_shufflehi_epi16(a, imm) \ - _mm_shufflehi_epi16_function((a), (imm)) - - -// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx -//FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_slli_epi32(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) {\ - ret = a; \ - } \ - else if ((imm) > 31) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_s32(vshlq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \ - } \ - ret; \ -}) - -//Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros. https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_srli_epi32(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm)> 31) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_u32(vshrq_n_u32(vreinterpretq_u32_m128i(a), (imm))); \ - } \ - ret; \ -}) - -// Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit. https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_srai_epi32(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm) > 31) { \ - ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), 16)); \ - ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(ret), 16)); \ - } \ - else { \ - ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \ - } \ - ret; \ -}) - -// Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx -//FORCE_INLINE _mm_srli_si128(__m128i a, __constrange(0,255) int imm) -#define _mm_srli_si128(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm) > 15) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_s8(vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \ - } \ - ret; \ -}) - -// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate. https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_slli_si128(__m128i a, __constrange(0,255) int imm) -#define _mm_slli_si128(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm) > 15) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \ - } \ - ret; \ -}) - -// NEON does not provide a version of this function, here is an article about some ways to repro the results. -// http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon -// Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits. https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_epi8(__m128i _a) -{ - uint8x16_t input = vreinterpretq_u8_m128i(_a); - static const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 }; - uint8x8_t mask_and = vdup_n_u8(0x80); - int8x8_t mask_shift = vld1_s8(xr); - - uint8x8_t lo = vget_low_u8(input); - uint8x8_t hi = vget_high_u8(input); - - lo = vand_u8(lo, mask_and); - lo = vshl_u8(lo, mask_shift); - - hi = vand_u8(hi, mask_and); - hi = vshl_u8(hi, mask_shift); - - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - - return ((hi[0] << 8) | (lo[0] & 0xFF)); -} - - -// ****************************************** -// Math operations -// ****************************************** - -// Subtracts the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or unsigned 32-bit integers of a. https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx -FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128_f32(vsubq_s32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// Adds the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// adds the scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - //the upper values in the result must be the remnants of . - return vreinterpretq_m128_f32(vaddq_f32(a, value)); -} - -// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or unsigned 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vmulq_s32(vreinterpretq_s32_m128i(a),vreinterpretq_s32_m128i(b))); -} - -// Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx -FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Divides the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) -{ - float32x4_t recip0 = vrecpeq_f32(vreinterpretq_f32_m128(b)); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, vreinterpretq_f32_m128(b))); - return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip1)); -} - -// Divides the scalar single-precision floating point value of a by b. https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) -{ - float32_t value = vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); -} - -// This version does additional iterations to improve accuracy. Between 1 and 4 recommended. -// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx -FORCE_INLINE __m128 recipq_newton(__m128 in, int n) -{ - int i; - float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); - for (i = 0; i < n; ++i) - { - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); - } - return vreinterpretq_m128_f32(recip); -} - -// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx -FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) -{ - float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); - return vreinterpretq_m128_f32(recip); -} - -// Computes the approximations of square roots of the four single-precision, floating-point values of a. First computes reciprocal square roots and then reciprocals of the four values. https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) -{ - float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in)); - float32x4_t sq = vrecpeq_f32(recipsq); - // ??? use step versions of both sqrt and recip for better accuracy? - return vreinterpretq_m128_f32(sq); -} - -// Computes the approximation of the square root of the scalar single-precision floating point value of in. https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) -{ - float32_t value = vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0)); -} - -// Computes the approximations of the reciprocal square roots of the four single-precision floating point values of in. https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx -FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) -{ - return vreinterpretq_m128_f32(vrsqrteq_f32(vreinterpretq_f32_m128(in))); -} - -// Computes the maximums of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx -FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Computes the minima of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Computes the maximum of the two lower scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) -{ - float32_t value = vgetq_lane_f32(vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); -} - -// Computes the minimum of the two lower scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx -FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) -{ - float32_t value = vgetq_lane_f32(vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); -} - -// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx -FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// epi versions of min/max -// Computes the pariwise maximums of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx -FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Computes the pariwise minima of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx -FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) -{ - /* apoty: issue with large values because of result saturation */ - //int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)); /* =2*a*b */ - //return vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1)); - int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b)); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b)); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t r = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - return vreinterpretq_m128i_u16(r.val[1]); -} - -// Computes pairwise add of each argument as single-precision, floating-point values a and b. -//https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx -FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b ) -{ -#if defined(__aarch64__) - return vreinterpretq_m128_f32(vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); //AArch64 -#else - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32))); -#endif -} - -// ****************************************** -// Compare operations -// ****************************************** - -// Compares for less than https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for greater than. https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for greater than or equal. https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for less than or equal. https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for equality. https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for less than. https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_u32(vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for greater than. https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_u32(vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Compares the four 32-bit floats in a and b to check if any values are NaN. Ordered compare between each value returns true for "orderable" and false for "not orderable" (NaN). https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx -// see also: -// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean -// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics -FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b ) -{ - // Note: NEON does not have ordered compare builtin - // Need to compare a eq a and b eq b to check for NaN - // Do AND of results to get final - uint32x4_t ceqaa = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t ceqbb = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb)); -} - -// Compares the lower single-precision floating point scalar values of a and b using a less than operation. : https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx -// Important note!! The documentation on MSDN is incorrect! If either of the values is a NAN the docs say you will get a one, but in fact, it will return a zero!! -FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) -{ - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using a greater than operation. : https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx -FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using a less than or equal operation. : https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx -FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using a greater than or equal operation. : https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx -FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using an equality operation. : https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx -FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using an inequality operation. : https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx -FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) -{ - //return !vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0) ? 1 : 0; -} - -// according to the documentation, these intrinsics behave the same as the non-'u' versions. We'll just alias them here. -#define _mm_ucomilt_ss _mm_comilt_ss -#define _mm_ucomile_ss _mm_comile_ss -#define _mm_ucomigt_ss _mm_comigt_ss -#define _mm_ucomige_ss _mm_comige_ss -#define _mm_ucomieq_ss _mm_comieq_ss -#define _mm_ucomineq_ss _mm_comineq_ss - -// ****************************************** -// Conversions -// ****************************************** - -// Converts the four single-precision, floating-point values of a to signed 32-bit integer values using truncate. https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) -{ - return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))); -} - -// Converts the four signed 32-bit integer values of a to single-precision, floating-point values https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) -{ - return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a))); -} - -// Converts the four unsigned 8-bit integers in the lower 32 bits to four unsigned 32-bit integers. https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx -FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a) -{ - uint8x16_t u8x16 = vreinterpretq_u8_s32(a); /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - return vreinterpretq_s32_u32(u32x4); -} - -// Converts the four signed 16-bit integers in the lower 64 bits to four signed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514079%28v=vs.100%29.aspx -FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a) -{ - return vreinterpretq_m128i_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a)))); -} - -// Converts the four single-precision, floating-point values of a to signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx -// *NOTE*. The default rounding mode on SSE is 'round to even', which ArmV7 does not support! -// It is supported on ARMv8 however. -FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) -{ -#if defined(__aarch64__) - return vcvtnq_s32_f32(a); -#else - uint32x4_t signmask = vdupq_n_u32(0x80000000); - float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), vdupq_n_f32(0.5f)); /* +/- 0.5 */ - int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/ - int32x4_t r_trunc = vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */ - int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */ - int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ - float32x4_t delta = vsubq_f32(vreinterpretq_f32_m128(a), vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ - uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */ - return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal)); -#endif -} - -// Moves the least significant 32 bits of a to a 32-bit integer. https://msdn.microsoft.com/en-us/library/5z7a9642%28v=vs.90%29.aspx -FORCE_INLINE int _mm_cvtsi128_si32(__m128i a) -{ - return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0); -} - -// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, zero extending the upper bits. https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) -{ - return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0)); -} - - -// Applies a type cast to reinterpret four 32-bit floating point values passed in as a 128-bit parameter as packed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514099.aspx -FORCE_INLINE __m128i _mm_castps_si128(__m128 a) -{ - return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a)); -} - -// Applies a type cast to reinterpret four 32-bit integers passed in as a 128-bit parameter as packed 32-bit floating point values. https://msdn.microsoft.com/en-us/library/bb514029.aspx -FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) -{ - return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a)); -} - -// Loads 128-bit value. : https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx -FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) -{ - return vreinterpretq_m128i_s32(vld1q_s32((int32_t *)p)); -} - -// ****************************************** -// Miscellaneous Operations -// ****************************************** - -// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates. https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s8(vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)), vqmovn_s16(vreinterpretq_s16_m128i(b)))); -} - -// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned integers and saturates. https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) -{ - return vreinterpretq_m128i_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)), vqmovun_s16(vreinterpretq_s16_m128i(b)))); -} - -// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers and saturates. https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)), vqmovn_s32(vreinterpretq_s32_m128i(b)))); -} - -// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower 8 signed or unsigned 8-bit integers in b. https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) -{ - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a))); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b))); - int8x8x2_t result = vzip_s8(a1, b1); - return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); -} - -// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the lower 4 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) -{ - int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b)); - int16x4x2_t result = vzip_s16(a1, b1); - return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); -} - -// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the lower 2 signed or unsigned 32 - bit integers in b. https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) -{ - int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a)); - int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b)); - int32x2x2_t result = vzip_s32(a1, b1); - return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); -} - -// Selects and interleaves the lower two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) -{ - float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b)); - float32x2x2_t result = vzip_f32(a1, b1); - return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); -} - -// Selects and interleaves the upper two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) -{ - float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b)); - float32x2x2_t result = vzip_f32(a1, b1); - return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); -} - -// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper 8 signed or unsigned 8-bit integers in b. https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) -{ - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a))); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b))); - int8x8x2_t result = vzip_s8(a1, b1); - return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); -} - -// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the upper 4 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) -{ - int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b)); - int16x4x2_t result = vzip_s16(a1, b1); - return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); -} - -// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the upper 2 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) -{ - int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a)); - int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b)); - int32x2x2_t result = vzip_s32(a1, b1); - return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); -} - -// Extracts the selected signed or unsigned 16-bit integer from a and zero extends. https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx -//FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm) -#define _mm_extract_epi16(a, imm) \ -({ \ - (vgetq_lane_s16(vreinterpretq_s16_m128i(a), (imm)) & 0x0000ffffUL); \ -}) - -// Inserts the least significant 16 bits of b into the selected 16-bit integer of a. https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx -//FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, const int b, __constrange(0,8) int imm) -#define _mm_insert_epi16(a, b, imm) \ -({ \ - vreinterpretq_m128i_s16(vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \ -}) - -// ****************************************** -// Streaming Extensions -// ****************************************** - -// Guarantees that every preceding store is globally visible before any subsequent store. https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx -FORCE_INLINE void _mm_sfence(void) -{ - __sync_synchronize(); -} - -// Stores the data in a to the address p without polluting the caches. If the cache line containing address p is already in the cache, the cache will be updated.Address p must be 16 - byte aligned. https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx -FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) -{ - *p = a; -} - -// Cache line containing p is flushed and invalidated from all caches in the coherency domain. : https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx -FORCE_INLINE void _mm_clflush(void const*p) -{ - // no corollary for Neon? -} - -#if defined(__GNUC__) || defined(__clang__) -# pragma pop_macro("ALIGN_STRUCT") -# pragma pop_macro("FORCE_INLINE") -#endif - -#endif diff --git a/src/crypto/argon2_hasher/common/DLLExport.h b/src/crypto/argon2_hasher/common/DLLExport.h new file mode 100644 index 00000000..3019914f --- /dev/null +++ b/src/crypto/argon2_hasher/common/DLLExport.h @@ -0,0 +1,16 @@ +// +// Created by Haifa Bogdan Adnan on 04.11.2018. +// + +#ifndef ARGON2_DLLEXPORT_H +#define ARGON2_DLLEXPORT_H + +#undef DLLEXPORT + +#ifndef _WIN64 + #define DLLEXPORT +#else + #define DLLEXPORT __declspec(dllexport) +#endif + +#endif //ARGON2_DLLEXPORT_H diff --git a/src/crypto/argon2_hasher/common/DLLImport.h b/src/crypto/argon2_hasher/common/DLLImport.h new file mode 100644 index 00000000..1946a4a2 --- /dev/null +++ b/src/crypto/argon2_hasher/common/DLLImport.h @@ -0,0 +1,16 @@ +// +// Created by Haifa Bogdan Adnan on 04.11.2018. +// + +#ifndef ARGON2_DLLIMPORT_H +#define ARGON2_DLLIMPORT_H + +#ifndef DLLEXPORT + #ifndef _WIN64 + #define DLLEXPORT + #else + #define DLLEXPORT __declspec(dllimport) + #endif +#endif + +#endif //ARGON2_DLLIMPORT_H diff --git a/src/crypto/argon2_hasher/common/common.cpp b/src/crypto/argon2_hasher/common/common.cpp new file mode 100644 index 00000000..676e5a80 --- /dev/null +++ b/src/crypto/argon2_hasher/common/common.cpp @@ -0,0 +1,21 @@ +// +// Created by Haifa Bogdan Adnan on 05/08/2018. +// + +#include "DLLExport.h" +#include "common.h" +#include + +vector getFiles(const string &folder) { + vector result; + DIR *dir; + struct dirent *ent; + if ((dir = opendir (folder.c_str())) != NULL) { + while ((ent = readdir (dir)) != NULL) { + if(ent->d_type == DT_REG) + result.push_back(ent->d_name); + } + closedir (dir); + } + return result; +} diff --git a/src/crypto/argon2_hasher/common/common.h b/src/crypto/argon2_hasher/common/common.h new file mode 100755 index 00000000..753716a0 --- /dev/null +++ b/src/crypto/argon2_hasher/common/common.h @@ -0,0 +1,56 @@ +// +// Created by Haifa Bogdan Adnan on 04/08/2018. +// + +#ifndef ARGON2_COMMON_H +#define ARGON2_COMMON_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include "DLLImport.h" + +#ifndef _WIN64 +#include +#include + +#include +#include +#include +#include +#else +#include +#endif + +#ifdef __APPLE__ +#include "../macosx/cpu_affinity.h" +#endif + +using namespace std; + +#define LOG(msg) cout< getFiles(const string &folder); + +#endif //ARGON2_COMMON_H diff --git a/src/crypto/argon2_hasher/crypt/base64.cpp b/src/crypto/argon2_hasher/crypt/base64.cpp new file mode 100644 index 00000000..12975989 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/base64.cpp @@ -0,0 +1,103 @@ +// +// Created by Haifa Bogdan Adnan on 17/08/2018. +// + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "../common/common.h" +#include "base64.h" + +static const string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} + +void base64::encode(const char *input, int input_size, char *output) { + char *ret = output; + int i = 0; + int j = 0; + unsigned char char_array_3[3]; + unsigned char char_array_4[4]; + + while (input_size--) { + char_array_3[i++] = *(input++); + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for(i = 0; (i <4) ; i++) + *(ret++) = base64_chars[char_array_4[i]]; + i = 0; + } + } + + if (i) + { + for(j = i; j < 3; j++) + char_array_3[j] = '\0'; + + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (j = 0; (j < i + 1); j++) + *(ret++) = base64_chars[char_array_4[j]]; + + while((i++ < 3)) + *(ret++) = '='; + + } +} + +int base64::decode(const char *input, char *output, int output_size) { + size_t in_len = strlen(input); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + char *ret = output; + int out_size = 0; + + while (in_len-- && ( input[in_] != '=') && is_base64(input[in_])) { + char_array_4[i++] = input[in_]; in_++; + if (i ==4) { + for (i = 0; i <4; i++) + char_array_4[i] = base64_chars.find(char_array_4[i]); + + char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) { + out_size ++; + if(output_size < out_size) + return -1; + *(ret++) = char_array_3[i]; + } + i = 0; + } + } + + if (i) { + for (j = 0; j < i; j++) + char_array_4[j] = base64_chars.find(char_array_4[j]); + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + + for (j = 0; (j < i - 1); j++) { + out_size ++; + if(output_size < out_size) + return -1; + *(ret++) = char_array_3[j]; + } + } + return out_size; +} + diff --git a/src/crypto/argon2_hasher/crypt/base64.h b/src/crypto/argon2_hasher/crypt/base64.h new file mode 100644 index 00000000..2ce74b88 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/base64.h @@ -0,0 +1,14 @@ +// +// Created by Haifa Bogdan Adnan on 17/08/2018. +// + +#ifndef ARGON2_BASE64_H +#define ARGON2_BASE64_H + +class DLLEXPORT base64 { +public: + static void encode(const char *input, int input_size, char *output); + static int decode(const char *input, char *output, int output_size); +}; + +#endif //ARGON2_BASE64_H diff --git a/src/crypto/argon2_hasher/crypt/hex.cpp b/src/crypto/argon2_hasher/crypt/hex.cpp new file mode 100644 index 00000000..e8a86312 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/hex.cpp @@ -0,0 +1,30 @@ +// +// Created by Haifa Bogdan Adnan on 30/05/2019. +// + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "../common/common.h" +#include "hex.h" + +void hex::encode(const unsigned char *input, int input_size, char *output) { + for ( int i=0; i> 4; // hi nybble + char b2= *input & 0x0f; // lo nybble + b1+='0'; if (b1>'9') b1 += 7; // gap between '9' and 'A' + b2+='0'; if (b2>'9') b2 += 7; + *(output++)= b1; + *(output++) = b2; + input++; + } + *output = 0; +} + +int hex::decode(const char *input, unsigned char *output, int output_size) { + size_t in_len = strlen(input); + for ( int i=0; i9) b1 -= 7; + unsigned char b2= input[i+1] -'0'; if (b2>9) b2 -= 7; + *(output++) = (b1<<4) + b2; // <<4 multiplies by 16 + } + return in_len / 2; +} diff --git a/src/crypto/argon2_hasher/crypt/hex.h b/src/crypto/argon2_hasher/crypt/hex.h new file mode 100644 index 00000000..038f2f8e --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/hex.h @@ -0,0 +1,14 @@ +// +// Created by Haifa Bogdan Adnan on 30/05/2019. +// + +#ifndef ARGON2_HEX_H +#define ARGON2_HEX_H + +class DLLEXPORT hex { +public: + static void encode(const unsigned char *input, int input_size, char *output); + static int decode(const char *input, unsigned char *output, int output_size); +}; + +#endif //ARGON2_HEX_H diff --git a/src/crypto/argon2_hasher/crypt/random_generator.cpp b/src/crypto/argon2_hasher/crypt/random_generator.cpp new file mode 100644 index 00000000..a6801266 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/random_generator.cpp @@ -0,0 +1,27 @@ +// +// Created by Haifa Bogdan Adnan on 17/08/2018. +// + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "../common/common.h" + +#include "random_generator.h" + +random_generator::random_generator() : __mt19937Gen(__randomDevice()), __mt19937Distr(0, 255) { + +} + +random_generator &random_generator::instance() { + return __instance; +} + +void random_generator::get_random_data(unsigned char *buffer, int length) { +// __thread_lock.lock(); + for(int i=0;i __mt19937Distr; + mutex __thread_lock; + + static random_generator __instance; +}; + +#endif //ARGON2_RANDOM_GENERATOR_H diff --git a/src/crypto/argon2_hasher/crypt/sha512.cpp b/src/crypto/argon2_hasher/crypt/sha512.cpp new file mode 100644 index 00000000..d94ec1bb --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/sha512.cpp @@ -0,0 +1,152 @@ +#include "crypto/argon2_hasher/common/DLLExport.h" + +#include +#include +#include "sha512.h" + +const unsigned long long SHA512::sha512_k[80] = //ULL = uint64 + {0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL}; + +void SHA512::transform(const unsigned char *message, unsigned int block_nb) +{ + uint64 w[80]; + uint64 wv[8]; + uint64 t1, t2; + const unsigned char *sub_block; + int i, j; + for (i = 0; i < (int) block_nb; i++) { + sub_block = message + (i << 7); + for (j = 0; j < 16; j++) { + SHA2_PACK64(&sub_block[j << 3], &w[j]); + } + for (j = 16; j < 80; j++) { + w[j] = SHA512_F4(w[j - 2]) + w[j - 7] + SHA512_F3(w[j - 15]) + w[j - 16]; + } + for (j = 0; j < 8; j++) { + wv[j] = m_h[j]; + } + for (j = 0; j < 80; j++) { + t1 = wv[7] + SHA512_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6]) + + sha512_k[j] + w[j]; + t2 = SHA512_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]); + wv[7] = wv[6]; + wv[6] = wv[5]; + wv[5] = wv[4]; + wv[4] = wv[3] + t1; + wv[3] = wv[2]; + wv[2] = wv[1]; + wv[1] = wv[0]; + wv[0] = t1 + t2; + } + for (j = 0; j < 8; j++) { + m_h[j] += wv[j]; + } + + } +} + +void SHA512::init() +{ + m_h[0] = 0x6a09e667f3bcc908ULL; + m_h[1] = 0xbb67ae8584caa73bULL; + m_h[2] = 0x3c6ef372fe94f82bULL; + m_h[3] = 0xa54ff53a5f1d36f1ULL; + m_h[4] = 0x510e527fade682d1ULL; + m_h[5] = 0x9b05688c2b3e6c1fULL; + m_h[6] = 0x1f83d9abfb41bd6bULL; + m_h[7] = 0x5be0cd19137e2179ULL; + m_len = 0; + m_tot_len = 0; +} + +void SHA512::update(const unsigned char *message, unsigned int len) +{ + unsigned int block_nb; + unsigned int new_len, rem_len, tmp_len; + const unsigned char *shifted_message; + tmp_len = SHA384_512_BLOCK_SIZE - m_len; + rem_len = len < tmp_len ? len : tmp_len; + memcpy(&m_block[m_len], message, rem_len); + if (m_len + len < SHA384_512_BLOCK_SIZE) { + m_len += len; + return; + } + new_len = len - rem_len; + block_nb = new_len / SHA384_512_BLOCK_SIZE; + shifted_message = message + rem_len; + transform(m_block, 1); + transform(shifted_message, block_nb); + rem_len = new_len % SHA384_512_BLOCK_SIZE; + memcpy(m_block, &shifted_message[block_nb << 7], rem_len); + m_len = rem_len; + m_tot_len += (block_nb + 1) << 7; +} + +void SHA512::final(unsigned char *digest) +{ + unsigned int block_nb; + unsigned int pm_len; + unsigned int len_b; + int i; + block_nb = 1 + ((SHA384_512_BLOCK_SIZE - 17) + < (m_len % SHA384_512_BLOCK_SIZE)); + len_b = (m_tot_len + m_len) << 3; + pm_len = block_nb << 7; + memset(m_block + m_len, 0, pm_len - m_len); + m_block[m_len] = 0x80; + SHA2_UNPACK32(len_b, m_block + pm_len - 4); + transform(m_block, block_nb); + for (i = 0 ; i < 8; i++) { + SHA2_UNPACK64(m_h[i], &digest[i << 3]); + } +} + +unsigned char *SHA512::hash(unsigned char *input, size_t length) +{ + unsigned char *digest = (unsigned char*)malloc(SHA512::DIGEST_SIZE); + memset(digest,0,SHA512::DIGEST_SIZE); + SHA512 ctx = SHA512(); + ctx.init(); + ctx.update(input, length); + ctx.final(digest); + return digest; +} diff --git a/src/crypto/argon2_hasher/crypt/sha512.h b/src/crypto/argon2_hasher/crypt/sha512.h new file mode 100644 index 00000000..5eb28326 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/sha512.h @@ -0,0 +1,70 @@ +#ifndef SHA512_H +#define SHA512_H + +#include + +class DLLEXPORT SHA512 +{ +protected: + typedef unsigned char uint8; + typedef unsigned int uint32; + typedef unsigned long long uint64; + + const static uint64 sha512_k[]; + static const unsigned int SHA384_512_BLOCK_SIZE = (1024/8); + +public: + void init(); + void update(const unsigned char *message, unsigned int len); + void final(unsigned char *digest); + static const unsigned int DIGEST_SIZE = ( 512 / 8); + + static unsigned char *hash(unsigned char *input, size_t length); +protected: + void transform(const unsigned char *message, unsigned int block_nb); + unsigned int m_tot_len; + unsigned int m_len; + unsigned char m_block[2 * SHA384_512_BLOCK_SIZE]; + uint64 m_h[8]; +}; + +#define SHA2_SHFR(x, n) (x >> n) +#define SHA2_ROTR(x, n) ((x >> n) | (x << ((sizeof(x) << 3) - n))) +#define SHA2_ROTL(x, n) ((x << n) | (x >> ((sizeof(x) << 3) - n))) +#define SHA2_CH(x, y, z) ((x & y) ^ (~x & z)) +#define SHA2_MAJ(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define SHA512_F1(x) (SHA2_ROTR(x, 28) ^ SHA2_ROTR(x, 34) ^ SHA2_ROTR(x, 39)) +#define SHA512_F2(x) (SHA2_ROTR(x, 14) ^ SHA2_ROTR(x, 18) ^ SHA2_ROTR(x, 41)) +#define SHA512_F3(x) (SHA2_ROTR(x, 1) ^ SHA2_ROTR(x, 8) ^ SHA2_SHFR(x, 7)) +#define SHA512_F4(x) (SHA2_ROTR(x, 19) ^ SHA2_ROTR(x, 61) ^ SHA2_SHFR(x, 6)) +#define SHA2_UNPACK32(x, str) \ +{ \ +*((str) + 3) = (uint8) ((x) ); \ +*((str) + 2) = (uint8) ((x) >> 8); \ +*((str) + 1) = (uint8) ((x) >> 16); \ +*((str) + 0) = (uint8) ((x) >> 24); \ +} +#define SHA2_UNPACK64(x, str) \ +{ \ +*((str) + 7) = (uint8) ((x) ); \ +*((str) + 6) = (uint8) ((x) >> 8); \ +*((str) + 5) = (uint8) ((x) >> 16); \ +*((str) + 4) = (uint8) ((x) >> 24); \ +*((str) + 3) = (uint8) ((x) >> 32); \ +*((str) + 2) = (uint8) ((x) >> 40); \ +*((str) + 1) = (uint8) ((x) >> 48); \ +*((str) + 0) = (uint8) ((x) >> 56); \ +} +#define SHA2_PACK64(str, x) \ +{ \ +*(x) = ((uint64) *((str) + 7) ) \ +| ((uint64) *((str) + 6) << 8) \ +| ((uint64) *((str) + 5) << 16) \ +| ((uint64) *((str) + 4) << 24) \ +| ((uint64) *((str) + 3) << 32) \ +| ((uint64) *((str) + 2) << 40) \ +| ((uint64) *((str) + 1) << 48) \ +| ((uint64) *((str) + 0) << 56); \ +} + +#endif diff --git a/src/crypto/argon2_hasher/hash/Hasher.cpp b/src/crypto/argon2_hasher/hash/Hasher.cpp new file mode 100755 index 00000000..cea64052 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/Hasher.cpp @@ -0,0 +1,132 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#include "../common/common.h" +#include "../crypt/base64.h" +#include "../crypt/hex.h" +#include "../crypt/random_generator.h" + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" +#include "Hasher.h" + +vector *Hasher::m_registeredHashers = NULL; +string Hasher::m_appFolder = ""; + +typedef void (*hasherLoader)(); + +Hasher::Hasher() { + m_intensity = 0; + m_type = ""; + m_subType = ""; + m_shortSubType = ""; + m_description = ""; + + m_computingThreads = 1; + + if(m_registeredHashers == NULL) { + m_registeredHashers = new vector(); + } + + m_registeredHashers->push_back(this); +} + +Hasher::~Hasher() {}; + +string Hasher::type() { + return m_type; +} + +string Hasher::subType(bool shortName) { + if(shortName && !(m_shortSubType.empty())) { + string shortVersion = m_shortSubType; + shortVersion.erase(3); + return shortVersion; + } + else + return m_subType; +} + +string Hasher::info() { + return m_description; +} + +int Hasher::computingThreads() { + return m_computingThreads; +} + +void Hasher::loadHashers(const string &appPath) { + m_registeredHashers = new vector(); + + string modulePath = "."; + + size_t lastSlash = appPath.find_last_of("/\\"); + if (lastSlash != string::npos) { + modulePath = appPath.substr(0, lastSlash); + if(modulePath.empty()) { + modulePath = "."; + } + } + + m_appFolder = modulePath; + + modulePath += "/modules/"; + + vector files = getFiles(modulePath); + for(string file : files) { + if(file.find(".hsh") != string::npos) { + void *dllHandle = dlopen((modulePath + file).c_str(), RTLD_LAZY); + if(dllHandle != NULL) { + hasherLoader hasherLoaderPtr = (hasherLoader) dlsym(dllHandle, "hasherLoader"); + (*hasherLoaderPtr)(); + } + } + } +} + +vector Hasher::getHashers() { + return *m_registeredHashers; +} + +vector Hasher::getActiveHashers() { + vector filtered; + for(Hasher *hasher : *m_registeredHashers) { + if(hasher->m_intensity != 0) + filtered.push_back(hasher); + } + return filtered; +} + +vector Hasher::getHashers(const string &type) { + vector filtered; + for(Hasher *hasher : *m_registeredHashers) { + if(hasher->m_type == type) + filtered.push_back(hasher); + } + return filtered; +} + +map &Hasher::devices() { + return m_deviceInfos; +} + +void Hasher::storeDeviceInfo(int deviceId, DeviceInfo device) { + m_deviceInfosMutex.lock(); + m_deviceInfos[deviceId] = device; + m_deviceInfosMutex.unlock(); +} + +Argon2Profile *Hasher::getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant) { + if(algorithm == xmrig::ARGON2) { + switch(variant) { + case xmrig::VARIANT_CHUKWA: + return &argon2profile_3_1_512; + case xmrig::VARIANT_CHUKWA_LITE: + return &argon2profile_4_1_256; + default: + return nullptr; + } + } + return nullptr; +} diff --git a/src/crypto/argon2_hasher/hash/Hasher.h b/src/crypto/argon2_hasher/hash/Hasher.h new file mode 100755 index 00000000..3f0c1b86 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/Hasher.h @@ -0,0 +1,63 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#ifndef ARGON2_HASHER_H +#define ARGON2_HASHER_H + +#include "crypto/argon2_hasher/hash/argon2/Defs.h" +#include "../../../core/HasherConfig.h" +#include "../../../common/xmrig.h" + +struct DeviceInfo { + string name; + string bus_id; + double intensity; +}; + +#define REGISTER_HASHER(x) extern "C" { DLLEXPORT void hasherLoader() { x *instance = new x(); } } + +class DLLEXPORT Hasher { +public: + Hasher(); + virtual ~Hasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant) = 0; + virtual bool configure(xmrig::HasherConfig &config) = 0; + virtual void cleanup() = 0; + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) = 0; + virtual size_t parallelism(int workerIdx) = 0; + virtual size_t deviceCount() = 0; + + string type(); + string subType(bool shortName = false); + + string info(); + int computingThreads(); + + map &devices(); + + static vector getHashers(const string &type); + static vector getHashers(); + static vector getActiveHashers(); + static void loadHashers(const string &appPath); + +protected: + double m_intensity; + string m_type; + string m_subType; + string m_shortSubType; //max 3 characters + string m_description; + int m_computingThreads; + static string m_appFolder; + + void storeDeviceInfo(int deviceId, DeviceInfo device); + Argon2Profile *getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant); + +private: + static vector *m_registeredHashers; + map m_deviceInfos; + mutex m_deviceInfosMutex; +}; + +#endif //ARGON2_HASHER_H diff --git a/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp b/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp new file mode 100755 index 00000000..7accf8c0 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp @@ -0,0 +1,143 @@ +// +// Created by Haifa Bogdan Adnan on 05/08/2018. +// + +#include "../../common/common.h" +#include "../../crypt/base64.h" +#include "../../crypt/hex.h" +#include "../../crypt/random_generator.h" + +#include "blake2/blake2.h" +#include "../../common/DLLExport.h" +#include "../../../Argon2_constants.h" +#include "Argon2.h" +#include "Defs.h" + +Argon2::Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData) { + m_prehash = prehash; + m_filler = filler; + m_posthash = posthash; + m_outputMemory = m_seedMemory = (uint8_t*)memory; + m_userData = userData; + m_threads = 1; +} + +int Argon2::generateHashes(const Argon2Profile &profile, HashData &hashData) { + if(initializeSeeds(profile, hashData)) { + if(fillBlocks(profile)) { + return encodeHashes(profile, hashData); + } + } + + return 0; +} + +bool Argon2::initializeSeeds(const Argon2Profile &profile, HashData &hashData) { + if(m_prehash != NULL) { + return (*m_prehash)(hashData.input, m_threads, (Argon2Profile*)&profile, m_userData); + } + else { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + + for (int i = 0; i < m_threads; i++, (*(nonce(hashData)))++) { + initialHash(profile, blockhash, (char *) hashData.input, hashData.inSize, xmrig::ARGON2_HASHLEN); + + memset(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + + fillFirstBlocks(profile, blockhash, i); + } + + return true; + } +} + +bool Argon2::fillBlocks(const Argon2Profile &profile) { + m_outputMemory = (uint8_t *)(*m_filler) (m_threads, (Argon2Profile*)&profile, m_userData); + return m_outputMemory != NULL; +} + +int Argon2::encodeHashes(const Argon2Profile &profile, HashData &hashData) { + if(m_posthash != NULL) { + if((*m_posthash)(hashData.output, m_threads, (Argon2Profile*)&profile, m_userData)) { + return m_threads; + } + return 0; + } + else { + if (m_outputMemory != NULL) { + uint32_t nonceInfo = *(nonce(hashData)) - m_threads; + + for (int i = 0; i < m_threads; i++, nonceInfo++) { + blake2b_long((void *) (hashData.output + i * hashData.outSize), xmrig::ARGON2_HASHLEN, + (void *) (m_outputMemory + i * profile.memSize), ARGON2_BLOCK_SIZE); + memcpy(hashData.output + i * hashData.outSize + xmrig::ARGON2_HASHLEN, &nonceInfo, 4); + } + return m_threads; + } + else + return 0; + } +} + +void Argon2::initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz,size_t outSz) { + blake2b_state BlakeHash; + uint32_t value; + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + value = profile.thrCost; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = outSz; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = profile.memCost; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = profile.tmCost; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = ARGON2_VERSION; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = ARGON2_TYPE_VALUE; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = (uint32_t)dataSz; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + blake2b_update(&BlakeHash, (const uint8_t *)data, dataSz); + + value = xmrig::ARGON2_SALTLEN; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + blake2b_update(&BlakeHash, (const uint8_t *)data, xmrig::ARGON2_SALTLEN); + + value = 0; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} + +void Argon2::fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread) { + block *blocks = (block *)(m_seedMemory + thread * profile.memSize); + size_t lane_length = profile.memCost / profile.thrCost; + + for (uint32_t l = 0; l < profile.thrCost; ++l) { + *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 0; + *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4)) = l; + + blake2b_long((void *)(blocks + l * lane_length), ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + + *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 1; + + blake2b_long((void *)(blocks + l * lane_length + 1), ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + } +} + +void Argon2::setThreads(int threads) { + m_threads = threads; +} diff --git a/src/crypto/argon2_hasher/hash/argon2/Argon2.h b/src/crypto/argon2_hasher/hash/argon2/Argon2.h new file mode 100644 index 00000000..90e72d53 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/Argon2.h @@ -0,0 +1,56 @@ +// +// Created by Haifa Bogdan Adnan on 05/08/2018. +// + +#ifndef ARIOMINER_ARGON2_H +#define ARIOMINER_ARGON2_H + +#include "Defs.h" +#include "crypto/argon2_hasher/hash/Hasher.h" + +typedef bool (*argon2BlocksPrehash)(void *, int, Argon2Profile *, void *); // data_memory +typedef void *(*argon2BlocksFillerPtr)(int, Argon2Profile *, void *); +typedef bool (*argon2BlocksPosthash)(void *, int, Argon2Profile *, void *); // raw_hash_mem + +struct HashData { + uint8_t *input; + uint8_t *output; + size_t inSize; + size_t outSize; +}; + +class DLLEXPORT Argon2 { +public: + Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData); + + int generateHashes(const Argon2Profile &profile, HashData &hashData); + + bool initializeSeeds(const Argon2Profile &profile, HashData &hashData); + bool fillBlocks(const Argon2Profile &profile); + int encodeHashes(const Argon2Profile &profile, HashData &hashData); + + void setThreads(int threads); + +private: + void initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz, size_t outSz); + void fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread); + + inline uint32_t *nonce(HashData &hashData) + { + return reinterpret_cast(hashData.input + 39); + } + + argon2BlocksPrehash m_prehash; + argon2BlocksFillerPtr m_filler; + argon2BlocksPosthash m_posthash; + + int m_threads; + + uint8_t *m_seedMemory; + uint8_t *m_outputMemory; + + void *m_userData; +}; + + +#endif //ARIOMINER_ARGON2_H diff --git a/src/crypto/argon2_hasher/hash/argon2/Defs.h b/src/crypto/argon2_hasher/hash/argon2/Defs.h new file mode 100755 index 00000000..3f6b7181 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/Defs.h @@ -0,0 +1,50 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#ifndef ARIOMINER_DEFS_H +#define ARIOMINER_DEFS_H + +#define ARGON2_RAW_LENGTH 32 +#define ARGON2_TYPE_VALUE 2 +#define ARGON2_VERSION 0x13 + +#define ARGON2_BLOCK_SIZE 1024 +#define ARGON2_DWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 4 +#define ARGON2_QWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 8 +#define ARGON2_OWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 16 +#define ARGON2_HWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 32 +#define ARGON2_512BIT_WORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 64 +#define ARGON2_PREHASH_DIGEST_LENGTH 64 +#define ARGON2_PREHASH_SEED_LENGTH 72 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; + +typedef struct Argon2Profile_ { + uint32_t memCost; + uint32_t thrCost; + uint32_t tmCost; + size_t memSize; + int32_t *blockRefs; + size_t blockRefsSize; + char profileName[15]; + int32_t *segments; // { start segment / current block, stop segment (excluding) / previous block, addressing type = 0 -> i, 1 -> d } + uint32_t segSize; + uint32_t segCount; + uint32_t succesiveIdxs; // 0 - idx are precalculated, 1 - idx are successive + int pwdLen; // in dwords + int saltLen; // in dwords +} Argon2Profile; + +extern DLLEXPORT Argon2Profile argon2profile_3_1_512; +extern DLLEXPORT Argon2Profile argon2profile_4_1_256; + +#ifdef __cplusplus +} +#endif + +#endif //ARIOMINER_DEFS_H diff --git a/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c b/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c new file mode 100644 index 00000000..9a0cbfa3 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c @@ -0,0 +1,292 @@ +#include +#include +#include "../../common/DLLExport.h" +#include "Defs.h" + +int32_t blocks_refs_3_1_512[] = { + 2, 0, 1, + 3, 1, 1, + 4, 2, 1, + 5, 3, 1, + 6, 3, 1, + 7, 3, 1, + 8, 2, 1, + 9, 5, 1, + 10, 0, 1, + 11, 9, 1, + 12, 10, 1, + 13, 9, 1, + 14, 12, 1, + 15, 8, 1, + 16, 5, 1, + 17, 15, 1, + 18, 10, 1, + 19, 14, 1, + 20, 7, 1, + 21, 19, 1, + 22, 14, 1, + 23, 7, 1, + 24, 14, 1, + 25, 23, 1, + 26, 24, 1, + 27, 0, 1, + 28, 9, 1, + 29, 11, 1, + 30, 12, 1, + 31, 29, 1, + 32, 12, 1, + 33, 23, 1, + 34, 30, 1, + 35, 1, 1, + 36, 32, 1, + 37, 8, 1, + 38, 30, 1, + 39, 31, 1, + 40, 15, 1, + 41, 38, 1, + 42, 29, 1, + 43, 18, 1, + 44, 33, 1, + 45, 18, 1, + 46, 39, 1, + 47, 43, 1, + 48, 40, 1, + 49, 38, 1, + 50, 5, 1, + 51, 47, 1, + 52, 14, 1, + 53, 45, 1, + 54, 30, 1, + 55, 13, 1, + 56, 47, 1, + 57, 30, 1, + 58, 21, 1, + 59, 18, 1, + 60, 36, 1, + 61, 58, 1, + 62, 58, 1, + 63, 19, 1, + 64, 59, 1, + 65, 29, 1, + 66, 10, 1, + 67, 48, 1, + 68, 39, 1, + 69, 25, 1, + 70, 63, 1, + 71, 57, 1, + 72, 70, 1, + 73, 16, 1, + 74, 20, 1, + 75, 72, 1, + 76, 67, 1, + 77, 61, 1, + 78, 49, 1, + 79, 63, 1, + 80, 9, 1, + 81, 19, 1, + 82, 80, 1, + 83, 36, 1, + 84, 20, 1, + 85, 23, 1, + 86, 52, 1, + 87, 85, 1, + 88, 75, 1, + 89, 18, 1, + 90, 85, 1, + 91, 2, 1, + 92, 81, 1, + 93, 91, 1, + 94, 91, 1, + 95, 3, 1, + 96, 45, 1, + 97, 16, 1, + 98, 11, 1, + 99, 60, 1, + 100, 89, 1, + 101, 65, 1, + 102, 39, 1, + 103, 63, 1, + 104, 66, 1, + 105, 74, 1, + 106, 54, 1, + 107, 88, 1, + 108, 106, 1, + 109, 107, 1, + 110, 47, 1, + 111, 8, 1, + 112, 95, 1, + 113, 66, 1, + 114, 1, 1, + 115, 2, 1, + 116, 20, 1, + 117, 110, 1, + 118, 47, 1, + 119, 117, 1, + 120, 114, 1, + 121, 37, 1, + 122, 71, 1, + 123, 51, 1, + 124, 122, 1, + 125, 44, 1, + 126, 92, 1, + 127, 120, 1, + 128, 123, 1, + 129, 127, 1, + 130, 11, 1, + 131, 110, 1, + 132, 93, 1, + 133, 20, 1, + 134, 58, 1, + 135, 13, 1, + 136, 73, 1, + 137, 27, 1, + 138, 94, 1, + 139, 110, 1, + 140, 96, 1, + 141, 57, 1, + 142, 137, 1, + 143, 116, 1, + 144, 119, 1, + 145, 141, 1, + 146, 73, 1, + 147, 26, 1, + 148, 103, 1, + 149, 125, 1, + 150, 146, 1, + 151, 149, 1, + 152, 28, 1, + 153, 149, 1, + 154, 125, 1, + 155, 104, 1, + 156, 61, 1, + 157, 128, 1, + 158, 156, 1, + 159, 122, 1, + 160, 96, 1, + 161, 92, 1, + 162, 160, 1, + 163, 154, 1, + 164, 88, 1, + 165, 160, 1, + 166, 134, 1, + 167, 116, 1, + 168, 23, 1, + 169, 167, 1, + 170, 100, 1, + 171, 169, 1, + 172, 169, 1, + 173, 127, 1, + 174, 0, 1, + 175, 78, 1, + 176, 155, 1, + 177, 124, 1, + 178, 138, 1, + 179, 41, 1, + 180, 156, 1, + 181, 173, 1, + 182, 122, 1, + 183, 173, 1, + 184, 112, 1, + 185, 15, 1, + 186, 183, 1, + 187, 171, 1, + 188, 163, 1, + 189, 85, 1, + 190, 45, 1, + 191, 171, 1, + 192, 139, 1, + 193, 188, 1, + 194, 192, 1, + 195, 78, 1, + 196, 5, 1, + 197, 187, 1, + 198, 180, 1, + 199, 195, 1, + 200, 102, 1, + 201, 89, 1, + 202, 165, 1, + 203, 144, 1, + 204, 171, 1, + 205, 152, 1, + 206, 53, 1, + 207, 19, 1, + 208, 206, 1, + 209, 165, 1, + 210, 208, 1, + 211, 76, 1, + 212, 177, 1, + 213, 189, 1, + 214, 43, 1, + 215, 120, 1, + 216, 122, 1, + 217, 189, 1, + 218, 45, 1, + 219, 217, 1, + 220, 207, 1, + 221, 202, 1, + 222, 169, 1, + 223, 194, 1, + 224, 213, 1, + 225, 178, 1, + 226, 175, 1, + 227, 221, 1, + 228, 212, 1, + 229, 220, 1, + 230, 227, 1, + 231, 30, 1, + 232, 34, 1, + 233, 91, 1, + 234, 231, 1, + 235, 154, 1, + 236, 100, 1, + 237, 166, 1, + 238, 216, 1, + 239, 229, 1, + 240, 177, 1, + 241, 123, 1, + 242, 172, 1, + 243, 71, 1, + 244, 241, 1, + 245, 236, 1, + 246, 109, 1, + 247, 4, 1, + 248, 246, 1, + 249, 166, 1, + 250, 248, 1, + 251, 243, 1, + 252, 248, 1, + 253, 39, 1, + 254, 98, 1, + 255, 253, 1 +}; + +int32_t segments_3_1_512[] = { // current_idx, previous_idx, seg_type 0=i 1=d + 2, 1, 0, + 128, 127, 0, + 256, 255, 1, + 384, 383, 1, + 0, 511, 1, + 128, 127, 1, + 256, 255, 1, + 384, 383, 1, + 0, 511, 1, + 128, 127, 1, + 256, 255, 1, + 384, 383, 1 +}; + +DLLEXPORT Argon2Profile argon2profile_3_1_512 = { + 512, + 1, + 3, + 524288, //256 blocks of 1024 bytes + blocks_refs_3_1_512, + sizeof(blocks_refs_3_1_512) / (3 * sizeof(int32_t)), + "3_1_512", + segments_3_1_512, + 128, + 12, + 1, + 32, + 4 +}; diff --git a/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c b/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c new file mode 100644 index 00000000..59890c49 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c @@ -0,0 +1,168 @@ +#include +#include +#include "../../common/DLLExport.h" +#include "Defs.h" + +int32_t blocks_refs_4_1_256[] = { + 2, 0, 1, + 3, 1, 1, + 4, 2, 1, + 5, 3, 1, + 6, 0, 1, + 7, 4, 1, + 8, 5, 1, + 9, 7, 1, + 10, 7, 1, + 11, 9, 1, + 12, 5, 1, + 13, 11, 1, + 14, 3, 1, + 15, 2, 1, + 16, 12, 1, + 17, 15, 1, + 18, 15, 1, + 19, 10, 1, + 20, 4, 1, + 21, 18, 1, + 22, 17, 1, + 23, 19, 1, + 24, 2, 1, + 25, 23, 1, + 26, 22, 1, + 27, 12, 1, + 28, 23, 1, + 29, 27, 1, + 30, 26, 1, + 31, 19, 1, + 32, 27, 1, + 33, 29, 1, + 34, 32, 1, + 35, 18, 1, + 36, 32, 1, + 37, 16, 1, + 38, 35, 1, + 39, 22, 1, + 40, 30, 1, + 41, 31, 1, + 42, 39, 1, + 43, 36, 1, + 44, 18, 1, + 45, 0, 1, + 46, 36, 1, + 47, 12, 1, + 48, 28, 1, + 49, 39, 1, + 50, 4, 1, + 51, 48, 1, + 52, 48, 1, + 53, 51, 1, + 54, 50, 1, + 55, 3, 1, + 56, 54, 1, + 57, 53, 1, + 58, 48, 1, + 59, 47, 1, + 60, 25, 1, + 61, 53, 1, + 62, 31, 1, + 63, 59, 1, + 64, 45, 1, + 65, 63, 1, + 66, 48, 1, + 67, 58, 1, + 68, 40, 1, + 69, 17, 1, + 70, 62, 1, + 71, 24, 1, + 72, 60, 1, + 73, 71, 1, + 74, 72, 1, + 75, 57, 1, + 76, 69, 1, + 77, 58, 1, + 78, 74, 1, + 79, 69, 1, + 80, 75, 1, + 81, 74, 1, + 82, 56, 1, + 83, 67, 1, + 84, 15, 1, + 85, 83, 1, + 86, 69, 1, + 87, 83, 1, + 88, 85, 1, + 89, 24, 1, + 90, 52, 1, + 91, 70, 1, + 92, 88, 1, + 93, 42, 1, + 94, 61, 1, + 95, 93, 1, + 96, 22, 1, + 97, 37, 1, + 98, 15, 1, + 99, 91, 1, + 100, 14, 1, + 101, 98, 1, + 102, 24, 1, + 103, 84, 1, + 104, 44, 1, + 105, 103, 1, + 106, 12, 1, + 107, 15, 1, + 108, 79, 1, + 109, 35, 1, + 110, 4, 1, + 111, 109, 1, + 112, 90, 1, + 113, 109, 1, + 114, 43, 1, + 115, 73, 1, + 116, 113, 1, + 117, 107, 1, + 118, 51, 1, + 119, 117, 1, + 120, 118, 1, + 121, 115, 1, + 122, 74, 1, + 123, 67, 1, + 124, 102, 1, + 125, 17, 1, + 126, 113, 1, + 127, 110, 1 +}; + +int32_t segments_4_1_256[] = { // current_idx, previous_idx, seg_type 0=i 1=d + 2, 1, 0, + 64, 63, 0, + 128, 127, 1, + 192, 191, 1, + 0, 255, 1, + 64, 63, 1, + 128, 127, 1, + 192, 191, 1, + 0, 255, 1, + 64, 63, 1, + 128, 127, 1, + 192, 191, 1, + 0, 255, 1, + 64, 63, 1, + 128, 127, 1, + 192, 191, 1 +}; + +DLLEXPORT Argon2Profile argon2profile_4_1_256 = { + 256, + 1, + 4, + 262144, //256 blocks of 1024 bytes + blocks_refs_4_1_256, + sizeof(blocks_refs_4_1_256) / (3 * sizeof(int32_t)), + "4_1_256", + segments_4_1_256, + 64, + 16, + 1, + 32, + 4 +}; diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h new file mode 100644 index 00000000..a70cd7f0 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h @@ -0,0 +1,76 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_CONFIG_H +#define BLAKE2_CONFIG_H + +/* These don't work everywhere */ +#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) +#define HAVE_SSE2 +#endif + +#if defined(__SSSE3__) +#define HAVE_SSSE3 +#endif + +#if defined(__SSE4_1__) +#define HAVE_SSE41 +#endif + +#if defined(__AVX__) +#define HAVE_AVX +#endif + +#if defined(__AVX2__) +#define HAVE_AVX2 +#endif + +#if defined(__XOP__) +#define HAVE_XOP +#endif + + +#ifdef HAVE_AVX2 +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_XOP +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_AVX +#ifndef HAVE_SSE41 +#define HAVE_SSE41 +#endif +#endif + +#ifdef HAVE_SSE41 +#ifndef HAVE_SSSE3 +#define HAVE_SSSE3 +#endif +#endif + +#ifdef HAVE_SSSE3 +#define HAVE_SSE2 +#endif + +#if !defined(HAVE_SSE2) +#error "This code requires at least SSE2." +#endif + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h new file mode 100644 index 00000000..e77ad92f --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h @@ -0,0 +1,154 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef PORTABLE_BLAKE2_IMPL_H +#define PORTABLE_BLAKE2_IMPL_H + +#include +#include + +#if defined(_MSC_VER) +#define BLAKE2_INLINE __inline +#elif defined(__GNUC__) || defined(__clang__) +#define BLAKE2_INLINE __inline__ +#else +#define BLAKE2_INLINE +#endif + +/* Argon2 Team - Begin Code */ +/* + Not an exhaustive list, but should cover the majority of modern platforms + Additionally, the code will always be correct---this is only a performance + tweak. +*/ +#if (defined(__BYTE_ORDER__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ + defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ + defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_ARM) +#define NATIVE_LITTLE_ENDIAN +#endif +/* Argon2 Team - End Code */ + +static BLAKE2_INLINE uint32_t load32(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = (const uint8_t *)src; + uint32_t w = *p++; + w |= (uint32_t)(*p++) << 8; + w |= (uint32_t)(*p++) << 16; + w |= (uint32_t)(*p++) << 24; + return w; +#endif +} + +static BLAKE2_INLINE uint64_t load64(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + w |= (uint64_t)(*p++) << 48; + w |= (uint64_t)(*p++) << 56; + return w; +#endif +} + +static BLAKE2_INLINE void store32(void *dst, uint32_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static BLAKE2_INLINE void store64(void *dst, uint64_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static BLAKE2_INLINE uint64_t load48(const void *src) { + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + return w; +} + +static BLAKE2_INLINE void store48(void *dst, uint64_t w) { + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +} + +static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { + return (w >> c) | (w << (32 - c)); +} + +static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { + return (w >> c) | (w << (64 - c)); +} + +#endif diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h new file mode 100644 index 00000000..70e4aeb8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h @@ -0,0 +1,90 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef PORTABLE_BLAKE2_H +#define PORTABLE_BLAKE2_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 +}; + +#pragma pack(push, 1) +typedef struct __blake2b_param { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint64_t node_offset; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ +} blake2b_param; +#pragma pack(pop) + +typedef struct __blake2b_state { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + unsigned buflen; + unsigned outlen; + uint8_t last_node; +} blake2b_state; + +/* Ensure param structs have not been wrongly padded */ +/* Poor man's static_assert */ +enum { + blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), + blake2_size_check_2 = + 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) +}; + +/* Streaming API */ +int blake2b_init(blake2b_state *S, size_t outlen); +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, + size_t keylen); +int blake2b_init_param(blake2b_state *S, const blake2b_param *P); +int blake2b_update(blake2b_state *S, const void *in, size_t inlen); +int blake2b_update_static(blake2b_state *S, const char in, size_t inlen); +int blake2b_final(blake2b_state *S, void *out, size_t outlen); + +/* Simple API */ +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen); + +/* Argon2 Team - Begin Code */ +int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); +/* Argon2 Team - End Code */ + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h new file mode 100644 index 00000000..f79123d8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h @@ -0,0 +1,68 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2B_LOAD_SSE2_H +#define BLAKE2B_LOAD_SSE2_H + +#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) +#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) +#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) +#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) +#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) +#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) +#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) +#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) +#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) +#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) +#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) +#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) +#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) +#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) +#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) +#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) +#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) +#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) +#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) +#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) +#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) +#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) +#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) +#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) +#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) +#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) +#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) +#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) + + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h new file mode 100644 index 00000000..e8564b57 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h @@ -0,0 +1,402 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2B_LOAD_SSE41_H +#define BLAKE2B_LOAD_SSE41_H + +#define LOAD_MSG_0_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_0_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_1_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_1_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_1_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_1_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#define LOAD_MSG_2_1(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m5, 8); \ +b1 = _mm_unpackhi_epi64(m2, m7); \ +} while(0) + + +#define LOAD_MSG_2_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m0); \ +b1 = _mm_blend_epi16(m1, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_2_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m5, m1, 0xF0); \ +b1 = _mm_unpackhi_epi64(m3, m4); \ +} while(0) + + +#define LOAD_MSG_2_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m3); \ +b1 = _mm_alignr_epi8(m2, m0, 8); \ +} while(0) + + +#define LOAD_MSG_3_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_unpackhi_epi64(m6, m5); \ +} while(0) + + +#define LOAD_MSG_3_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m0); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_3_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m2, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_3_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m5); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_4_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m2); \ +b1 = _mm_unpacklo_epi64(m1, m5); \ +} while(0) + + +#define LOAD_MSG_4_2(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m0, m3, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m7, m5, 0xF0); \ +b1 = _mm_blend_epi16(m3, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m0, 8); \ +b1 = _mm_blend_epi16(m4, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_5_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m3); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_5_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m5); \ +b1 = _mm_unpackhi_epi64(m5, m1); \ +} while(0) + + +#define LOAD_MSG_5_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m2, m3, 0xF0); \ +b1 = _mm_unpackhi_epi64(m7, m0); \ +} while(0) + + +#define LOAD_MSG_5_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m2); \ +b1 = _mm_blend_epi16(m7, m4, 0xF0); \ +} while(0) + + +#define LOAD_MSG_6_1(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m6, m0, 0xF0); \ +b1 = _mm_unpacklo_epi64(m7, m2); \ +} while(0) + + +#define LOAD_MSG_6_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_alignr_epi8(m5, m6, 8); \ +} while(0) + + +#define LOAD_MSG_6_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m3); \ +b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ +} while(0) + + +#define LOAD_MSG_6_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_blend_epi16(m1, m5, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m3); \ +b1 = _mm_blend_epi16(m6, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_2(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpackhi_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_7_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_unpacklo_epi64(m4, m1); \ +} while(0) + + +#define LOAD_MSG_7_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m2); \ +b1 = _mm_unpacklo_epi64(m3, m5); \ +} while(0) + + +#define LOAD_MSG_8_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m7); \ +b1 = _mm_alignr_epi8(m0, m5, 8); \ +} while(0) + + +#define LOAD_MSG_8_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_alignr_epi8(m4, m1, 8); \ +} while(0) + + +#define LOAD_MSG_8_3(b0, b1) \ +do \ +{ \ +b0 = m6; \ +b1 = _mm_alignr_epi8(m5, m0, 8); \ +} while(0) + + +#define LOAD_MSG_8_4(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m3, 0xF0); \ +b1 = m2; \ +} while(0) + + +#define LOAD_MSG_9_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_unpackhi_epi64(m3, m0); \ +} while(0) + + +#define LOAD_MSG_9_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m2); \ +b1 = _mm_blend_epi16(m3, m2, 0xF0); \ +} while(0) + + +#define LOAD_MSG_9_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_unpackhi_epi64(m1, m6); \ +} while(0) + + +#define LOAD_MSG_9_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpacklo_epi64(m6, m0); \ +} while(0) + + +#define LOAD_MSG_10_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_10_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_11_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_11_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_11_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_11_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h new file mode 100644 index 00000000..3e348e6f --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h @@ -0,0 +1,154 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2B_ROUND_H +#define BLAKE2B_ROUND_H + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) +#else +#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) +#endif +#else +/* ... */ +#endif + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#if defined(HAVE_SSSE3) +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; +#else + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row4l;\ + t1 = row2l;\ + row4l = row3l;\ + row3l = row3h;\ + row3h = row4l;\ + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row3l;\ + row3l = row3h;\ + row3h = t0;\ + t0 = row2l;\ + t1 = row4l;\ + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) + +#endif + +#if defined(HAVE_SSE41) +#include "blake2b-load-sse41.h" +#else +#include "blake2b-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_2(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + LOAD_MSG_ ##r ##_3(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_4(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c new file mode 100644 index 00000000..c40a7991 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c @@ -0,0 +1,514 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64)) + #include "blake2-config.h" + + #ifdef _MSC_VER + #include /* for _mm_set_epi64x */ + #endif + #include + #if defined(HAVE_SSSE3) + #include + #endif + #if defined(HAVE_SSE41) + #include + #endif + #if defined(HAVE_AVX) + #include + #endif + #if defined(HAVE_XOP) + #include + #endif + + #include "blake2b-round.h" +#endif + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)}; + +static const unsigned int blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { + S->f[1] = (uint64_t)-1; +} + +static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { + if (S->last_node) { + blake2b_set_lastnode(S); + } + S->f[0] = (uint64_t)-1; +} + +static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, + uint64_t inc) { + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { + blake2b_set_lastblock(S); /* invalidate for further use */ +} + +static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { + memset(S, 0, sizeof(*S)); + memcpy(S->h, blake2b_IV, sizeof(S->h)); +} + +int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { + const unsigned char *p = (const unsigned char *)P; + unsigned int i; + + if (NULL == P || NULL == S) { + return -1; + } + + blake2b_init0(S); + /* IV XOR Parameter Block */ + for (i = 0; i < 8; ++i) { + S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); + } + S->outlen = P->digest_length; + return 0; +} + +/* Sequential blake2b initialization */ +int blake2b_init(blake2b_state *S, size_t outlen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for unkeyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = 0; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + return blake2b_init_param(S, &P); +} + +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, + size_t keylen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for keyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = (uint8_t)keylen; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + if (blake2b_init_param(S, &P) < 0) { + blake2b_invalidate_state(S); + return -1; + } + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, BLAKE2B_BLOCKBYTES); + memcpy(block, key, keylen); + blake2b_update(S, block, BLAKE2B_BLOCKBYTES); + } + return 0; +} + +#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64)) +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + __m128i row1l, row1h; + __m128i row2l, row2h; + __m128i row3l, row3h; + __m128i row4l, row4h; + __m128i b0, b1; + __m128i t0, t1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); + const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); + const __m128i m4 = LOADU( block + 64 ); + const __m128i m5 = LOADU( block + 80 ); + const __m128i m6 = LOADU( block + 96 ); + const __m128i m7 = LOADU( block + 112 ); +#else + const uint64_t m0 = load64(block + 0 * sizeof(uint64_t)); + const uint64_t m1 = load64(block + 1 * sizeof(uint64_t)); + const uint64_t m2 = load64(block + 2 * sizeof(uint64_t)); + const uint64_t m3 = load64(block + 3 * sizeof(uint64_t)); + const uint64_t m4 = load64(block + 4 * sizeof(uint64_t)); + const uint64_t m5 = load64(block + 5 * sizeof(uint64_t)); + const uint64_t m6 = load64(block + 6 * sizeof(uint64_t)); + const uint64_t m7 = load64(block + 7 * sizeof(uint64_t)); + const uint64_t m8 = load64(block + 8 * sizeof(uint64_t)); + const uint64_t m9 = load64(block + 9 * sizeof(uint64_t)); + const uint64_t m10 = load64(block + 10 * sizeof(uint64_t)); + const uint64_t m11 = load64(block + 11 * sizeof(uint64_t)); + const uint64_t m12 = load64(block + 12 * sizeof(uint64_t)); + const uint64_t m13 = load64(block + 13 * sizeof(uint64_t)); + const uint64_t m14 = load64(block + 14 * sizeof(uint64_t)); + const uint64_t m15 = load64(block + 15 * sizeof(uint64_t)); +#endif + row1l = LOADU( &S->h[0] ); + row1h = LOADU( &S->h[2] ); + row2l = LOADU( &S->h[4] ); + row2h = LOADU( &S->h[6] ); + row3l = LOADU( &blake2b_IV[0] ); + row3h = LOADU( &blake2b_IV[2] ); + row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); + row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + row1l = _mm_xor_si128( row3l, row1l ); + row1h = _mm_xor_si128( row3h, row1h ); + STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); + STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); + row2l = _mm_xor_si128( row4l, row2l ); + row2h = _mm_xor_si128( row4h, row2h ); + STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); + STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); +} +#else +static void blake2b_compress(blake2b_state *S, const uint8_t *block) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i, r; + + for (i = 0; i < 16; ++i) { + m[i] = load64(block + i * sizeof(m[i])); + } + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(r) \ + do { \ + G(r, 0, v[0], v[4], v[8], v[12]); \ + G(r, 1, v[1], v[5], v[9], v[13]); \ + G(r, 2, v[2], v[6], v[10], v[14]); \ + G(r, 3, v[3], v[7], v[11], v[15]); \ + G(r, 4, v[0], v[5], v[10], v[15]); \ + G(r, 5, v[1], v[6], v[11], v[12]); \ + G(r, 6, v[2], v[7], v[8], v[13]); \ + G(r, 7, v[3], v[4], v[9], v[14]); \ + } while ((void)0, 0) + + for (r = 0; r < 12; ++r) { + ROUND(r); + } + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } + +#undef G +#undef ROUND +} +#endif + +int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { + const uint8_t *pin = (const uint8_t *)in; + + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL || in == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + +int blake2b_update_static(blake2b_state *S, const char in, size_t inlen) { + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memset(&S->buf[left], in, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + memset(S->buf, in, BLAKE2B_BLOCKBYTES); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + inlen -= BLAKE2B_BLOCKBYTES; + } + } + memset(&S->buf[S->buflen], in, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + + +int blake2b_final(blake2b_state *S, void *out, size_t outlen) { + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + unsigned int i; + + /* Sanity checks */ + if (S == NULL || out == NULL || outlen < S->outlen) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + blake2b_increment_counter(S, S->buflen); + blake2b_set_lastblock(S); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + sizeof(S->h[i]) * i, S->h[i]); + } + + memcpy(out, buffer, S->outlen); + return 0; +} + +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen) { + blake2b_state S; + int ret = -1; + + /* Verify parameters */ + if (NULL == in && inlen > 0) { + goto fail; + } + + if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { + goto fail; + } + + if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { + goto fail; + } + + if (keylen > 0) { + if (blake2b_init_key(&S, outlen, key, keylen) < 0) { + goto fail; + } + } else { + if (blake2b_init(&S, outlen) < 0) { + goto fail; + } + } + + if (blake2b_update(&S, in, inlen) < 0) { + goto fail; + } + ret = blake2b_final(&S, out, outlen); + + fail: + return ret; +} + +/* Argon2 Team - Begin Code */ +int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { + uint8_t *out = (uint8_t *)pout; + blake2b_state blake_state; + uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; + int ret = -1; + + if (outlen > UINT32_MAX) { + goto fail; + } + + /* Ensure little-endian byte order! */ + store32(outlen_bytes, (uint32_t)outlen); + +#define TRY(statement) \ + do { \ + ret = statement; \ + if (ret < 0) { \ + goto fail; \ + } \ + } while ((void)0, 0) + + if (outlen <= BLAKE2B_OUTBYTES) { + TRY(blake2b_init(&blake_state, outlen)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out, outlen)); + } else { + uint32_t toproduce; + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + uint8_t in_buffer[BLAKE2B_OUTBYTES]; + TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, + BLAKE2B_OUTBYTES, NULL, 0)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, + 0)); + memcpy(out, out_buffer, toproduce); + } + fail: + return ret; +#undef TRY +} +/* Argon2 Team - End Code */ diff --git a/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp new file mode 100755 index 00000000..08e4c019 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp @@ -0,0 +1,227 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64) + #include +#endif +#if defined(__arm__) + #include +#endif + +#include + +#include "../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#include "CpuHasher.h" +#include "crypto/argon2_hasher/common/DLLExport.h" + +CpuHasher::CpuHasher() : Hasher() { + m_type = "CPU"; + m_subType = "CPU"; + m_shortSubType = "CPU"; + m_optimization = "REF"; + m_computingThreads = 0; + m_availableProcessingThr = 1; + m_availableMemoryThr = 1; + m_argon2BlocksFillerPtr = nullptr; + m_dllHandle = nullptr; + m_profile = nullptr; + m_threadData = nullptr; +} + +CpuHasher::~CpuHasher() { + this->cleanup(); +} + +bool CpuHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { + m_profile = getArgon2Profile(algorithm, variant); + m_description = detectFeaturesAndMakeDescription(); + return true; +} + +bool CpuHasher::configure(xmrig::HasherConfig &config) { + m_intensity = 100; + + if(config.cpuOptimization() != "") { + m_description += "Overiding detected optimization feature with " + config.cpuOptimization() + ".\n"; + m_optimization = config.cpuOptimization(); + } + + loadArgon2BlockFiller(); + + if(m_argon2BlocksFillerPtr == NULL) { + m_intensity = 0; + m_description += "Status: DISABLED - argon2 hashing module not found."; + return false; + } + + m_computingThreads = min(m_availableProcessingThr, m_availableMemoryThr); + + if (m_computingThreads == 0) { + m_intensity = 0; + m_description += "Status: DISABLED - not enough resources."; + return false; + } + + if(config.cpuThreads() > -1) { + m_intensity = min(100.0 * config.cpuThreads() / m_computingThreads, 100.0); + m_computingThreads = min(config.cpuThreads(), m_computingThreads); + } + + if (m_intensity == 0) { + m_description += "Status: DISABLED - by user."; + return false; + } + + m_deviceInfo.intensity = m_intensity; + + storeDeviceInfo(0, m_deviceInfo); + + m_threadData = new CpuHasherThread[m_computingThreads]; + for(int i=0; i < m_computingThreads; i++) { + void *buffer = NULL; + void *mem = allocateMemory(buffer); + if(mem == NULL) { + m_intensity = 0; + m_description += "Status: DISABLED - error allocating memory."; + return false; + } + m_threadData[i].mem = buffer; + m_threadData[i].argon2 = new Argon2(NULL, m_argon2BlocksFillerPtr, NULL, mem, mem); + m_threadData[i].hashData.outSize = xmrig::ARGON2_HASHLEN + sizeof(uint32_t); + } + + m_description += "Status: ENABLED - with " + to_string(m_computingThreads) + " threads."; + + return true; +} + +string CpuHasher::detectFeaturesAndMakeDescription() { + stringstream ss; +#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64) + char brand_string[49]; + cpu_features::FillX86BrandString(brand_string); + m_deviceInfo.name = brand_string; + + ss << brand_string << endl; + + cpu_features::X86Features features = cpu_features::GetX86Info().features; + ss << "Optimization features: "; + +#if defined(__x86_64__) || defined(_WIN64) + ss << "SSE2 "; + m_optimization = "SSE2"; +#else + ss << "none"; + m_optimization = "REF"; +#endif + + if(features.ssse3 || features.avx2 || features.avx512f) { + if (features.ssse3) { + ss << "SSSE3 "; + m_optimization = "SSSE3"; + } + if (features.avx) { + ss << "AVX "; + m_optimization = "AVX"; + } + if (features.avx2) { + ss << "AVX2 "; + m_optimization = "AVX2"; + } + if (features.avx512f) { + ss << "AVX512F "; + m_optimization = "AVX512F"; + } + } + ss << endl; +#endif +#if defined(__arm__) + m_deviceInfo.name = "ARM processor"; + + cpu_features::ArmFeatures features = cpu_features::GetArmInfo().features; + ss << "ARM processor" << endl; + ss << "Optimization features: "; + + m_optimization = "REF"; + + if(features.neon) { + ss << "NEON"; + m_optimization = "NEON"; + } + else { + ss << "none"; + } + ss << endl; +#endif + ss << "Selecting " << m_optimization << " as candidate for hashing algorithm." << endl; + + m_availableProcessingThr = thread::hardware_concurrency(); + ss << "Parallelism: " << m_availableProcessingThr << " concurent threads supported." << endl; + + //check available memory + vector memoryTest; + for(m_availableMemoryThr = 0;m_availableMemoryThr < m_availableProcessingThr;m_availableMemoryThr++) { + void *memory = malloc(m_profile->memSize + 64); //64 bytes for alignament - to work on AVX512F optimisations + if(memory == NULL) + break; + memoryTest.push_back(memory); + } + for(vector::iterator it=memoryTest.begin(); it != memoryTest.end(); ++it) { + free(*it); + } + ss << "Memory: there is enough memory for " << m_availableMemoryThr << " concurent threads." << endl; + + return ss.str(); +} + +void CpuHasher::cleanup() { + for(int i=0; i < m_computingThreads; i++) { + delete m_threadData[i].argon2; + free(m_threadData[i].mem); + } + delete[] m_threadData; + if(m_dllHandle != NULL) + dlclose(m_dllHandle); +} + +void CpuHasher::loadArgon2BlockFiller() { + string module_path = m_appFolder; + module_path += "/modules/argon2_fill_blocks_" + m_optimization + ".opt"; + + m_dllHandle = dlopen(module_path.c_str(), RTLD_LAZY); + if(m_dllHandle != NULL) + m_argon2BlocksFillerPtr = (argon2BlocksFillerPtr)dlsym(m_dllHandle, "fill_memory_blocks"); +} + +int CpuHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + CpuHasherThread &threadData = m_threadData[threadIdx]; + threadData.hashData.input = input; + threadData.hashData.inSize = size; + threadData.hashData.output = output; + return threadData.argon2->generateHashes(*m_profile, threadData.hashData); +} + +void *CpuHasher::allocateMemory(void *&buffer) { + size_t mem_size = m_profile->memSize + 64; + void *mem = malloc(mem_size); + buffer = mem; + return align(64, m_profile->memSize, mem, mem_size); +} + +size_t CpuHasher::parallelism(int workerIdx) { + if(workerIdx < 0 || workerIdx > computingThreads()) + return 0; + + return 1; +} + +size_t CpuHasher::deviceCount() { + return computingThreads(); +} + +REGISTER_HASHER(CpuHasher); \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h new file mode 100644 index 00000000..888421c6 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h @@ -0,0 +1,41 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#ifndef ARGON2_CPU_HASHER_H +#define ARGON2_CPU_HASHER_H + +struct CpuHasherThread { + Argon2 *argon2; + HashData hashData; + void *mem; +}; + +class CpuHasher : public Hasher { +public: + CpuHasher(); + ~CpuHasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + string detectFeaturesAndMakeDescription(); + void loadArgon2BlockFiller(); + void *allocateMemory(void *&buffer); + + DeviceInfo m_deviceInfo; + string m_optimization; + int m_availableProcessingThr; + int m_availableMemoryThr; + void *m_dllHandle; + Argon2Profile *m_profile; + argon2BlocksFillerPtr m_argon2BlocksFillerPtr; + CpuHasherThread *m_threadData; +}; + +#endif //ARGON2_CPU_HASHER_H diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h new file mode 100644 index 00000000..8048503c --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h @@ -0,0 +1,567 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_OPT_H +#define BLAKE_ROUND_MKA_OPT_H + +#include "../../argon2/blake2/blake2-impl.h" + +#if !defined(__NEON__) +#include +#if defined(__SSSE3__) +#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ +#endif + +#if (defined(__XOP__) || defined(__AVX__)) && (defined(__GNUC__) || defined(__clang__)) +#include +#endif +#else +#include +#endif + +#if !defined(__NEON__) +#if !defined(__AVX512F__) +#if !defined(__AVX2__) +#if !defined(__XOP__) +#if defined(__SSSE3__) +#define r16 \ + (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define r24 \ + (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) \ + ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ + : (-(c) == 24) \ + ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) \ + ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) \ + ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_slli_epi64((x), 64 - (-(c)))) +#else /* defined(__SSE2__) */ +#define _mm_roti_epi64(r, c) \ + _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) +#endif +#else +#endif + +static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { + const __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -32); \ + D1 = _mm_roti_epi64(D1, -32); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -24); \ + B1 = _mm_roti_epi64(B1, -24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -16); \ + D1 = _mm_roti_epi64(D1, -16); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -63); \ + B1 = _mm_roti_epi64(B1, -63); \ + } while ((void)0, 0) + +#if defined(__SSSE3__) +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) +#else /* SSE2 */ +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = D0; \ + __m128i t1 = B0; \ + D0 = C0; \ + C0 = C1; \ + C1 = D0; \ + D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ + D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ + B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ + B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0, t1; \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + t0 = B0; \ + t1 = D0; \ + B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ + B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ + D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ + D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) +#endif + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) +#else /* __AVX2__ */ + +#include + +#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)) +#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) + +#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i ml = _mm256_mul_epu32(A0, B0); \ + ml = _mm256_add_epi64(ml, ml); \ + A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ + D0 = _mm256_xor_si256(D0, A0); \ + D0 = rotr32(D0); \ + \ + ml = _mm256_mul_epu32(C0, D0); \ + ml = _mm256_add_epi64(ml, ml); \ + C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ + \ + B0 = _mm256_xor_si256(B0, C0); \ + B0 = rotr24(B0); \ + \ + ml = _mm256_mul_epu32(A1, B1); \ + ml = _mm256_add_epi64(ml, ml); \ + A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ + D1 = _mm256_xor_si256(D1, A1); \ + D1 = rotr32(D1); \ + \ + ml = _mm256_mul_epu32(C1, D1); \ + ml = _mm256_add_epi64(ml, ml); \ + C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ + \ + B1 = _mm256_xor_si256(B1, C1); \ + B1 = rotr24(B1); \ + } while((void)0, 0); + +#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i ml = _mm256_mul_epu32(A0, B0); \ + ml = _mm256_add_epi64(ml, ml); \ + A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ + D0 = _mm256_xor_si256(D0, A0); \ + D0 = rotr16(D0); \ + \ + ml = _mm256_mul_epu32(C0, D0); \ + ml = _mm256_add_epi64(ml, ml); \ + C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ + B0 = _mm256_xor_si256(B0, C0); \ + B0 = rotr63(B0); \ + \ + ml = _mm256_mul_epu32(A1, B1); \ + ml = _mm256_add_epi64(ml, ml); \ + A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ + D1 = _mm256_xor_si256(D1, A1); \ + D1 = rotr16(D1); \ + \ + ml = _mm256_mul_epu32(C1, D1); \ + ml = _mm256_add_epi64(ml, ml); \ + C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ + B1 = _mm256_xor_si256(B1, C1); \ + B1 = rotr63(B1); \ + } while((void)0, 0); + +#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while((void)0, 0); + +#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + \ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ + \ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while(0); + +#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while((void)0, 0); + +#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + \ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ + \ + tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while((void)0, 0); + +#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \ + do{ \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + } while((void)0, 0); + +#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do{ \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + } while((void)0, 0); + +#endif /* __AVX2__ */ + +#else /* __AVX512F__ */ + +#include + +#define ror64(x, n) _mm512_ror_epi64((x), (n)) + +static BLAKE2_INLINE __m512i muladd(__m512i x, __m512i y) +{ + __m512i z = _mm512_mul_epu32(x, y); + return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = muladd(A0, B0); \ + A1 = muladd(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = muladd(C0, D0); \ + C1 = muladd(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = muladd(A0, B0); \ + A1 = muladd(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = muladd(C0, D0); \ + C1 = muladd(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define SWAP_HALVES(A0, A1) \ + do { \ + __m512i t0, t1; \ + t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ + t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ + A0 = t0; \ + A1 = t1; \ + } while((void)0, 0) + +#define SWAP_QUARTERS(A0, A1) \ + do { \ + SWAP_HALVES(A0, A1); \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + } while((void)0, 0) + +#define UNSWAP_QUARTERS(A0, A1) \ + do { \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + SWAP_HALVES(A0, A1); \ + } while((void)0, 0) + +#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \ + do { \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + SWAP_QUARTERS(A0, A1); \ + SWAP_QUARTERS(B0, B1); \ + SWAP_QUARTERS(C0, C1); \ + SWAP_QUARTERS(D0, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + UNSWAP_QUARTERS(A0, A1); \ + UNSWAP_QUARTERS(B0, B1); \ + UNSWAP_QUARTERS(C0, C1); \ + UNSWAP_QUARTERS(D0, D1); \ + } while ((void)0, 0) + +#endif /* __AVX512F__ */ + +#else /* __NEON__ */ + +static BLAKE2_INLINE uint64x2_t fBlaMka(uint64x2_t x, uint64x2_t y) { + const uint64x2_t z = vmull_u32(vmovn_u64(x), vmovn_u64(y)); + return vaddq_u64(vaddq_u64(x, y), vaddq_u64(z, z)); +} + +#define vrorq_n_u64_32(x) vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_u64((x)))) + +#define vrorq_n_u64_24(x) vcombine_u64( \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 3)), \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 3))) + +#define vrorq_n_u64_16(x) vcombine_u64( \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 2)), \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 2))) + +#define vrorq_n_u64_63(x) veorq_u64(vaddq_u64(x, x), vshrq_n_u64(x, 63)) + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ +do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = veorq_u64(D0, A0); \ + D1 = veorq_u64(D1, A1); \ + \ + D0 = vrorq_n_u64_32(D0); \ + D1 = vrorq_n_u64_32(D1); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = veorq_u64(B0, C0); \ + B1 = veorq_u64(B1, C1); \ + \ + B0 = vrorq_n_u64_24(B0); \ + B1 = vrorq_n_u64_24(B1); \ +} while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ +do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = veorq_u64(D0, A0); \ + D1 = veorq_u64(D1, A1); \ + \ + D0 = vrorq_n_u64_16(D0); \ + D1 = vrorq_n_u64_16(D1); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = veorq_u64(B0, C0); \ + B1 = veorq_u64(B1, C1); \ + \ + B0 = vrorq_n_u64_63(B0); \ + B1 = vrorq_n_u64_63(B1); \ +} while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + t0 = vextq_u64(B0, B1, 1); \ + t1 = vextq_u64(B1, B0, 1); \ + B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \ + t0 = vextq_u64(D1, D0, 1); t1 = vextq_u64(D0, D1, 1); \ + D0 = t0; D1 = t1; + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + t0 = vextq_u64(B1, B0, 1); \ + t1 = vextq_u64(B0, B1, 1); \ + B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \ + t0 = vextq_u64(D0, D1, 1); t1 = vextq_u64(D1, D0, 1); \ + D0 = t0; D1 = t1; + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ +do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +} while ((void)0, 0) + +#endif /* __NEON__ */ + +#endif /* BLAKE_ROUND_MKA_OPT_H */ diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h new file mode 100644 index 00000000..fb07a969 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h @@ -0,0 +1,55 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_H +#define BLAKE_ROUND_MKA_H + +#include "../../argon2/blake2/blake2-impl.h" + +/* designed by the Lyra PHC team */ +static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { + const uint64_t m = UINT64_C(0xFFFFFFFF); + const uint64_t xy = (x & m) * (y & m); + return x + y + 2 * xy; +} + +#define G(a, b, c, d) \ + do { \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 32); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 24); \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 16); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ + v12, v13, v14, v15) \ + do { \ + G(v0, v4, v8, v12); \ + G(v1, v5, v9, v13); \ + G(v2, v6, v10, v14); \ + G(v3, v7, v11, v15); \ + G(v0, v5, v10, v15); \ + G(v1, v6, v11, v12); \ + G(v2, v7, v8, v13); \ + G(v3, v4, v9, v14); \ + } while ((void)0, 0) + +#endif diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c new file mode 100755 index 00000000..c01261ab --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c @@ -0,0 +1,448 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#include +#include +#include +#include + +#include "../../../common/DLLImport.h" +#include "../../argon2/Defs.h" +#include "../../../common/DLLExport.h" + +#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64) || defined(__NEON__)) +#include "blamka-round-opt.h" +#else +#include "blamka-round-ref.h" +#endif + +void copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +void xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +#ifndef BUILD_REF + +#if defined(__AVX512F__) +static void fill_block(__m512i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + block_XY[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + } + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND_1( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND_2( + state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i], + state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512(state[i], block_XY[i]); + _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]); + } + } + else { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512(state[i], block_XY[i]); + } + } +} +#elif defined(__AVX2__) +static void fill_block(__m256i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + block_XY[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + } + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256(state[i], block_XY[i]); + _mm256_store_si256((__m256i *)next_block->v + i, state[i]); + } + } + else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256(state[i], block_XY[i]); + } + } +} +#elif defined(__AVX__) + +#define I2D(x) _mm256_castsi256_pd(x) +#define D2I(x) _mm256_castpd_si256(x) + +static void fill_block(__m128i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + __m256i t; + __m256i *s256 = (__m256i *) state, *block256 = (__m256i *) block_XY; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i)))); + _mm256_storeu_si256(s256 + i, t); + t = D2I(_mm256_xor_pd(I2D(t), \ + I2D(_mm256_loadu_si256((const __m256i *)next_block->v + i)))); + _mm256_storeu_si256(block256 + i, t); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i)))); + _mm256_storeu_si256(s256 + i, t); + _mm256_storeu_si256(block256 + i, t); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256(block256 + i)))); + + _mm256_storeu_si256(s256 + i, t); + _mm256_storeu_si256((__m256i *)next_block->v + i, t); + } + } + else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256(block256 + i)))); + + _mm256_storeu_si256(s256 + i, t); + } + } + +} +#elif defined(__NEON__) +static void fill_block(uint64x2_t *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + uint64x2_t block_XY[ARGON2_OWORDS_IN_BLOCK]; + uint64x2_t t0, t1; + + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2)); + block_XY[i] = veorq_u64(state[i], vld1q_u64(next_block->v + i*2)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = veorq_u64(state[i], block_XY[i]); + vst1q_u64(next_block->v + i*2, state[i]); + } + } + else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = veorq_u64(state[i], block_XY[i]); + } + } +} +#else +static void fill_block(__m128i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + block_XY[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + _mm_storeu_si128((__m128i *)next_block->v + i, state[i]); + } + } + else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + } + } +} +#endif + +#else +static void fill_block(block *prev_block, const block *ref_block, + block *next_block, int with_xor, int keep) { + block block_tmp; + unsigned i; + + xor_block(prev_block, ref_block); + copy_block(&block_tmp, prev_block); + + if (with_xor && next_block != NULL) { + xor_block(&block_tmp, next_block); + } + + /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then + (16,17,..31)... finally (112,113,...127) */ + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND_NOMSG( + prev_block->v[16 * i], prev_block->v[16 * i + 1], prev_block->v[16 * i + 2], + prev_block->v[16 * i + 3], prev_block->v[16 * i + 4], prev_block->v[16 * i + 5], + prev_block->v[16 * i + 6], prev_block->v[16 * i + 7], prev_block->v[16 * i + 8], + prev_block->v[16 * i + 9], prev_block->v[16 * i + 10], prev_block->v[16 * i + 11], + prev_block->v[16 * i + 12], prev_block->v[16 * i + 13], prev_block->v[16 * i + 14], + prev_block->v[16 * i + 15]); + } + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + for (i = 0; i < 8; i++) { + BLAKE2_ROUND_NOMSG( + prev_block->v[2 * i], prev_block->v[2 * i + 1], prev_block->v[2 * i + 16], + prev_block->v[2 * i + 17], prev_block->v[2 * i + 32], prev_block->v[2 * i + 33], + prev_block->v[2 * i + 48], prev_block->v[2 * i + 49], prev_block->v[2 * i + 64], + prev_block->v[2 * i + 65], prev_block->v[2 * i + 80], prev_block->v[2 * i + 81], + prev_block->v[2 * i + 96], prev_block->v[2 * i + 97], prev_block->v[2 * i + 112], + prev_block->v[2 * i + 113]); + } + + xor_block(prev_block, &block_tmp); + if(keep) + copy_block(next_block, prev_block); +} + +#endif + +DLLEXPORT void *fill_memory_blocks(int threads, Argon2Profile *profile, void *user_data) { + void *memory = user_data; +#ifndef BUILD_REF +#if defined(__AVX512F__) + __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; + uint64_t buff_512[8]; +#elif defined(__AVX2__) + __m256i state[ARGON2_HWORDS_IN_BLOCK]; + uint64_t buff_256[4]; +#elif defined(__x86_64__) || defined(_WIN64) + __m128i state[ARGON2_OWORDS_IN_BLOCK]; +#elif defined(__NEON__) + uint64x2_t state[ARGON2_OWORDS_IN_BLOCK]; +#endif +#else + block state_; + block *state = &state_; +#endif + int lane_length = profile->segSize * 4; + int seg_length = profile->segSize; + int suc_idx = profile->succesiveIdxs; + + for(int thr = 0; thr < threads;thr++) { + block *ref_block = NULL, *curr_block = NULL; + + int32_t ref_idx = 0; + int32_t cur_idx = 0; + int32_t prev_idx = 0; + int32_t seg_type = 0; + int32_t idx = 0; + int32_t keep = 1; + int32_t with_xor = 0; + + block *blocks = (block *)((uint8_t*)memory + thr * profile->memSize); + + int32_t *address = profile->blockRefs; + + for(uint32_t s = 0; s < profile->segCount; s++) { + cur_idx = profile->segments[s * 3]; + prev_idx = profile->segments[s * 3 + 1]; + seg_type = profile->segments[s * 3 + 2]; + keep = 1; + with_xor = (s >= profile->thrCost * 4) ? 1 : 0; + + idx = (s < profile->thrCost) ? 2 : 0; + + int32_t lane = s % profile->thrCost; + int32_t slice = (s / profile->thrCost) % 4; + int32_t pass = (s / profile->thrCost) / 4; + + memcpy(state, (void *) (blocks + prev_idx), ARGON2_BLOCK_SIZE); + + if(seg_type == 0) { + if(s < profile->thrCost) + address = &profile->blockRefs[(s * (profile->segSize - 2)) * 3]; + else + address = &profile->blockRefs[(profile->thrCost * (profile->segSize - 2) + (s - profile->thrCost) * profile->segSize) * 3]; + } + + for (int i = idx; i < seg_length; ++i, cur_idx ++) { + if (seg_type == 1) { // data dependent addressing +#ifndef BUILD_REF +#if defined(__AVX512F__) + _mm512_storeu_si512(buff_512, state[0]); + uint64_t pseudo_rand = buff_512[0]; +#elif defined(__AVX2__) + _mm256_storeu_si256(buff_256, state[0]); + uint64_t pseudo_rand = buff_256[0]; +#elif defined(__x86_64__) || defined(_WIN64) + uint64_t pseudo_rand = _mm_cvtsi128_si64(state[0]); +#elif defined(__NEON__) + uint64_t pseudo_rand = 0; + vst1q_lane_u64(&pseudo_rand, state[0], 0); +#endif +#else + uint64_t pseudo_rand = state->v[0]; +#endif + uint64_t ref_lane = ((pseudo_rand >> 32)) % profile->thrCost; + uint32_t reference_area_size = 0; + if(pass > 0) { + if (lane == ref_lane) { + reference_area_size = lane_length - seg_length + i - 1; + } else { + reference_area_size = lane_length - seg_length + ((i == 0) ? (-1) : 0); + } + } + else { + if (lane == ref_lane) { + reference_area_size = slice * seg_length + i - 1; + } else { + reference_area_size = slice * seg_length + ((i == 0) ? (-1) : 0); + } + } + uint64_t relative_position = pseudo_rand & 0xFFFFFFFF; + relative_position = relative_position * relative_position >> 32; + + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length; + } + else { + ref_idx = address[1]; + if(suc_idx == 0) + cur_idx = address[0]; + keep = address[2]; + + address += 3; + } + + ref_block = blocks + ref_idx; + curr_block = blocks + cur_idx; + + fill_block(state, ref_block, curr_block, with_xor, keep); + } + } + + uint32_t dst = -1; + for(; address < (profile->blockRefs + profile->blockRefsSize * 3); address += 3) { + if (address[2] == -1) { + curr_block = blocks + address[0]; + ref_block = blocks + address[1]; + dst = address[0]; + xor_block(curr_block, ref_block); + } + } + if(dst != -1) + copy_block(blocks, blocks + dst); + else + copy_block(blocks, state); + } + + return memory; +} + diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format new file mode 100755 index 00000000..06ea346a --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format @@ -0,0 +1,4 @@ +--- +Language: Cpp +BasedOnStyle: Google +... diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore new file mode 100755 index 00000000..0690aa44 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore @@ -0,0 +1 @@ +cmake_build/ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml new file mode 100755 index 00000000..deafdfa7 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml @@ -0,0 +1,91 @@ +language: c + +sudo: false + +cache: + directories: + - $HOME/cpu_features_archives + +matrix: + include: + - os: linux + compiler: gcc + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: linux + compiler: clang + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: osx + compiler: gcc + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: osx + compiler: clang + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: linux-ppc64le + compiler: gcc + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: linux-ppc64le + compiler: clang + env: + TOOLCHAIN=NATIVE + TARGET=native + # Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=aarch64-linux-gnu + QEMU_ARCH=aarch64 + # Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabihf + QEMU_ARCH=arm + # Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=armv8l-linux-gnueabihf + QEMU_ARCH=arm + # Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabi + QEMU_ARCH=arm + # Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=aarch64_be-linux-gnu + QEMU_ARCH=DISABLED + # Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabihf + QEMU_ARCH=DISABLED + # Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabi + QEMU_ARCH=DISABLED + - os: linux + env: + TOOLCHAIN=CODESCAPE + TARGET=mips-mti-linux-gnu + QEMU_ARCH=DISABLED + +script: + - cmake --version + - bash -e -x ./scripts/run_integration.sh diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt new file mode 100755 index 00000000..591c1164 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt @@ -0,0 +1,165 @@ +cmake_minimum_required(VERSION 3.0) + +project(CpuFeatures VERSION 0.1.0) + +# Default Build Type to be Release +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) +endif(NOT CMAKE_BUILD_TYPE) + +# BUILD_TESTING is a standard CMake variable, but we declare it here to make it +# prominent in the GUI. +option(BUILD_TESTING "Enable test (depends on googletest)." OFF) +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make +# it prominent in the GUI. +option(BUILD_SHARED_LIBS "Build library as shared." OFF) + +# +# library : cpu_features +# + +set(_HDRS + include/cpuinfo_aarch64.h + include/cpuinfo_arm.h + include/cpuinfo_mips.h + include/cpuinfo_ppc.h + include/cpuinfo_x86.h + include/cpu_features_macros.h +) + +add_library(cpu_features + ${_HDRS} + include/internal/bit_utils.h + include/internal/linux_features_aggregator.h + include/internal/cpuid_x86.h + include/internal/filesystem.h + include/internal/hwcaps.h + include/internal/stack_line_reader.h + include/internal/string_view.h + include/cpu_features_macros.h + src/linux_features_aggregator.c + src/cpuid_x86_clang_gcc.c + src/cpuid_x86_msvc.c + src/cpuinfo_aarch64.c + src/cpuinfo_arm.c + src/cpuinfo_mips.c + src/cpuinfo_ppc.c + src/cpuinfo_x86.c + src/filesystem.c + src/hwcaps.c + src/stack_line_reader.c + src/string_view.c +) + +target_include_directories(cpu_features + PUBLIC + $ + $ + PRIVATE + include/internal +) +set_target_properties(cpu_features PROPERTIES PUBLIC_HEADER "${_HDRS}") +target_compile_definitions(cpu_features + PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024) +target_link_libraries(cpu_features PUBLIC ${CMAKE_DL_LIBS}) + +# The use of shared libraries is discouraged. +# For API / ABI compatibility reasons, it is recommended to build and use +# cpu_features in a subdirectory of your project or as an embedded dependency. +if(BUILD_SHARED_LIBS) + set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON) +endif() +add_library(CpuFeature::cpu_features ALIAS cpu_features) + +# +# program : list_cpu_features +# + +add_executable(list_cpu_features src/utils/list_cpu_features.c) +target_link_libraries(list_cpu_features PRIVATE cpu_features) +add_executable(CpuFeature::list_cpu_features ALIAS list_cpu_features) + +# +# tests +# + +include(CTest) +if(BUILD_TESTING) + # Automatically incorporate googletest into the CMake Project if target not + # found. + if(NOT TARGET gtest OR NOT TARGET gmock_main) + # Download and unpack googletest at configure time. + configure_file( + cmake/googletest.CMakeLists.txt.in + googletest-download/CMakeLists.txt + ) + + execute_process( + COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) + + if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) + + if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") + endif() + + # Prevent overriding the parent project's compiler/linker settings on + # Windows. + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + + # Add googletest directly to our build. This defines the gtest and + # gtest_main targets. + add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src + ${CMAKE_BINARY_DIR}/googletest-build + EXCLUDE_FROM_ALL) + endif() + + add_subdirectory(test) +endif() + +# +# Install +# + +include(GNUInstallDirs) +install(TARGETS cpu_features list_cpu_features + EXPORT CpuFeaturesTargets + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cpu_features + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) +install(EXPORT CpuFeaturesTargets + NAMESPACE CpuFeatures:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures + COMPONENT Devel +) +include(CMakePackageConfigHelpers) +configure_package_config_file(cmake/CpuFeaturesConfig.cmake.in + "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) +write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake" + COMPATIBILITY SameMajorVersion +) +install( + FILES + "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake" + "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures" + COMPONENT Devel +) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md new file mode 100755 index 00000000..c980350f --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE b/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE new file mode 100755 index 00000000..7a4a3ea2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md new file mode 100755 index 00000000..039175b3 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md @@ -0,0 +1,165 @@ +# cpu_features [![Build Status](https://travis-ci.org/google/cpu_features.svg?branch=master)](https://travis-ci.org/google/cpu_features) [![Build status](https://ci.appveyor.com/api/projects/status/46d1owsj7n8dsylq/branch/master?svg=true)](https://ci.appveyor.com/project/gchatelet/cpu-features/branch/master) + +A cross-platform C library to retrieve CPU features (such as available +instructions) at runtime. + +## Table of Contents + +- [Design Rationale](#rationale) +- [Code samples](#codesample) +- [Running sample code](#usagesample) +- [What's supported](#support) +- [License](#license) +- [Build with cmake](#cmake) + + +## Design Rationale + +- **Simple to use.** See the snippets below for examples. +- **Extensible.** Easy to add missing features or architectures. +- **Compatible with old compilers** and available on many architectures so it + can be used widely. To ensure that cpu_features works on as many platforms + as possible, we implemented it in a highly portable version of C: C99. +- **Sandbox-compatible.** The library uses a variety of strategies to cope + with sandboxed environments or when `cpuid` is unavailable. This is useful + when running integration tests in hermetic environments. +- **Thread safe, no memory allocation, and raises no exceptions.** + cpu_features is suitable for implementing fundamental libc functions like + `malloc`, `memcpy`, and `memcmp`. +- **Unit tested.** + + +### Checking features at runtime + +Here's a simple example that executes a codepath if the CPU supports both the +AES and the SSE4.2 instruction sets: + +```c +#include "cpuinfo_x86.h" + +static const X86Features features = GetX86Info().features; + +void Compute(void) { + if (features.aes && features.sse4_2) { + // Run optimized code. + } else { + // Run standard code. + } +} +``` + +### Caching for faster evaluation of complex checks + +If you wish, you can read all the features at once into a global variable, and +then query for the specific features you care about. Below, we store all the ARM +features and then check whether AES and NEON are supported. + +```c +#include +#include "cpuinfo_arm.h" + +static const ArmFeatures features = GetArmInfo().features; +static const bool has_aes_and_neon = features.aes && features.neon; + +// use has_aes_and_neon. +``` + +This is a good approach to take if you're checking for combinations of features +when using a compiler that is slow to extract individual bits from bit-packed +structures. + +### Checking compile time flags + +The following code determines whether the compiler was told to use the AVX +instruction set (e.g., `g++ -mavx`) and sets `has_avx` accordingly. + +```c +#include +#include "cpuinfo_x86.h" + +static const X86Features features = GetX86Info().features; +static const bool has_avx = CPU_FEATURES_COMPILED_X86_AVX || features.avx; + +// use has_avx. +``` + +`CPU_FEATURES_COMPILED_X86_AVX` is set to 1 if the compiler was instructed to +use AVX and 0 otherwise, combining compile time and runtime knowledge. + +### Rejecting poor hardware implementations based on microarchitecture + +On x86, the first incarnation of a feature in a microarchitecture might not be +the most efficient (e.g. AVX on Sandy Bridge). We provide a function to retrieve +the underlying microarchitecture so you can decide whether to use it. + +Below, `has_fast_avx` is set to 1 if the CPU supports the AVX instruction +set—but only if it's not Sandy Bridge. + +```c +#include +#include "cpuinfo_x86.h" + +static const X86Info info = GetX86Info(); +static const X86Microarchitecture uarch = GetX86Microarchitecture(&info); +static const bool has_fast_avx = info.features.avx && uarch != INTEL_SNB; + +// use has_fast_avx. +``` + +This feature is currently available only for x86 microarchitectures. + + +### Running sample code + +Building `cpu_features` brings a small executable to test the library. + +```shell + % ./build/list_cpu_features +arch : x86 +brand : Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz +family : 6 (0x06) +model : 45 (0x2D) +stepping : 7 (0x07) +uarch : INTEL_SNB +flags : aes,avx,cx16,smx,sse4_1,sse4_2,ssse3 +``` + +```shell +% ./build/list_cpu_features --json +{"arch":"x86","brand":" Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz","family":6,"model":45,"stepping":7,"uarch":"INTEL_SNB","flags":["aes","avx","cx16","smx","sse4_1","sse4_2","ssse3"]} +``` + + +## What's supported + +| | x86³ | ARM | AArch64 | MIPSel | POWER | +|---------|:----:|:-------:|:-------:|:------:|:-------:| +| Android | yes² | yes¹ | yes¹ | yes¹ | N/A | +| iOS | N/A | not yet | not yet | N/A | N/A | +| Linux | yes² | yes¹ | yes¹ | yes¹ | yes¹ | +| MacOs | yes² | N/A | not yet | N/A | no | +| Windows | yes² | not yet | not yet | N/A | N/A | + +1. **Features revealed from Linux.** We gather data from several sources + depending on availability: + + from glibc's + [getauxval](https://www.gnu.org/software/libc/manual/html_node/Auxiliary-Vector.html) + + by parsing `/proc/self/auxv` + + by parsing `/proc/cpuinfo` +2. **Features revealed from CPU.** features are retrieved by using the `cpuid` + instruction. +3. **Microarchitecture detection.** On x86 some features are not always + implemented efficiently in hardware (e.g. AVX on Sandybridge). Exposing the + microarchitecture allows the client to reject particular microarchitectures. + + + +## License + +The cpu_features library is licensed under the terms of the Apache license. +See [LICENSE](LICENSE) for more information. + + +## Build with CMake + +Please check the [CMake build instructions](cmake/README.md). diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE b/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE new file mode 100755 index 00000000..8ea8a8b6 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE @@ -0,0 +1,7 @@ +# ===== googletest ===== + +git_repository( + name = "com_google_googletest", + remote = "https://github.com/google/googletest.git", + commit = "c3f65335b79f47b05629e79a54685d899bc53b93", +) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml b/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml new file mode 100755 index 00000000..f18635a3 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml @@ -0,0 +1,24 @@ +version: '{build}' +shallow_clone: true + +platform: x64 + +environment: + matrix: + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + CMAKE_GENERATOR: "Visual Studio 15 2017 Win64" + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + CMAKE_GENERATOR: "Visual Studio 14 2015 Win64" + +matrix: + fast_finish: true + +before_build: + - cmake --version + - cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON -H. -Bcmake_build -G "%CMAKE_GENERATOR%" + +build_script: + - cmake --build cmake_build --config Debug --target ALL_BUILD + +test_script: + - cmake --build cmake_build --config Debug --target RUN_TESTS diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in new file mode 100755 index 00000000..e0bf10e4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in @@ -0,0 +1,3 @@ +# CpuFeatures CMake configuration file + +include("${CMAKE_CURRENT_LIST_DIR}/CpuFeaturesTargets.cmake") diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md new file mode 100755 index 00000000..b6baeaa2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md @@ -0,0 +1,28 @@ +# CMake build instructions + +## Recommended usage : Incorporating cpu_features into a CMake project + + For API / ABI compatibility reasons, it is recommended to build and use + cpu_features in a subdirectory of your project or as an embedded dependency. + + This is similar to the recommended usage of the googletest framework + ( https://github.com/google/googletest/blob/master/googletest/README.md ) + + Build and use step-by-step + + + 1- Download cpu_features and copy it in a sub-directory in your project. + or add cpu_features as a git-submodule in your project + + 2- You can then use the cmake command `add_subdirectory()` to include + cpu_features directly and use the `cpu_features` target in your project. + + 3- Add the `cpu_features` target to the `target_link_libraries()` section of + your executable or of your library. + +## Enabling tests + + CMake default options for cpu_features is Release built type with tests + disabled. To enable testing set cmake `BUILD_TESTING` variable to `ON`, + [.travis.yml](../.travis.yml) and [appveyor.yml](../appveyor.yml) have up to + date examples. diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in new file mode 100755 index 00000000..d60a33e9 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.8.2) + +project(googletest-download NONE) + +include(ExternalProject) +ExternalProject_Add(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG master + SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" + BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake new file mode 100755 index 00000000..dcfab7cf --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake @@ -0,0 +1,34 @@ +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_PROCESSOR "mips32") + +if (ENABLE_DSPR2 AND ENABLE_MSA) + message(FATAL_ERROR "ENABLE_DSPR2 and ENABLE_MSA cannot be combined.") +endif () + +if (ENABLE_DSPR2) + set(HAVE_DSPR2 1 CACHE BOOL "" FORCE) + set(MIPS_CFLAGS "-mdspr2") + set(MIPS_CXXFLAGS "-mdspr2") +elseif (ENABLE_MSA) + set(HAVE_MSA 1 CACHE BOOL "" FORCE) + set(MIPS_CFLAGS "-mmsa") + set(MIPS_CXXFLAGS "-mmsa") +endif () + +if ("${MIPS_CPU}" STREQUAL "") + set(MIPS_CFLAGS "${MIPS_CFLAGS} -mips32r2") + set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} -mips32r2") +elseif ("${MIPS_CPU}" STREQUAL "p5600") + set(P56_FLAGS "-mips32r5 -mload-store-pairs -msched-weight -mhard-float -mfp64") + set(MIPS_CFLAGS "${MIPS_CFLAGS} ${P56_FLAGS}") + set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${P56_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "-mfp64 ${CMAKE_EXE_LINKER_FLAGS}") +endif () + +set(CMAKE_C_COMPILER ${CROSS}gcc) +set(CMAKE_CXX_COMPILER ${CROSS}g++) +set(AS_EXECUTABLE ${CROSS}as) +set(CMAKE_C_COMPILER_ARG1 "-EL ${MIPS_CFLAGS}") +set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}") + +set(THREADS_PTHREAD_ARG "2" CACHE STRING "Forcibly set by CMakeLists.txt." FORCE) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h new file mode 100755 index 00000000..f8220e1b --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h @@ -0,0 +1,125 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ +#define CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ + +//////////////////////////////////////////////////////////////////////////////// +// Architectures +//////////////////////////////////////////////////////////////////////////////// + +#if ((defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__x86_64__)) && \ + !defined(__pnacl__) && !defined(__CLR_VER)) +#define CPU_FEATURES_ARCH_X86 +#endif + +#if (defined(__arm__) || defined(_M_ARM)) +#define CPU_FEATURES_ARCH_ARM +#endif + +#if defined(__aarch64__) +#define CPU_FEATURES_ARCH_AARCH64 +#endif + +#if (defined(CPU_FEATURES_ARCH_AARCH64) || defined(CPU_FEATURES_ARCH_ARM)) +#define CPU_FEATURES_ARCH_ANY_ARM +#endif + +#if defined(__mips__) +#define CPU_FEATURES_ARCH_MIPS +#endif + +#if defined(__powerpc__) +#define CPU_FEATURES_ARCH_PPC +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Os +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__linux__) +#define CPU_FEATURES_OS_LINUX_OR_ANDROID +#endif + +#if defined(__ANDROID__) +#define CPU_FEATURES_OS_ANDROID +#endif + +#if (defined(_WIN64) || defined(_WIN32)) +#define CPU_FEATURES_OS_WINDOWS +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Compilers +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__clang__) +#define CPU_FEATURES_COMPILER_CLANG +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#define CPU_FEATURES_COMPILER_GCC +#endif + +#if defined(_MSC_VER) +#define CPU_FEATURES_COMPILER_MSC +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Cpp +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__cplusplus) +#define CPU_FEATURES_START_CPP_NAMESPACE \ + namespace cpu_features { \ + extern "C" { +#define CPU_FEATURES_END_CPP_NAMESPACE \ + } \ + } +#else +#define CPU_FEATURES_START_CPP_NAMESPACE +#define CPU_FEATURES_END_CPP_NAMESPACE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Compiler flags +//////////////////////////////////////////////////////////////////////////////// + +// Use the following to check if a feature is known to be available at compile +// time. See README.md for an example. +#if defined(CPU_FEATURES_ARCH_X86) +#define CPU_FEATURES_COMPILED_X86_AES defined(__AES__) +#define CPU_FEATURES_COMPILED_X86_F16C defined(__F16C__) +#define CPU_FEATURES_COMPILED_X86_BMI defined(__BMI__) +#define CPU_FEATURES_COMPILED_X86_BMI2 defined(__BMI2__) +#define CPU_FEATURES_COMPILED_X86_SSE (defined(__SSE__) || (_M_IX86_FP >= 1)) +#define CPU_FEATURES_COMPILED_X86_SSE2 (defined(__SSE2__) || (_M_IX86_FP >= 2)) +#define CPU_FEATURES_COMPILED_X86_SSE3 defined(__SSE3__) +#define CPU_FEATURES_COMPILED_X86_SSSE3 defined(__SSSE3__) +#define CPU_FEATURES_COMPILED_X86_SSE4_1 defined(__SSE4_1__) +#define CPU_FEATURES_COMPILED_X86_SSE4_2 defined(__SSE4_2__) +#define CPU_FEATURES_COMPILED_X86_AVX defined(__AVX__) +#define CPU_FEATURES_COMPILED_x86_AVX2 defined(__AVX2__) +#endif + +#if defined(CPU_FEATURES_ARCH_ANY_ARM) +#define CPU_FEATURES_COMPILED_ANY_ARM_NEON defined(__ARM_NEON__) +#endif + +#if defined(CPU_FEATURES_ARCH_MIPS) +#define CPU_FEATURES_COMPILED_MIPS_MSA defined(__mips_msa) +#endif + +#endif // CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h new file mode 100755 index 00000000..b8826ed4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h @@ -0,0 +1,65 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int fp : 1; // Floating-point. + int asimd : 1; // Advanced SIMD. + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + + // Make sure to update Aarch64FeaturesEnum below if you add a field here. +} Aarch64Features; + +typedef struct { + Aarch64Features features; + int implementer; + int variant; + int part; + int revision; +} Aarch64Info; + +Aarch64Info GetAarch64Info(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + AARCH64_FP, + AARCH64_ASIMD, + AARCH64_AES, + AARCH64_PMULL, + AARCH64_SHA1, + AARCH64_SHA2, + AARCH64_CRC32, + AARCH64_LAST_, +} Aarch64FeaturesEnum; + +int GetAarch64FeaturesEnumValue(const Aarch64Features* features, + Aarch64FeaturesEnum value); + +const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h new file mode 100755 index 00000000..7a94bb08 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h @@ -0,0 +1,80 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int vfp : 1; // Vector Floating Point. + int iwmmxt : 1; // Intel Wireless MMX Technology. + int neon : 1; // Advanced SIMD. + int vfpv3 : 1; // VFP version 3 + int vfpv3d16 : 1; // VFP version 3 with 16 D-registers + int vfpv4 : 1; // VFP version 4 with fast context switching + int idiva : 1; // SDIV and UDIV hardware division in ARM mode. + int idivt : 1; // SDIV and UDIV hardware division in Thumb mode. + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + + // Make sure to update ArmFeaturesEnum below if you add a field here. +} ArmFeatures; + +typedef struct { + ArmFeatures features; + int implementer; + int architecture; + int variant; + int part; + int revision; +} ArmInfo; + +// TODO(user): Add macros to know which features are present at compile +// time. + +ArmInfo GetArmInfo(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + ARM_VFP, + ARM_IWMMXT, + ARM_NEON, + ARM_VFPV3, + ARM_VFPV3D16, + ARM_VFPV4, + ARM_IDIVA, + ARM_IDIVT, + ARM_AES, + ARM_PMULL, + ARM_SHA1, + ARM_SHA2, + ARM_CRC32, + ARM_LAST_, +} ArmFeaturesEnum; + +int GetArmFeaturesEnumValue(const ArmFeatures* features, ArmFeaturesEnum value); + +const char* GetArmFeaturesEnumName(ArmFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h new file mode 100755 index 00000000..48c23a16 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h @@ -0,0 +1,53 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int msa : 1; // MIPS SIMD Architecture + // https://www.mips.com/products/architectures/ase/simd/ + int eva : 1; // Enhanced Virtual Addressing + // https://www.mips.com/products/architectures/mips64/ + + // Make sure to update MipsFeaturesEnum below if you add a field here. +} MipsFeatures; + +typedef struct { + MipsFeatures features; +} MipsInfo; + +MipsInfo GetMipsInfo(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + MIPS_MSA, + MIPS_EVA, + MIPS_LAST_, +} MipsFeaturesEnum; + +int GetMipsFeaturesEnumValue(const MipsFeatures* features, + MipsFeaturesEnum value); + +const char* GetMipsFeaturesEnumName(MipsFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h new file mode 100755 index 00000000..654155da --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h @@ -0,0 +1,141 @@ +// Copyright 2018 IBM +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ + +#include "cpu_features_macros.h" +#include "internal/hwcaps.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int ppc32 : 1; + int ppc64 : 1; + int ppc601 : 1; + int altivec : 1; + int fpu : 1; + int mmu : 1; + int mac_4xx : 1; + int unifiedcache : 1; + int spe : 1; + int efpsingle : 1; + int efpdouble : 1; + int no_tb : 1; + int power4 : 1; + int power5 : 1; + int power5plus : 1; + int cell : 1; + int booke : 1; + int smt : 1; + int icachesnoop : 1; + int arch205 : 1; + int pa6t : 1; + int dfp : 1; + int power6ext : 1; + int arch206 : 1; + int vsx : 1; + int pseries_perfmon_compat : 1; + int truele : 1; + int ppcle : 1; + int arch207 : 1; + int htm : 1; + int dscr : 1; + int ebb : 1; + int isel : 1; + int tar : 1; + int vcrypto : 1; + int htm_nosc : 1; + int arch300 : 1; + int ieee128 : 1; + int darn : 1; + int scv : 1; + int htm_no_suspend : 1; + + // Make sure to update PPCFeaturesEnum below if you add a field here. +} PPCFeatures; + +typedef struct { + PPCFeatures features; +} PPCInfo; + +// This function is guaranteed to be malloc, memset and memcpy free. +PPCInfo GetPPCInfo(void); + +typedef struct { + char platform[64]; // 0 terminated string + char model[64]; // 0 terminated string + char machine[64]; // 0 terminated string + char cpu[64]; // 0 terminated string + PlatformType type; +} PPCPlatformStrings; + +PPCPlatformStrings GetPPCPlatformStrings(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + PPC_32, /* 32 bit mode execution */ + PPC_64, /* 64 bit mode execution */ + PPC_601_INSTR, /* Old POWER ISA */ + PPC_HAS_ALTIVEC, /* SIMD Unit*/ + PPC_HAS_FPU, /* Floating Point Unit */ + PPC_HAS_MMU, /* Memory management unit */ + PPC_HAS_4xxMAC, + PPC_UNIFIED_CACHE, /* Unified instruction and data cache */ + PPC_HAS_SPE, /* Signal processing extention unit */ + PPC_HAS_EFP_SINGLE, /* SPE single precision fpu */ + PPC_HAS_EFP_DOUBLE, /* SPE double precision fpu */ + PPC_NO_TB, /* No timebase */ + PPC_POWER4, + PPC_POWER5, + PPC_POWER5_PLUS, + PPC_CELL, /* Cell broadband engine */ + PPC_BOOKE, /* Embedded ISA */ + PPC_SMT, /* Simultaneous multi-threading */ + PPC_ICACHE_SNOOP, + PPC_ARCH_2_05, /* ISA 2.05 - POWER6 */ + PPC_PA6T, /* PA Semi 6T core ISA */ + PPC_HAS_DFP, /* Decimal floating point unit */ + PPC_POWER6_EXT, + PPC_ARCH_2_06, /* ISA 2.06 - POWER7 */ + PPC_HAS_VSX, /* Vector-scalar extension */ + PPC_PSERIES_PERFMON_COMPAT, /* Set of backwards compatibile performance + monitoring events */ + PPC_TRUE_LE, + PPC_PPC_LE, + PPC_ARCH_2_07, /* ISA 2.07 - POWER8 */ + PPC_HTM, /* Hardware Transactional Memory */ + PPC_DSCR, /* Data stream control register */ + PPC_EBB, /* Event base branching */ + PPC_ISEL, /* Integer select instructions */ + PPC_TAR, /* Target address register */ + PPC_VEC_CRYPTO, /* Vector cryptography instructions */ + PPC_HTM_NOSC, /* Transactions aborted when syscall made*/ + PPC_ARCH_3_00, /* ISA 3.00 - POWER9 */ + PPC_HAS_IEEE128, /* VSX IEEE Binary Float 128-bit */ + PPC_DARN, /* Deliver a random number instruction */ + PPC_SCV, /* scv syscall */ + PPC_HTM_NO_SUSPEND, /* TM w/out suspended state */ + PPC_LAST_, +} PPCFeaturesEnum; + +int GetPPCFeaturesEnumValue(const PPCFeatures* features, PPCFeaturesEnum value); + +const char* GetPPCFeaturesEnumName(PPCFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h new file mode 100755 index 00000000..0123ddbe --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h @@ -0,0 +1,154 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// See https://en.wikipedia.org/wiki/CPUID for a list of x86 cpu features. +typedef struct { + int aes : 1; + int erms : 1; + int f16c : 1; + int fma3 : 1; + int vpclmulqdq : 1; + int bmi1 : 1; + int bmi2 : 1; + + int ssse3 : 1; + int sse4_1 : 1; + int sse4_2 : 1; + + int avx : 1; + int avx2 : 1; + + int avx512f : 1; + int avx512cd : 1; + int avx512er : 1; + int avx512pf : 1; + int avx512bw : 1; + int avx512dq : 1; + int avx512vl : 1; + int avx512ifma : 1; + int avx512vbmi : 1; + int avx512vbmi2 : 1; + int avx512vnni : 1; + int avx512bitalg : 1; + int avx512vpopcntdq : 1; + int avx512_4vnniw : 1; + int avx512_4vbmi2 : 1; + + int smx : 1; + int sgx : 1; + int cx16 : 1; // aka. CMPXCHG16B + + // Make sure to update X86FeaturesEnum below if you add a field here. +} X86Features; + +typedef struct { + X86Features features; + int family; + int model; + int stepping; + char vendor[13]; // 0 terminated string +} X86Info; + +// Calls cpuid and returns an initialized X86info. +// This function is guaranteed to be malloc, memset and memcpy free. +X86Info GetX86Info(void); + +typedef enum { + X86_UNKNOWN, + INTEL_CORE, // CORE + INTEL_PNR, // PENRYN + INTEL_NHM, // NEHALEM + INTEL_ATOM_BNL, // BONNELL + INTEL_WSM, // WESTMERE + INTEL_SNB, // SANDYBRIDGE + INTEL_IVB, // IVYBRIDGE + INTEL_ATOM_SMT, // SILVERMONT + INTEL_HSW, // HASWELL + INTEL_BDW, // BROADWELL + INTEL_SKL, // SKYLAKE + INTEL_ATOM_GMT, // GOLDMONT + INTEL_KBL, // KABY LAKE + INTEL_CFL, // COFFEE LAKE + INTEL_CNL, // CANNON LAKE + AMD_HAMMER, // K8 + AMD_K10, // K10 + AMD_BOBCAT, // K14 + AMD_BULLDOZER, // K15 + AMD_JAGUAR, // K16 + AMD_ZEN, // K17 +} X86Microarchitecture; + +// Returns the underlying microarchitecture by looking at X86Info's vendor, +// family and model. +X86Microarchitecture GetX86Microarchitecture(const X86Info* info); + +// Calls cpuid and fills the brand_string. +// - brand_string *must* be of size 49 (beware of array decaying). +// - brand_string will be zero terminated. +// - This function calls memcpy. +void FillX86BrandString(char brand_string[49]); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + X86_AES, + X86_ERMS, + X86_F16C, + X86_FMA3, + X86_VPCLMULQDQ, + X86_BMI1, + X86_BMI2, + X86_SSSE3, + X86_SSE4_1, + X86_SSE4_2, + X86_AVX, + X86_AVX2, + X86_AVX512F, + X86_AVX512CD, + X86_AVX512ER, + X86_AVX512PF, + X86_AVX512BW, + X86_AVX512DQ, + X86_AVX512VL, + X86_AVX512IFMA, + X86_AVX512VBMI, + X86_AVX512VBMI2, + X86_AVX512VNNI, + X86_AVX512BITALG, + X86_AVX512VPOPCNTDQ, + X86_AVX512_4VNNIW, + X86_AVX512_4VBMI2, + X86_SMX, + X86_SGX, + X86_CX16, + X86_LAST_, +} X86FeaturesEnum; + +int GetX86FeaturesEnumValue(const X86Features* features, X86FeaturesEnum value); + +const char* GetX86FeaturesEnumName(X86FeaturesEnum); + +const char* GetX86MicroarchitectureName(X86Microarchitecture); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h new file mode 100755 index 00000000..75f0cdd5 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h @@ -0,0 +1,39 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ + +#include +#include +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +inline static bool IsBitSet(uint32_t reg, uint32_t bit) { + return (reg >> bit) & 0x1; +} + +inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb, + uint32_t lsb) { + const uint64_t bits = msb - lsb + 1; + const uint64_t mask = (1ULL << bits) - 1ULL; + assert(msb >= lsb); + return (reg >> lsb) & mask; +} + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h new file mode 100755 index 00000000..9dcee0de --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h @@ -0,0 +1,37 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ + +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// A struct to hold the result of a call to cpuid. +typedef struct { + uint32_t eax, ebx, ecx, edx; +} Leaf; + +// Retrieves the leaf for a particular cpuid. +Leaf CpuId(uint32_t leaf_id); + +// Returns the eax value of the XCR0 register. +uint32_t GetXCR0Eax(void); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h new file mode 100755 index 00000000..33788813 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h @@ -0,0 +1,38 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// An interface for the filesystem that allows mocking the filesystem in +// unittests. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ + +#include +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// Same as linux "open(filename, O_RDONLY)", retries automatically on EINTR. +int CpuFeatures_OpenFile(const char* filename); + +// Same as linux "read(file_descriptor, buffer, buffer_size)", retries +// automatically on EINTR. +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, size_t buffer_size); + +// Same as linux "close(file_descriptor)". +void CpuFeatures_CloseFile(int file_descriptor); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h new file mode 100755 index 00000000..830cde31 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h @@ -0,0 +1,131 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Interface to retrieve hardware capabilities. It relies on Linux's getauxval +// or `/proc/self/auxval` under the hood. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ + +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// To avoid depending on the linux kernel we reproduce the architecture specific +// constants here. + +// http://elixir.free-electrons.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h +#define AARCH64_HWCAP_FP (1UL << 0) +#define AARCH64_HWCAP_ASIMD (1UL << 1) +#define AARCH64_HWCAP_AES (1UL << 3) +#define AARCH64_HWCAP_PMULL (1UL << 4) +#define AARCH64_HWCAP_SHA1 (1UL << 5) +#define AARCH64_HWCAP_SHA2 (1UL << 6) +#define AARCH64_HWCAP_CRC32 (1UL << 7) + +// http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h +#define ARM_HWCAP_VFP (1UL << 6) +#define ARM_HWCAP_IWMMXT (1UL << 9) +#define ARM_HWCAP_NEON (1UL << 12) +#define ARM_HWCAP_VFPV3 (1UL << 13) +#define ARM_HWCAP_VFPV3D16 (1UL << 14) +#define ARM_HWCAP_VFPV4 (1UL << 16) +#define ARM_HWCAP_IDIVA (1UL << 17) +#define ARM_HWCAP_IDIVT (1UL << 18) +#define ARM_HWCAP2_AES (1UL << 0) +#define ARM_HWCAP2_PMULL (1UL << 1) +#define ARM_HWCAP2_SHA1 (1UL << 2) +#define ARM_HWCAP2_SHA2 (1UL << 3) +#define ARM_HWCAP2_CRC32 (1UL << 4) + +// http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h +#define MIPS_HWCAP_VZ (1UL << 0) +#define MIPS_HWCAP_EVA (1UL << 1) +#define MIPS_HWCAP_HTW (1UL << 2) +#define MIPS_HWCAP_FPU (1UL << 3) +#define MIPS_HWCAP_MIPS32R2 (1UL << 4) +#define MIPS_HWCAP_MIPS32R5 (1UL << 5) +#define MIPS_HWCAP_MIPS64R6 (1UL << 6) +#define MIPS_HWCAP_DSPR1 (1UL << 7) +#define MIPS_HWCAP_DSPR2 (1UL << 8) +#define MIPS_HWCAP_MSA (1UL << 9) + +// http://elixir.free-electrons.com/linux/latest/source/arch/powerpc/include/uapi/asm/cputable.h +#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H +/* in AT_HWCAP */ +#define PPC_FEATURE_32 0x80000000 +#define PPC_FEATURE_64 0x40000000 +#define PPC_FEATURE_601_INSTR 0x20000000 +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 +#define PPC_FEATURE_HAS_FPU 0x08000000 +#define PPC_FEATURE_HAS_MMU 0x04000000 +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 +#define PPC_FEATURE_HAS_SPE 0x00800000 +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 +#define PPC_FEATURE_POWER4 0x00080000 +#define PPC_FEATURE_POWER5 0x00040000 +#define PPC_FEATURE_POWER5_PLUS 0x00020000 +#define PPC_FEATURE_CELL 0x00010000 +#define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_PA6T 0x00000800 +#define PPC_FEATURE_HAS_DFP 0x00000400 +#define PPC_FEATURE_POWER6_EXT 0x00000200 +#define PPC_FEATURE_ARCH_2_06 0x00000100 +#define PPC_FEATURE_HAS_VSX 0x00000080 + +#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040 + +/* Reserved - do not use 0x00000004 */ +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* in AT_HWCAP2 */ +#define PPC_FEATURE2_ARCH_2_07 0x80000000 +#define PPC_FEATURE2_HTM 0x40000000 +#define PPC_FEATURE2_DSCR 0x20000000 +#define PPC_FEATURE2_EBB 0x10000000 +#define PPC_FEATURE2_ISEL 0x08000000 +#define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_DARN 0x00200000 +#define PPC_FEATURE2_SCV 0x00100000 +#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 +#endif + +typedef struct { + unsigned long hwcaps; + unsigned long hwcaps2; +} HardwareCapabilities; + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void); + +typedef struct { + char platform[64]; // 0 terminated string + char base_platform[64]; // 0 terminated string +} PlatformType; + +PlatformType CpuFeatures_GetPlatformType(void); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h new file mode 100755 index 00000000..77661d4c --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h @@ -0,0 +1,60 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// CapabilityConfig provides a way to map cpu features to hardware caps and +// /proc/cpuinfo flags. We then provide functions to update capabilities from +// either source. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_ + +#include +#include +#include "cpu_features_macros.h" +#include "internal/hwcaps.h" +#include "internal/string_view.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// Use the following macro to declare setter functions to be used in +// CapabilityConfig. +#define DECLARE_SETTER(FeatureType, FeatureName) \ + static void set_##FeatureName(void* const features, bool value) { \ + ((FeatureType*)features)->FeatureName = value; \ + } + +// Describes the relationship between hardware caps and /proc/cpuinfo flags. +typedef struct { + const HardwareCapabilities hwcaps_mask; + const char* const proc_cpuinfo_flag; + void (*set_bit)(void* const, bool); // setter for the corresponding bit. +} CapabilityConfig; + +// For every config, looks into flags_line for the presence of the +// corresponding proc_cpuinfo_flag, calls `set_bit` accordingly. +// Note: features is a pointer to the underlying Feature struct. +void CpuFeatures_SetFromFlags(const size_t configs_size, + const CapabilityConfig* configs, + const StringView flags_line, + void* const features); + +// For every config, looks into hwcaps for the presence of the feature. Calls +// `set_bit` with true if the hardware capability is found. +// Note: features is a pointer to the underlying Feature struct. +void CpuFeatures_OverrideFromHwCaps(const size_t configs_size, + const CapabilityConfig* configs, + const HardwareCapabilities hwcaps, + void* const features); + +CPU_FEATURES_END_CPP_NAMESPACE +#endif // CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h new file mode 100755 index 00000000..c540f6b2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h @@ -0,0 +1,49 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Reads a file line by line and stores the data on the stack. This allows +// parsing files in one go without allocating. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ + +#include + +#include "cpu_features_macros.h" +#include "internal/string_view.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + char buffer[STACK_LINE_READER_BUFFER_SIZE]; + StringView view; + int fd; + bool skip_mode; +} StackLineReader; + +// Initializes a StackLineReader. +void StackLineReader_Initialize(StackLineReader* reader, int fd); + +typedef struct { + StringView line; // A view of the line. + bool eof; // Nothing more to read, we reached EOF. + bool full_line; // If false the line was truncated to + // STACK_LINE_READER_BUFFER_SIZE. +} LineResult; + +// Reads the file pointed to by fd and tries to read a full line. +LineResult StackLineReader_NextLine(StackLineReader* reader); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h new file mode 100755 index 00000000..aa3779c4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h @@ -0,0 +1,108 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A view over a piece of string. The view is not 0 terminated. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ + +#include +#include +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + const char* ptr; + size_t size; +} StringView; + +#ifdef __cplusplus +static const StringView kEmptyStringView = {NULL, 0}; +#else +static const StringView kEmptyStringView; +#endif + +// Returns a StringView from the provided string. +// Passing NULL is valid only if size is 0. +static inline StringView view(const char* str, const size_t size) { + StringView view; + view.ptr = str; + view.size = size; + return view; +} + +static inline StringView str(const char* str) { return view(str, strlen(str)); } + +// Returns the index of the first occurrence of c in view or -1 if not found. +int CpuFeatures_StringView_IndexOfChar(const StringView view, char c); + +// Returns the index of the first occurrence of sub_view in view or -1 if not +// found. +int CpuFeatures_StringView_IndexOf(const StringView view, + const StringView sub_view); + +// Returns whether a is equal to b (same content). +bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b); + +// Returns whether a starts with b. +bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b); + +// Removes count characters from the beginning of view or kEmptyStringView if +// count if greater than view.size. +StringView CpuFeatures_StringView_PopFront(const StringView str_view, + size_t count); + +// Removes count characters from the end of view or kEmptyStringView if count if +// greater than view.size. +StringView CpuFeatures_StringView_PopBack(const StringView str_view, + size_t count); + +// Keeps the count first characters of view or view if count if greater than +// view.size. +StringView CpuFeatures_StringView_KeepFront(const StringView str_view, + size_t count); + +// Retrieves the first character of view. If view is empty the behavior is +// undefined. +char CpuFeatures_StringView_Front(const StringView view); + +// Retrieves the last character of view. If view is empty the behavior is +// undefined. +char CpuFeatures_StringView_Back(const StringView view); + +// Removes leading and tailing space characters. +StringView CpuFeatures_StringView_TrimWhitespace(StringView view); + +// Convert StringView to positive integer. e.g. "42", "0x2a". +// Returns -1 on error. +int CpuFeatures_StringView_ParsePositiveNumber(const StringView view); + +// Copies src StringView to dst buffer. +void CpuFeatures_StringView_CopyString(const StringView src, char* dst, + size_t dst_size); + +// Checks if line contains the specified whitespace separated word. +bool CpuFeatures_StringView_HasWord(const StringView line, + const char* const word); + +// Get key/value from line. key and value are separated by ": ". +// key and value are cleaned up from leading and trailing whitespaces. +bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line, + StringView* key, + StringView* value); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh new file mode 100755 index 00000000..a1de0d1e --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +readonly SCRIPT_FOLDER=$(cd -P -- "$(dirname -- "$0")" && pwd -P) +readonly PROJECT_FOLDER="${SCRIPT_FOLDER}/.." +readonly ARCHIVE_FOLDER=~/cpu_features_archives +readonly QEMU_INSTALL=${ARCHIVE_FOLDER}/qemu +readonly DEFAULT_CMAKE_ARGS=" -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON" + +function extract() { + case $1 in + *.tar.bz2) tar xjf "$1" ;; + *.tar.xz) tar xJf "$1" ;; + *.tar.gz) tar xzf "$1" ;; + *) + echo "don't know how to extract '$1'..." + exit 1 + esac +} + +function unpackifnotexists() { + mkdir -p "${ARCHIVE_FOLDER}" + cd "${ARCHIVE_FOLDER}" || exit + local URL=$1 + local RELATIVE_FOLDER=$2 + local DESTINATION="${ARCHIVE_FOLDER}/${RELATIVE_FOLDER}" + if [[ ! -d "${DESTINATION}" ]] ; then + local ARCHIVE_NAME=$(echo ${URL} | sed 's/.*\///') + test -f "${ARCHIVE_NAME}" || wget -q "${URL}" + extract "${ARCHIVE_NAME}" + fi +} + +function installqemuifneeded() { + local VERSION=${QEMU_VERSION:=2.11.1} + local ARCHES=${QEMU_ARCHES:=arm aarch64 i386 x86_64 mips mipsel} + local TARGETS=${QEMU_TARGETS:=$(echo "$ARCHES" | sed 's#$# #;s#\([^ ]*\) #\1-linux-user #g')} + + if echo "${VERSION} ${TARGETS}" | cmp --silent ${QEMU_INSTALL}/.build -; then + echo "qemu ${VERSION} up to date!" + return 0 + fi + + echo "VERSION: ${VERSION}" + echo "TARGETS: ${TARGETS}" + + rm -rf ${QEMU_INSTALL} + + # Checking for a tarball before downloading makes testing easier :-) + local QEMU_URL="http://wiki.qemu-project.org/download/qemu-${VERSION}.tar.xz" + local QEMU_FOLDER="qemu-${VERSION}" + unpackifnotexists ${QEMU_URL} ${QEMU_FOLDER} + cd ${QEMU_FOLDER} || exit + + ./configure \ + --prefix="${QEMU_INSTALL}" \ + --target-list="${TARGETS}" \ + --disable-docs \ + --disable-sdl \ + --disable-gtk \ + --disable-gnutls \ + --disable-gcrypt \ + --disable-nettle \ + --disable-curses \ + --static + + make -j4 + make install + + echo "$VERSION $TARGETS" > ${QEMU_INSTALL}/.build +} + +function assert_defined(){ + local VALUE=${1} + : "${VALUE?"${1} needs to be defined"}" +} + +function integrate() { + cd "${PROJECT_FOLDER}" || exit + cmake -H. -B"${BUILD_DIR}" ${DEFAULT_CMAKE_ARGS} ${CMAKE_ADDITIONAL_ARGS} + cmake --build "${BUILD_DIR}" --target all + + if [[ -n "${QEMU_ARCH}" ]]; then + if [[ "${QEMU_ARCH}" == "DISABLED" ]]; then + QEMU="true || " + else + installqemuifneeded + QEMU="${QEMU_INSTALL}/bin/qemu-${QEMU_ARCH} ${QEMU_ARGS}" + fi + else + QEMU="" + fi + # Run tests + for test_binary in ${BUILD_DIR}/test/*_test; do ${QEMU} ${test_binary}; done + # Run demo program + ${QEMU} "${BUILD_DIR}/list_cpu_features" +} + +function expand_linaro_config() { + assert_defined TARGET + local LINARO_ROOT_URL=https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11 + + local GCC_URL=${LINARO_ROOT_URL}/${TARGET}/gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}.tar.xz + local GCC_RELATIVE_FOLDER="gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}" + unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}" + + local SYSROOT_URL=${LINARO_ROOT_URL}/${TARGET}/sysroot-glibc-linaro-2.25-2017.11-${TARGET}.tar.xz + local SYSROOT_RELATIVE_FOLDER=sysroot-glibc-linaro-2.25-2017.11-${TARGET} + unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}" + + local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${SYSROOT_RELATIVE_FOLDER} + local GCC_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER} + + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_SYSROOT=${SYSROOT_FOLDER}" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_C_COMPILER=${GCC_FOLDER}/bin/${TARGET}-gcc" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_CXX_COMPILER=${GCC_FOLDER}/bin/${TARGET}-g++" + + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=ONLY" + + QEMU_ARGS+=" -L ${SYSROOT_FOLDER}" + QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib" +} + +function expand_codescape_config() { + assert_defined TARGET + local FLAVOUR=${QEMU_ARCH}-r2-hard + local DATE=2016.05-03 + local CODESCAPE_URL=http://codescape-mips-sdk.imgtec.com/components/toolchain/${DATE}/Codescape.GNU.Tools.Package.${DATE}.for.MIPS.MTI.Linux.CentOS-5.x86_64.tar.gz + local GCC_URL=${CODESCAPE_URL} + local GCC_RELATIVE_FOLDER=${TARGET}/${DATE} + unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}" + + local SYSROOT_URL=${CODESCAPE_URL} + local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}/sysroot/${FLAVOUR} + unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}" + + CMAKE_ADDITIONAL_ARGS+=" -DENABLE_MSA=1" + CMAKE_ADDITIONAL_ARGS+=" -DMIPS_CPU=p5600" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=cmake/mips32-linux-gcc.cmake" + CMAKE_ADDITIONAL_ARGS+=" -DCROSS=${TARGET}-" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}" + + QEMU_ARGS+=" -L ${SYSROOT_FOLDER}" + QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib" + QEMU_ARGS+=" -cpu P5600" +} + +function expand_environment_and_integrate() { + assert_defined PROJECT_FOLDER + assert_defined TARGET + + BUILD_DIR="${PROJECT_FOLDER}/cmake_build/${TARGET}" + mkdir -p "${BUILD_DIR}" + + CMAKE_ADDITIONAL_ARGS="" + QEMU_ARGS="" + + case ${TOOLCHAIN} in + LINARO) expand_linaro_config ;; + CODESCAPE) expand_codescape_config ;; + NATIVE) QEMU_ARCH="" ;; + *) + echo "Unknown toolchain '${TOOLCHAIN}'..." + exit 1 + esac + integrate +} + +if [ "${CONTINUOUS_INTEGRATION}" = "true" ]; then + QEMU_ARCHES=${QEMU_ARCH} + expand_environment_and_integrate +fi diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh new file mode 100755 index 00000000..53d1d3b8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh @@ -0,0 +1,80 @@ +source "$(dirname -- "$0")"/run_integration.sh + +# Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems +function set_aarch64-linux-gnu() { + TOOLCHAIN=LINARO + TARGET=aarch64-linux-gnu + QEMU_ARCH=aarch64 +} + +# Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_arm-linux-gnueabihf() { + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabihf + QEMU_ARCH=arm +} + +# Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems +function set_armv8l-linux-gnueabihf() { + TOOLCHAIN=LINARO + TARGET=armv8l-linux-gnueabihf + QEMU_ARCH=arm +} + +# Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_arm-linux-gnueabi() { + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabi + QEMU_ARCH=arm +} + +# Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems +function set_aarch64_be-linux-gnu() { + TOOLCHAIN=LINARO + TARGET=aarch64_be-linux-gnu + QEMU_ARCH="DISABLED" +} + +# Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_armeb-linux-gnueabihf() { + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabihf + QEMU_ARCH="DISABLED" +} + +# Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_armeb-linux-gnueabi() { + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabi + QEMU_ARCH="DISABLED" +} + + +function set_mips() { + TOOLCHAIN=CODESCAPE + TARGET=mips-mti-linux-gnu + QEMU_ARCH="DISABLED" +} + +function set_native() { + TOOLCHAIN=NATIVE + TARGET=native + QEMU_ARCH="" +} + +ENVIRONMENTS=" + set_aarch64-linux-gnu + set_arm-linux-gnueabihf + set_armv8l-linux-gnueabihf + set_arm-linux-gnueabi + set_aarch64_be-linux-gnu + set_armeb-linux-gnueabihf + set_armeb-linux-gnueabi + set_native + set_mips +" + +for SET_ENVIRONMENT in ${ENVIRONMENTS}; do + ${SET_ENVIRONMENT} + expand_environment_and_integrate +done diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c new file mode 100755 index 00000000..472e7125 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c @@ -0,0 +1,36 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/cpuid_x86.h" + +#if defined(CPU_FEATURES_ARCH_X86) +#if defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC) + +#include + +Leaf CpuId(uint32_t leaf_id) { + Leaf leaf; + __cpuid_count(leaf_id, 0, leaf.eax, leaf.ebx, leaf.ecx, leaf.edx); + return leaf; +} + +uint32_t GetXCR0Eax(void) { + uint32_t eax, edx; + __asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0)); + return eax; +} + +#endif // defined(CPU_FEATURES_COMPILER_CLANG) || + // defined(CPU_FEATURES_COMPILER_GCC) +#endif // defined(CPU_FEATURES_ARCH_X86) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c new file mode 100755 index 00000000..cd8f19f2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c @@ -0,0 +1,34 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/cpuid_x86.h" + +#if defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC) +#include +#include // For __cpuidex() + +Leaf CpuId(uint32_t leaf_id) { + Leaf leaf; + int data[4]; + __cpuid(data, leaf_id); + leaf.eax = data[0]; + leaf.ebx = data[1]; + leaf.ecx = data[2]; + leaf.edx = data[3]; + return leaf; +} + +uint32_t GetXCR0Eax(void) { return _xgetbv(0); } + +#endif // defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c new file mode 100755 index 00000000..0d111ff9 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c @@ -0,0 +1,141 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_aarch64.h" + +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#include + +DECLARE_SETTER(Aarch64Features, fp) +DECLARE_SETTER(Aarch64Features, asimd) +DECLARE_SETTER(Aarch64Features, aes) +DECLARE_SETTER(Aarch64Features, pmull) +DECLARE_SETTER(Aarch64Features, sha1) +DECLARE_SETTER(Aarch64Features, sha2) +DECLARE_SETTER(Aarch64Features, crc32) + +static const CapabilityConfig kConfigs[] = { + {{AARCH64_HWCAP_FP, 0}, "fp", &set_fp}, // + {{AARCH64_HWCAP_ASIMD, 0}, "asimd", &set_asimd}, // + {{AARCH64_HWCAP_AES, 0}, "aes", &set_aes}, // + {{AARCH64_HWCAP_PMULL, 0}, "pmull", &set_pmull}, // + {{AARCH64_HWCAP_SHA1, 0}, "sha1", &set_sha1}, // + {{AARCH64_HWCAP_SHA2, 0}, "sha2", &set_sha2}, // + {{AARCH64_HWCAP_CRC32, 0}, "crc32", &set_crc32}, // +}; + +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +static bool HandleAarch64Line(const LineResult result, + Aarch64Info* const info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(Aarch64Info* const info) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const Aarch64Info kEmptyAarch64Info; + +Aarch64Info GetAarch64Info(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + Aarch64Info info = kEmptyAarch64Info; + + FillProcCpuInfoData(&info); + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetAarch64FeaturesEnumValue(const Aarch64Features* features, + Aarch64FeaturesEnum value) { + switch (value) { + case AARCH64_FP: + return features->fp; + case AARCH64_ASIMD: + return features->asimd; + case AARCH64_AES: + return features->aes; + case AARCH64_PMULL: + return features->pmull; + case AARCH64_SHA1: + return features->sha1; + case AARCH64_SHA2: + return features->sha2; + case AARCH64_CRC32: + return features->crc32; + case AARCH64_LAST_: + break; + } + return false; +} + +const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum value) { + switch (value) { + case AARCH64_FP: + return "fp"; + case AARCH64_ASIMD: + return "asimd"; + case AARCH64_AES: + return "aes"; + case AARCH64_PMULL: + return "pmull"; + case AARCH64_SHA1: + return "sha1"; + case AARCH64_SHA2: + return "sha2"; + case AARCH64_CRC32: + return "crc32"; + case AARCH64_LAST_: + break; + } + return "unknown feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c new file mode 100755 index 00000000..3ea06419 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c @@ -0,0 +1,259 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_arm.h" + +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#include + +DECLARE_SETTER(ArmFeatures, vfp) +DECLARE_SETTER(ArmFeatures, iwmmxt) +DECLARE_SETTER(ArmFeatures, neon) +DECLARE_SETTER(ArmFeatures, vfpv3) +DECLARE_SETTER(ArmFeatures, vfpv3d16) +DECLARE_SETTER(ArmFeatures, vfpv4) +DECLARE_SETTER(ArmFeatures, idiva) +DECLARE_SETTER(ArmFeatures, idivt) +DECLARE_SETTER(ArmFeatures, aes) +DECLARE_SETTER(ArmFeatures, pmull) +DECLARE_SETTER(ArmFeatures, sha1) +DECLARE_SETTER(ArmFeatures, sha2) +DECLARE_SETTER(ArmFeatures, crc32) + +static const CapabilityConfig kConfigs[] = { + {{ARM_HWCAP_VFP, 0}, "vfp", &set_vfp}, // + {{ARM_HWCAP_IWMMXT, 0}, "iwmmxt", &set_iwmmxt}, // + {{ARM_HWCAP_NEON, 0}, "neon", &set_neon}, // + {{ARM_HWCAP_VFPV3, 0}, "vfpv3", &set_vfpv3}, // + {{ARM_HWCAP_VFPV3D16, 0}, "vfpv3d16", &set_vfpv3d16}, // + {{ARM_HWCAP_VFPV4, 0}, "vfpv4", &set_vfpv4}, // + {{ARM_HWCAP_IDIVA, 0}, "idiva", &set_idiva}, // + {{ARM_HWCAP_IDIVT, 0}, "idivt", &set_idivt}, // + {{0, ARM_HWCAP2_AES}, "aes", &set_aes}, // + {{0, ARM_HWCAP2_PMULL}, "pmull", &set_pmull}, // + {{0, ARM_HWCAP2_SHA1}, "sha1", &set_sha1}, // + {{0, ARM_HWCAP2_SHA2}, "sha2", &set_sha2}, // + {{0, ARM_HWCAP2_CRC32}, "crc32", &set_crc32}, // +}; + +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +typedef struct { + bool processor_reports_armv6; + bool hardware_reports_goldfish; +} ProcCpuInfoData; + +static int IndexOfNonDigit(StringView str) { + size_t index = 0; + while (str.size && isdigit(CpuFeatures_StringView_Front(str))) { + str = CpuFeatures_StringView_PopFront(str, 1); + ++index; + } + return index; +} + +static bool HandleArmLine(const LineResult result, ArmInfo* const info, + ProcCpuInfoData* const proc_info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU architecture"))) { + // CPU architecture is a number that may be followed by letters. e.g. + // "6TEJ", "7". + const StringView digits = + CpuFeatures_StringView_KeepFront(value, IndexOfNonDigit(value)); + info->architecture = CpuFeatures_StringView_ParsePositiveNumber(digits); + } else if (CpuFeatures_StringView_IsEquals(key, str("Processor"))) { + proc_info->processor_reports_armv6 = + CpuFeatures_StringView_IndexOf(value, str("(v6l)")) >= 0; + } else if (CpuFeatures_StringView_IsEquals(key, str("Hardware"))) { + proc_info->hardware_reports_goldfish = + CpuFeatures_StringView_IsEquals(value, str("Goldfish")); + } + } + return !result.eof; +} + +static uint32_t GetCpuId(const ArmInfo* const info) { + return (ExtractBitRange(info->implementer, 7, 0) << 24) | + (ExtractBitRange(info->variant, 3, 0) << 20) | + (ExtractBitRange(info->part, 11, 0) << 4) | + (ExtractBitRange(info->revision, 3, 0) << 0); +} + +static void FixErrors(ArmInfo* const info, + ProcCpuInfoData* const proc_cpu_info_data) { + // Fixing Samsung kernel reporting invalid cpu architecture. + // http://code.google.com/p/android/issues/detail?id=10812 + if (proc_cpu_info_data->processor_reports_armv6 && info->architecture >= 7) { + info->architecture = 6; + } + + // Handle kernel configuration bugs that prevent the correct reporting of CPU + // features. + switch (GetCpuId(info)) { + case 0x4100C080: + // Special case: The emulator-specific Android 4.2 kernel fails to report + // support for the 32-bit ARM IDIV instruction. Technically, this is a + // feature of the virtual CPU implemented by the emulator. Note that it + // could also support Thumb IDIV in the future, and this will have to be + // slightly updated. + if (info->architecture >= 7 && + proc_cpu_info_data->hardware_reports_goldfish) { + info->features.idiva = true; + } + break; + case 0x511004D0: + // https://crbug.com/341598. + info->features.neon = false; + break; + case 0x510006F2: + case 0x510006F3: + // The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report + // IDIV support. + info->features.idiva = true; + info->features.idivt = true; + break; + } + + // Propagate cpu features. + if (info->features.vfpv4) info->features.vfpv3 = true; + if (info->features.neon) info->features.vfpv3 = true; + if (info->features.vfpv3) info->features.vfp = true; +} + +static void FillProcCpuInfoData(ArmInfo* const info, + ProcCpuInfoData* proc_cpu_info_data) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleArmLine(StackLineReader_NextLine(&reader), info, + proc_cpu_info_data)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const ArmInfo kEmptyArmInfo; + +static const ProcCpuInfoData kEmptyProcCpuInfoData; + +ArmInfo GetArmInfo(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + ArmInfo info = kEmptyArmInfo; + ProcCpuInfoData proc_cpu_info_data = kEmptyProcCpuInfoData; + + FillProcCpuInfoData(&info, &proc_cpu_info_data); + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + + FixErrors(&info, &proc_cpu_info_data); + + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetArmFeaturesEnumValue(const ArmFeatures* features, + ArmFeaturesEnum value) { + switch (value) { + case ARM_VFP: + return features->vfp; + case ARM_IWMMXT: + return features->iwmmxt; + case ARM_NEON: + return features->neon; + case ARM_VFPV3: + return features->vfpv3; + case ARM_VFPV3D16: + return features->vfpv3d16; + case ARM_VFPV4: + return features->vfpv4; + case ARM_IDIVA: + return features->idiva; + case ARM_IDIVT: + return features->idivt; + case ARM_AES: + return features->aes; + case ARM_PMULL: + return features->pmull; + case ARM_SHA1: + return features->sha1; + case ARM_SHA2: + return features->sha2; + case ARM_CRC32: + return features->crc32; + case ARM_LAST_: + break; + } + return false; +} + +const char* GetArmFeaturesEnumName(ArmFeaturesEnum value) { + switch (value) { + case ARM_VFP: + return "vfp"; + case ARM_IWMMXT: + return "iwmmxt"; + case ARM_NEON: + return "neon"; + case ARM_VFPV3: + return "vfpv3"; + case ARM_VFPV3D16: + return "vfpv3d16"; + case ARM_VFPV4: + return "vfpv4"; + case ARM_IDIVA: + return "idiva"; + case ARM_IDIVT: + return "idivt"; + case ARM_AES: + return "aes"; + case ARM_PMULL: + return "pmull"; + case ARM_SHA1: + return "sha1"; + case ARM_SHA2: + return "sha2"; + case ARM_CRC32: + return "crc32"; + case ARM_LAST_: + break; + } + return "unknown feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c new file mode 100755 index 00000000..a61cdd81 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c @@ -0,0 +1,98 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_mips.h" + +#include "internal/filesystem.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +DECLARE_SETTER(MipsFeatures, msa) +DECLARE_SETTER(MipsFeatures, eva) + +static const CapabilityConfig kConfigs[] = { + {{MIPS_HWCAP_MSA, 0}, "msa", &set_msa}, // + {{MIPS_HWCAP_EVA, 0}, "eva", &set_eva}, // +}; +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +static bool HandleMipsLine(const LineResult result, + MipsFeatures* const features) { + StringView key, value; + // See tests for an example. + if (CpuFeatures_StringView_GetAttributeKeyValue(result.line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("ASEs implemented"))) { + CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, features); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(MipsFeatures* const features) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleMipsLine(StackLineReader_NextLine(&reader), features)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const MipsInfo kEmptyMipsInfo; + +MipsInfo GetMipsInfo(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + MipsInfo info = kEmptyMipsInfo; + + FillProcCpuInfoData(&info.features); + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetMipsFeaturesEnumValue(const MipsFeatures* features, + MipsFeaturesEnum value) { + switch (value) { + case MIPS_MSA: + return features->msa; + case MIPS_EVA: + return features->eva; + case MIPS_LAST_: + break; + } + return false; +} + +const char* GetMipsFeaturesEnumName(MipsFeaturesEnum value) { + switch (value) { + case MIPS_MSA: + return "msa"; + case MIPS_EVA: + return "eva"; + case MIPS_LAST_: + break; + } + return "unknown feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c new file mode 100755 index 00000000..59b9ecca --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c @@ -0,0 +1,358 @@ +// Copyright 2018 IBM. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "cpuinfo_ppc.h" +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +DECLARE_SETTER(PPCFeatures, ppc32) +DECLARE_SETTER(PPCFeatures, ppc64) +DECLARE_SETTER(PPCFeatures, ppc601) +DECLARE_SETTER(PPCFeatures, altivec) +DECLARE_SETTER(PPCFeatures, fpu) +DECLARE_SETTER(PPCFeatures, mmu) +DECLARE_SETTER(PPCFeatures, mac_4xx) +DECLARE_SETTER(PPCFeatures, unifiedcache) +DECLARE_SETTER(PPCFeatures, spe) +DECLARE_SETTER(PPCFeatures, efpsingle) +DECLARE_SETTER(PPCFeatures, efpdouble) +DECLARE_SETTER(PPCFeatures, no_tb) +DECLARE_SETTER(PPCFeatures, power4) +DECLARE_SETTER(PPCFeatures, power5) +DECLARE_SETTER(PPCFeatures, power5plus) +DECLARE_SETTER(PPCFeatures, cell) +DECLARE_SETTER(PPCFeatures, booke) +DECLARE_SETTER(PPCFeatures, smt) +DECLARE_SETTER(PPCFeatures, icachesnoop) +DECLARE_SETTER(PPCFeatures, arch205) +DECLARE_SETTER(PPCFeatures, pa6t) +DECLARE_SETTER(PPCFeatures, dfp) +DECLARE_SETTER(PPCFeatures, power6ext) +DECLARE_SETTER(PPCFeatures, arch206) +DECLARE_SETTER(PPCFeatures, vsx) +DECLARE_SETTER(PPCFeatures, pseries_perfmon_compat) +DECLARE_SETTER(PPCFeatures, truele) +DECLARE_SETTER(PPCFeatures, ppcle) +DECLARE_SETTER(PPCFeatures, arch207) +DECLARE_SETTER(PPCFeatures, htm) +DECLARE_SETTER(PPCFeatures, dscr) +DECLARE_SETTER(PPCFeatures, ebb) +DECLARE_SETTER(PPCFeatures, isel) +DECLARE_SETTER(PPCFeatures, tar) +DECLARE_SETTER(PPCFeatures, vcrypto) +DECLARE_SETTER(PPCFeatures, htm_nosc) +DECLARE_SETTER(PPCFeatures, arch300) +DECLARE_SETTER(PPCFeatures, ieee128) +DECLARE_SETTER(PPCFeatures, darn) +DECLARE_SETTER(PPCFeatures, scv) +DECLARE_SETTER(PPCFeatures, htm_no_suspend) + +static const CapabilityConfig kConfigs[] = { + {{PPC_FEATURE_32, 0}, "ppc32", &set_ppc32}, + {{PPC_FEATURE_64, 0}, "ppc64", &set_ppc64}, + {{PPC_FEATURE_601_INSTR, 0}, "ppc601", &set_ppc601}, + {{PPC_FEATURE_HAS_ALTIVEC, 0}, "altivec", &set_altivec}, + {{PPC_FEATURE_HAS_FPU, 0}, "fpu", &set_fpu}, + {{PPC_FEATURE_HAS_MMU, 0}, "mmu", &set_mmu}, + {{PPC_FEATURE_HAS_4xxMAC, 0}, "4xxmac", &set_mac_4xx}, + {{PPC_FEATURE_UNIFIED_CACHE, 0}, "ucache", &set_unifiedcache}, + {{PPC_FEATURE_HAS_SPE, 0}, "spe", &set_spe}, + {{PPC_FEATURE_HAS_EFP_SINGLE, 0}, "efpsingle", &set_efpsingle}, + {{PPC_FEATURE_HAS_EFP_DOUBLE, 0}, "efpdouble", &set_efpdouble}, + {{PPC_FEATURE_NO_TB, 0}, "notb", &set_no_tb}, + {{PPC_FEATURE_POWER4, 0}, "power4", &set_power4}, + {{PPC_FEATURE_POWER5, 0}, "power5", &set_power5}, + {{PPC_FEATURE_POWER5_PLUS, 0}, "power5+", &set_power5plus}, + {{PPC_FEATURE_CELL, 0}, "cellbe", &set_cell}, + {{PPC_FEATURE_BOOKE, 0}, "booke", &set_booke}, + {{PPC_FEATURE_SMT, 0}, "smt", &set_smt}, + {{PPC_FEATURE_ICACHE_SNOOP, 0}, "ic_snoop", &set_icachesnoop}, + {{PPC_FEATURE_ARCH_2_05, 0}, "arch_2_05", &set_arch205}, + {{PPC_FEATURE_PA6T, 0}, "pa6t", &set_pa6t}, + {{PPC_FEATURE_HAS_DFP, 0}, "dfp", &set_dfp}, + {{PPC_FEATURE_POWER6_EXT, 0}, "power6x", &set_power6ext}, + {{PPC_FEATURE_ARCH_2_06, 0}, "arch_2_06", &set_arch206}, + {{PPC_FEATURE_HAS_VSX, 0}, "vsx", &set_vsx}, + {{PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0}, + "archpmu", + &set_pseries_perfmon_compat}, + {{PPC_FEATURE_TRUE_LE, 0}, "true_le", &set_truele}, + {{PPC_FEATURE_PPC_LE, 0}, "ppcle", &set_ppcle}, + {{0, PPC_FEATURE2_ARCH_2_07}, "arch_2_07", &set_arch207}, + {{0, PPC_FEATURE2_HTM}, "htm", &set_htm}, + {{0, PPC_FEATURE2_DSCR}, "dscr", &set_dscr}, + {{0, PPC_FEATURE2_EBB}, "ebb", &set_ebb}, + {{0, PPC_FEATURE2_ISEL}, "isel", &set_isel}, + {{0, PPC_FEATURE2_TAR}, "tar", &set_tar}, + {{0, PPC_FEATURE2_VEC_CRYPTO}, "vcrypto", &set_vcrypto}, + {{0, PPC_FEATURE2_HTM_NOSC}, "htm-nosc", &set_htm_nosc}, + {{0, PPC_FEATURE2_ARCH_3_00}, "arch_3_00", &set_arch300}, + {{0, PPC_FEATURE2_HAS_IEEE128}, "ieee128", &set_ieee128}, + {{0, PPC_FEATURE2_DARN}, "darn", &set_darn}, + {{0, PPC_FEATURE2_SCV}, "scv", &set_scv}, + {{0, PPC_FEATURE2_HTM_NO_SUSPEND}, "htm-no-suspend", &set_htm_no_suspend}, +}; +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +static bool HandlePPCLine(const LineResult result, + PPCPlatformStrings* const strings) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_HasWord(key, "platform")) { + CpuFeatures_StringView_CopyString(value, strings->platform, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("model"))) { + CpuFeatures_StringView_CopyString(value, strings->model, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("machine"))) { + CpuFeatures_StringView_CopyString(value, strings->machine, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("cpu"))) { + CpuFeatures_StringView_CopyString(value, strings->cpu, + sizeof(strings->platform)); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(PPCPlatformStrings* const strings) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandlePPCLine(StackLineReader_NextLine(&reader), strings)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const PPCInfo kEmptyPPCInfo; + +PPCInfo GetPPCInfo(void) { + /* + * On Power feature flags aren't currently in cpuinfo so we only look at + * the auxilary vector. + */ + PPCInfo info = kEmptyPPCInfo; + + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + return info; +} + +static const PPCPlatformStrings kEmptyPPCPlatformStrings; + +PPCPlatformStrings GetPPCPlatformStrings(void) { + PPCPlatformStrings strings = kEmptyPPCPlatformStrings; + + FillProcCpuInfoData(&strings); + strings.type = CpuFeatures_GetPlatformType(); + return strings; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetPPCFeaturesEnumValue(const PPCFeatures* features, + PPCFeaturesEnum value) { + switch (value) { + case PPC_32: + return features->ppc32; + case PPC_64: + return features->ppc64; + case PPC_601_INSTR: + return features->ppc601; + case PPC_HAS_ALTIVEC: + return features->altivec; + case PPC_HAS_FPU: + return features->fpu; + case PPC_HAS_MMU: + return features->mmu; + case PPC_HAS_4xxMAC: + return features->mac_4xx; + case PPC_UNIFIED_CACHE: + return features->unifiedcache; + case PPC_HAS_SPE: + return features->spe; + case PPC_HAS_EFP_SINGLE: + return features->efpsingle; + case PPC_HAS_EFP_DOUBLE: + return features->efpdouble; + case PPC_NO_TB: + return features->no_tb; + case PPC_POWER4: + return features->power4; + case PPC_POWER5: + return features->power5; + case PPC_POWER5_PLUS: + return features->power5plus; + case PPC_CELL: + return features->cell; + case PPC_BOOKE: + return features->booke; + case PPC_SMT: + return features->smt; + case PPC_ICACHE_SNOOP: + return features->icachesnoop; + case PPC_ARCH_2_05: + return features->arch205; + case PPC_PA6T: + return features->pa6t; + case PPC_HAS_DFP: + return features->dfp; + case PPC_POWER6_EXT: + return features->power6ext; + case PPC_ARCH_2_06: + return features->arch206; + case PPC_HAS_VSX: + return features->vsx; + case PPC_PSERIES_PERFMON_COMPAT: + return features->pseries_perfmon_compat; + case PPC_TRUE_LE: + return features->truele; + case PPC_PPC_LE: + return features->ppcle; + case PPC_ARCH_2_07: + return features->arch207; + case PPC_HTM: + return features->htm; + case PPC_DSCR: + return features->dscr; + case PPC_EBB: + return features->ebb; + case PPC_ISEL: + return features->isel; + case PPC_TAR: + return features->tar; + case PPC_VEC_CRYPTO: + return features->vcrypto; + case PPC_HTM_NOSC: + return features->htm_nosc; + case PPC_ARCH_3_00: + return features->arch300; + case PPC_HAS_IEEE128: + return features->ieee128; + case PPC_DARN: + return features->darn; + case PPC_SCV: + return features->scv; + case PPC_HTM_NO_SUSPEND: + return features->htm_no_suspend; + case PPC_LAST_: + break; + } + return false; +} + +/* Have used the same names as glibc */ +const char* GetPPCFeaturesEnumName(PPCFeaturesEnum value) { + switch (value) { + case PPC_32: + return "ppc32"; + case PPC_64: + return "ppc64"; + case PPC_601_INSTR: + return "ppc601"; + case PPC_HAS_ALTIVEC: + return "altivec"; + case PPC_HAS_FPU: + return "fpu"; + case PPC_HAS_MMU: + return "mmu"; + case PPC_HAS_4xxMAC: + return "4xxmac"; + case PPC_UNIFIED_CACHE: + return "ucache"; + case PPC_HAS_SPE: + return "spe"; + case PPC_HAS_EFP_SINGLE: + return "efpsingle"; + case PPC_HAS_EFP_DOUBLE: + return "efpdouble"; + case PPC_NO_TB: + return "notb"; + case PPC_POWER4: + return "power4"; + case PPC_POWER5: + return "power5"; + case PPC_POWER5_PLUS: + return "power5+"; + case PPC_CELL: + return "cellbe"; + case PPC_BOOKE: + return "booke"; + case PPC_SMT: + return "smt"; + case PPC_ICACHE_SNOOP: + return "ic_snoop"; + case PPC_ARCH_2_05: + return "arch_2_05"; + case PPC_PA6T: + return "pa6t"; + case PPC_HAS_DFP: + return "dfp"; + case PPC_POWER6_EXT: + return "power6x"; + case PPC_ARCH_2_06: + return "arch_2_06"; + case PPC_HAS_VSX: + return "vsx"; + case PPC_PSERIES_PERFMON_COMPAT: + return "archpmu"; + case PPC_TRUE_LE: + return "true_le"; + case PPC_PPC_LE: + return "ppcle"; + case PPC_ARCH_2_07: + return "arch_2_07"; + case PPC_HTM: + return "htm"; + case PPC_DSCR: + return "dscr"; + case PPC_EBB: + return "ebb"; + case PPC_ISEL: + return "isel"; + case PPC_TAR: + return "tar"; + case PPC_VEC_CRYPTO: + return "vcrypto"; + case PPC_HTM_NOSC: + return "htm-nosc"; + case PPC_ARCH_3_00: + return "arch_3_00"; + case PPC_HAS_IEEE128: + return "ieee128"; + case PPC_DARN: + return "darn"; + case PPC_SCV: + return "scv"; + case PPC_HTM_NO_SUSPEND: + return "htm-no-suspend"; + case PPC_LAST_: + break; + } + return "unknown_feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c new file mode 100755 index 00000000..390e8c92 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c @@ -0,0 +1,447 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_x86.h" +#include "internal/bit_utils.h" +#include "internal/cpuid_x86.h" + +#include +#include + +static const Leaf kEmptyLeaf; + +static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { + if (leaf_id <= max_cpuid_leaf) { + return CpuId(leaf_id); + } else { + return kEmptyLeaf; + } +} + +#define MASK_XMM 0x2 +#define MASK_YMM 0x4 +#define MASK_MASKREG 0x20 +#define MASK_ZMM0_15 0x40 +#define MASK_ZMM16_31 0x80 + +static bool HasMask(uint32_t value, uint32_t mask) { + return (value & mask) == mask; +} + +// Checks that operating system saves and restores xmm registers during context +// switches. +static bool HasXmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM); +} + +// Checks that operating system saves and restores ymm registers during context +// switches. +static bool HasYmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM); +} + +// Checks that operating system saves and restores zmm registers during context +// switches. +static bool HasZmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | + MASK_ZMM16_31); +} + +static void SetVendor(const Leaf leaf, char* const vendor) { + *(uint32_t*)(vendor) = leaf.ebx; + *(uint32_t*)(vendor + 4) = leaf.edx; + *(uint32_t*)(vendor + 8) = leaf.ecx; + vendor[12] = '\0'; +} + +static int IsVendor(const Leaf leaf, const char* const name) { + const uint32_t ebx = *(const uint32_t*)(name); + const uint32_t edx = *(const uint32_t*)(name + 4); + const uint32_t ecx = *(const uint32_t*)(name + 8); + return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx; +} + +// Reference https://en.wikipedia.org/wiki/CPUID. +static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info) { + const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); + const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); + + const bool have_xsave = IsBitSet(leaf_1.ecx, 26); + const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); + const uint32_t xcr0_eax = (have_xsave && have_osxsave) ? GetXCR0Eax() : 0; + const bool have_sse_os_support = HasXmmOsXSave(xcr0_eax); + const bool have_avx_os_support = HasYmmOsXSave(xcr0_eax); + const bool have_avx512_os_support = HasZmmOsXSave(xcr0_eax); + + const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); + const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); + const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); + const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16); + + X86Features* const features = &info->features; + + info->family = extended_family + family; + info->model = (extended_model << 4) + model; + info->stepping = ExtractBitRange(leaf_1.eax, 3, 0); + + features->smx = IsBitSet(leaf_1.ecx, 6); + features->cx16 = IsBitSet(leaf_1.ecx, 13); + features->aes = IsBitSet(leaf_1.ecx, 25); + features->f16c = IsBitSet(leaf_1.ecx, 29); + features->sgx = IsBitSet(leaf_7.ebx, 2); + features->bmi1 = IsBitSet(leaf_7.ebx, 3); + features->bmi2 = IsBitSet(leaf_7.ebx, 8); + features->erms = IsBitSet(leaf_7.ebx, 9); + features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); + + if (have_sse_os_support) { + features->ssse3 = IsBitSet(leaf_1.ecx, 9); + features->sse4_1 = IsBitSet(leaf_1.ecx, 19); + features->sse4_2 = IsBitSet(leaf_1.ecx, 20); + } + + if (have_avx_os_support) { + features->fma3 = IsBitSet(leaf_1.ecx, 12); + features->avx = IsBitSet(leaf_1.ecx, 28); + features->avx2 = IsBitSet(leaf_7.ebx, 5); + } + + if (have_avx512_os_support) { + features->avx512f = IsBitSet(leaf_7.ebx, 16); + features->avx512cd = IsBitSet(leaf_7.ebx, 28); + features->avx512er = IsBitSet(leaf_7.ebx, 27); + features->avx512pf = IsBitSet(leaf_7.ebx, 26); + features->avx512bw = IsBitSet(leaf_7.ebx, 30); + features->avx512dq = IsBitSet(leaf_7.ebx, 17); + features->avx512vl = IsBitSet(leaf_7.ebx, 31); + features->avx512ifma = IsBitSet(leaf_7.ebx, 21); + features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); + features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); + features->avx512vnni = IsBitSet(leaf_7.ecx, 11); + features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); + features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); + features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); + features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + } +} + +static const X86Info kEmptyX86Info; + +X86Info GetX86Info(void) { + X86Info info = kEmptyX86Info; + const Leaf leaf_0 = CpuId(0); + const uint32_t max_cpuid_leaf = leaf_0.eax; + SetVendor(leaf_0, info.vendor); + if (IsVendor(leaf_0, "GenuineIntel") || IsVendor(leaf_0, "AuthenticAMD")) { + ParseCpuId(max_cpuid_leaf, &info); + } + return info; +} + +#define CPUID(FAMILY, MODEL) (((FAMILY & 0xFF) << 8) | (MODEL & 0xFF)) + +X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { + if (memcmp(info->vendor, "GenuineIntel", sizeof(info->vendor)) == 0) { + switch (CPUID(info->family, info->model)) { + case CPUID(0x06, 0x35): + case CPUID(0x06, 0x36): + // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture) + return INTEL_ATOM_BNL; + case CPUID(0x06, 0x37): + case CPUID(0x06, 0x4C): + // https://en.wikipedia.org/wiki/Silvermont + return INTEL_ATOM_SMT; + case CPUID(0x06, 0x5C): + // https://en.wikipedia.org/wiki/Goldmont + return INTEL_ATOM_GMT; + case CPUID(0x06, 0x0F): + case CPUID(0x06, 0x16): + // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture) + return INTEL_CORE; + case CPUID(0x06, 0x17): + case CPUID(0x06, 0x1D): + // https://en.wikipedia.org/wiki/Penryn_(microarchitecture) + return INTEL_PNR; + case CPUID(0x06, 0x1A): + case CPUID(0x06, 0x1E): + case CPUID(0x06, 0x1F): + case CPUID(0x06, 0x2E): + // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) + return INTEL_NHM; + case CPUID(0x06, 0x25): + case CPUID(0x06, 0x2C): + case CPUID(0x06, 0x2F): + // https://en.wikipedia.org/wiki/Westmere_(microarchitecture) + return INTEL_WSM; + case CPUID(0x06, 0x2A): + case CPUID(0x06, 0x2D): + // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings + return INTEL_SNB; + case CPUID(0x06, 0x3A): + case CPUID(0x06, 0x3E): + // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings + return INTEL_IVB; + case CPUID(0x06, 0x3C): + case CPUID(0x06, 0x3F): + case CPUID(0x06, 0x45): + case CPUID(0x06, 0x46): + // https://en.wikipedia.org/wiki/Haswell_(microarchitecture) + return INTEL_HSW; + case CPUID(0x06, 0x3D): + case CPUID(0x06, 0x47): + case CPUID(0x06, 0x4F): + case CPUID(0x06, 0x56): + // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture) + return INTEL_BDW; + case CPUID(0x06, 0x4E): + case CPUID(0x06, 0x55): + case CPUID(0x06, 0x5E): + // https://en.wikipedia.org/wiki/Skylake_(microarchitecture) + return INTEL_SKL; + case CPUID(0x06, 0x8E): + case CPUID(0x06, 0x9E): + // https://en.wikipedia.org/wiki/Kaby_Lake + return INTEL_KBL; + default: + return X86_UNKNOWN; + } + } + if (memcmp(info->vendor, "AuthenticAMD", sizeof(info->vendor)) == 0) { + switch (info->family) { + // https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures + case 0x0F: + return AMD_HAMMER; + case 0x10: + return AMD_K10; + case 0x14: + return AMD_BOBCAT; + case 0x15: + return AMD_BULLDOZER; + case 0x16: + return AMD_JAGUAR; + case 0x17: + return AMD_ZEN; + default: + return X86_UNKNOWN; + } + } + return X86_UNKNOWN; +} + +static void SetString(const uint32_t max_cpuid_ext_leaf, const uint32_t leaf_id, + char* buffer) { + const Leaf leaf = SafeCpuId(max_cpuid_ext_leaf, leaf_id); + // We allow calling memcpy from SetString which is only called when requesting + // X86BrandString. + memcpy(buffer, &leaf, sizeof(Leaf)); +} + +void FillX86BrandString(char brand_string[49]) { + const Leaf leaf_ext_0 = CpuId(0x80000000); + const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax; + SetString(max_cpuid_leaf_ext, 0x80000002, brand_string); + SetString(max_cpuid_leaf_ext, 0x80000003, brand_string + 16); + SetString(max_cpuid_leaf_ext, 0x80000004, brand_string + 32); + brand_string[48] = '\0'; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetX86FeaturesEnumValue(const X86Features* features, + X86FeaturesEnum value) { + switch (value) { + case X86_AES: + return features->aes; + case X86_ERMS: + return features->erms; + case X86_F16C: + return features->f16c; + case X86_FMA3: + return features->fma3; + case X86_VPCLMULQDQ: + return features->vpclmulqdq; + case X86_BMI1: + return features->bmi1; + case X86_BMI2: + return features->bmi2; + case X86_SSSE3: + return features->ssse3; + case X86_SSE4_1: + return features->sse4_1; + case X86_SSE4_2: + return features->sse4_2; + case X86_AVX: + return features->avx; + case X86_AVX2: + return features->avx2; + case X86_AVX512F: + return features->avx512f; + case X86_AVX512CD: + return features->avx512cd; + case X86_AVX512ER: + return features->avx512er; + case X86_AVX512PF: + return features->avx512pf; + case X86_AVX512BW: + return features->avx512bw; + case X86_AVX512DQ: + return features->avx512dq; + case X86_AVX512VL: + return features->avx512vl; + case X86_AVX512IFMA: + return features->avx512ifma; + case X86_AVX512VBMI: + return features->avx512vbmi; + case X86_AVX512VBMI2: + return features->avx512vbmi2; + case X86_AVX512VNNI: + return features->avx512vnni; + case X86_AVX512BITALG: + return features->avx512bitalg; + case X86_AVX512VPOPCNTDQ: + return features->avx512vpopcntdq; + case X86_AVX512_4VNNIW: + return features->avx512_4vnniw; + case X86_AVX512_4VBMI2: + return features->avx512_4vbmi2; + case X86_SMX: + return features->smx; + case X86_SGX: + return features->sgx; + case X86_CX16: + return features->cx16; + case X86_LAST_: + break; + } + return false; +} + +const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { + switch (value) { + case X86_AES: + return "aes"; + case X86_ERMS: + return "erms"; + case X86_F16C: + return "f16c"; + case X86_FMA3: + return "fma3"; + case X86_VPCLMULQDQ: + return "vpclmulqdq"; + case X86_BMI1: + return "bmi1"; + case X86_BMI2: + return "bmi2"; + case X86_SSSE3: + return "ssse3"; + case X86_SSE4_1: + return "sse4_1"; + case X86_SSE4_2: + return "sse4_2"; + case X86_AVX: + return "avx"; + case X86_AVX2: + return "avx2"; + case X86_AVX512F: + return "avx512f"; + case X86_AVX512CD: + return "avx512cd"; + case X86_AVX512ER: + return "avx512er"; + case X86_AVX512PF: + return "avx512pf"; + case X86_AVX512BW: + return "avx512bw"; + case X86_AVX512DQ: + return "avx512dq"; + case X86_AVX512VL: + return "avx512vl"; + case X86_AVX512IFMA: + return "avx512ifma"; + case X86_AVX512VBMI: + return "avx512vbmi"; + case X86_AVX512VBMI2: + return "avx512vbmi2"; + case X86_AVX512VNNI: + return "avx512vnni"; + case X86_AVX512BITALG: + return "avx512bitalg"; + case X86_AVX512VPOPCNTDQ: + return "avx512vpopcntdq"; + case X86_AVX512_4VNNIW: + return "avx512_4vnniw"; + case X86_AVX512_4VBMI2: + return "avx512_4vbmi2"; + case X86_SMX: + return "smx"; + case X86_SGX: + return "sgx"; + case X86_CX16: + return "cx16"; + case X86_LAST_: + break; + } + return "unknown_feature"; +} + +const char* GetX86MicroarchitectureName(X86Microarchitecture uarch) { + switch (uarch) { + case X86_UNKNOWN: + return "X86_UNKNOWN"; + case INTEL_CORE: + return "INTEL_CORE"; + case INTEL_PNR: + return "INTEL_PNR"; + case INTEL_NHM: + return "INTEL_NHM"; + case INTEL_ATOM_BNL: + return "INTEL_ATOM_BNL"; + case INTEL_WSM: + return "INTEL_WSM"; + case INTEL_SNB: + return "INTEL_SNB"; + case INTEL_IVB: + return "INTEL_IVB"; + case INTEL_ATOM_SMT: + return "INTEL_ATOM_SMT"; + case INTEL_HSW: + return "INTEL_HSW"; + case INTEL_BDW: + return "INTEL_BDW"; + case INTEL_SKL: + return "INTEL_SKL"; + case INTEL_ATOM_GMT: + return "INTEL_ATOM_GMT"; + case INTEL_KBL: + return "INTEL_KBL"; + case INTEL_CFL: + return "INTEL_CFL"; + case INTEL_CNL: + return "INTEL_CNL"; + case AMD_HAMMER: + return "AMD_HAMMER"; + case AMD_K10: + return "AMD_K10"; + case AMD_BOBCAT: + return "AMD_BOBCAT"; + case AMD_BULLDOZER: + return "AMD_BULLDOZER"; + case AMD_JAGUAR: + return "AMD_JAGUAR"; + case AMD_ZEN: + return "AMD_ZEN"; + } + return "unknown microarchitecture"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c new file mode 100755 index 00000000..286a9ccb --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c @@ -0,0 +1,57 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/filesystem.h" + +#include +#include +#include +#include + +#if defined(_MSC_VER) +#include +int CpuFeatures_OpenFile(const char* filename) { + return _open(filename, _O_RDONLY); +} + +void CpuFeatures_CloseFile(int file_descriptor) { _close(file_descriptor); } + +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + return _read(file_descriptor, buffer, buffer_size); +} + +#else +#include + +int CpuFeatures_OpenFile(const char* filename) { + int result; + do { + result = open(filename, O_RDONLY); + } while (result == -1L && errno == EINTR); + return result; +} + +void CpuFeatures_CloseFile(int file_descriptor) { close(file_descriptor); } + +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + int result; + do { + result = read(file_descriptor, buffer, buffer_size); + } while (result == -1L && errno == EINTR); + return result; +} + +#endif diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c new file mode 100755 index 00000000..99ea74b5 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c @@ -0,0 +1,194 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "cpu_features_macros.h" +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/string_view.h" + +#if defined(NDEBUG) +#define D(...) +#else +#include +#define D(...) \ + do { \ + printf(__VA_ARGS__); \ + fflush(stdout); \ + } while (0) +#endif + +#if defined(CPU_FEATURES_ARCH_MIPS) || defined(CPU_FEATURES_ARCH_ANY_ARM) +#define HWCAPS_ANDROID_MIPS_OR_ARM +#endif + +#if defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) && \ + !defined(HWCAPS_ANDROID_MIPS_OR_ARM) +#define HWCAPS_REGULAR_LINUX +#endif + +#if defined(HWCAPS_ANDROID_MIPS_OR_ARM) || defined(HWCAPS_REGULAR_LINUX) +#define HWCAPS_SUPPORTED +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetElfHwcapFromGetauxval +//////////////////////////////////////////////////////////////////////////////// + +// On Linux we simply use getauxval. +#if defined(HWCAPS_REGULAR_LINUX) +#include +#include +static unsigned long GetElfHwcapFromGetauxval(uint32_t hwcap_type) { + return getauxval(hwcap_type); +} +#endif // defined(HWCAPS_REGULAR_LINUX) + +// On Android we probe the system's C library for a 'getauxval' function and +// call it if it exits, or return 0 for failure. This function is available +// since API level 20. +// +// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the edge +// case where some NDK developers use headers for a platform that is newer than +// the one really targetted by their application. This is typically done to use +// newer native APIs only when running on more recent Android versions, and +// requires careful symbol management. +// +// Note that getauxval() can't really be re-implemented here, because its +// implementation does not parse /proc/self/auxv. Instead it depends on values +// that are passed by the kernel at process-init time to the C runtime +// initialization layer. +#if defined(HWCAPS_ANDROID_MIPS_OR_ARM) +#include +#define AT_HWCAP 16 +#define AT_HWCAP2 26 +#define AT_PLATFORM 15 +#define AT_BASE_PLATFORM 24 + +typedef unsigned long getauxval_func_t(unsigned long); + +static uint32_t GetElfHwcapFromGetauxval(uint32_t hwcap_type) { + uint32_t ret = 0; + void* libc_handle = NULL; + getauxval_func_t* func = NULL; + + dlerror(); // Cleaning error state before calling dlopen. + libc_handle = dlopen("libc.so", RTLD_NOW); + if (!libc_handle) { + D("Could not dlopen() C library: %s\n", dlerror()); + return 0; + } + func = (getauxval_func_t*)dlsym(libc_handle, "getauxval"); + if (!func) { + D("Could not find getauxval() in C library\n"); + } else { + // Note: getauxval() returns 0 on failure. Doesn't touch errno. + ret = (uint32_t)(*func)(hwcap_type); + } + dlclose(libc_handle); + return ret; +} +#endif // defined(HWCAPS_ANDROID_MIPS_OR_ARM) + +#if defined(HWCAPS_SUPPORTED) +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetHardwareCapabilities for Android and Linux +//////////////////////////////////////////////////////////////////////////////// + +// Fallback when getauxval is not available, retrieves hwcaps from +// "/proc/self/auxv". +static uint32_t GetElfHwcapFromProcSelfAuxv(uint32_t hwcap_type) { + struct { + uint32_t tag; + uint32_t value; + } entry; + uint32_t result = 0; + const char filepath[] = "/proc/self/auxv"; + const int fd = CpuFeatures_OpenFile(filepath); + if (fd < 0) { + D("Could not open %s\n", filepath); + return 0; + } + for (;;) { + const int ret = CpuFeatures_ReadFile(fd, (char*)&entry, sizeof entry); + if (ret < 0) { + D("Error while reading %s\n", filepath); + break; + } + // Detect end of list. + if (ret == 0 || (entry.tag == 0 && entry.value == 0)) { + break; + } + if (entry.tag == hwcap_type) { + result = entry.value; + break; + } + } + CpuFeatures_CloseFile(fd); + return result; +} + +// Retrieves hardware capabilities by first trying to call getauxval, if not +// available falls back to reading "/proc/self/auxv". +static unsigned long GetHardwareCapabilitiesFor(uint32_t type) { + unsigned long hwcaps = GetElfHwcapFromGetauxval(type); + if (!hwcaps) { + D("Parsing /proc/self/auxv to extract ELF hwcaps!\n"); + hwcaps = GetElfHwcapFromProcSelfAuxv(type); + } + return hwcaps; +} + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + HardwareCapabilities capabilities; + capabilities.hwcaps = GetHardwareCapabilitiesFor(AT_HWCAP); + capabilities.hwcaps2 = GetHardwareCapabilitiesFor(AT_HWCAP2); + return capabilities; +} + +PlatformType kEmptyPlatformType; + +PlatformType CpuFeatures_GetPlatformType(void) { + PlatformType type = kEmptyPlatformType; + char *platform = (char *)GetHardwareCapabilitiesFor(AT_PLATFORM); + char *base_platform = (char *)GetHardwareCapabilitiesFor(AT_BASE_PLATFORM); + + if (platform != NULL) + CpuFeatures_StringView_CopyString(str(platform), type.platform, + sizeof(type.platform)); + if (base_platform != NULL) + CpuFeatures_StringView_CopyString(str(base_platform), type.base_platform, + sizeof(type.base_platform)); + return type; +} +#else // (defined(HWCAPS_SUPPORTED) + +PlatformType kEmptyPlatformType; + +PlatformType CpuFeatures_GetPlatformType(void) { + PlatformType type = kEmptyPlatformType; + return type; +} + +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetHardwareCapabilities for unsupported platforms. +//////////////////////////////////////////////////////////////////////////////// + +const HardwareCapabilities kEmptyHardwareCapabilities; +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + return kEmptyHardwareCapabilities; +} +#endif diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c new file mode 100755 index 00000000..b7f8f3d9 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c @@ -0,0 +1,51 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/linux_features_aggregator.h" +#include "internal/string_view.h" + +void CpuFeatures_SetFromFlags(const size_t configs_size, + const CapabilityConfig* configs, + const StringView flags_line, + void* const features) { + size_t i = 0; + for (; i < configs_size; ++i) { + const CapabilityConfig config = configs[i]; + config.set_bit(features, CpuFeatures_StringView_HasWord( + flags_line, config.proc_cpuinfo_flag)); + } +} + +static bool IsSet(const uint32_t mask, const uint32_t value) { + return (value & mask) == mask; +} + +static bool IsHwCapsSet(const HardwareCapabilities hwcaps_mask, + const HardwareCapabilities hwcaps) { + return IsSet(hwcaps_mask.hwcaps, hwcaps.hwcaps) && + IsSet(hwcaps_mask.hwcaps2, hwcaps.hwcaps2); +} + +void CpuFeatures_OverrideFromHwCaps(const size_t configs_size, + const CapabilityConfig* configs, + const HardwareCapabilities hwcaps, + void* const features) { + size_t i = 0; + for (; i < configs_size; ++i) { + const CapabilityConfig* config = &configs[i]; + if (IsHwCapsSet(config->hwcaps_mask, hwcaps)) { + config->set_bit(features, true); + } + } +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c new file mode 100755 index 00000000..b2c48ba6 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c @@ -0,0 +1,131 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/stack_line_reader.h" +#include "internal/filesystem.h" + +#include +#include +#include + +void StackLineReader_Initialize(StackLineReader* reader, int fd) { + reader->view.ptr = reader->buffer; + reader->view.size = 0; + reader->skip_mode = false; + reader->fd = fd; +} + +// Replaces the content of buffer with bytes from the file. +static int LoadFullBuffer(StackLineReader* reader) { + const int read = CpuFeatures_ReadFile(reader->fd, reader->buffer, + STACK_LINE_READER_BUFFER_SIZE); + assert(read >= 0); + reader->view.ptr = reader->buffer; + reader->view.size = read; + return read; +} + +// Appends with bytes from the file to buffer, filling the remaining space. +static int LoadMore(StackLineReader* reader) { + char* const ptr = reader->buffer + reader->view.size; + const size_t size_to_read = STACK_LINE_READER_BUFFER_SIZE - reader->view.size; + const int read = CpuFeatures_ReadFile(reader->fd, ptr, size_to_read); + assert(read >= 0); + assert(read <= (int)size_to_read); + reader->view.size += read; + return read; +} + +static int IndexOfEol(StackLineReader* reader) { + return CpuFeatures_StringView_IndexOfChar(reader->view, '\n'); +} + +// Relocate buffer's pending bytes at the beginning of the array and fills the +// remaining space with bytes from the file. +static int BringToFrontAndLoadMore(StackLineReader* reader) { + if (reader->view.size && reader->view.ptr != reader->buffer) { + memmove(reader->buffer, reader->view.ptr, reader->view.size); + } + reader->view.ptr = reader->buffer; + return LoadMore(reader); +} + +// Loads chunks of buffer size from disks until it contains a newline character +// or end of file. +static void SkipToNextLine(StackLineReader* reader) { + for (;;) { + const int read = LoadFullBuffer(reader); + if (read == 0) { + break; + } else { + const int eol_index = IndexOfEol(reader); + if (eol_index >= 0) { + reader->view = + CpuFeatures_StringView_PopFront(reader->view, eol_index + 1); + break; + } + } + } +} + +static LineResult CreateLineResult(bool eof, bool full_line, StringView view) { + LineResult result; + result.eof = eof; + result.full_line = full_line; + result.line = view; + return result; +} + +// Helper methods to provide clearer semantic in StackLineReader_NextLine. +static LineResult CreateEOFLineResult(StringView view) { + return CreateLineResult(true, true, view); +} + +static LineResult CreateTruncatedLineResult(StringView view) { + return CreateLineResult(false, false, view); +} + +static LineResult CreateValidLineResult(StringView view) { + return CreateLineResult(false, true, view); +} + +LineResult StackLineReader_NextLine(StackLineReader* reader) { + if (reader->skip_mode) { + SkipToNextLine(reader); + reader->skip_mode = false; + } + { + const bool can_load_more = + reader->view.size < STACK_LINE_READER_BUFFER_SIZE; + int eol_index = IndexOfEol(reader); + if (eol_index < 0 && can_load_more) { + const int read = BringToFrontAndLoadMore(reader); + if (read == 0) { + return CreateEOFLineResult(reader->view); + } + eol_index = IndexOfEol(reader); + } + if (eol_index < 0) { + reader->skip_mode = true; + return CreateTruncatedLineResult(reader->view); + } + { + StringView line = + CpuFeatures_StringView_KeepFront(reader->view, eol_index); + reader->view = + CpuFeatures_StringView_PopFront(reader->view, eol_index + 1); + return CreateValidLineResult(line); + } + } +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c new file mode 100755 index 00000000..4f27cbdb --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c @@ -0,0 +1,182 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/string_view.h" + +#include +#include +#include + +int CpuFeatures_StringView_IndexOfChar(const StringView view, char c) { + if (view.ptr && view.size) { + const char* const found = (const char*)memchr(view.ptr, c, view.size); + if (found) { + return found - view.ptr; + } + } + return -1; +} + +int CpuFeatures_StringView_IndexOf(const StringView view, + const StringView sub_view) { + if (sub_view.size) { + StringView remainder = view; + while (remainder.size >= sub_view.size) { + const int found_index = + CpuFeatures_StringView_IndexOfChar(remainder, sub_view.ptr[0]); + if (found_index < 0) break; + remainder = CpuFeatures_StringView_PopFront(remainder, found_index); + if (CpuFeatures_StringView_StartsWith(remainder, sub_view)) { + return remainder.ptr - view.ptr; + } + remainder = CpuFeatures_StringView_PopFront(remainder, 1); + } + } + return -1; +} + +bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b) { + if (a.size == b.size) { + return a.ptr == b.ptr || memcmp(a.ptr, b.ptr, b.size) == 0; + } + return false; +} + +bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b) { + return a.ptr && b.ptr && b.size && a.size >= b.size + ? memcmp(a.ptr, b.ptr, b.size) == 0 + : false; +} + +StringView CpuFeatures_StringView_PopFront(const StringView str_view, + size_t count) { + if (count > str_view.size) { + return kEmptyStringView; + } + return view(str_view.ptr + count, str_view.size - count); +} + +StringView CpuFeatures_StringView_PopBack(const StringView str_view, + size_t count) { + if (count > str_view.size) { + return kEmptyStringView; + } + return view(str_view.ptr, str_view.size - count); +} + +StringView CpuFeatures_StringView_KeepFront(const StringView str_view, + size_t count) { + return count <= str_view.size ? view(str_view.ptr, count) : str_view; +} + +char CpuFeatures_StringView_Front(const StringView view) { + assert(view.size); + assert(view.ptr); + return view.ptr[0]; +} + +char CpuFeatures_StringView_Back(const StringView view) { + assert(view.size); + return view.ptr[view.size - 1]; +} + +StringView CpuFeatures_StringView_TrimWhitespace(StringView view) { + while (view.size && isspace(CpuFeatures_StringView_Front(view))) + view = CpuFeatures_StringView_PopFront(view, 1); + while (view.size && isspace(CpuFeatures_StringView_Back(view))) + view = CpuFeatures_StringView_PopBack(view, 1); + return view; +} + +static int HexValue(const char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + +// Returns -1 if view contains non digits. +static int ParsePositiveNumberWithBase(const StringView view, int base) { + int result = 0; + StringView remainder = view; + for (; remainder.size; + remainder = CpuFeatures_StringView_PopFront(remainder, 1)) { + const int value = HexValue(CpuFeatures_StringView_Front(remainder)); + if (value < 0 || value >= base) return -1; + result = (result * base) + value; + } + return result; +} + +int CpuFeatures_StringView_ParsePositiveNumber(const StringView view) { + if (view.size) { + const StringView hex_prefix = str("0x"); + if (CpuFeatures_StringView_StartsWith(view, hex_prefix)) { + const StringView span_no_prefix = + CpuFeatures_StringView_PopFront(view, hex_prefix.size); + return ParsePositiveNumberWithBase(span_no_prefix, 16); + } + return ParsePositiveNumberWithBase(view, 10); + } + return -1; +} + +void CpuFeatures_StringView_CopyString(const StringView src, char* dst, + size_t dst_size) { + if (dst_size > 0) { + const size_t max_copy_size = dst_size - 1; + const size_t copy_size = + src.size > max_copy_size ? max_copy_size : src.size; + memcpy(dst, src.ptr, copy_size); + dst[copy_size] = '\0'; + } +} + +bool CpuFeatures_StringView_HasWord(const StringView line, + const char* const word_str) { + const StringView word = str(word_str); + StringView remainder = line; + for (;;) { + const int index_of_word = CpuFeatures_StringView_IndexOf(remainder, word); + if (index_of_word < 0) { + return false; + } else { + const StringView before = + CpuFeatures_StringView_KeepFront(line, index_of_word); + const StringView after = + CpuFeatures_StringView_PopFront(line, index_of_word + word.size); + const bool valid_before = + before.size == 0 || CpuFeatures_StringView_Back(before) == ' '; + const bool valid_after = + after.size == 0 || CpuFeatures_StringView_Front(after) == ' '; + if (valid_before && valid_after) return true; + remainder = + CpuFeatures_StringView_PopFront(remainder, index_of_word + word.size); + } + } + return false; +} + +bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line, + StringView* key, + StringView* value) { + const StringView sep = str(": "); + const int index_of_separator = CpuFeatures_StringView_IndexOf(line, sep); + if (index_of_separator < 0) return false; + *value = CpuFeatures_StringView_TrimWhitespace( + CpuFeatures_StringView_PopFront(line, index_of_separator + sep.size)); + *key = CpuFeatures_StringView_TrimWhitespace( + CpuFeatures_StringView_KeepFront(line, index_of_separator)); + return true; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c new file mode 100755 index 00000000..a5f7f8ce --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c @@ -0,0 +1,237 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "cpu_features_macros.h" +#include "cpuinfo_aarch64.h" +#include "cpuinfo_arm.h" +#include "cpuinfo_mips.h" +#include "cpuinfo_ppc.h" +#include "cpuinfo_x86.h" + +static void PrintEscapedAscii(const char* str) { + putchar('"'); + for (; str && *str; ++str) { + switch (*str) { + case '\"': + case '\\': + case '/': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + putchar('\\'); + } + putchar(*str); + } + putchar('"'); +} + +static void PrintVoid(void) {} +static void PrintComma(void) { putchar(','); } +static void PrintLineFeed(void) { putchar('\n'); } +static void PrintOpenBrace(void) { putchar('{'); } +static void PrintCloseBrace(void) { putchar('}'); } +static void PrintOpenBracket(void) { putchar('['); } +static void PrintCloseBracket(void) { putchar(']'); } +static void PrintString(const char* field) { printf("%s", field); } +static void PrintAlignedHeader(const char* field) { printf("%-15s : ", field); } +static void PrintIntValue(int value) { printf("%d", value); } +static void PrintDecHexValue(int value) { + printf("%3d (0x%02X)", value, value); +} +static void PrintJsonHeader(const char* field) { + PrintEscapedAscii(field); + putchar(':'); +} + +typedef struct { + void (*Start)(void); + void (*ArrayStart)(void); + void (*ArraySeparator)(void); + void (*ArrayEnd)(void); + void (*PrintString)(const char* value); + void (*PrintValue)(int value); + void (*EndField)(void); + void (*StartField)(const char* field); + void (*End)(void); +} Printer; + +static Printer getJsonPrinter(void) { + return (Printer){ + .Start = &PrintOpenBrace, + .ArrayStart = &PrintOpenBracket, + .ArraySeparator = &PrintComma, + .ArrayEnd = &PrintCloseBracket, + .PrintString = &PrintEscapedAscii, + .PrintValue = &PrintIntValue, + .EndField = &PrintComma, + .StartField = &PrintJsonHeader, + .End = &PrintCloseBrace, + }; +} + +static Printer getTextPrinter(void) { + return (Printer){ + .Start = &PrintVoid, + .ArrayStart = &PrintVoid, + .ArraySeparator = &PrintComma, + .ArrayEnd = &PrintVoid, + .PrintString = &PrintString, + .PrintValue = &PrintDecHexValue, + .EndField = &PrintLineFeed, + .StartField = &PrintAlignedHeader, + .End = &PrintVoid, + }; +} + +// Prints a named numeric value in both decimal and hexadecimal. +static void PrintN(const Printer p, const char* field, int value) { + p.StartField(field); + p.PrintValue(value); + p.EndField(); +} + +// Prints a named string. +static void PrintS(const Printer p, const char* field, const char* value) { + p.StartField(field); + p.PrintString(value); + p.EndField(); +} + +static int cmp(const void* p1, const void* p2) { + return strcmp(*(const char* const*)p1, *(const char* const*)p2); +} + +#define DEFINE_PRINT_FLAGS(HasFeature, FeatureName, FeatureType, LastEnum) \ + static void PrintFlags(const Printer p, const FeatureType* features) { \ + size_t i; \ + const char* ptrs[LastEnum] = {0}; \ + size_t count = 0; \ + for (i = 0; i < LastEnum; ++i) { \ + if (HasFeature(features, i)) { \ + ptrs[count] = FeatureName(i); \ + ++count; \ + } \ + } \ + qsort(ptrs, count, sizeof(char*), cmp); \ + p.StartField("flags"); \ + p.ArrayStart(); \ + for (i = 0; i < count; ++i) { \ + if (i > 0) p.ArraySeparator(); \ + p.PrintString(ptrs[i]); \ + } \ + p.ArrayEnd(); \ + } + +#if defined(CPU_FEATURES_ARCH_X86) +DEFINE_PRINT_FLAGS(GetX86FeaturesEnumValue, GetX86FeaturesEnumName, X86Features, + X86_LAST_) +#elif defined(CPU_FEATURES_ARCH_ARM) +DEFINE_PRINT_FLAGS(GetArmFeaturesEnumValue, GetArmFeaturesEnumName, ArmFeatures, + ARM_LAST_) +#elif defined(CPU_FEATURES_ARCH_AARCH64) +DEFINE_PRINT_FLAGS(GetAarch64FeaturesEnumValue, GetAarch64FeaturesEnumName, + Aarch64Features, AARCH64_LAST_) +#elif defined(CPU_FEATURES_ARCH_MIPS) +DEFINE_PRINT_FLAGS(GetMipsFeaturesEnumValue, GetMipsFeaturesEnumName, + MipsFeatures, MIPS_LAST_) +#elif defined(CPU_FEATURES_ARCH_PPC) +DEFINE_PRINT_FLAGS(GetPPCFeaturesEnumValue, GetPPCFeaturesEnumName, PPCFeatures, + PPC_LAST_) +#endif + +static void PrintFeatures(const Printer printer) { +#if defined(CPU_FEATURES_ARCH_X86) + char brand_string[49]; + const X86Info info = GetX86Info(); + FillX86BrandString(brand_string); + PrintS(printer, "arch", "x86"); + PrintS(printer, "brand", brand_string); + PrintN(printer, "family", info.family); + PrintN(printer, "model", info.model); + PrintN(printer, "stepping", info.stepping); + PrintS(printer, "uarch", + GetX86MicroarchitectureName(GetX86Microarchitecture(&info))); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_ARM) + const ArmInfo info = GetArmInfo(); + PrintS(printer, "arch", "ARM"); + PrintN(printer, "implementer", info.implementer); + PrintN(printer, "architecture", info.architecture); + PrintN(printer, "variant", info.variant); + PrintN(printer, "part", info.part); + PrintN(printer, "revision", info.revision); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_AARCH64) + const Aarch64Info info = GetAarch64Info(); + PrintS(printer, "arch", "aarch64"); + PrintN(printer, "implementer", info.implementer); + PrintN(printer, "variant", info.variant); + PrintN(printer, "part", info.part); + PrintN(printer, "revision", info.revision); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_MIPS) + const MipsInfo info = GetMipsInfo(); + PrintS(printer, "arch", "mips"); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_PPC) + const PPCInfo info = GetPPCInfo(); + const PPCPlatformStrings strings = GetPPCPlatformStrings(); + PrintS(printer, "arch", "ppc"); + PrintS(printer, "platform", strings.platform); + PrintS(printer, "model", strings.model); + PrintS(printer, "machine", strings.machine); + PrintS(printer, "cpu", strings.cpu); + PrintS(printer, "instruction set", strings.type.platform); + PrintS(printer, "microarchitecture", strings.type.base_platform); + PrintFlags(printer, &info.features); +#endif +} + +static void showUsage(const char* name) { + printf( + "\n" + "Usage: %s [options]\n" + " Options:\n" + " -h | --help Show help message.\n" + " -j | --json Format output as json instead of plain text.\n" + "\n", + name); +} + +int main(int argc, char** argv) { + Printer printer = getTextPrinter(); + int i = 1; + for (; i < argc; ++i) { + const char* arg = argv[i]; + if (strcmp(arg, "-j") == 0 || strcmp(arg, "--json") == 0) { + printer = getJsonPrinter(); + } else { + showUsage(argv[0]); + if (strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0) + return EXIT_SUCCESS; + return EXIT_FAILURE; + } + } + printer.Start(); + PrintFeatures(printer); + printer.End(); + PrintLineFeed(); + return EXIT_SUCCESS; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt new file mode 100755 index 00000000..794ef04b --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt @@ -0,0 +1,79 @@ +# +# libraries for tests +# + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) # prefer use of -std11 instead of -gnustd11 + +include_directories(../include) +add_definitions(-DCPU_FEATURES_TEST) + +##------------------------------------------------------------------------------ +add_library(string_view ../src/string_view.c) +##------------------------------------------------------------------------------ +add_library(filesystem_for_testing filesystem_for_testing.cc) +##------------------------------------------------------------------------------ +add_library(hwcaps_for_testing hwcaps_for_testing.cc) +target_link_libraries(hwcaps_for_testing filesystem_for_testing) +##------------------------------------------------------------------------------ +add_library(stack_line_reader ../src/stack_line_reader.c) +target_compile_definitions(stack_line_reader PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024) +target_link_libraries(stack_line_reader string_view) +##------------------------------------------------------------------------------ +add_library(stack_line_reader_for_test ../src/stack_line_reader.c) +target_compile_definitions(stack_line_reader_for_test PUBLIC STACK_LINE_READER_BUFFER_SIZE=16) +target_link_libraries(stack_line_reader_for_test string_view filesystem_for_testing) +##------------------------------------------------------------------------------ +add_library(all_libraries ../src/stack_line_reader.c ../src/linux_features_aggregator.c) +target_link_libraries(all_libraries hwcaps_for_testing stack_line_reader string_view) + +# +# tests +# +link_libraries(gtest gmock_main) + +## bit_utils_test +add_executable(bit_utils_test bit_utils_test.cc) +target_link_libraries(bit_utils_test) +add_test(NAME bit_utils_test COMMAND bit_utils_test) +##------------------------------------------------------------------------------ +## string_view_test +add_executable(string_view_test string_view_test.cc ../src/string_view.c) +target_link_libraries(string_view_test string_view) +add_test(NAME string_view_test COMMAND string_view_test) +##------------------------------------------------------------------------------ +## stack_line_reader_test +add_executable(stack_line_reader_test stack_line_reader_test.cc) +target_link_libraries(stack_line_reader_test stack_line_reader_for_test) +add_test(NAME stack_line_reader_test COMMAND stack_line_reader_test) +##------------------------------------------------------------------------------ +## linux_features_aggregator_test +add_executable(linux_features_aggregator_test linux_features_aggregator_test.cc) +target_link_libraries(linux_features_aggregator_test all_libraries) +add_test(NAME linux_features_aggregator_test COMMAND linux_features_aggregator_test) +##------------------------------------------------------------------------------ +## cpuinfo_x86_test +add_executable(cpuinfo_x86_test cpuinfo_x86_test.cc ../src/cpuinfo_x86.c) +target_link_libraries(cpuinfo_x86_test all_libraries) +add_test(NAME cpuinfo_x86_test COMMAND cpuinfo_x86_test) +##------------------------------------------------------------------------------ +## cpuinfo_arm_test +add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/cpuinfo_arm.c) +target_link_libraries(cpuinfo_arm_test all_libraries) +add_test(NAME cpuinfo_arm_test COMMAND cpuinfo_arm_test) +##------------------------------------------------------------------------------ +## cpuinfo_aarch64_test +add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/cpuinfo_aarch64.c) +target_link_libraries(cpuinfo_aarch64_test all_libraries) +add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test) +##------------------------------------------------------------------------------ +## cpuinfo_mips_test +add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc ../src/cpuinfo_mips.c) +target_link_libraries(cpuinfo_mips_test all_libraries) +add_test(NAME cpuinfo_mips_test COMMAND cpuinfo_mips_test) +##------------------------------------------------------------------------------ +## cpuinfo_ppc_test +add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc ../src/cpuinfo_ppc.c) +target_link_libraries(cpuinfo_ppc_test all_libraries) +add_test(NAME cpuinfo_ppc_test COMMAND cpuinfo_ppc_test) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc new file mode 100755 index 00000000..8937cbc2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc @@ -0,0 +1,53 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/bit_utils.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +TEST(UtilsTest, IsBitSet) { + for (size_t bit_set = 0; bit_set < 32; ++bit_set) { + const uint32_t value = 1UL << bit_set; + for (size_t i = 0; i < 32; ++i) { + EXPECT_EQ(IsBitSet(value, i), i == bit_set); + } + } + + // testing 0, all bits should be 0. + for (size_t i = 0; i < 32; ++i) { + EXPECT_FALSE(IsBitSet(0, i)); + } + + // testing ~0, all bits should be 1. + for (size_t i = 0; i < 32; ++i) { + EXPECT_TRUE(IsBitSet(-1, i)); + } +} + +TEST(UtilsTest, ExtractBitRange) { + // Extracting all bits gives the same number. + EXPECT_EQ(ExtractBitRange(123, 31, 0), 123); + // Extracting 1 bit gives parity. + EXPECT_EQ(ExtractBitRange(123, 0, 0), 1); + EXPECT_EQ(ExtractBitRange(122, 0, 0), 0); + + EXPECT_EQ(ExtractBitRange(0xF0, 7, 4), 0xF); + EXPECT_EQ(ExtractBitRange(0x42 << 2, 10, 2), 0x42); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc new file mode 100755 index 00000000..bdb4d17c --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc @@ -0,0 +1,74 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_aarch64.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoAarch64Test, FromHardwareCap) { + SetHardwareCapabilities(AARCH64_HWCAP_FP | AARCH64_HWCAP_AES, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetAarch64Info(); + EXPECT_TRUE(info.features.fp); + EXPECT_FALSE(info.features.asimd); + EXPECT_TRUE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +TEST(CpuinfoAarch64Test, ARMCortexA53) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : AArch64 Processor rev 3 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +processor : 4 +processor : 5 +processor : 6 +processor : 7 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: AArch64 +CPU variant : 0x0 +CPU part : 0xd03 +CPU revision : 3)"); + const auto info = GetAarch64Info(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x0); + EXPECT_EQ(info.part, 0xd03); + EXPECT_EQ(info.revision, 3); + + EXPECT_TRUE(info.features.fp); + EXPECT_TRUE(info.features.asimd); + EXPECT_TRUE(info.features.aes); + EXPECT_TRUE(info.features.pmull); + EXPECT_TRUE(info.features.sha1); + EXPECT_TRUE(info.features.sha2); + EXPECT_TRUE(info.features.crc32); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc new file mode 100755 index 00000000..a72c5662 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc @@ -0,0 +1,182 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_arm.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoArmTest, FromHardwareCap) { + SetHardwareCapabilities(ARM_HWCAP_NEON, ARM_HWCAP2_AES | ARM_HWCAP2_CRC32); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.vfp); // triggered by vfpv3 + EXPECT_TRUE(info.features.vfpv3); // triggered by neon + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.aes); + EXPECT_TRUE(info.features.crc32); + + EXPECT_FALSE(info.features.vfpv4); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_FALSE(info.features.idiva); + EXPECT_FALSE(info.features.idivt); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); +} + +TEST(CpuinfoArmTest, ODroidFromCpuInfo) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(processor : 0 +model name : ARMv7 Processor rev 3 (v71) +BogoMIPS : 120.00 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x2 +CPU part : 0xc0f +CPU revision : 3)"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x2); + EXPECT_EQ(info.part, 0xc0f); + EXPECT_EQ(info.revision, 3); + EXPECT_EQ(info.architecture, 7); + + EXPECT_TRUE(info.features.vfp); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.vfpv3); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_TRUE(info.features.vfpv4); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); + EXPECT_FALSE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +// http://code.google.com/p/android/issues/detail?id=10812 +TEST(CpuinfoArmTest, InvalidArmv7) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : ARMv6-compatible processor rev 6 (v6l) +BogoMIPS : 199.47 +Features : swp half thumb fastmult vfp edsp java +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xb76 +CPU revision : 6 + +Hardware : SPICA +Revision : 0020 +Serial : 33323613546d00ec )"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.architecture, 6); +} + +// https://crbug.com/341598. +TEST(CpuinfoArmTest, InvalidNeon) { + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor: ARMv7 Processory rev 0 (v71) +processor: 0 +BogoMIPS: 13.50 + +Processor: 1 +BogoMIPS: 13.50 + +Features: swp half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt +CPU implementer : 0x51 +CPU architecture: 7 +CPU variant: 0x1 +CPU part: 0x04d +CPU revision: 0 + +Hardware: SAMSUNG M2 +Revision: 0010 +Serial: 00001e030000354e)"); + const auto info = GetArmInfo(); + EXPECT_FALSE(info.features.neon); +} + +// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV +// support. +TEST(CpuinfoArmTest, Nexus4_0x510006f2) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(CPU implementer : 0x51 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x6f +CPU revision : 2)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); +} + +// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV +// support. +TEST(CpuinfoArmTest, Nexus4_0x510006f3) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(CPU implementer : 0x51 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x6f +CPU revision : 3)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); +} + +// The emulator-specific Android 4.2 kernel fails to report support for the +// 32-bit ARM IDIV instruction. Technically, this is a feature of the virtual +// CPU implemented by the emulator. +TEST(CpuinfoArmTest, EmulatorSpecificIdiv) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : ARMv7 Processor rev 0 (v7l) +BogoMIPS : 629.14 +Features : swp half thumb fastmult vfp edsp neon vfpv3 +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xc08 +CPU revision : 0 + +Hardware : Goldfish +Revision : 0000 +Serial : 0000000000000000)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc new file mode 100755 index 00000000..7c5a6752 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc @@ -0,0 +1,125 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_mips.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoMipsTest, FromHardwareCapBoth) { + SetHardwareCapabilities(MIPS_HWCAP_EVA | MIPS_HWCAP_MSA, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetMipsInfo(); + EXPECT_TRUE(info.features.msa); + EXPECT_TRUE(info.features.eva); +} + +TEST(CpuinfoMipsTest, FromHardwareCapOnlyOne) { + SetHardwareCapabilities(MIPS_HWCAP_MSA, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetMipsInfo(); + EXPECT_TRUE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +TEST(CpuinfoMipsTest, Ci40) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(system type : IMG Pistachio SoC (B0) +machine : IMG Marduk – Ci40 with cc2520 +processor : 0 +cpu model : MIPS interAptiv (multi) V2.0 FPU V0.0 +BogoMIPS : 363.72 +wait instruction : yes +microsecond timers : yes +tlb_entries : 64 +extra interrupt vector : yes +hardware watchpoint : yes, count: 4, address/irw mask: [0x0ffc, 0x0ffc, 0x0ffb, 0x0ffb] +isa : mips1 mips2 mips32r1 mips32r2 +ASEs implemented : mips16 dsp mt eva +shadow register sets : 1 +kscratch registers : 0 +package : 0 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +VPE : 0 +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_TRUE(info.features.eva); +} + +TEST(CpuinfoMipsTest, AR7161) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(system type : Atheros AR7161 rev 2 +machine : NETGEAR WNDR3700/WNDR3800/WNDRMAC +processor : 0 +cpu model : MIPS 24Kc V7.4 +BogoMIPS : 452.19 +wait instruction : yes +microsecond timers : yes +tlb_entries : 16 +extra interrupt vector : yes +hardware watchpoint : yes, count: 4, address/irw mask: [0x0000, 0x0f98, 0x0f78, 0x0df8] +ASEs implemented : mips16 +shadow register sets : 1 +kscratch registers : 0 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +TEST(CpuinfoMipsTest, Goldfish) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(system type : MIPS-Goldfish +Hardware : goldfish +Revison : 1 +processor : 0 +cpu model : MIPS 24Kc V0.0 FPU V0.0 +BogoMIPS : 1042.02 +wait instruction : yes +microsecond timers : yes +tlb_entries : 16 +extra interrupt vector : yes +hardware watchpoint : yes, count: 1, address/irw mask: [0x0ff8] +ASEs implemented : +shadow register sets : 1 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc new file mode 100755 index 00000000..5d5e7980 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc @@ -0,0 +1,119 @@ +// Copyright 2018 IBM. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_ppc.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpustringsPPCTest, FromHardwareCap) { + SetHardwareCapabilities(PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_VSX, + PPC_FEATURE2_ARCH_3_00); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetPPCInfo(); + EXPECT_TRUE(info.features.fpu); + EXPECT_FALSE(info.features.mmu); + EXPECT_TRUE(info.features.vsx); + EXPECT_TRUE(info.features.arch300); + EXPECT_FALSE(info.features.power4); + EXPECT_FALSE(info.features.altivec); + EXPECT_FALSE(info.features.vcrypto); + EXPECT_FALSE(info.features.htm); +} + +TEST(CpustringsPPCTest, Blade) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 14 +cpu : POWER7 (architected), altivec supported +clock : 3000.000000MHz +revision : 2.1 (pvr 003f 0201) + +processor : 15 +cpu : POWER7 (architected), altivec supported +clock : 3000.000000MHz +revision : 2.1 (pvr 003f 0201) + +timebase : 512000000 +platform : pSeries +model : IBM,8406-70Y +machine : CHRP IBM,8406-70Y)"); + SetPlatformTypes("power7", "power8"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "pSeries"); + ASSERT_STREQ(strings.model, "IBM,8406-70Y"); + ASSERT_STREQ(strings.machine, "CHRP IBM,8406-70Y"); + ASSERT_STREQ(strings.cpu, "POWER7 (architected), altivec supported"); + ASSERT_STREQ(strings.type.platform, "power7"); + ASSERT_STREQ(strings.type.base_platform, "power8"); +} + +TEST(CpustringsPPCTest, Firestone) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 126 +cpu : POWER8 (raw), altivec supported +clock : 2061.000000MHz +revision : 2.0 (pvr 004d 0200) + +processor : 127 +cpu : POWER8 (raw), altivec supported +clock : 2061.000000MHz +revision : 2.0 (pvr 004d 0200) + +timebase : 512000000 +platform : PowerNV +model : 8335-GTA +machine : PowerNV 8335-GTA +firmware : OPAL v3)"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "PowerNV"); + ASSERT_STREQ(strings.model, "8335-GTA"); + ASSERT_STREQ(strings.machine, "PowerNV 8335-GTA"); + ASSERT_STREQ(strings.cpu, "POWER8 (raw), altivec supported"); +} + +TEST(CpustringsPPCTest, w8) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 143 +cpu : POWER9, altivec supported +clock : 2300.000000MHz +revision : 2.2 (pvr 004e 1202) + +timebase : 512000000 +platform : PowerNV +model : 0000000000000000 +machine : PowerNV 0000000000000000 +firmware : OPAL +MMU : Radix)"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "PowerNV"); + ASSERT_STREQ(strings.model, "0000000000000000"); + ASSERT_STREQ(strings.machine, "PowerNV 0000000000000000"); + ASSERT_STREQ(strings.cpu, "POWER9, altivec supported"); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc new file mode 100755 index 00000000..f7fc0817 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc @@ -0,0 +1,172 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "cpuinfo_x86.h" +#include "internal/cpuid_x86.h" + +namespace cpu_features { + +class FakeCpu { + public: + Leaf CpuId(uint32_t leaf_id) const { + const auto itr = cpuid_leaves_.find(leaf_id); + EXPECT_TRUE(itr != cpuid_leaves_.end()) << "Missing leaf " << leaf_id; + return itr->second; + } + + uint32_t GetXCR0Eax() const { return xcr0_eax_; } + + void SetLeaves(std::map configuration) { + cpuid_leaves_ = std::move(configuration); + } + + void SetOsBackupsExtendedRegisters(bool os_backups_extended_registers) { + xcr0_eax_ = os_backups_extended_registers ? -1 : 0; + } + + private: + std::map cpuid_leaves_; + uint32_t xcr0_eax_; +}; + +auto* g_fake_cpu = new FakeCpu(); + +extern "C" Leaf CpuId(uint32_t leaf_id) { return g_fake_cpu->CpuId(leaf_id); } +extern "C" uint32_t GetXCR0Eax(void) { return g_fake_cpu->GetXCR0Eax(); } + +namespace { + +TEST(CpuidX86Test, SandyBridge) { + g_fake_cpu->SetOsBackupsExtendedRegisters(true); + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x02A); + EXPECT_EQ(info.stepping, 0x06); + // Leaf 7 is zeroed out so none of the Leaf 7 flags are set. + const auto features = info.features; + EXPECT_FALSE(features.erms); + EXPECT_FALSE(features.avx2); + EXPECT_FALSE(features.avx512f); + EXPECT_FALSE(features.avx512cd); + EXPECT_FALSE(features.avx512er); + EXPECT_FALSE(features.avx512pf); + EXPECT_FALSE(features.avx512bw); + EXPECT_FALSE(features.avx512dq); + EXPECT_FALSE(features.avx512vl); + EXPECT_FALSE(features.avx512ifma); + EXPECT_FALSE(features.avx512vbmi); + EXPECT_FALSE(features.avx512vbmi2); + EXPECT_FALSE(features.avx512vnni); + EXPECT_FALSE(features.avx512bitalg); + EXPECT_FALSE(features.avx512vpopcntdq); + EXPECT_FALSE(features.avx512_4vnniw); + EXPECT_FALSE(features.avx512_4vbmi2); + // All old cpu features should be set. + EXPECT_TRUE(features.aes); + EXPECT_TRUE(features.ssse3); + EXPECT_TRUE(features.sse4_1); + EXPECT_TRUE(features.sse4_2); + EXPECT_TRUE(features.avx); +} + +TEST(CpuidX86Test, SandyBridgeTestOsSupport) { + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + }); + // avx is disabled if os does not support backing up ymm registers. + g_fake_cpu->SetOsBackupsExtendedRegisters(false); + EXPECT_FALSE(GetX86Info().features.avx); + // avx is disabled if os does not support backing up ymm registers. + g_fake_cpu->SetOsBackupsExtendedRegisters(true); + EXPECT_TRUE(GetX86Info().features.avx); +} + +TEST(CpuidX86Test, SkyLake) { + g_fake_cpu->SetOsBackupsExtendedRegisters(true); + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x04E); + EXPECT_EQ(info.stepping, 0x03); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_SKL); +} + +TEST(CpuidX86Test, Branding) { + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + {0x80000000, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {0x80000001, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}}, + {0x80000002, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}}, + {0x80000003, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}}, + {0x80000004, Leaf{0x352E3220, 0x7A484730, 0x00000000, 0x00000000}}, + }); + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz"); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD0630F81_K15_Godavari_CPUID.txt +TEST(CpuidX86Test, AMD_K15) { + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {0x00000001, Leaf{0x00630F81, 0x00040800, 0x3E98320B, 0x178BFBFF}}, + {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {0x80000000, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {0x80000001, Leaf{0x00630F81, 0x10000000, 0x0FEBBFFF, 0x2FD3FBFF}}, + {0x80000002, Leaf{0x20444D41, 0x372D3841, 0x4B303736, 0x64615220}}, + {0x80000003, Leaf{0x206E6F65, 0x202C3752, 0x43203031, 0x75706D6F}}, + {0x80000004, Leaf{0x43206574, 0x7365726F, 0x2B433420, 0x00204736}}, + {0x80000005, Leaf{0xFF40FF18, 0xFF40FF30, 0x10040140, 0x60030140}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x15); + EXPECT_EQ(info.model, 0x38); + EXPECT_EQ(info.stepping, 0x01); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::AMD_BULLDOZER); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD A8-7670K Radeon R7, 10 Compute Cores 4C+6G "); +} + +// TODO(user): test what happens when xsave/osxsave are not present. +// TODO(user): test what happens when xmm/ymm/zmm os support are not +// present. + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc new file mode 100755 index 00000000..4554c1f0 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc @@ -0,0 +1,103 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "filesystem_for_testing.h" + +#include +#include +#include +#include +#include + +namespace cpu_features { + +FakeFile::FakeFile(int file_descriptor, const char* content) + : file_descriptor_(file_descriptor), content_(content) {} + +FakeFile::~FakeFile() { assert(!opened_); } + +void FakeFile::Open() { + assert(!opened_); + opened_ = true; +} + +void FakeFile::Close() { + assert(opened_); + opened_ = false; +} + +int FakeFile::Read(int fd, void* buf, size_t count) { + assert(count < INT_MAX); + assert(fd == file_descriptor_); + const size_t remainder = content_.size() - head_index_; + const size_t read = count > remainder ? remainder : count; + memcpy(buf, content_.data() + head_index_, read); + head_index_ += read; + assert(read < INT_MAX); + return read; +} + +void FakeFilesystem::Reset() { files_.clear(); } + +FakeFile* FakeFilesystem::CreateFile(const std::string& filename, + const char* content) { + auto& file = files_[filename]; + file = + std::unique_ptr(new FakeFile(next_file_descriptor_++, content)); + return file.get(); +} + +FakeFile* FakeFilesystem::FindFileOrNull(const std::string& filename) const { + const auto itr = files_.find(filename); + return itr == files_.end() ? nullptr : itr->second.get(); +} + +FakeFile* FakeFilesystem::FindFileOrDie(const int file_descriptor) const { + for (const auto& filename_file_pair : files_) { + FakeFile* const file_ptr = filename_file_pair.second.get(); + if (file_ptr->GetFileDescriptor() == file_descriptor) { + return file_ptr; + } + } + assert(false); + return nullptr; +} + +static FakeFilesystem* kFilesystem = new FakeFilesystem(); + +FakeFilesystem& GetEmptyFilesystem() { + kFilesystem->Reset(); + return *kFilesystem; +} + +extern "C" int CpuFeatures_OpenFile(const char* filename) { + auto* const file = kFilesystem->FindFileOrNull(filename); + if (file) { + file->Open(); + return file->GetFileDescriptor(); + } + return -1; +} + +extern "C" void CpuFeatures_CloseFile(int file_descriptor) { + kFilesystem->FindFileOrDie(file_descriptor)->Close(); +} + +extern "C" int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + return kFilesystem->FindFileOrDie(file_descriptor) + ->Read(file_descriptor, buffer, buffer_size); +} + +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h new file mode 100755 index 00000000..ca269e52 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h @@ -0,0 +1,61 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements a fake filesystem, useful for tests. +#ifndef CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ +#define CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ + +#include +#include +#include + +#include "internal/filesystem.h" + +namespace cpu_features { + +class FakeFile { + public: + explicit FakeFile(int file_descriptor, const char* content); + ~FakeFile(); + + void Open(); + void Close(); + int Read(int fd, void* buf, size_t count); + + int GetFileDescriptor() const { return file_descriptor_; } + + private: + const int file_descriptor_; + const std::string content_; + bool opened_ = false; + size_t head_index_ = 0; +}; + +class FakeFilesystem { + public: + void Reset(); + FakeFile* CreateFile(const std::string& filename, const char* content); + FakeFile* FindFileOrDie(const int file_descriptor) const; + FakeFile* FindFileOrNull(const std::string& filename) const; + + private: + size_t next_file_descriptor_ = 0; + std::unordered_map> files_; +}; + +FakeFilesystem& GetEmptyFilesystem(); + +} // namespace cpu_features + +#endif // CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc new file mode 100755 index 00000000..07f68e8a --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc @@ -0,0 +1,45 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "hwcaps_for_testing.h" +#include "internal/string_view.h" + +namespace cpu_features { + +namespace { +static auto* const g_hardware_capabilities = new HardwareCapabilities(); +static auto* const g_platform_types = new PlatformType(); +} // namespace + +void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2) { + g_hardware_capabilities->hwcaps = hwcaps; + g_hardware_capabilities->hwcaps2 = hwcaps2; +} + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + return *g_hardware_capabilities; +} + +void SetPlatformTypes(const char* platform, const char* base_platform) { + CpuFeatures_StringView_CopyString(str(platform), g_platform_types->platform, + sizeof(g_platform_types->platform)); + CpuFeatures_StringView_CopyString(str(base_platform), + g_platform_types->base_platform, + sizeof(g_platform_types->base_platform)); +} + +PlatformType CpuFeatures_GetPlatformType(void) { return *g_platform_types; } +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h new file mode 100755 index 00000000..0d037772 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h @@ -0,0 +1,27 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ +#define CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ + +#include "internal/hwcaps.h" + +namespace cpu_features { + +void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2); +void SetPlatformTypes(const char *platform, const char *base_platform); + +} // namespace cpu_features + +#endif // CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc new file mode 100755 index 00000000..99367dc4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc @@ -0,0 +1,95 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "internal/linux_features_aggregator.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +namespace { + +struct Features { + bool a = false; + bool b = false; + bool c = false; +}; + +DECLARE_SETTER(Features, a) +DECLARE_SETTER(Features, b) +DECLARE_SETTER(Features, c) + +class LinuxFeatureAggregatorTest : public testing::Test { + public: + const std::array kConfigs = { + {{{0b0001, 0b0000}, "a", &set_a}, + {{0b0010, 0b0000}, "b", &set_b}, + {{0b0000, 0b1100}, "c", &set_c}}}; +}; + +TEST_F(LinuxFeatureAggregatorTest, FromFlagsEmpty) { + Features features; + CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str(""), + &features); + EXPECT_FALSE(features.a); + EXPECT_FALSE(features.b); + EXPECT_FALSE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromFlagsAllSet) { + Features features; + CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a c b"), + &features); + EXPECT_TRUE(features.a); + EXPECT_TRUE(features.b); + EXPECT_TRUE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromFlagsOnlyA) { + Features features; + CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a"), + &features); + EXPECT_TRUE(features.a); + EXPECT_FALSE(features.b); + EXPECT_FALSE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromHwcapsNone) { + HardwareCapabilities capability; + capability.hwcaps = 0; // matches none + capability.hwcaps2 = 0; // matches none + Features features; + CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability, + &features); + EXPECT_FALSE(features.a); + EXPECT_FALSE(features.b); + EXPECT_FALSE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromHwcapsSet) { + HardwareCapabilities capability; + capability.hwcaps = 0b0010; // matches b but not a + capability.hwcaps2 = 0b1111; // matches c + Features features; + CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability, + &features); + EXPECT_FALSE(features.a); + EXPECT_TRUE(features.b); + EXPECT_TRUE(features.c); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc new file mode 100755 index 00000000..c8f96910 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc @@ -0,0 +1,132 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/stack_line_reader.h" +#include "filesystem_for_testing.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +bool operator==(const StringView& a, const StringView& b) { + return CpuFeatures_StringView_IsEquals(a, b); +} + +namespace { + +std::string ToString(StringView view) { return {view.ptr, view.size}; } + +TEST(StackLineReaderTest, Empty) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", ""); + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("")); + } +} + +TEST(StackLineReaderTest, ManySmallLines) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", "a\nb\nc"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("a")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("b")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("c")); + } +} + +TEST(StackLineReaderTest, TruncatedLine) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", R"(First +Second +More than 16 characters, this will be truncated. +last)"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("First")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("Second")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("More than 16 cha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("last")); + } +} + +TEST(StackLineReaderTest, TruncatedLines) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", R"(More than 16 characters +Another line that is too long)"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("More than 16 cha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("Another line tha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("")); + } +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc new file mode 100755 index 00000000..abfcc2cd --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc @@ -0,0 +1,144 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +bool operator==(const StringView& a, const StringView& b) { + return CpuFeatures_StringView_IsEquals(a, b); +} + +namespace { + +TEST(StringViewTest, Empty) { + EXPECT_EQ(kEmptyStringView.ptr, nullptr); + EXPECT_EQ(kEmptyStringView.size, 0); +} + +TEST(StringViewTest, Build) { + const auto view = str("test"); + EXPECT_EQ(view.ptr[0], 't'); + EXPECT_EQ(view.size, 4); +} + +TEST(StringViewTest, CpuFeatures_StringView_IndexOfChar) { + // Found. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'e'), 1); + // Not found. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'z'), -1); + // Empty. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(kEmptyStringView, 'z'), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_IndexOf) { + // Found. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("es")), 1); + // Not found. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("aa")), -1); + // Empty. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(kEmptyStringView, str("aa")), -1); + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("aa"), kEmptyStringView), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_StartsWith) { + EXPECT_TRUE(CpuFeatures_StringView_StartsWith(str("test"), str("te"))); + EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str(""))); + EXPECT_FALSE( + CpuFeatures_StringView_StartsWith(str("test"), kEmptyStringView)); + EXPECT_FALSE( + CpuFeatures_StringView_StartsWith(kEmptyStringView, str("test"))); +} + +TEST(StringViewTest, CpuFeatures_StringView_IsEquals) { + EXPECT_TRUE( + CpuFeatures_StringView_IsEquals(kEmptyStringView, kEmptyStringView)); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str(""))); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str(""), kEmptyStringView)); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str("a"), str("a"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), str("b"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), kEmptyStringView)); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("a"))); +} + +TEST(StringViewTest, CpuFeatures_StringView_PopFront) { + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 2), str("st")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 0), str("test")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 4), str("")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 100), str("")); +} + +TEST(StringViewTest, CpuFeatures_StringView_ParsePositiveNumber) { + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("42")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2a")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2A")), 42); + + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("-0x2A")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("abc")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("")), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_CopyString) { + char buf[4]; + buf[0] = 'X'; + + // Empty + CpuFeatures_StringView_CopyString(str(""), buf, sizeof(buf)); + EXPECT_STREQ(buf, ""); + + // Less + CpuFeatures_StringView_CopyString(str("a"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "a"); + + // exact + CpuFeatures_StringView_CopyString(str("abc"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "abc"); + + // More + CpuFeatures_StringView_CopyString(str("abcd"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "abc"); +} + +TEST(StringViewTest, CpuFeatures_StringView_HasWord) { + // Find flags at beginning, middle and end. + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "first")); + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "middle")); + EXPECT_TRUE(CpuFeatures_StringView_HasWord(str("first middle last"), "last")); + // Do not match partial flags + EXPECT_FALSE( + CpuFeatures_StringView_HasWord(str("first middle last"), "irst")); + EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "mid")); + EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "las")); +} + +TEST(StringViewTest, CpuFeatures_StringView_GetAttributeKeyValue) { + const StringView line = str(" key : first middle last "); + StringView key, value; + EXPECT_TRUE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)); + EXPECT_EQ(key, str("key")); + EXPECT_EQ(value, str("first middle last")); +} + +TEST(StringViewTest, FailingGetAttributeKeyValue) { + const StringView line = str("key first middle last"); + StringView key, value; + EXPECT_FALSE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu new file mode 100644 index 00000000..db94e488 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu @@ -0,0 +1,353 @@ +#define BLOCK_BYTES 32 +#define OUT_BYTES 16 +#define BLAKE_SHARED_MEM 480 +#define BLAKE_SHARED_MEM_UINT 120 + +#define G(m, r, i, a, b, c, d) \ +do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define G_S(m, a, b, c, d) \ +do { \ + a = a + b + m; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define ROUND(m, t, r) \ +do { \ + G(m, r, t, v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \ + G(m, r, (t + 4), v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \ +} while ((void)0, 0) + +#define ROUND_S(m, t) \ +do { \ + G_S(m, v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \ + G_S(m, v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \ +} while ((void)0, 0) + +__constant__ uint64_t blake2b_IV[8] = { + 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 +}; + +__constant__ uint32_t blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +__device__ uint64_t rotr64(uint64_t x, uint32_t n) +{ + return (x >> n) | (x << (64 - n)); +} + +__device__ __forceinline__ void blake2b_compress(uint64_t *h, uint64_t *m, uint64_t f0, int thr_id) +{ + uint64_t v0, v1, v2, v3; + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND(m, thr_id, 0); + ROUND(m, thr_id, 1); + ROUND(m, thr_id, 2); + ROUND(m, thr_id, 3); + ROUND(m, thr_id, 4); + ROUND(m, thr_id, 5); + ROUND(m, thr_id, 6); + ROUND(m, thr_id, 7); + ROUND(m, thr_id, 8); + ROUND(m, thr_id, 9); + ROUND(m, thr_id, 10); + ROUND(m, thr_id, 11); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +__device__ __forceinline__ void blake2b_compress_static(uint64_t *h, uint64_t m, uint64_t f0, int thr_id) +{ + uint64_t v0, v1, v2, v3; + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +__device__ __forceinline__ int blake2b_init(uint64_t *h, int out_len, int thr_id) +{ + h[thr_id * 2] = blake2b_IV[thr_id * 2]; + h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1]; + + if(thr_id == 0) { + h[8] = h[9] = 0; + h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24)); + } + + return 0; +} + +__device__ __forceinline__ void blake2b_incrementCounter(uint64_t *h, int inc) +{ + h[8] += (inc * 4); + h[9] += (h[8] < (inc * 4)); +} + +__device__ __forceinline__ int blake2b_update(uint32_t *in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id) +{ + uint32_t *cursor_in = in; + uint32_t *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (uint64_t*)buf, 0, thr_id); + + buf_len = 0; + + in_len -= left; + in += left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + cursor_in = in; + cursor_out = buf; + + for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + blake2b_compress(h, (uint64_t *)buf, 0, thr_id); + + in_len -= BLOCK_BYTES; + in += BLOCK_BYTES; + } + } + + cursor_in = in; + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } + + return buf_len + in_len; +} + +__device__ __forceinline__ int blake2b_update_static(uint32_t in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id) +{ + uint64_t in64 = in; + in64 = in64 << 32; + in64 = in64 | in; + + uint32_t *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = in; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (uint64_t*)buf, 0, thr_id); + + buf_len = 0; + + in_len -= left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + blake2b_compress_static(h, in64, 0, thr_id); + + in_len -= BLOCK_BYTES; + } + } + + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = in; + } + } + + return buf_len + in_len; +} + +__device__ __forceinline__ void blake2b_final(uint32_t *out, int out_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id) +{ + int left = BLOCK_BYTES - buf_len; + uint32_t *cursor_out = buf + buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = 0; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = 0; + } + blake2b_incrementCounter(h, buf_len); + } + + blake2b_compress(h, (uint64_t*)buf, 0xFFFFFFFFFFFFFFFF, thr_id); + + uint32_t *cursor_in = (uint32_t *)h; + cursor_out = out; + + for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (out_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } +} + +__device__ void blake2b_digestLong(uint32_t *out, int out_len, uint32_t *in, int in_len, int thr_id, uint32_t *shared) +{ + uint64_t *h = (uint64_t*)shared; + uint32_t *buf = (uint32_t*)&h[10]; + uint32_t *out_buffer = &buf[32]; + int buf_len; + + if(thr_id == 0) buf[0] = (out_len * 4); + buf_len = 1; + + if (out_len <= OUT_BYTES) { + blake2b_init(h, out_len, thr_id); + buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id); + blake2b_final(out, out_len, h, buf, buf_len, thr_id); + } else { + uint32_t *cursor_in = out_buffer; + uint32_t *cursor_out = out; + + blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id); + blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + + int to_produce = out_len - OUT_BYTES / 2; + while (to_produce > OUT_BYTES) { + buf_len = blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + + cursor_out = out; + cursor_in = out_buffer; + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + to_produce -= OUT_BYTES / 2; + } + + buf_len = blake2b_init(h, to_produce, thr_id); + buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + blake2b_final(out, to_produce, h, buf, buf_len, thr_id); + } +} \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp new file mode 100644 index 00000000..2046a321 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp @@ -0,0 +1,340 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#include + +#include "../../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#if defined(WITH_CUDA) + +#include +#include + +#include "cuda_hasher.h" +#include "../../../common/DLLExport.h" + +cuda_hasher::cuda_hasher() { + m_type = "GPU"; + m_subType = "CUDA"; + m_shortSubType = "NVD"; + m_intensity = 0; + m_description = ""; + m_computingThreads = 0; +} + + +cuda_hasher::~cuda_hasher() { + this->cleanup(); +} + +bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { + cudaError_t error = cudaSuccess; + string error_message; + + m_profile = getArgon2Profile(algorithm, variant); + + __devices = __query_cuda_devices(error, error_message); + + if(error != cudaSuccess) { + m_description = "No compatible GPU detected: " + error_message; + return false; + } + + if (__devices.empty()) { + m_description = "No compatible GPU detected."; + return false; + } + + return true; +} + +vector cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) { + vector devices; + int devCount = 0; + error = cudaGetDeviceCount(&devCount); + + if(error != cudaSuccess) { + error_message = "Error querying CUDA device count."; + return devices; + } + + if(devCount == 0) + return devices; + + for (int i = 0; i < devCount; ++i) + { + cuda_device_info *dev = __get_device_info(i); + if(dev == NULL) + continue; + if(dev->error != cudaSuccess) { + error = dev->error; + error_message = dev->error_message; + continue; + } + devices.push_back(dev); + } + return devices; +} + +cuda_device_info *cuda_hasher::__get_device_info(int device_index) { + cuda_device_info *device_info = new cuda_device_info(); + device_info->error = cudaSuccess; + device_info->cuda_index = device_index; + + device_info->error = cudaSetDevice(device_index); + if(device_info->error != cudaSuccess) { + device_info->error_message = "Error setting current device."; + return device_info; + } + + cudaDeviceProp devProp; + device_info->error = cudaGetDeviceProperties(&devProp, device_index); + if(device_info->error != cudaSuccess) { + device_info->error_message = "Error setting current device."; + return device_info; + } + + device_info->device_string = devProp.name; + + size_t freemem, totalmem; + device_info->error = cudaMemGetInfo(&freemem, &totalmem); + if(device_info->error != cudaSuccess) { + device_info->error_message = "Error setting current device."; + return device_info; + } + + device_info->free_mem_size = freemem; + device_info->max_allocable_mem_size = freemem / 4; + + double mem_in_gb = totalmem / 1073741824.0; + stringstream ss; + ss << setprecision(2) << mem_in_gb; + device_info->device_string += (" (" + ss.str() + "GB)"); + + return device_info; +} + +bool cuda_hasher::configure(xmrig::HasherConfig &config) { + int index = config.getGPUCardsCount(); + double intensity = 0; + + int total_threads = 0; + intensity = config.getAverageGPUIntensity(); + + if (intensity == 0) { + m_intensity = 0; + m_description = "Status: DISABLED - by user."; + return false; + } + + bool cards_selected = false; + intensity = 0; + + for(vector::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { + stringstream ss; + ss << "["<< (index + 1) << "] " << (*d)->device_string; + string device_description = ss.str(); + (*d)->device_index = index; + (*d)->profile_info.profile = m_profile; + + if(config.gpuFilter().size() > 0) { + bool found = false; + for(xmrig::GPUFilter fit : config.gpuFilter()) { + if(device_description.find(fit.filter) != string::npos) { + found = true; + break; + } + } + if(!found) { + (*d)->profile_info.threads = 0; + ss << " - DISABLED" << endl; + m_description += ss.str(); + continue; + } + else { + cards_selected = true; + } + } + else { + cards_selected = true; + } + + ss << endl; + + double device_intensity = config.getGPUIntensity((*d)->device_index); + + m_description += ss.str(); + + if(!(__setup_device_info((*d), device_intensity))) { + m_description += (*d)->error_message; + m_description += "\n"; + continue; + }; + + DeviceInfo device; + + char bus_id[100]; + if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) { + device.bus_id = bus_id; + int domain_separator = device.bus_id.find(":"); + if(domain_separator != string::npos) { + device.bus_id.erase(0, domain_separator + 1); + } + } + + device.name = (*d)->device_string; + device.intensity = device_intensity; + storeDeviceInfo((*d)->device_index, device); + + __enabledDevices.push_back(*d); + + total_threads += (*d)->profile_info.threads; + intensity += device_intensity; + } + + config.addGPUCardsCount(index - config.getGPUCardsCount()); + + if(!cards_selected) { + m_intensity = 0; + m_description += "Status: DISABLED - no card enabled because of filtering."; + return false; + } + + if (total_threads == 0) { + m_intensity = 0; + m_description += "Status: DISABLED - not enough resources."; + return false; + } + + if(!buildThreadData()) + return false; + + m_intensity = intensity / __enabledDevices.size(); + m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device + m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; + + return true; +} + +void cuda_hasher::cleanup() { + for(vector::iterator d = __devices.begin(); d != __devices.end(); d++) { + cuda_free(*d); + } +} + +bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) { + device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize); + size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; + + if(chunk_size == 0) { + device->error = cudaErrorInitializationError; + device->error_message = "Not enough memory on GPU."; + return false; + } + + uint64_t usable_memory = device->free_mem_size; + double chunks = (double)usable_memory / (double)chunk_size; + + uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks); + + if(max_threads == 0) { + device->error = cudaErrorInitializationError; + device->error_message = "Not enough memory on GPU."; + return false; + } + + device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0); + device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution + if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) + device->profile_info.threads = 2; + + chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk; + + cuda_allocate(device, chunks, chunk_size); + + if(device->error != cudaSuccess) + return false; + + return true; +} + +bool cuda_hasher::buildThreadData() { + __thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2]; + + for(int i=0; i < __enabledDevices.size(); i++) { + cuda_device_info *device = __enabledDevices[i]; + for(int threadId = 0; threadId < 2; threadId ++) { + cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; + thread_data.device = device; + thread_data.thread_id = threadId; + + cudaStream_t stream; + device->error = cudaStreamCreate(&stream); + if(device->error != cudaSuccess) { + LOG("Error running kernel: (" + to_string(device->error) + ") cannot create cuda stream."); + return false; + } + + thread_data.device_data = stream; + + #ifdef PARALLEL_CUDA + if(threadId == 0) { + thread_data.threads_idx = 0; + thread_data.threads = device->profile_info.threads / 2; + } + else { + thread_data.threads_idx = device->profile_info.threads / 2; + thread_data.threads = device->profile_info.threads - thread_data.threads_idx; + } + #else + thread_data.threads_idx = 0; + thread_data.threads = device->profile_info.threads; + #endif + + thread_data.argon2 = new Argon2(cuda_kernel_prehasher, cuda_kernel_filler, cuda_kernel_posthasher, + nullptr, &thread_data); + thread_data.argon2->setThreads(thread_data.threads); + thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4; + } + } + + return true; +} + +int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; + + cudaSetDevice(threadData.device->cuda_index); + + threadData.hashData.input = input; + threadData.hashData.inSize = size; + threadData.hashData.output = output; + int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); + if(threadData.device->error != cudaSuccess) { + LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); + return 0; + } + + uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39)); + (*nonce) += threadData.threads; + + return hashCount; + +} + +size_t cuda_hasher::parallelism(int workerIdx) { + cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx]; + return threadData.threads; +} + +size_t cuda_hasher::deviceCount() { + return __enabledDevices.size(); +} + +REGISTER_HASHER(cuda_hasher); + +#endif //WITH_CUDA diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h new file mode 100644 index 00000000..2e668b8e --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h @@ -0,0 +1,126 @@ +// +// Created by Haifa Bogdan Adnan on 18/09/2018. +// + +#ifndef ARGON2_CUDA_HASHER_H +#define ARGON2_CUDA_HASHER_H + +#if defined(WITH_CUDA) + +struct cuda_kernel_arguments { + void *memory_chunk_0; + void *memory_chunk_1; + void *memory_chunk_2; + void *memory_chunk_3; + void *memory_chunk_4; + void *memory_chunk_5; + + uint32_t *refs; + uint32_t *idxs; + uint32_t *segments; + + uint32_t *preseed_memory[2]; + uint32_t *seed_memory[2]; + uint32_t *out_memory[2]; + uint32_t *hash_memory[2]; + + uint32_t *host_seed_memory[2]; +}; + +struct argon2profile_info { + argon2profile_info() { + threads = 0; + threads_per_chunk = 0; + } + uint32_t threads; + uint32_t threads_per_chunk; + Argon2Profile *profile; +}; + +struct cuda_device_info { + cuda_device_info() { + device_index = 0; + device_string = ""; + free_mem_size = 0; + max_allocable_mem_size = 0; + + error = cudaSuccess; + error_message = ""; + } + + int device_index; + int cuda_index; + + string device_string; + uint64_t free_mem_size; + uint64_t max_allocable_mem_size; + + argon2profile_info profile_info; + cuda_kernel_arguments arguments; + + mutex device_lock; + + cudaError_t error; + string error_message; +}; + +struct cuda_gpumgmt_thread_data { + void lock() { +#ifndef PARALLEL_CUDA + device->device_lock.lock(); +#endif + } + + void unlock() { +#ifndef PARALLEL_CUDA + device->device_lock.unlock(); +#endif + } + + int thread_id; + cuda_device_info *device; + Argon2 *argon2; + HashData hashData; + + void *device_data; + + int threads; + int threads_idx; +}; + +class cuda_hasher : public Hasher { +public: + cuda_hasher(); + ~cuda_hasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + cuda_device_info *__get_device_info(int device_index); + bool __setup_device_info(cuda_device_info *device, double intensity); + vector __query_cuda_devices(cudaError_t &error, string &error_message); + bool buildThreadData(); + + vector __devices; + vector __enabledDevices; + cuda_gpumgmt_thread_data *__thread_data; + + Argon2Profile *m_profile; +}; + +// CUDA kernel exports +extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size); +extern void cuda_free(cuda_device_info *device); +extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data); +extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data); +extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data); +// end CUDA kernel exports + +#endif //WITH_CUDA + +#endif //ARGON2_CUDA_HASHER_H \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu new file mode 100644 index 00000000..eea358f2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu @@ -0,0 +1,1132 @@ +#include + +#include + +#include "../../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#include "cuda_hasher.h" + +#define THREADS_PER_LANE 32 +#define BLOCK_SIZE_UINT4 64 +#define BLOCK_SIZE_UINT 256 +#define KERNEL_WORKGROUP_SIZE 32 +#define ARGON2_PREHASH_DIGEST_LENGTH_UINT 16 +#define ARGON2_PREHASH_SEED_LENGTH_UINT 18 + + +#include "blake2b.cu" + +#define COMPUTE \ + asm ("{" \ + ".reg .u32 s1, s2, s3, s4;\n\t" \ + "mul.lo.u32 s3, %0, %2;\n\t" \ + "mul.hi.u32 s4, %0, %2;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %0, %2;\n\t" \ + "addc.u32 s2, %1, %3;\n\t" \ + "add.cc.u32 %0, s1, s3;\n\t" \ + "addc.u32 %1, s2, s4;\n\t" \ + "xor.b32 s1, %0, %6;\n\t" \ + "xor.b32 %6, %1, %7;\n\t" \ + "mov.b32 %7, s1;\n\t" \ + "mul.lo.u32 s3, %4, %6;\n\t" \ + "mul.hi.u32 s4, %4, %6;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %4, %6;\n\t" \ + "addc.u32 s2, %5, %7;\n\t" \ + "add.cc.u32 %4, s1, s3;\n\t" \ + "addc.u32 %5, s2, s4;\n\t" \ + "xor.b32 s3, %2, %4;\n\t" \ + "xor.b32 s4, %3, %5;\n\t" \ + "shf.r.wrap.b32 %3, s4, s3, 24;\n\t" \ + "shf.r.wrap.b32 %2, s3, s4, 24;\n\t" \ + "mul.lo.u32 s3, %0, %2;\n\t" \ + "mul.hi.u32 s4, %0, %2;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %0, %2;\n\t" \ + "addc.u32 s2, %1, %3;\n\t" \ + "add.cc.u32 %0, s1, s3;\n\t" \ + "addc.u32 %1, s2, s4;\n\t" \ + "xor.b32 s3, %0, %6;\n\t" \ + "xor.b32 s4, %1, %7;\n\t" \ + "shf.r.wrap.b32 %7, s4, s3, 16;\n\t" \ + "shf.r.wrap.b32 %6, s3, s4, 16;\n\t" \ + "mul.lo.u32 s3, %4, %6;\n\t" \ + "mul.hi.u32 s4, %4, %6;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %4, %6;\n\t" \ + "addc.u32 s2, %5, %7;\n\t" \ + "add.cc.u32 %4, s1, s3;\n\t" \ + "addc.u32 %5, s2, s4;\n\t" \ + "xor.b32 s3, %2, %4;\n\t" \ + "xor.b32 s4, %3, %5;\n\t" \ + "shf.r.wrap.b32 %3, s3, s4, 31;\n\t" \ + "shf.r.wrap.b32 %2, s4, s3, 31;\n\t" \ + "}" : "+r"(tmp_a.x), "+r"(tmp_a.y), "+r"(tmp_a.z), "+r"(tmp_a.w), "+r"(tmp_b.x), "+r"(tmp_b.y), "+r"(tmp_b.z), "+r"(tmp_b.w)); + +#define G1(data) \ +{ \ + COMPUTE \ + tmp_a.z = __shfl_sync(0xffffffff, tmp_a.z, i_shfl1_1); \ + tmp_a.w = __shfl_sync(0xffffffff, tmp_a.w, i_shfl1_1); \ + tmp_b.x = __shfl_sync(0xffffffff, tmp_b.x, i_shfl1_2); \ + tmp_b.y = __shfl_sync(0xffffffff, tmp_b.y, i_shfl1_2); \ + tmp_b.z = __shfl_sync(0xffffffff, tmp_b.z, i_shfl1_3); \ + tmp_b.w = __shfl_sync(0xffffffff, tmp_b.w, i_shfl1_3); \ +} + +#define G2(data) \ +{ \ + COMPUTE \ + data[i2_0_0] = tmp_a.x; \ + data[i2_0_1] = tmp_a.y; \ + data[i2_1_0] = tmp_a.z; \ + data[i2_1_1] = tmp_a.w; \ + data[i2_2_0] = tmp_b.x; \ + data[i2_2_1] = tmp_b.y; \ + data[i2_3_0] = tmp_b.z; \ + data[i2_3_1] = tmp_b.w; \ + __syncwarp(); \ +} + +#define G3(data) \ +{ \ + tmp_a.x = data[i3_0_0]; \ + tmp_a.y = data[i3_0_1]; \ + tmp_a.z = data[i3_1_0]; \ + tmp_a.w = data[i3_1_1]; \ + tmp_b.x = data[i3_2_0]; \ + tmp_b.y = data[i3_2_1]; \ + tmp_b.z = data[i3_3_0]; \ + tmp_b.w = data[i3_3_1]; \ + COMPUTE \ + tmp_a.z = __shfl_sync(0xffffffff, tmp_a.z, i_shfl2_1); \ + tmp_a.w = __shfl_sync(0xffffffff, tmp_a.w, i_shfl2_1); \ + tmp_b.x = __shfl_sync(0xffffffff, tmp_b.x, i_shfl2_2); \ + tmp_b.y = __shfl_sync(0xffffffff, tmp_b.y, i_shfl2_2); \ + tmp_b.z = __shfl_sync(0xffffffff, tmp_b.z, i_shfl2_3); \ + tmp_b.w = __shfl_sync(0xffffffff, tmp_b.w, i_shfl2_3); \ +} + +#define G4(data) \ +{ \ + COMPUTE \ + data[i4_0_0] = tmp_a.x; \ + data[i4_0_1] = tmp_a.y; \ + data[i4_1_0] = tmp_a.z; \ + data[i4_1_1] = tmp_a.w; \ + data[i4_2_0] = tmp_b.x; \ + data[i4_2_1] = tmp_b.y; \ + data[i4_3_0] = tmp_b.z; \ + data[i4_3_1] = tmp_b.w; \ + __syncwarp(); \ + tmp_a.x = data[i1_0_0]; \ + tmp_a.y = data[i1_0_1]; \ + tmp_a.z = data[i1_1_0]; \ + tmp_a.w = data[i1_1_1]; \ + tmp_b.x = data[i1_2_0]; \ + tmp_b.y = data[i1_2_1]; \ + tmp_b.z = data[i1_3_0]; \ + tmp_b.w = data[i1_3_1]; \ +} + +__constant__ int offsets[768] = { + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15, + 16, 20, 24, 28, + 17, 21, 25, 29, + 18, 22, 26, 30, + 19, 23, 27, 31, + 32, 36, 40, 44, + 33, 37, 41, 45, + 34, 38, 42, 46, + 35, 39, 43, 47, + 48, 52, 56, 60, + 49, 53, 57, 61, + 50, 54, 58, 62, + 51, 55, 59, 63, + 64, 68, 72, 76, + 65, 69, 73, 77, + 66, 70, 74, 78, + 67, 71, 75, 79, + 80, 84, 88, 92, + 81, 85, 89, 93, + 82, 86, 90, 94, + 83, 87, 91, 95, + 96, 100, 104, 108, + 97, 101, 105, 109, + 98, 102, 106, 110, + 99, 103, 107, 111, + 112, 116, 120, 124, + 113, 117, 121, 125, + 114, 118, 122, 126, + 115, 119, 123, 127, + 0, 5, 10, 15, + 1, 6, 11, 12, + 2, 7, 8, 13, + 3, 4, 9, 14, + 16, 21, 26, 31, + 17, 22, 27, 28, + 18, 23, 24, 29, + 19, 20, 25, 30, + 32, 37, 42, 47, + 33, 38, 43, 44, + 34, 39, 40, 45, + 35, 36, 41, 46, + 48, 53, 58, 63, + 49, 54, 59, 60, + 50, 55, 56, 61, + 51, 52, 57, 62, + 64, 69, 74, 79, + 65, 70, 75, 76, + 66, 71, 72, 77, + 67, 68, 73, 78, + 80, 85, 90, 95, + 81, 86, 91, 92, + 82, 87, 88, 93, + 83, 84, 89, 94, + 96, 101, 106, 111, + 97, 102, 107, 108, + 98, 103, 104, 109, + 99, 100, 105, 110, + 112, 117, 122, 127, + 113, 118, 123, 124, + 114, 119, 120, 125, + 115, 116, 121, 126, + 0, 32, 64, 96, + 1, 33, 65, 97, + 2, 34, 66, 98, + 3, 35, 67, 99, + 4, 36, 68, 100, + 5, 37, 69, 101, + 6, 38, 70, 102, + 7, 39, 71, 103, + 8, 40, 72, 104, + 9, 41, 73, 105, + 10, 42, 74, 106, + 11, 43, 75, 107, + 12, 44, 76, 108, + 13, 45, 77, 109, + 14, 46, 78, 110, + 15, 47, 79, 111, + 16, 48, 80, 112, + 17, 49, 81, 113, + 18, 50, 82, 114, + 19, 51, 83, 115, + 20, 52, 84, 116, + 21, 53, 85, 117, + 22, 54, 86, 118, + 23, 55, 87, 119, + 24, 56, 88, 120, + 25, 57, 89, 121, + 26, 58, 90, 122, + 27, 59, 91, 123, + 28, 60, 92, 124, + 29, 61, 93, 125, + 30, 62, 94, 126, + 31, 63, 95, 127, + 0, 33, 80, 113, + 1, 48, 81, 96, + 2, 35, 82, 115, + 3, 50, 83, 98, + 4, 37, 84, 117, + 5, 52, 85, 100, + 6, 39, 86, 119, + 7, 54, 87, 102, + 8, 41, 88, 121, + 9, 56, 89, 104, + 10, 43, 90, 123, + 11, 58, 91, 106, + 12, 45, 92, 125, + 13, 60, 93, 108, + 14, 47, 94, 127, + 15, 62, 95, 110, + 16, 49, 64, 97, + 17, 32, 65, 112, + 18, 51, 66, 99, + 19, 34, 67, 114, + 20, 53, 68, 101, + 21, 36, 69, 116, + 22, 55, 70, 103, + 23, 38, 71, 118, + 24, 57, 72, 105, + 25, 40, 73, 120, + 26, 59, 74, 107, + 27, 42, 75, 122, + 28, 61, 76, 109, + 29, 44, 77, 124, + 30, 63, 78, 111, + 31, 46, 79, 126, + 0, 1, 2, 3, + 1, 2, 3, 0, + 2, 3, 0, 1, + 3, 0, 1, 2, + 4, 5, 6, 7, + 5, 6, 7, 4, + 6, 7, 4, 5, + 7, 4, 5, 6, + 8, 9, 10, 11, + 9, 10, 11, 8, + 10, 11, 8, 9, + 11, 8, 9, 10, + 12, 13, 14, 15, + 13, 14, 15, 12, + 14, 15, 12, 13, + 15, 12, 13, 14, + 16, 17, 18, 19, + 17, 18, 19, 16, + 18, 19, 16, 17, + 19, 16, 17, 18, + 20, 21, 22, 23, + 21, 22, 23, 20, + 22, 23, 20, 21, + 23, 20, 21, 22, + 24, 25, 26, 27, + 25, 26, 27, 24, + 26, 27, 24, 25, + 27, 24, 25, 26, + 28, 29, 30, 31, + 29, 30, 31, 28, + 30, 31, 28, 29, + 31, 28, 29, 30, + 0, 1, 16, 17, + 1, 16, 17, 0, + 2, 3, 18, 19, + 3, 18, 19, 2, + 4, 5, 20, 21, + 5, 20, 21, 4, + 6, 7, 22, 23, + 7, 22, 23, 6, + 8, 9, 24, 25, + 9, 24, 25, 8, + 10, 11, 26, 27, + 11, 26, 27, 10, + 12, 13, 28, 29, + 13, 28, 29, 12, + 14, 15, 30, 31, + 15, 30, 31, 14, + 16, 17, 0, 1, + 17, 0, 1, 16, + 18, 19, 2, 3, + 19, 2, 3, 18, + 20, 21, 4, 5, + 21, 4, 5, 20, + 22, 23, 6, 7, + 23, 6, 7, 22, + 24, 25, 8, 9, + 25, 8, 9, 24, + 26, 27, 10, 11, + 27, 10, 11, 26, + 28, 29, 12, 13, + 29, 12, 13, 28, + 30, 31, 14, 15, + 31, 14, 15, 30 +}; + +inline __host__ __device__ void operator^=( uint4& a, uint4 s) { + a.x ^= s.x; a.y ^= s.y; a.z ^= s.z; a.w ^= s.w; +} + +__global__ void fill_blocks(uint32_t *scratchpad0, + uint32_t *scratchpad1, + uint32_t *scratchpad2, + uint32_t *scratchpad3, + uint32_t *scratchpad4, + uint32_t *scratchpad5, + uint32_t *seed, + uint32_t *out, + uint32_t *refs, // 32 bit + uint32_t *idxs, // first bit is keep flag, next 31 bit is current idx + uint32_t *segments, + int memsize, + int lanes, + int seg_length, + int seg_count, + int threads_per_chunk, + int thread_idx) { + extern __shared__ uint32_t shared[]; // lanes * BLOCK_SIZE_UINT [local state] + lanes * 32 [refs buffer] ( + lanes * 32 [idx buffer]) + + uint32_t *local_state = shared; + uint32_t *local_refs = shared + (lanes * BLOCK_SIZE_UINT); + uint32_t *local_idxs = shared + (lanes * BLOCK_SIZE_UINT + lanes * 32); + + uint4 tmp_a, tmp_b, tmp_c, tmp_d, tmp_p, tmp_q, tmp_l, tmp_m; + + int hash = blockIdx.x; + int mem_hash = hash + thread_idx; + int local_id = threadIdx.x; + int lane_length = seg_length * 4; + + int id = local_id % THREADS_PER_LANE; + int lane = local_id / THREADS_PER_LANE; + + int offset = id << 2; + + int i1_0_0 = 2 * offsets[offset]; + int i1_0_1 = i1_0_0 + 1; + int i1_1_0 = 2 * offsets[offset + 1]; + int i1_1_1 = i1_1_0 + 1; + int i1_2_0 = 2 * offsets[offset + 2]; + int i1_2_1 = i1_2_0 + 1; + int i1_3_0 = 2 * offsets[offset + 3]; + int i1_3_1 = i1_3_0 + 1; + + int i2_0_0 = 2 * offsets[offset + 128]; + int i2_0_1 = i2_0_0 + 1; + int i2_1_0 = 2 * offsets[offset + 129]; + int i2_1_1 = i2_1_0 + 1; + int i2_2_0 = 2 * offsets[offset + 130]; + int i2_2_1 = i2_2_0 + 1; + int i2_3_0 = 2 * offsets[offset + 131]; + int i2_3_1 = i2_3_0 + 1; + + int i3_0_0 = 2 * offsets[offset + 256]; + int i3_0_1 = i3_0_0 + 1; + int i3_1_0 = 2 * offsets[offset + 257]; + int i3_1_1 = i3_1_0 + 1; + int i3_2_0 = 2 * offsets[offset + 258]; + int i3_2_1 = i3_2_0 + 1; + int i3_3_0 = 2 * offsets[offset + 259]; + int i3_3_1 = i3_3_0 + 1; + + int i4_0_0 = 2 * offsets[offset + 384]; + int i4_0_1 = i4_0_0 + 1; + int i4_1_0 = 2 * offsets[offset + 385]; + int i4_1_1 = i4_1_0 + 1; + int i4_2_0 = 2 * offsets[offset + 386]; + int i4_2_1 = i4_2_0 + 1; + int i4_3_0 = 2 * offsets[offset + 387]; + int i4_3_1 = i4_3_0 + 1; + + int i_shfl1_1 = offsets[offset + 513]; + int i_shfl1_2 = offsets[offset + 514]; + int i_shfl1_3 = offsets[offset + 515]; + int i_shfl2_1 = offsets[offset + 641]; + int i_shfl2_2 = offsets[offset + 642]; + int i_shfl2_3 = offsets[offset + 643]; + + int scratchpad_location = mem_hash / threads_per_chunk; + uint4 *memory = reinterpret_cast(scratchpad0); + if(scratchpad_location == 1) memory = reinterpret_cast(scratchpad1); + if(scratchpad_location == 2) memory = reinterpret_cast(scratchpad2); + if(scratchpad_location == 3) memory = reinterpret_cast(scratchpad3); + if(scratchpad_location == 4) memory = reinterpret_cast(scratchpad4); + if(scratchpad_location == 5) memory = reinterpret_cast(scratchpad5); + int hash_offset = mem_hash - scratchpad_location * threads_per_chunk; + memory = memory + hash_offset * (memsize >> 4); // memsize / 16 -> 16 bytes in uint4 + + uint32_t *mem_seed = seed + hash * lanes * 2 * BLOCK_SIZE_UINT; + + uint32_t *seed_src = mem_seed + lane * 2 * BLOCK_SIZE_UINT; + uint4 *seed_dst = memory + lane * lane_length * BLOCK_SIZE_UINT4; + + seed_dst[id] = make_uint4(seed_src[i1_0_0], seed_src[i1_0_1], seed_src[i1_1_0], seed_src[i1_1_1]); + seed_dst[id + 32] = make_uint4(seed_src[i1_2_0], seed_src[i1_2_1], seed_src[i1_3_0], seed_src[i1_3_1]); + seed_src += BLOCK_SIZE_UINT; + seed_dst += BLOCK_SIZE_UINT4; + seed_dst[id] = make_uint4(seed_src[i1_0_0], seed_src[i1_0_1], seed_src[i1_1_0], seed_src[i1_1_1]); + seed_dst[id + 32] = make_uint4(seed_src[i1_2_0], seed_src[i1_2_1], seed_src[i1_3_0], seed_src[i1_3_1]); + + uint4 *next_block; + uint4 *prev_block; + uint4 *ref_block; + uint32_t *seg_refs, *seg_idxs; + + local_state = local_state + lane * BLOCK_SIZE_UINT; + local_refs = local_refs + lane * 32; + local_idxs = local_idxs + lane * 32; + + segments += (lane * 3); + + for(int s = 0; s < (seg_count / lanes); s++) { + int idx = ((s == 0) ? 2 : 0); // index for first slice in each lane is 2 + int with_xor = ((s >= 4) ? 1 : 0); + int keep = 1; + int slice = s % 4; + int pass = s / 4; + + uint32_t *cur_seg = &segments[s * lanes * 3]; + + uint32_t cur_idx = cur_seg[0]; + uint32_t prev_idx = cur_seg[1]; + uint32_t seg_type = cur_seg[2]; + uint32_t ref_idx = 0; + + prev_block = memory + prev_idx * BLOCK_SIZE_UINT4; + + tmp_a = prev_block[id]; + tmp_b = prev_block[id + 32]; + + __syncthreads(); + + if(seg_type == 0) { + seg_refs = refs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + if(idxs != NULL) seg_idxs = idxs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + + for (cur_idx--;idx < seg_length; seg_refs += 32, seg_idxs += 32) { + uint64_t i_limit = seg_length - idx; + if (i_limit > 32) i_limit = 32; + + local_refs[id] = seg_refs[id]; + ref_idx = local_refs[0]; + + if(idxs != NULL) { + local_idxs[id] = seg_idxs[id]; + cur_idx = local_idxs[0]; + keep = cur_idx & 0x80000000; + cur_idx = cur_idx & 0x7FFFFFFF; + } else + cur_idx++; + + ref_block = memory + ref_idx * BLOCK_SIZE_UINT4; + tmp_p = ref_block[id]; + tmp_q = ref_block[id + 32]; + + for (int i = 0; i < i_limit; i++, idx++) { + next_block = memory + cur_idx * BLOCK_SIZE_UINT4; + if(with_xor == 1) { + tmp_l = next_block[id]; + tmp_m = next_block[id + 32]; + } + + tmp_a ^= tmp_p; + tmp_b ^= tmp_q; + + if (i < (i_limit - 1)) { + ref_idx = local_refs[i + 1]; + + if(idxs != NULL) { + cur_idx = local_idxs[i + 1]; + keep = cur_idx & 0x80000000; + cur_idx = cur_idx & 0x7FFFFFFF; + } + else + cur_idx++; + + ref_block = memory + ref_idx * BLOCK_SIZE_UINT4; + tmp_p = ref_block[id]; + tmp_q = ref_block[id + 32]; + } + + tmp_c = tmp_a; + tmp_d = tmp_b; + + G1(local_state); + G2(local_state); + G3(local_state); + G4(local_state); + + if(with_xor == 1) { + tmp_c ^= tmp_l; + tmp_d ^= tmp_m; + } + + tmp_a ^= tmp_c; + tmp_b ^= tmp_d; + + if(keep > 0) { + next_block[id] = tmp_a; + next_block[id + 32] = tmp_b; + } + } + } + } + else { + + for (; idx < seg_length; idx++, cur_idx++) { + next_block = memory + cur_idx * BLOCK_SIZE_UINT4; + + if(with_xor == 1) { + tmp_l = next_block[id]; + tmp_m = next_block[id + 32]; + } + + uint32_t pseudo_rand_lo = __shfl_sync(0xffffffff, tmp_a.x, 0); + uint32_t pseudo_rand_hi = __shfl_sync(0xffffffff, tmp_a.y, 0); + + uint64_t ref_lane = pseudo_rand_hi % lanes; // thr_cost + uint32_t reference_area_size = 0; + if(pass > 0) { + if (lane == ref_lane) { + reference_area_size = lane_length - seg_length + idx - 1; + } else { + reference_area_size = lane_length - seg_length + ((idx == 0) ? (-1) : 0); + } + } + else { + if (lane == ref_lane) { + reference_area_size = slice * seg_length + idx - 1; // seg_length + } else { + reference_area_size = slice * seg_length + ((idx == 0) ? (-1) : 0); + } + } + asm("{mul.hi.u32 %0, %1, %1; mul.hi.u32 %0, %0, %2; }": "=r"(pseudo_rand_lo) : "r"(pseudo_rand_lo), "r"(reference_area_size)); + + uint32_t relative_position = reference_area_size - 1 - pseudo_rand_lo; + + ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length; + + ref_block = memory + ref_idx * BLOCK_SIZE_UINT4; + + tmp_a ^= ref_block[id]; + tmp_b ^= ref_block[id + 32]; + + tmp_c = tmp_a; + tmp_d = tmp_b; + + G1(local_state); + G2(local_state); + G3(local_state); + G4(local_state); + + if(with_xor == 1) { + tmp_c ^= tmp_l; + tmp_d ^= tmp_m; + } + + tmp_a ^= tmp_c; + tmp_b ^= tmp_d; + + next_block[id] = tmp_a; + next_block[id + 32] = tmp_b; + } + } + } + + local_state[i1_0_0] = tmp_a.x; + local_state[i1_0_1] = tmp_a.y; + local_state[i1_1_0] = tmp_a.z; + local_state[i1_1_1] = tmp_a.w; + local_state[i1_2_0] = tmp_b.x; + local_state[i1_2_1] = tmp_b.y; + local_state[i1_3_0] = tmp_b.z; + local_state[i1_3_1] = tmp_b.w; + + __syncthreads(); + + // at this point local_state will contain the final blocks + + if(lane == 0) { // first lane needs to acumulate results + tmp_a = make_uint4(0, 0, 0, 0); + tmp_b = make_uint4(0, 0, 0, 0); + + for(int l=0; l> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (pwdlen % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + + uint32_t nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash; + local_preseed[9] = (preseed[9] & 0x00FFFFFF) | (nonce << 24); + local_preseed[10] = (preseed[10] & 0xFF000000) | (nonce >> 8); + } + + int buf_len = blake2b_init(h, ARGON2_PREHASH_DIGEST_LENGTH_UINT, thr_id); + *value = lanes; //lanes + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = 32; //outlen + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = memsz; //m_cost + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = passes; //t_cost + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = ARGON2_VERSION; //version + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = ARGON2_TYPE_VALUE; //type + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = pwdlen * 4; //pw_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(local_preseed, pwdlen, h, buf, buf_len, thr_id); + *value = saltlen * 4; //salt_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(local_preseed, saltlen, h, buf, buf_len, thr_id); + *value = 0; //secret_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(NULL, 0, h, buf, buf_len, thr_id); + *value = 0; //ad_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(NULL, 0, h, buf, buf_len, thr_id); + + blake2b_final(local_mem, ARGON2_PREHASH_DIGEST_LENGTH_UINT, h, buf, buf_len, thr_id); + + if (thr_id == 0) { + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT] = idx; + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT + 1] = lane; + } + + blake2b_digestLong(local_seed, ARGON2_DWORDS_IN_BLOCK, local_mem, ARGON2_PREHASH_SEED_LENGTH_UINT, thr_id, + &local_mem[20]); + } +} + +__global__ void posthash ( + uint32_t *hash, + uint32_t *out, + uint32_t *preseed) { + extern __shared__ uint32_t shared[]; // size = 120 + + int hash_id = blockIdx.x; + int thread = threadIdx.x; + + uint32_t *local_hash = hash + hash_id * ((ARGON2_RAW_LENGTH / 4) + 1); + uint32_t *local_out = out + hash_id * BLOCK_SIZE_UINT; + + blake2b_digestLong(local_hash, ARGON2_RAW_LENGTH / 4, local_out, ARGON2_DWORDS_IN_BLOCK, thread, shared); + + if(thread == 0) { + uint32_t nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash_id; + local_hash[ARGON2_RAW_LENGTH / 4] = nonce; + } +} + +void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { + Argon2Profile *profile = device->profile_info.profile; + + device->error = cudaSetDevice(device->cuda_index); + if(device->error != cudaSuccess) { + device->error_message = "Error setting current device for memory allocation."; + return; + } + + size_t allocated_mem_for_current_chunk = 0; + + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_0, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_1, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_2, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_3, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_4, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_5, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + + uint32_t *refs = (uint32_t *)malloc(profile->blockRefsSize * sizeof(uint32_t)); + for(int i=0;iblockRefsSize;i++) { + refs[i] = profile->blockRefs[i*3 + 1]; + } + + device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t)); + if(device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + + device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice); + if(device->error != cudaSuccess) { + device->error_message = "Error copying memory."; + return; + } + free(refs); + + if(profile->succesiveIdxs == 1) { + device->arguments.idxs = NULL; + } + else { + uint32_t *idxs = (uint32_t *) malloc(profile->blockRefsSize * sizeof(uint32_t)); + for (int i = 0; i < profile->blockRefsSize; i++) { + idxs[i] = profile->blockRefs[i * 3]; + if (profile->blockRefs[i * 3 + 2] == 1) { + idxs[i] |= 0x80000000; + } + } + + device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t)); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + + device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t), + cudaMemcpyHostToDevice); + if (device->error != cudaSuccess) { + device->error_message = "Error copying memory."; + return; + } + free(idxs); + } + + //reorganize segments data + device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t)); + if(device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice); + if(device->error != cudaSuccess) { + device->error_message = "Error copying memory."; + return; + } + +#ifdef PARALLEL_CUDA + int threads = device->profile_info.threads / 2; +#else + int threads = device->profile_info.threads; +#endif + + size_t preseed_memory_size = profile->pwdLen * 4; + size_t seed_memory_size = threads * (profile->thrCost * 2) * ARGON2_BLOCK_SIZE; + size_t out_memory_size = threads * ARGON2_BLOCK_SIZE; + size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4); + + device->error = cudaMalloc(&device->arguments.preseed_memory[0], preseed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.seed_memory[0], seed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.out_memory[0], out_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.hash_memory[0], hash_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMallocHost(&device->arguments.host_seed_memory[0], 132 * threads); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating pinned memory."; + return; + } + device->error = cudaMalloc(&device->arguments.preseed_memory[1], preseed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.seed_memory[1], seed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.out_memory[1], out_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.hash_memory[1], hash_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMallocHost(&device->arguments.host_seed_memory[1], 132 * threads); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating pinned memory."; + return; + } +} + +void cuda_free(cuda_device_info *device) { + cudaSetDevice(device->cuda_index); + + if(device->arguments.idxs != NULL) { + cudaFree(device->arguments.idxs); + device->arguments.idxs = NULL; + } + + if(device->arguments.refs != NULL) { + cudaFree(device->arguments.refs); + device->arguments.refs = NULL; + } + + if(device->arguments.segments != NULL) { + cudaFree(device->arguments.segments); + device->arguments.segments = NULL; + } + + if(device->arguments.memory_chunk_0 != NULL) { + cudaFree(device->arguments.memory_chunk_0); + device->arguments.memory_chunk_0 = NULL; + } + + if(device->arguments.memory_chunk_1 != NULL) { + cudaFree(device->arguments.memory_chunk_1); + device->arguments.memory_chunk_1 = NULL; + } + + if(device->arguments.memory_chunk_2 != NULL) { + cudaFree(device->arguments.memory_chunk_2); + device->arguments.memory_chunk_2 = NULL; + } + + if(device->arguments.memory_chunk_3 != NULL) { + cudaFree(device->arguments.memory_chunk_3); + device->arguments.memory_chunk_3 = NULL; + } + + if(device->arguments.memory_chunk_4 != NULL) { + cudaFree(device->arguments.memory_chunk_4); + device->arguments.memory_chunk_4 = NULL; + } + + if(device->arguments.memory_chunk_5 != NULL) { + cudaFree(device->arguments.memory_chunk_5); + device->arguments.memory_chunk_5 = NULL; + } + + if(device->arguments.preseed_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.preseed_memory[i] != NULL) + cudaFree(device->arguments.preseed_memory[i]); + device->arguments.preseed_memory[i] = NULL; + } + } + + if(device->arguments.seed_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.seed_memory[i] != NULL) + cudaFree(device->arguments.seed_memory[i]); + device->arguments.seed_memory[i] = NULL; + } + } + + if(device->arguments.out_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.out_memory[i] != NULL) + cudaFree(device->arguments.out_memory[i]); + device->arguments.out_memory[i] = NULL; + } + } + + if(device->arguments.hash_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.hash_memory[i] != NULL) + cudaFree(device->arguments.hash_memory[i]); + device->arguments.hash_memory[i] = NULL; + } + } + + if(device->arguments.host_seed_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.host_seed_memory[i] != NULL) + cudaFreeHost(device->arguments.host_seed_memory[i]); + device->arguments.host_seed_memory[i] = NULL; + } + } + + cudaDeviceReset(); +} + +bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; + cuda_device_info *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + + int sessions = max(profile->thrCost * 2, (uint32_t)8); + double hashes_per_block = sessions / (profile->thrCost * 2.0); + size_t work_items = sessions * 4; + + gpumgmt_thread->lock(); + + memcpy(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], memory, gpumgmt_thread->hashData.inSize); + + device->error = cudaMemcpyAsync(device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.host_seed_memory[gpumgmt_thread->thread_id], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream); + if (device->error != cudaSuccess) { + device->error_message = "Error writing to gpu memory."; + gpumgmt_thread->unlock(); + return false; + } + + prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> ( + device->arguments.preseed_memory[gpumgmt_thread->thread_id], + device->arguments.seed_memory[gpumgmt_thread->thread_id], + profile->memCost, + profile->thrCost, + profile->segCount / (4 * profile->thrCost), + gpumgmt_thread->hashData.inSize / 4, + profile->saltLen, + threads); + + return true; +} + +void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { + cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; + cuda_device_info *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + + size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost; + size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0)); + + fill_blocks <<>> ((uint32_t*)device->arguments.memory_chunk_0, + (uint32_t*)device->arguments.memory_chunk_1, + (uint32_t*)device->arguments.memory_chunk_2, + (uint32_t*)device->arguments.memory_chunk_3, + (uint32_t*)device->arguments.memory_chunk_4, + (uint32_t*)device->arguments.memory_chunk_5, + device->arguments.seed_memory[gpumgmt_thread->thread_id], + device->arguments.out_memory[gpumgmt_thread->thread_id], + device->arguments.refs, + device->arguments.idxs, + device->arguments.segments, + profile->memSize, + profile->thrCost, + profile->segSize, + profile->segCount, + device->profile_info.threads_per_chunk, + gpumgmt_thread->threads_idx); + + return (void *)1; +} + +bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; + cuda_device_info *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + + size_t work_items = 4; + + posthash <<>> ( + device->arguments.hash_memory[gpumgmt_thread->thread_id], + device->arguments.out_memory[gpumgmt_thread->thread_id], + device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + + device->error = cudaMemcpyAsync(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], device->arguments.hash_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream); + if (device->error != cudaSuccess) { + device->error_message = "Error reading gpu memory."; + gpumgmt_thread->unlock(); + return false; + } + + while(cudaStreamQuery(stream) != cudaSuccess) { + this_thread::sleep_for(chrono::milliseconds(10)); + continue; + } + + memcpy(memory, device->arguments.host_seed_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4)); + gpumgmt_thread->unlock(); + + return memory; +} \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp new file mode 100755 index 00000000..b217dc79 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp @@ -0,0 +1,888 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#include +#include "../../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#include "OpenCLHasher.h" +#include "OpenCLKernel.h" + +#include "crypto/argon2_hasher/common/DLLExport.h" + +#if defined(WITH_OPENCL) + +#ifndef CL_DEVICE_BOARD_NAME_AMD +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#endif +#ifndef CL_DEVICE_TOPOLOGY_AMD +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#endif +#ifndef CL_DEVICE_PCI_BUS_ID_NV +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#endif +#ifndef CL_DEVICE_PCI_SLOT_ID_NV +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#endif + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} device_topology_amd; + +#define KERNEL_WORKGROUP_SIZE 32 + +opencl_hasher::opencl_hasher() { + m_type = "GPU"; + m_subType = "OPENCL"; + m_shortSubType = "OCL"; + m_intensity = 0; + m_description = ""; + m_computingThreads = 0; +} + +opencl_hasher::~opencl_hasher() { +// this->cleanup(); +} + +bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { + cl_int error = CL_SUCCESS; + string error_message; + + m_profile = getArgon2Profile(algorithm, variant); + + __devices = __query_opencl_devices(error, error_message); + if(error != CL_SUCCESS) { + m_description = "No compatible GPU detected: " + error_message; + return false; + } + + if (__devices.empty()) { + m_description = "No compatible GPU detected."; + return false; + } + + return true; +} + +vector opencl_hasher::__query_opencl_devices(cl_int &error, string &error_message) { + cl_int err; + + cl_uint platform_count = 0; + cl_uint device_count = 0; + + vector result; + + clGetPlatformIDs(0, NULL, &platform_count); + if(platform_count == 0) { + return result; + } + + cl_platform_id *platforms = (cl_platform_id*)malloc(platform_count * sizeof(cl_platform_id)); + + err=clGetPlatformIDs(platform_count, platforms, &platform_count); + if(err != CL_SUCCESS) { + free(platforms); + error = err; + error_message = "Error querying for opencl platforms."; + return result; + } + + int counter = 0; + + for(uint32_t i=0; i < platform_count; i++) { + device_count = 0; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &device_count); + if(device_count == 0) { + continue; + } + + cl_device_id * devices = (cl_device_id*)malloc(device_count * sizeof(cl_device_id)); + err=clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, device_count, devices, &device_count); + + if(err != CL_SUCCESS) { + free(devices); + error = err; + error_message = "Error querying for opencl devices."; + continue; + } + + for(uint32_t j=0; j < device_count; j++) { + opencl_device_info *info = __get_device_info(platforms[i], devices[j]); + if(info->error != CL_SUCCESS) { + error = info->error; + error_message = info->error_message; + } + else { + info->device_index = counter; + result.push_back(info); + counter++; + } + } + + free(devices); + } + + free(platforms); + + return result; +} + +opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl_device_id device) { + opencl_device_info *device_info = new opencl_device_info(CL_SUCCESS, ""); + + device_info->platform = platform; + device_info->device = device; + + char *buffer; + size_t sz; + + // device name + string device_vendor; + sz = 0; + clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, NULL, &sz); + buffer = (char *)malloc(sz + 1); + device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz); + if(device_info->error != CL_SUCCESS) { + free(buffer); + device_info->error_message = "Error querying device vendor."; + return device_info; + } + else { + buffer[sz] = 0; + device_vendor = buffer; + free(buffer); + } + + string device_name; + cl_device_info query_type = CL_DEVICE_NAME; + + if(device_vendor.find("Advanced Micro Devices") != string::npos) + query_type = CL_DEVICE_BOARD_NAME_AMD; + + sz = 0; + clGetDeviceInfo(device, query_type, 0, NULL, &sz); + buffer = (char *) malloc(sz + 1); + device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz); + if (device_info->error != CL_SUCCESS) { + free(buffer); + device_info->error_message = "Error querying device name."; + return device_info; + } else { + buffer[sz] = 0; + device_name = buffer; + free(buffer); + } + + string device_version; + sz = 0; + clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &sz); + buffer = (char *)malloc(sz + 1); + device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz); + if(device_info->error != CL_SUCCESS) { + free(buffer); + device_info->error_message = "Error querying device version."; + return device_info; + } + else { + buffer[sz] = 0; + device_version = buffer; + free(buffer); + } + + device_info->device_string = device_vendor + " - " + device_name/* + " : " + device_version*/; + + device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->max_mem_size), &(device_info->max_mem_size), NULL); + if(device_info->error != CL_SUCCESS) { + device_info->error_message = "Error querying device global memory size."; + return device_info; + } + + device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->max_allocable_mem_size), &(device_info->max_allocable_mem_size), NULL); + if(device_info->error != CL_SUCCESS) { + device_info->error_message = "Error querying device max memory allocation."; + return device_info; + } + + double mem_in_gb = device_info->max_mem_size / 1073741824.0; + stringstream ss; + ss << setprecision(2) << mem_in_gb; + device_info->device_string += (" (" + ss.str() + "GB)"); + + return device_info; +} + +bool opencl_hasher::configure(xmrig::HasherConfig &config) { + int index = config.getGPUCardsCount(); + double intensity = 0; + + int total_threads = 0; + intensity = config.getAverageGPUIntensity(); + + if (intensity == 0) { + m_intensity = 0; + m_description = "Status: DISABLED - by user."; + return false; + } + + bool cards_selected = false; + + intensity = 0; + + for(vector::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { + stringstream ss; + ss << "["<< (index + 1) << "] " << (*d)->device_string; + string device_description = ss.str(); + (*d)->device_index = index; + (*d)->profile_info.profile = m_profile; + + if(config.gpuFilter().size() > 0) { + bool found = false; + for(xmrig::GPUFilter fit : config.gpuFilter()) { + if(device_description.find(fit.filter) != string::npos) { + found = true; + break; + } + } + if(!found) { + (*d)->profile_info.threads = 0; + ss << " - DISABLED" << endl; + m_description += ss.str(); + continue; + } + else { + cards_selected = true; + } + } + else { + cards_selected = true; + } + + ss << endl; + + double device_intensity = config.getGPUIntensity((*d)->device_index); + + m_description += ss.str(); + + if(!(__setup_device_info((*d), device_intensity))) { + m_description += (*d)->error_message; + m_description += "\n"; + continue; + }; + + DeviceInfo device; + + if((*d)->device_string.find("Advanced Micro Devices") != string::npos) { + device_topology_amd amdtopo; + if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) { + char bus_id[50]; + sprintf(bus_id, "%02x:%02x.%x", amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function); + device.bus_id = bus_id; + } + } + else if((*d)->device_string.find("NVIDIA") != string::npos) { + cl_uint bus; + cl_uint slot; + + if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof(bus), &bus, NULL) == CL_SUCCESS) { + if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof(slot), &slot, NULL) == CL_SUCCESS) { + char bus_id[50]; + sprintf(bus_id, "%02x:%02x.0", bus, slot); + device.bus_id = bus_id; + } + } + } + + device.name = (*d)->device_string; + device.intensity = device_intensity; + storeDeviceInfo((*d)->device_index, device); + + __enabledDevices.push_back(*d); + + total_threads += (*d)->profile_info.threads; + intensity += device_intensity; + } + + config.addGPUCardsCount(index - config.getGPUCardsCount()); + + if(!cards_selected) { + m_intensity = 0; + m_description += "Status: DISABLED - no card enabled because of filtering."; + return false; + } + + if (total_threads == 0) { + m_intensity = 0; + m_description += "Status: DISABLED - not enough resources."; + return false; + } + + buildThreadData(); + + m_intensity = intensity / __enabledDevices.size(); + m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device + m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; + + return true; +} + +bool opencl_hasher::__setup_device_info(opencl_device_info *device, double intensity) { + cl_int error; + + cl_context_properties properties[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties) device->platform, + 0}; + + device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error getting device context."; + return false; + } + + device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error getting device command queue."; + return false; + } + + const char *srcptr[] = {OpenCLKernel.c_str()}; + size_t srcsize = OpenCLKernel.size(); + + device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl program for device."; + return false; + } + + error = clBuildProgram(device->program, 1, &device->device, "", NULL, NULL); + if (error != CL_SUCCESS) { + size_t log_size; + clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + char *log = (char *) malloc(log_size + 1); + clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + log[log_size] = 0; + string build_log = log; + free(log); + + device->error = error; + device->error_message = "Error building opencl program for device: " + build_log; + return false; + } + + device->kernel_prehash = clCreateKernel(device->program, "prehash", &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl prehash kernel for device."; + return false; + } + device->kernel_fill_blocks = clCreateKernel(device->program, "fill_blocks", &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl main kernel for device."; + return false; + } + device->kernel_posthash = clCreateKernel(device->program, "posthash", &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl posthash kernel for device."; + return false; + } + + device->profile_info.threads_per_chunk = (uint32_t) (device->max_allocable_mem_size / device->profile_info.profile->memSize); + size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; + + if (chunk_size == 0) { + device->error = -1; + device->error_message = "Not enough memory on GPU."; + return false; + } + + uint64_t usable_memory = device->max_mem_size; + double chunks = (double) usable_memory / (double) chunk_size; + + uint32_t max_threads = (uint32_t) (device->profile_info.threads_per_chunk * chunks); + + if (max_threads == 0) { + device->error = -1; + device->error_message = "Not enough memory on GPU."; + return false; + } + + device->profile_info.threads = (uint32_t) (max_threads * intensity / 100.0); + device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4 + if (max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) + device->profile_info.threads = 4; + + double counter = (double) device->profile_info.threads / (double) device->profile_info.threads_per_chunk; + size_t allocated_mem_for_current_chunk = 0; + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, + device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, + &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (device->profile_info.profile->succesiveIdxs == 1) { + device->arguments.idxs = NULL; + } + else { + device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, + device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, + &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + } + + device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + size_t preseed_memory_size = device->profile_info.profile->pwdLen * 4; + size_t seed_memory_size = device->profile_info.threads * (device->profile_info.profile->thrCost * 2) * ARGON2_BLOCK_SIZE; + size_t out_memory_size = device->profile_info.threads * ARGON2_BLOCK_SIZE; + size_t hash_memory_size = device->profile_info.threads * (xmrig::ARGON2_HASHLEN + 4); + + device->arguments.preseed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.preseed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.seed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.seed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.out_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.out_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.hash_memory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.hash_memory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + //optimise address sizes + uint32_t *refs = (uint32_t *)malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); + for(int i=0;iprofile_info.profile->blockRefsSize;i++) { + refs[i] = device->profile_info.profile->blockRefs[i*3 + 1]; + } + + error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + return false; + } + + free(refs); + + if(device->profile_info.profile->succesiveIdxs == 0) { + uint32_t *idxs = (uint32_t *) malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); + for (int i = 0; i < device->profile_info.profile->blockRefsSize; i++) { + idxs[i] = device->profile_info.profile->blockRefs[i * 3]; + if (device->profile_info.profile->blockRefs[i * 3 + 2] == 1) { + idxs[i] |= 0x80000000; + } + } + + error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + return false; + } + + free(idxs); + } + + error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), device->profile_info.profile->segments, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + return false; + } + + clSetKernelArg(device->kernel_fill_blocks, 0, sizeof(device->arguments.memory_chunk_0), &device->arguments.memory_chunk_0); + clSetKernelArg(device->kernel_fill_blocks, 1, sizeof(device->arguments.memory_chunk_1), &device->arguments.memory_chunk_1); + clSetKernelArg(device->kernel_fill_blocks, 2, sizeof(device->arguments.memory_chunk_2), &device->arguments.memory_chunk_2); + clSetKernelArg(device->kernel_fill_blocks, 3, sizeof(device->arguments.memory_chunk_3), &device->arguments.memory_chunk_3); + clSetKernelArg(device->kernel_fill_blocks, 4, sizeof(device->arguments.memory_chunk_4), &device->arguments.memory_chunk_4); + clSetKernelArg(device->kernel_fill_blocks, 5, sizeof(device->arguments.memory_chunk_5), &device->arguments.memory_chunk_5); + clSetKernelArg(device->kernel_fill_blocks, 8, sizeof(device->arguments.refs), &device->arguments.refs); + if(device->profile_info.profile->succesiveIdxs == 0) + clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs); + else + clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(cl_mem), NULL); + clSetKernelArg(device->kernel_fill_blocks, 10, sizeof(device->arguments.segments), &device->arguments.segments); + clSetKernelArg(device->kernel_fill_blocks, 11, sizeof(int32_t), &device->profile_info.profile->memSize); + clSetKernelArg(device->kernel_fill_blocks, 12, sizeof(int32_t), &device->profile_info.profile->thrCost); + clSetKernelArg(device->kernel_fill_blocks, 13, sizeof(int32_t), &device->profile_info.profile->segSize); + clSetKernelArg(device->kernel_fill_blocks, 14, sizeof(int32_t), &device->profile_info.profile->segCount); + clSetKernelArg(device->kernel_fill_blocks, 15, sizeof(int32_t), &device->profile_info.threads_per_chunk); + + clSetKernelArg(device->kernel_prehash, 2, sizeof(int32_t), &device->profile_info.profile->memCost); + clSetKernelArg(device->kernel_prehash, 3, sizeof(int32_t), &device->profile_info.profile->thrCost); + int passes = device->profile_info.profile->segCount / (4 * device->profile_info.profile->thrCost); + clSetKernelArg(device->kernel_prehash, 4, sizeof(int32_t), &passes); + clSetKernelArg(device->kernel_prehash, 6, sizeof(int32_t), &device->profile_info.profile->saltLen); + + return true; +} + +bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; + opencl_device_info *device = gpumgmt_thread->device; + + cl_int error; + + int sessions = max(profile->thrCost * 2, (uint32_t)16); + double hashes_per_block = sessions / (profile->thrCost * 2.0); + + size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block); + size_t local_work_items = sessions * 4; + + device->device_lock.lock(); + + error = clEnqueueWriteBuffer(device->queue, device->arguments.preseed_memory[gpumgmt_thread->thread_id], + CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + device->device_lock.unlock(); + return false; + } + + int inSizeInInt = gpumgmt_thread->hashData.inSize / 4; + clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_prehash, 5, sizeof(int), &inSizeInInt); + clSetKernelArg(device->kernel_prehash, 7, sizeof(int), &threads); + clSetKernelArg(device->kernel_prehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes) + + error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error running the kernel."; + device->device_lock.unlock(); + return false; + } + + return true; +} + +void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { + opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; + opencl_device_info *device = gpumgmt_thread->device; + + cl_int error; + + size_t total_work_items = threads * KERNEL_WORKGROUP_SIZE * profile->thrCost; + size_t local_work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost; + + size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK; + + clSetKernelArg(device->kernel_fill_blocks, 6, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_fill_blocks, 7, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_fill_blocks, 16, sizeof(cl_ulong) * shared_mem, NULL); + + error=clEnqueueNDRangeKernel(device->queue, device->kernel_fill_blocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error running the kernel."; + device->device_lock.unlock(); + return NULL; + } + + return (void *)1; +} + +bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; + opencl_device_info *device = gpumgmt_thread->device; + + cl_int error; + + size_t total_work_items = threads * 4; + size_t local_work_items = 4; + + clSetKernelArg(device->kernel_posthash, 0, sizeof(device->arguments.hash_memory[gpumgmt_thread->thread_id]), &device->arguments.hash_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_posthash, 1, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_posthash, 2, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_posthash, 3, sizeof(cl_ulong) * 60, NULL); + + error=clEnqueueNDRangeKernel(device->queue, device->kernel_posthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error running the kernel."; + device->device_lock.unlock(); + return false; + } + + error = clEnqueueReadBuffer(device->queue, device->arguments.hash_memory[gpumgmt_thread->thread_id], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error reading gpu memory."; + device->device_lock.unlock(); + return false; + } + + error=clFinish(device->queue); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error flushing GPU queue."; + device->device_lock.unlock(); + return false; + } + + device->device_lock.unlock(); + + return true; +} + +void opencl_hasher::buildThreadData() { + __thread_data = new opencl_gpumgmt_thread_data[__enabledDevices.size() * 2]; + + for(int i=0; i < __enabledDevices.size(); i++) { + opencl_device_info *device = __enabledDevices[i]; + for(int threadId = 0; threadId < 2; threadId ++) { + opencl_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; + thread_data.device = device; + thread_data.thread_id = threadId; + thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher, + nullptr, &thread_data); + thread_data.argon2->setThreads(device->profile_info.threads); + thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4; + } + } +} + +int opencl_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + opencl_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; + threadData.hashData.input = input; + threadData.hashData.inSize = size; + threadData.hashData.output = output; + int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); + if(threadData.device->error != CL_SUCCESS) { + LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); + return 0; + } + + uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39)); + (*nonce) += threadData.device->profile_info.threads; + + return hashCount; +} + +void opencl_hasher::cleanup() { + vector platforms; + + for(vector::iterator it=__devices.begin(); it != __devices.end(); it++) { + if ((*it)->profile_info.threads != 0) { + clReleaseMemObject((*it)->arguments.memory_chunk_0); + clReleaseMemObject((*it)->arguments.memory_chunk_1); + clReleaseMemObject((*it)->arguments.memory_chunk_2); + clReleaseMemObject((*it)->arguments.memory_chunk_3); + clReleaseMemObject((*it)->arguments.memory_chunk_4); + clReleaseMemObject((*it)->arguments.memory_chunk_5); + clReleaseMemObject((*it)->arguments.refs); + clReleaseMemObject((*it)->arguments.segments); + clReleaseMemObject((*it)->arguments.preseed_memory[0]); + clReleaseMemObject((*it)->arguments.preseed_memory[1]); + clReleaseMemObject((*it)->arguments.seed_memory[0]); + clReleaseMemObject((*it)->arguments.seed_memory[1]); + clReleaseMemObject((*it)->arguments.out_memory[0]); + clReleaseMemObject((*it)->arguments.out_memory[1]); + clReleaseMemObject((*it)->arguments.hash_memory[0]); + clReleaseMemObject((*it)->arguments.hash_memory[1]); + + clReleaseKernel((*it)->kernel_prehash); + clReleaseKernel((*it)->kernel_fill_blocks); + clReleaseKernel((*it)->kernel_posthash); + clReleaseProgram((*it)->program); + clReleaseCommandQueue((*it)->queue); + clReleaseContext((*it)->context); + } + clReleaseDevice((*it)->device); + delete (*it); + } + __devices.clear(); +} + +size_t opencl_hasher::parallelism(int workerIdx) { + // there are 2 computing threads per device, so divide by 2 to get device index + workerIdx /= 2; + + if(workerIdx < 0 || workerIdx > __enabledDevices.size()) + return 0; + + return __enabledDevices[workerIdx]->profile_info.threads; +} + +size_t opencl_hasher::deviceCount() { + return __enabledDevices.size(); +} + +REGISTER_HASHER(opencl_hasher); + +#endif // WITH_OPENCL diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h new file mode 100755 index 00000000..ece7c971 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h @@ -0,0 +1,110 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#ifndef ARGON2_OPENCL_HASHER_H +#define ARGON2_OPENCL_HASHER_H + +#if defined(WITH_OPENCL) + +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#else +#include +#endif // !__APPLE__ + +struct opencl_kernel_arguments { + cl_mem memory_chunk_0; + cl_mem memory_chunk_1; + cl_mem memory_chunk_2; + cl_mem memory_chunk_3; + cl_mem memory_chunk_4; + cl_mem memory_chunk_5; + cl_mem refs; + cl_mem idxs; + cl_mem segments; + cl_mem preseed_memory[2]; + cl_mem seed_memory[2]; + cl_mem out_memory[2]; + cl_mem hash_memory[2]; +}; + +struct argon2profile_info { + argon2profile_info() { + threads = 0; + threads_per_chunk = 0; + } + + uint32_t threads; + uint32_t threads_per_chunk; + Argon2Profile *profile; +}; + +struct opencl_device_info { + opencl_device_info(cl_int err, const string &err_msg) { + error = err; + error_message = err_msg; + } + + cl_platform_id platform; + cl_device_id device; + cl_context context; + cl_command_queue queue; + + cl_program program; + cl_kernel kernel_prehash; + cl_kernel kernel_fill_blocks; + cl_kernel kernel_posthash; + + int device_index; + + opencl_kernel_arguments arguments; + argon2profile_info profile_info; + + string device_string; + uint64_t max_mem_size; + uint64_t max_allocable_mem_size; + + cl_int error; + string error_message; + + mutex device_lock; +}; + +struct opencl_gpumgmt_thread_data { + int thread_id; + opencl_device_info *device; + Argon2 *argon2; + HashData hashData; +}; + +class opencl_hasher : public Hasher { +public: + opencl_hasher(); + ~opencl_hasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + opencl_device_info *__get_device_info(cl_platform_id platform, cl_device_id device); + bool __setup_device_info(opencl_device_info *device, double intensity); + vector __query_opencl_devices(cl_int &error, string &error_message); + void buildThreadData(); + + vector __devices; + vector __enabledDevices; + opencl_gpumgmt_thread_data *__thread_data; + + Argon2Profile *m_profile; +}; + +#endif //WITH_OPENCL + +#endif //ARGON2_OPENCL_HASHER_H diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp new file mode 100644 index 00000000..b65539bc --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp @@ -0,0 +1,1085 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#include "../../../common/common.h" + +#include "OpenCLKernel.h" + +string OpenCLKernel = R"OCL( +#define THREADS_PER_LANE 32 +#define BLOCK_SIZE_ULONG 128 +#define BLOCK_SIZE_UINT 256 +#define ARGON2_PREHASH_DIGEST_LENGTH_UINT 16 +#define ARGON2_PREHASH_SEED_LENGTH_UINT 18 + +#define ARGON2_BLOCK_SIZE 1024 +#define ARGON2_DWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 4) + +#define BLAKE_SHARED_MEM_ULONG 76 + +#define ARGON2_RAW_LENGTH 8 + +#define ARGON2_TYPE_VALUE 2 +#define ARGON2_VERSION 0x13 + +#define BLOCK_BYTES 32 +#define OUT_BYTES 16 + +#define G(m, r, i, a, b, c, d) \ +do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define G_S(m, a, b, c, d) \ +do { \ + a = a + b + m; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define ROUND(m, t, r, shfl) \ +do { \ + G(m, r, t, v0, v1, v2, v3); \ + shfl[t + 4] = v1; \ + shfl[t + 8] = v2; \ + shfl[t + 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[((t + 1) % 4)+ 4]; \ + v2 = shfl[((t + 2) % 4)+ 8]; \ + v3 = shfl[((t + 3) % 4)+ 12]; \ + G(m, r, (t + 4), v0, v1, v2, v3); \ + shfl[((t + 1) % 4)+ 4] = v1; \ + shfl[((t + 2) % 4)+ 8] = v2; \ + shfl[((t + 3) % 4)+ 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[t + 4]; \ + v2 = shfl[t + 8]; \ + v3 = shfl[t + 12]; \ +} while ((void)0, 0) + +#define ROUND_S(m, t, shfl) \ +do { \ + G_S(m, v0, v1, v2, v3); \ + shfl[t + 4] = v1; \ + shfl[t + 8] = v2; \ + shfl[t + 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[((t + 1) % 4)+ 4]; \ + v2 = shfl[((t + 2) % 4)+ 8]; \ + v3 = shfl[((t + 3) % 4)+ 12]; \ + G_S(m, v0, v1, v2, v3); \ + shfl[((t + 1) % 4)+ 4] = v1; \ + shfl[((t + 2) % 4)+ 8] = v2; \ + shfl[((t + 3) % 4)+ 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[t + 4]; \ + v2 = shfl[t + 8]; \ + v3 = shfl[t + 12]; \ +} while ((void)0, 0) + +ulong rotr64(ulong x, ulong n) +{ + return rotate(x, 64 - n); +} + +__constant ulong blake2b_IV[8] = { + 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 +}; + +__constant uint blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +void blake2b_compress(__local ulong *h, __local ulong *m, ulong f0, __local ulong *shfl, int thr_id) +{ + ulong v0, v1, v2, v3; + + barrier(CLK_LOCAL_MEM_FENCE); + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND(m, thr_id, 0, shfl); + ROUND(m, thr_id, 1, shfl); + ROUND(m, thr_id, 2, shfl); + ROUND(m, thr_id, 3, shfl); + ROUND(m, thr_id, 4, shfl); + ROUND(m, thr_id, 5, shfl); + ROUND(m, thr_id, 6, shfl); + ROUND(m, thr_id, 7, shfl); + ROUND(m, thr_id, 8, shfl); + ROUND(m, thr_id, 9, shfl); + ROUND(m, thr_id, 10, shfl); + ROUND(m, thr_id, 11, shfl); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +void blake2b_compress_static(__local ulong *h, ulong m, ulong f0, __local ulong *shfl, int thr_id) +{ + ulong v0, v1, v2, v3; + + barrier(CLK_LOCAL_MEM_FENCE); + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +void blake2b_incrementCounter(__local ulong *h, int inc) +{ + h[8] += (inc * 4); + h[9] += (h[8] < (inc * 4)); +} + +void blake2b_final_global(__global uint *out, int out_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + int left = BLOCK_BYTES - buf_len; + __local uint *cursor_out_local = buf + buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out_local += 4) { + cursor_out_local[thr_id] = 0; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out_local[i] = 0; + } + blake2b_incrementCounter(h, buf_len); + } + + blake2b_compress(h, (__local ulong *)buf, 0xFFFFFFFFFFFFFFFF, shfl, thr_id); + + __local uint *cursor_in = (__local uint *)h; + __global uint *cursor_out_global = out; + + for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out_global += 4) { + cursor_out_global[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (out_len % 4); i++) { + cursor_out_global[i] = cursor_in[i]; + } + } +} + +void blake2b_final_local(__local uint *out, int out_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + int left = BLOCK_BYTES - buf_len; + __local uint *cursor_out = buf + buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = 0; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = 0; + } + blake2b_incrementCounter(h, buf_len); + } + + blake2b_compress(h, (__local ulong *)buf, 0xFFFFFFFFFFFFFFFF, shfl, thr_id); + + __local uint *cursor_in = (__local uint *)h; + cursor_out = out; + + for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (out_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } +} + +int blake2b_update_global(__global uint *in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + __global uint *cursor_in = in; + __local uint *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + buf_len = 0; + + in_len -= left; + in += left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + cursor_in = in; + cursor_out = buf; + + for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + in_len -= BLOCK_BYTES; + in += BLOCK_BYTES; + } + } + + cursor_in = in; + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } + + return buf_len + in_len; +} + +int blake2b_update_static(uint in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + ulong in64 = in; + in64 = in64 << 32; + in64 = in64 | in; + + __local uint *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = in; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + buf_len = 0; + + in_len -= left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + blake2b_compress_static(h, in64, 0, shfl, thr_id); + + in_len -= BLOCK_BYTES; + } + } + + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = in; + } + } + + return buf_len + in_len; +} + +int blake2b_update_local(__local uint *in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + __local uint *cursor_in = in; + __local uint *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + buf_len = 0; + + in_len -= left; + in += left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + cursor_in = in; + cursor_out = buf; + + for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + in_len -= BLOCK_BYTES; + in += BLOCK_BYTES; + } + } + + cursor_in = in; + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } + + return buf_len + in_len; +} + +int blake2b_init(__local ulong *h, int out_len, int thr_id) +{ + h[thr_id * 2] = blake2b_IV[thr_id * 2]; + h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1]; + + if(thr_id == 0) { + h[8] = h[9] = 0; + h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24)); + } + + return 0; +} + +void blake2b_digestLong_global(__global uint *out, int out_len, + __global uint *in, int in_len, + int thr_id, __local ulong* shared) +{ + __local ulong *h = shared; + __local ulong *shfl = &h[10]; + __local uint *buf = (__local uint *)&shfl[16]; + __local uint *out_buffer = &buf[32]; + int buf_len; + + if(thr_id == 0) buf[0] = (out_len * 4); + buf_len = 1; + + if (out_len <= OUT_BYTES) { + blake2b_init(h, out_len, thr_id); + buf_len = blake2b_update_global(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, out_len, h, buf, buf_len, shfl, thr_id); + } else { + __local uint *cursor_in = out_buffer; + __global uint *cursor_out = out; + + blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_global(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + + int to_produce = out_len - OUT_BYTES / 2; + while (to_produce > OUT_BYTES) { + buf_len = blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + cursor_out = out; + cursor_in = out_buffer; + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + to_produce -= OUT_BYTES / 2; + } + + buf_len = blake2b_init(h, to_produce, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, to_produce, h, buf, buf_len, shfl, thr_id); + } +} + +void blake2b_digestLong_local(__global uint *out, int out_len, + __local uint *in, int in_len, + int thr_id, __local ulong* shared) +{ + __local ulong *h = shared; + __local ulong *shfl = &h[10]; + __local uint *buf = (__local uint *)&shfl[16]; + __local uint *out_buffer = &buf[32]; + int buf_len; + + if(thr_id == 0) buf[0] = (out_len * 4); + buf_len = 1; + + if (out_len <= OUT_BYTES) { + blake2b_init(h, out_len, thr_id); + buf_len = blake2b_update_local(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, out_len, h, buf, buf_len, shfl, thr_id); + } else { + __local uint *cursor_in = out_buffer; + __global uint *cursor_out = out; + + blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_local(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + + int to_produce = out_len - OUT_BYTES / 2; + while (to_produce > OUT_BYTES) { + buf_len = blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + cursor_out = out; + cursor_in = out_buffer; + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + to_produce -= OUT_BYTES / 2; + } + + buf_len = blake2b_init(h, to_produce, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, to_produce, h, buf, buf_len, shfl, thr_id); + } +} + +#define fBlaMka(x, y) ((x) + (y) + 2 * upsample(mul_hi((uint)(x), (uint)(y)), (uint)(x) * (uint)y)) + +#define COMPUTE \ + a = fBlaMka(a, b); \ + d = rotate(d ^ a, (ulong)32); \ + c = fBlaMka(c, d); \ + b = rotate(b ^ c, (ulong)40); \ + a = fBlaMka(a, b); \ + d = rotate(d ^ a, (ulong)48); \ + c = fBlaMka(c, d); \ + b = rotate(b ^ c, (ulong)1); + +__constant char offsets_round_1[32][4] = { + { 0, 4, 8, 12 }, + { 1, 5, 9, 13 }, + { 2, 6, 10, 14 }, + { 3, 7, 11, 15 }, + { 16, 20, 24, 28 }, + { 17, 21, 25, 29 }, + { 18, 22, 26, 30 }, + { 19, 23, 27, 31 }, + { 32, 36, 40, 44 }, + { 33, 37, 41, 45 }, + { 34, 38, 42, 46 }, + { 35, 39, 43, 47 }, + { 48, 52, 56, 60 }, + { 49, 53, 57, 61 }, + { 50, 54, 58, 62 }, + { 51, 55, 59, 63 }, + { 64, 68, 72, 76 }, + { 65, 69, 73, 77 }, + { 66, 70, 74, 78 }, + { 67, 71, 75, 79 }, + { 80, 84, 88, 92 }, + { 81, 85, 89, 93 }, + { 82, 86, 90, 94 }, + { 83, 87, 91, 95 }, + { 96, 100, 104, 108 }, + { 97, 101, 105, 109 }, + { 98, 102, 106, 110 }, + { 99, 103, 107, 111 }, + { 112, 116, 120, 124 }, + { 113, 117, 121, 125 }, + { 114, 118, 122, 126 }, + { 115, 119, 123, 127 }, +}; + +__constant char offsets_round_2[32][4] = { + { 0, 5, 10, 15 }, + { 1, 6, 11, 12 }, + { 2, 7, 8, 13 }, + { 3, 4, 9, 14 }, + { 16, 21, 26, 31 }, + { 17, 22, 27, 28 }, + { 18, 23, 24, 29 }, + { 19, 20, 25, 30 }, + { 32, 37, 42, 47 }, + { 33, 38, 43, 44 }, + { 34, 39, 40, 45 }, + { 35, 36, 41, 46 }, + { 48, 53, 58, 63 }, + { 49, 54, 59, 60 }, + { 50, 55, 56, 61 }, + { 51, 52, 57, 62 }, + { 64, 69, 74, 79 }, + { 65, 70, 75, 76 }, + { 66, 71, 72, 77 }, + { 67, 68, 73, 78 }, + { 80, 85, 90, 95 }, + { 81, 86, 91, 92 }, + { 82, 87, 88, 93 }, + { 83, 84, 89, 94 }, + { 96, 101, 106, 111 }, + { 97, 102, 107, 108 }, + { 98, 103, 104, 109 }, + { 99, 100, 105, 110 }, + { 112, 117, 122, 127 }, + { 113, 118, 123, 124 }, + { 114, 119, 120, 125 }, + { 115, 116, 121, 126 }, +}; + +__constant char offsets_round_3[32][4] = { + { 0, 32, 64, 96 }, + { 1, 33, 65, 97 }, + { 16, 48, 80, 112 }, + { 17, 49, 81, 113 }, + { 2, 34, 66, 98 }, + { 3, 35, 67, 99 }, + { 18, 50, 82, 114 }, + { 19, 51, 83, 115 }, + { 4, 36, 68, 100 }, + { 5, 37, 69, 101 }, + { 20, 52, 84, 116 }, + { 21, 53, 85, 117 }, + { 6, 38, 70, 102 }, + { 7, 39, 71, 103 }, + { 22, 54, 86, 118 }, + { 23, 55, 87, 119 }, + { 8, 40, 72, 104 }, + { 9, 41, 73, 105 }, + { 24, 56, 88, 120 }, + { 25, 57, 89, 121 }, + { 10, 42, 74, 106 }, + { 11, 43, 75, 107 }, + { 26, 58, 90, 122 }, + { 27, 59, 91, 123 }, + { 12, 44, 76, 108 }, + { 13, 45, 77, 109 }, + { 28, 60, 92, 124 }, + { 29, 61, 93, 125 }, + { 14, 46, 78, 110 }, + { 15, 47, 79, 111 }, + { 30, 62, 94, 126 }, + { 31, 63, 95, 127 }, +}; + +__constant char offsets_round_4[32][4] = { + { 0, 33, 80, 113 }, + { 1, 48, 81, 96 }, + { 16, 49, 64, 97 }, + { 17, 32, 65, 112 }, + { 2, 35, 82, 115 }, + { 3, 50, 83, 98 }, + { 18, 51, 66, 99 }, + { 19, 34, 67, 114 }, + { 4, 37, 84, 117 }, + { 5, 52, 85, 100 }, + { 20, 53, 68, 101 }, + { 21, 36, 69, 116 }, + { 6, 39, 86, 119 }, + { 7, 54, 87, 102 }, + { 22, 55, 70, 103 }, + { 23, 38, 71, 118 }, + { 8, 41, 88, 121 }, + { 9, 56, 89, 104 }, + { 24, 57, 72, 105 }, + { 25, 40, 73, 120 }, + { 10, 43, 90, 123 }, + { 11, 58, 91, 106 }, + { 26, 59, 74, 107 }, + { 27, 42, 75, 122 }, + { 12, 45, 92, 125 }, + { 13, 60, 93, 108 }, + { 28, 61, 76, 109 }, + { 29, 44, 77, 124 }, + { 14, 47, 94, 127 }, + { 15, 62, 95, 110 }, + { 30, 63, 78, 111 }, + { 31, 46, 79, 126 }, +}; + +#define G1(data) \ +{ \ + barrier(CLK_LOCAL_MEM_FENCE); \ + a = data[i1_0]; \ + b = data[i1_1]; \ + c = data[i1_2]; \ + d = data[i1_3]; \ + COMPUTE \ + data[i1_1] = b; \ + data[i1_2] = c; \ + data[i1_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +#define G2(data) \ +{ \ + b = data[i2_1]; \ + c = data[i2_2]; \ + d = data[i2_3]; \ + COMPUTE \ + data[i2_0] = a; \ + data[i2_1] = b; \ + data[i2_2] = c; \ + data[i2_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +#define G3(data) \ +{ \ + a = data[i3_0]; \ + b = data[i3_1]; \ + c = data[i3_2]; \ + d = data[i3_3]; \ + COMPUTE \ + data[i3_1] = b; \ + data[i3_2] = c; \ + data[i3_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +#define G4(data) \ +{ \ + b = data[i4_1]; \ + c = data[i4_2]; \ + d = data[i4_3]; \ + COMPUTE \ + data[i4_0] = a; \ + data[i4_1] = b; \ + data[i4_2] = c; \ + data[i4_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +__kernel void fill_blocks(__global ulong *chunk_0, + __global ulong *chunk_1, + __global ulong *chunk_2, + __global ulong *chunk_3, + __global ulong *chunk_4, + __global ulong *chunk_5, + __global ulong *seed, + __global ulong *out, + __global uint *refs, + __global uint *idxs, + __global uint *segments, + int memsize, + int lanes, + int seg_length, + int seg_count, + int threads_per_chunk, + __local ulong *scratchpad) { // lanes * BLOCK_SIZE_ULONG + ulong4 tmp; + ulong a, b, c, d; + + int hash = get_group_id(0); + int local_id = get_local_id(0); + + int id = local_id % THREADS_PER_LANE; + int lane = local_id / THREADS_PER_LANE; + int lane_length = seg_length * 4; + + ulong chunks[6]; + chunks[0] = (ulong)chunk_0; + chunks[1] = (ulong)chunk_1; + chunks[2] = (ulong)chunk_2; + chunks[3] = (ulong)chunk_3; + chunks[4] = (ulong)chunk_4; + chunks[5] = (ulong)chunk_5; + int chunk_index = hash / threads_per_chunk; + int chunk_offset = hash - chunk_index * threads_per_chunk; + __global ulong *memory = (__global ulong *)chunks[chunk_index] + chunk_offset * (memsize / 8); + + int i1_0 = offsets_round_1[id][0]; + int i1_1 = offsets_round_1[id][1]; + int i1_2 = offsets_round_1[id][2]; + int i1_3 = offsets_round_1[id][3]; + + int i2_0 = offsets_round_2[id][0]; + int i2_1 = offsets_round_2[id][1]; + int i2_2 = offsets_round_2[id][2]; + int i2_3 = offsets_round_2[id][3]; + + int i3_0 = offsets_round_3[id][0]; + int i3_1 = offsets_round_3[id][1]; + int i3_2 = offsets_round_3[id][2]; + int i3_3 = offsets_round_3[id][3]; + + int i4_0 = offsets_round_4[id][0]; + int i4_1 = offsets_round_4[id][1]; + int i4_2 = offsets_round_4[id][2]; + int i4_3 = offsets_round_4[id][3]; + + __global ulong *out_mem = out + hash * BLOCK_SIZE_ULONG; + __global ulong *seed_mem = seed + hash * lanes * 2 * BLOCK_SIZE_ULONG + lane * 2 * BLOCK_SIZE_ULONG; + + __global ulong *seed_dst = memory + lane * lane_length * BLOCK_SIZE_ULONG; + + vstore4(vload4(id, seed_mem), id, seed_dst); + + seed_mem += BLOCK_SIZE_ULONG; + seed_dst += BLOCK_SIZE_ULONG; + + vstore4(vload4(id, seed_mem), id, seed_dst); + + __global ulong *next_block; + __global ulong *prev_block; + __global uint *seg_refs; + __global uint *seg_idxs; + + __local ulong *state = scratchpad + lane * BLOCK_SIZE_ULONG; + + segments += (lane * 3); + + for(int s=0; s < (seg_count / lanes); s++) { + int idx = ((s == 0) ? 2 : 0); // index for first slice in each lane is 2 + + int with_xor = ((s >= 4) ? 1 : 0); + int keep = 1; + int slice = s % 4; + int pass = s / 4; + __global int *cur_seg = &segments[s * lanes * 3]; + + int cur_idx = cur_seg[0]; + int prev_idx = cur_seg[1]; + int seg_type = cur_seg[2]; + int ref_idx = 0; + ulong4 ref = 0, next = 0; + + prev_block = memory + prev_idx * BLOCK_SIZE_ULONG; + + tmp = vload4(id, prev_block); + + if(seg_type == 0) { + seg_refs = refs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + ref_idx = seg_refs[0]; + + if(idxs != 0) { + seg_idxs = idxs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + cur_idx = seg_idxs[0]; + } + + ulong4 nextref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG); + + for (int i=0;idx < seg_length;i++, idx++) { + next_block = memory + (cur_idx & 0x7FFFFFFF) * BLOCK_SIZE_ULONG; + + if(with_xor == 1) + next = vload4(id, next_block); + + ref = nextref; + + if (idx < seg_length - 1) { + ref_idx = seg_refs[i + 1]; + + if(idxs != 0) { + keep = cur_idx & 0x80000000; + cur_idx = seg_idxs[i + 1]; + } + else + cur_idx++; + + nextref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG); + } + + tmp ^= ref; + + vstore4(tmp, id, state); + + G1(state); + G2(state); + G3(state); + G4(state); + + if(with_xor == 1) + tmp ^= next; + + tmp ^= vload4(id, state); + + if(keep > 0) { + vstore4(tmp, id, next_block); + barrier(CLK_GLOBAL_MEM_FENCE); + } + } + } + else { + vstore4(tmp, id, state); + barrier(CLK_LOCAL_MEM_FENCE); + + for (int i=0;idx < seg_length;i++, idx++, cur_idx++) { + ulong pseudo_rand = state[0]; + + ulong ref_lane = ((pseudo_rand >> 32)) % lanes; // thr_cost + uint reference_area_size = 0; + + if(pass > 0) { + if (lane == ref_lane) { + reference_area_size = lane_length - seg_length + idx - 1; + } else { + reference_area_size = lane_length - seg_length + ((idx == 0) ? (-1) : 0); + } + } + else { + if (lane == ref_lane) { + reference_area_size = slice * seg_length + idx - 1; // seg_length + } else { + reference_area_size = slice * seg_length + ((idx == 0) ? (-1) : 0); + } + } + + ulong relative_position = pseudo_rand & 0xFFFFFFFF; + relative_position = (relative_position * relative_position) >> 32; + + relative_position = reference_area_size - 1 - + ((reference_area_size * relative_position) >> 32); + + ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length; + + ref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG); + + next_block = memory + cur_idx * BLOCK_SIZE_ULONG; + + if(with_xor == 1) + next = vload4(id, next_block); + + tmp ^= ref; + + vstore4(tmp, id, state); + + G1(state); + G2(state); + G3(state); + G4(state); + + if(with_xor == 1) + tmp ^= next; + + tmp ^= vload4(id, state); + + vstore4(tmp, id, state); + vstore4(tmp, id, next_block); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + } + } + + vstore4(tmp, id, state); + barrier(CLK_LOCAL_MEM_FENCE); + + if(lane == 0) { // first lane needs to acumulate results + for(int l=1; l> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (pwdlen % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + + uint nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash; + local_preseed[9] = (preseed[9] & 0x00FFFFFF) | (nonce << 24); + local_preseed[10] = (preseed[10] & 0xFF000000) | (nonce >> 8); + } + + int buf_len = blake2b_init(h, ARGON2_PREHASH_DIGEST_LENGTH_UINT, thr_id); + *value = lanes; //lanes + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = 32; //outlen + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = memsz; //m_cost + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = passes; //t_cost + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = ARGON2_VERSION; //version + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = ARGON2_TYPE_VALUE; //type + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = pwdlen * 4; //pw_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(local_preseed, pwdlen, h, buf, buf_len, shfl, thr_id); + *value = saltlen * 4; //salt_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(local_preseed, saltlen, h, buf, buf_len, shfl, thr_id); + *value = 0; //secret_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(0, 0, h, buf, buf_len, shfl, thr_id); + *value = 0; //ad_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(0, 0, h, buf, buf_len, shfl, thr_id); + + blake2b_final_local(local_mem, ARGON2_PREHASH_DIGEST_LENGTH_UINT, h, buf, buf_len, shfl, thr_id); + + if (thr_id == 0) { + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT] = idx; + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT + 1] = lane; + } + + blake2b_digestLong_local(local_seed, ARGON2_DWORDS_IN_BLOCK, local_mem, ARGON2_PREHASH_SEED_LENGTH_UINT, thr_id, (__local ulong *)&local_mem[20]); + } +} + +__kernel void posthash ( + __global uint *hash, + __global uint *out, + __global uint *preseed, + __local ulong *blake_shared) { + + int hash_id = get_group_id(0); + int thread = get_local_id(0); + + __global uint *local_hash = hash + hash_id * (ARGON2_RAW_LENGTH + 1); + __global uint *local_out = out + hash_id * BLOCK_SIZE_UINT; + + blake2b_digestLong_global(local_hash, ARGON2_RAW_LENGTH, local_out, ARGON2_DWORDS_IN_BLOCK, thread, blake_shared); + if(thread == 0) { + uint nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash_id; + local_hash[ARGON2_RAW_LENGTH] = nonce; + } +} + +)OCL"; diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h new file mode 100644 index 00000000..386659f8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h @@ -0,0 +1,10 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#ifndef ARGON2_OPENCL_KERNEL_H +#define ARGON2_OPENCL_KERNEL_H + +extern string OpenCLKernel; + +#endif //ARGON2_OPENCL_KERNEL_H diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc deleted file mode 100644 index e9e1bb4f..00000000 --- a/src/crypto/asm/CryptonightR_soft_aes_template.inc +++ /dev/null @@ -1,281 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) - -ALIGN(64) -FN_PREFIX(CryptonightR_soft_aes_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -FN_PREFIX(CryptonightR_soft_aes_template_mainloop): - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - pxor xmm6, xmm1 - pxor xmm6, xmm0 - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - movaps xmm0, xmm5 - psrldq xmm0, 8 - movd r9d, xmm0 - -FN_PREFIX(CryptonightR_soft_aes_template_part2): - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov edi, edi - shl rbp, 32 - or rbp, rdi - xor r8, rbp - - mov ebx, ebx - shl rsi, 32 - or rsi, rbx - xor QWORD PTR [rsp+320], rsi - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm1 - paddq xmm1, xmm7 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm6, xmm0 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop) - -FN_PREFIX(CryptonightR_soft_aes_template_part3): - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -FN_PREFIX(CryptonightR_soft_aes_template_end): diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc deleted file mode 100644 index 589192ca..00000000 --- a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc +++ /dev/null @@ -1,281 +0,0 @@ -PUBLIC CryptonightR_soft_aes_template_part1 -PUBLIC CryptonightR_soft_aes_template_mainloop -PUBLIC CryptonightR_soft_aes_template_part2 -PUBLIC CryptonightR_soft_aes_template_part3 -PUBLIC CryptonightR_soft_aes_template_end - -ALIGN(64) -CryptonightR_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightR_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - pxor xmm6, xmm1 - pxor xmm6, xmm0 - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - movaps xmm0, xmm5 - psrldq xmm0, 8 - movd r9d, xmm0 - -CryptonightR_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov edi, edi - shl rbp, 32 - or rbp, rdi - xor r8, rbp - - mov ebx, ebx - shl rsi, 32 - or rsi, rbx - xor QWORD PTR [rsp+320], rsi - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm1 - paddq xmm1, xmm7 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm6, xmm0 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightR_soft_aes_template_mainloop - -CryptonightR_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightR_soft_aes_template_end: diff --git a/src/crypto/asm/CryptonightR_template.S b/src/crypto/asm/CryptonightR_template.S deleted file mode 100644 index d2974d16..00000000 --- a/src/crypto/asm/CryptonightR_template.S +++ /dev/null @@ -1,1595 +0,0 @@ -#ifdef __APPLE__ -# define ALIGN(x) .align 6 -#else -# define ALIGN(x) .align 64 -#endif -.intel_syntax noprefix -#ifdef __APPLE__ -# define FN_PREFIX(fn) _ ## fn -.text -#else -# define FN_PREFIX(fn) fn -.section .text -#endif - -#define PUBLIC .global - -PUBLIC FN_PREFIX(CryptonightR_instruction0) -PUBLIC FN_PREFIX(CryptonightR_instruction1) -PUBLIC FN_PREFIX(CryptonightR_instruction2) -PUBLIC FN_PREFIX(CryptonightR_instruction3) -PUBLIC FN_PREFIX(CryptonightR_instruction4) -PUBLIC FN_PREFIX(CryptonightR_instruction5) -PUBLIC FN_PREFIX(CryptonightR_instruction6) -PUBLIC FN_PREFIX(CryptonightR_instruction7) -PUBLIC FN_PREFIX(CryptonightR_instruction8) -PUBLIC FN_PREFIX(CryptonightR_instruction9) -PUBLIC FN_PREFIX(CryptonightR_instruction10) -PUBLIC FN_PREFIX(CryptonightR_instruction11) -PUBLIC FN_PREFIX(CryptonightR_instruction12) -PUBLIC FN_PREFIX(CryptonightR_instruction13) -PUBLIC FN_PREFIX(CryptonightR_instruction14) -PUBLIC FN_PREFIX(CryptonightR_instruction15) -PUBLIC FN_PREFIX(CryptonightR_instruction16) -PUBLIC FN_PREFIX(CryptonightR_instruction17) -PUBLIC FN_PREFIX(CryptonightR_instruction18) -PUBLIC FN_PREFIX(CryptonightR_instruction19) -PUBLIC FN_PREFIX(CryptonightR_instruction20) -PUBLIC FN_PREFIX(CryptonightR_instruction21) -PUBLIC FN_PREFIX(CryptonightR_instruction22) -PUBLIC FN_PREFIX(CryptonightR_instruction23) -PUBLIC FN_PREFIX(CryptonightR_instruction24) -PUBLIC FN_PREFIX(CryptonightR_instruction25) -PUBLIC FN_PREFIX(CryptonightR_instruction26) -PUBLIC FN_PREFIX(CryptonightR_instruction27) -PUBLIC FN_PREFIX(CryptonightR_instruction28) -PUBLIC FN_PREFIX(CryptonightR_instruction29) -PUBLIC FN_PREFIX(CryptonightR_instruction30) -PUBLIC FN_PREFIX(CryptonightR_instruction31) -PUBLIC FN_PREFIX(CryptonightR_instruction32) -PUBLIC FN_PREFIX(CryptonightR_instruction33) -PUBLIC FN_PREFIX(CryptonightR_instruction34) -PUBLIC FN_PREFIX(CryptonightR_instruction35) -PUBLIC FN_PREFIX(CryptonightR_instruction36) -PUBLIC FN_PREFIX(CryptonightR_instruction37) -PUBLIC FN_PREFIX(CryptonightR_instruction38) -PUBLIC FN_PREFIX(CryptonightR_instruction39) -PUBLIC FN_PREFIX(CryptonightR_instruction40) -PUBLIC FN_PREFIX(CryptonightR_instruction41) -PUBLIC FN_PREFIX(CryptonightR_instruction42) -PUBLIC FN_PREFIX(CryptonightR_instruction43) -PUBLIC FN_PREFIX(CryptonightR_instruction44) -PUBLIC FN_PREFIX(CryptonightR_instruction45) -PUBLIC FN_PREFIX(CryptonightR_instruction46) -PUBLIC FN_PREFIX(CryptonightR_instruction47) -PUBLIC FN_PREFIX(CryptonightR_instruction48) -PUBLIC FN_PREFIX(CryptonightR_instruction49) -PUBLIC FN_PREFIX(CryptonightR_instruction50) -PUBLIC FN_PREFIX(CryptonightR_instruction51) -PUBLIC FN_PREFIX(CryptonightR_instruction52) -PUBLIC FN_PREFIX(CryptonightR_instruction53) -PUBLIC FN_PREFIX(CryptonightR_instruction54) -PUBLIC FN_PREFIX(CryptonightR_instruction55) -PUBLIC FN_PREFIX(CryptonightR_instruction56) -PUBLIC FN_PREFIX(CryptonightR_instruction57) -PUBLIC FN_PREFIX(CryptonightR_instruction58) -PUBLIC FN_PREFIX(CryptonightR_instruction59) -PUBLIC FN_PREFIX(CryptonightR_instruction60) -PUBLIC FN_PREFIX(CryptonightR_instruction61) -PUBLIC FN_PREFIX(CryptonightR_instruction62) -PUBLIC FN_PREFIX(CryptonightR_instruction63) -PUBLIC FN_PREFIX(CryptonightR_instruction64) -PUBLIC FN_PREFIX(CryptonightR_instruction65) -PUBLIC FN_PREFIX(CryptonightR_instruction66) -PUBLIC FN_PREFIX(CryptonightR_instruction67) -PUBLIC FN_PREFIX(CryptonightR_instruction68) -PUBLIC FN_PREFIX(CryptonightR_instruction69) -PUBLIC FN_PREFIX(CryptonightR_instruction70) -PUBLIC FN_PREFIX(CryptonightR_instruction71) -PUBLIC FN_PREFIX(CryptonightR_instruction72) -PUBLIC FN_PREFIX(CryptonightR_instruction73) -PUBLIC FN_PREFIX(CryptonightR_instruction74) -PUBLIC FN_PREFIX(CryptonightR_instruction75) -PUBLIC FN_PREFIX(CryptonightR_instruction76) -PUBLIC FN_PREFIX(CryptonightR_instruction77) -PUBLIC FN_PREFIX(CryptonightR_instruction78) -PUBLIC FN_PREFIX(CryptonightR_instruction79) -PUBLIC FN_PREFIX(CryptonightR_instruction80) -PUBLIC FN_PREFIX(CryptonightR_instruction81) -PUBLIC FN_PREFIX(CryptonightR_instruction82) -PUBLIC FN_PREFIX(CryptonightR_instruction83) -PUBLIC FN_PREFIX(CryptonightR_instruction84) -PUBLIC FN_PREFIX(CryptonightR_instruction85) -PUBLIC FN_PREFIX(CryptonightR_instruction86) -PUBLIC FN_PREFIX(CryptonightR_instruction87) -PUBLIC FN_PREFIX(CryptonightR_instruction88) -PUBLIC FN_PREFIX(CryptonightR_instruction89) -PUBLIC FN_PREFIX(CryptonightR_instruction90) -PUBLIC FN_PREFIX(CryptonightR_instruction91) -PUBLIC FN_PREFIX(CryptonightR_instruction92) -PUBLIC FN_PREFIX(CryptonightR_instruction93) -PUBLIC FN_PREFIX(CryptonightR_instruction94) -PUBLIC FN_PREFIX(CryptonightR_instruction95) -PUBLIC FN_PREFIX(CryptonightR_instruction96) -PUBLIC FN_PREFIX(CryptonightR_instruction97) -PUBLIC FN_PREFIX(CryptonightR_instruction98) -PUBLIC FN_PREFIX(CryptonightR_instruction99) -PUBLIC FN_PREFIX(CryptonightR_instruction100) -PUBLIC FN_PREFIX(CryptonightR_instruction101) -PUBLIC FN_PREFIX(CryptonightR_instruction102) -PUBLIC FN_PREFIX(CryptonightR_instruction103) -PUBLIC FN_PREFIX(CryptonightR_instruction104) -PUBLIC FN_PREFIX(CryptonightR_instruction105) -PUBLIC FN_PREFIX(CryptonightR_instruction106) -PUBLIC FN_PREFIX(CryptonightR_instruction107) -PUBLIC FN_PREFIX(CryptonightR_instruction108) -PUBLIC FN_PREFIX(CryptonightR_instruction109) -PUBLIC FN_PREFIX(CryptonightR_instruction110) -PUBLIC FN_PREFIX(CryptonightR_instruction111) -PUBLIC FN_PREFIX(CryptonightR_instruction112) -PUBLIC FN_PREFIX(CryptonightR_instruction113) -PUBLIC FN_PREFIX(CryptonightR_instruction114) -PUBLIC FN_PREFIX(CryptonightR_instruction115) -PUBLIC FN_PREFIX(CryptonightR_instruction116) -PUBLIC FN_PREFIX(CryptonightR_instruction117) -PUBLIC FN_PREFIX(CryptonightR_instruction118) -PUBLIC FN_PREFIX(CryptonightR_instruction119) -PUBLIC FN_PREFIX(CryptonightR_instruction120) -PUBLIC FN_PREFIX(CryptonightR_instruction121) -PUBLIC FN_PREFIX(CryptonightR_instruction122) -PUBLIC FN_PREFIX(CryptonightR_instruction123) -PUBLIC FN_PREFIX(CryptonightR_instruction124) -PUBLIC FN_PREFIX(CryptonightR_instruction125) -PUBLIC FN_PREFIX(CryptonightR_instruction126) -PUBLIC FN_PREFIX(CryptonightR_instruction127) -PUBLIC FN_PREFIX(CryptonightR_instruction128) -PUBLIC FN_PREFIX(CryptonightR_instruction129) -PUBLIC FN_PREFIX(CryptonightR_instruction130) -PUBLIC FN_PREFIX(CryptonightR_instruction131) -PUBLIC FN_PREFIX(CryptonightR_instruction132) -PUBLIC FN_PREFIX(CryptonightR_instruction133) -PUBLIC FN_PREFIX(CryptonightR_instruction134) -PUBLIC FN_PREFIX(CryptonightR_instruction135) -PUBLIC FN_PREFIX(CryptonightR_instruction136) -PUBLIC FN_PREFIX(CryptonightR_instruction137) -PUBLIC FN_PREFIX(CryptonightR_instruction138) -PUBLIC FN_PREFIX(CryptonightR_instruction139) -PUBLIC FN_PREFIX(CryptonightR_instruction140) -PUBLIC FN_PREFIX(CryptonightR_instruction141) -PUBLIC FN_PREFIX(CryptonightR_instruction142) -PUBLIC FN_PREFIX(CryptonightR_instruction143) -PUBLIC FN_PREFIX(CryptonightR_instruction144) -PUBLIC FN_PREFIX(CryptonightR_instruction145) -PUBLIC FN_PREFIX(CryptonightR_instruction146) -PUBLIC FN_PREFIX(CryptonightR_instruction147) -PUBLIC FN_PREFIX(CryptonightR_instruction148) -PUBLIC FN_PREFIX(CryptonightR_instruction149) -PUBLIC FN_PREFIX(CryptonightR_instruction150) -PUBLIC FN_PREFIX(CryptonightR_instruction151) -PUBLIC FN_PREFIX(CryptonightR_instruction152) -PUBLIC FN_PREFIX(CryptonightR_instruction153) -PUBLIC FN_PREFIX(CryptonightR_instruction154) -PUBLIC FN_PREFIX(CryptonightR_instruction155) -PUBLIC FN_PREFIX(CryptonightR_instruction156) -PUBLIC FN_PREFIX(CryptonightR_instruction157) -PUBLIC FN_PREFIX(CryptonightR_instruction158) -PUBLIC FN_PREFIX(CryptonightR_instruction159) -PUBLIC FN_PREFIX(CryptonightR_instruction160) -PUBLIC FN_PREFIX(CryptonightR_instruction161) -PUBLIC FN_PREFIX(CryptonightR_instruction162) -PUBLIC FN_PREFIX(CryptonightR_instruction163) -PUBLIC FN_PREFIX(CryptonightR_instruction164) -PUBLIC FN_PREFIX(CryptonightR_instruction165) -PUBLIC FN_PREFIX(CryptonightR_instruction166) -PUBLIC FN_PREFIX(CryptonightR_instruction167) -PUBLIC FN_PREFIX(CryptonightR_instruction168) -PUBLIC FN_PREFIX(CryptonightR_instruction169) -PUBLIC FN_PREFIX(CryptonightR_instruction170) -PUBLIC FN_PREFIX(CryptonightR_instruction171) -PUBLIC FN_PREFIX(CryptonightR_instruction172) -PUBLIC FN_PREFIX(CryptonightR_instruction173) -PUBLIC FN_PREFIX(CryptonightR_instruction174) -PUBLIC FN_PREFIX(CryptonightR_instruction175) -PUBLIC FN_PREFIX(CryptonightR_instruction176) -PUBLIC FN_PREFIX(CryptonightR_instruction177) -PUBLIC FN_PREFIX(CryptonightR_instruction178) -PUBLIC FN_PREFIX(CryptonightR_instruction179) -PUBLIC FN_PREFIX(CryptonightR_instruction180) -PUBLIC FN_PREFIX(CryptonightR_instruction181) -PUBLIC FN_PREFIX(CryptonightR_instruction182) -PUBLIC FN_PREFIX(CryptonightR_instruction183) -PUBLIC FN_PREFIX(CryptonightR_instruction184) -PUBLIC FN_PREFIX(CryptonightR_instruction185) -PUBLIC FN_PREFIX(CryptonightR_instruction186) -PUBLIC FN_PREFIX(CryptonightR_instruction187) -PUBLIC FN_PREFIX(CryptonightR_instruction188) -PUBLIC FN_PREFIX(CryptonightR_instruction189) -PUBLIC FN_PREFIX(CryptonightR_instruction190) -PUBLIC FN_PREFIX(CryptonightR_instruction191) -PUBLIC FN_PREFIX(CryptonightR_instruction192) -PUBLIC FN_PREFIX(CryptonightR_instruction193) -PUBLIC FN_PREFIX(CryptonightR_instruction194) -PUBLIC FN_PREFIX(CryptonightR_instruction195) -PUBLIC FN_PREFIX(CryptonightR_instruction196) -PUBLIC FN_PREFIX(CryptonightR_instruction197) -PUBLIC FN_PREFIX(CryptonightR_instruction198) -PUBLIC FN_PREFIX(CryptonightR_instruction199) -PUBLIC FN_PREFIX(CryptonightR_instruction200) -PUBLIC FN_PREFIX(CryptonightR_instruction201) -PUBLIC FN_PREFIX(CryptonightR_instruction202) -PUBLIC FN_PREFIX(CryptonightR_instruction203) -PUBLIC FN_PREFIX(CryptonightR_instruction204) -PUBLIC FN_PREFIX(CryptonightR_instruction205) -PUBLIC FN_PREFIX(CryptonightR_instruction206) -PUBLIC FN_PREFIX(CryptonightR_instruction207) -PUBLIC FN_PREFIX(CryptonightR_instruction208) -PUBLIC FN_PREFIX(CryptonightR_instruction209) -PUBLIC FN_PREFIX(CryptonightR_instruction210) -PUBLIC FN_PREFIX(CryptonightR_instruction211) -PUBLIC FN_PREFIX(CryptonightR_instruction212) -PUBLIC FN_PREFIX(CryptonightR_instruction213) -PUBLIC FN_PREFIX(CryptonightR_instruction214) -PUBLIC FN_PREFIX(CryptonightR_instruction215) -PUBLIC FN_PREFIX(CryptonightR_instruction216) -PUBLIC FN_PREFIX(CryptonightR_instruction217) -PUBLIC FN_PREFIX(CryptonightR_instruction218) -PUBLIC FN_PREFIX(CryptonightR_instruction219) -PUBLIC FN_PREFIX(CryptonightR_instruction220) -PUBLIC FN_PREFIX(CryptonightR_instruction221) -PUBLIC FN_PREFIX(CryptonightR_instruction222) -PUBLIC FN_PREFIX(CryptonightR_instruction223) -PUBLIC FN_PREFIX(CryptonightR_instruction224) -PUBLIC FN_PREFIX(CryptonightR_instruction225) -PUBLIC FN_PREFIX(CryptonightR_instruction226) -PUBLIC FN_PREFIX(CryptonightR_instruction227) -PUBLIC FN_PREFIX(CryptonightR_instruction228) -PUBLIC FN_PREFIX(CryptonightR_instruction229) -PUBLIC FN_PREFIX(CryptonightR_instruction230) -PUBLIC FN_PREFIX(CryptonightR_instruction231) -PUBLIC FN_PREFIX(CryptonightR_instruction232) -PUBLIC FN_PREFIX(CryptonightR_instruction233) -PUBLIC FN_PREFIX(CryptonightR_instruction234) -PUBLIC FN_PREFIX(CryptonightR_instruction235) -PUBLIC FN_PREFIX(CryptonightR_instruction236) -PUBLIC FN_PREFIX(CryptonightR_instruction237) -PUBLIC FN_PREFIX(CryptonightR_instruction238) -PUBLIC FN_PREFIX(CryptonightR_instruction239) -PUBLIC FN_PREFIX(CryptonightR_instruction240) -PUBLIC FN_PREFIX(CryptonightR_instruction241) -PUBLIC FN_PREFIX(CryptonightR_instruction242) -PUBLIC FN_PREFIX(CryptonightR_instruction243) -PUBLIC FN_PREFIX(CryptonightR_instruction244) -PUBLIC FN_PREFIX(CryptonightR_instruction245) -PUBLIC FN_PREFIX(CryptonightR_instruction246) -PUBLIC FN_PREFIX(CryptonightR_instruction247) -PUBLIC FN_PREFIX(CryptonightR_instruction248) -PUBLIC FN_PREFIX(CryptonightR_instruction249) -PUBLIC FN_PREFIX(CryptonightR_instruction250) -PUBLIC FN_PREFIX(CryptonightR_instruction251) -PUBLIC FN_PREFIX(CryptonightR_instruction252) -PUBLIC FN_PREFIX(CryptonightR_instruction253) -PUBLIC FN_PREFIX(CryptonightR_instruction254) -PUBLIC FN_PREFIX(CryptonightR_instruction255) -PUBLIC FN_PREFIX(CryptonightR_instruction256) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov0) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov1) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov2) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov3) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov4) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov5) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov6) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov7) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov8) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov9) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov10) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov11) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov12) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov13) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov14) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov15) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov16) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov17) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov18) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov19) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov20) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov21) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov22) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov23) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov24) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov25) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov26) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov27) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov28) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov29) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov30) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov31) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov32) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov33) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov34) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov35) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov36) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov37) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov38) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov39) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov40) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov41) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov42) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov43) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov44) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov45) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov46) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov47) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov48) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov49) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov50) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov51) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov52) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov53) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov54) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov55) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov56) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov57) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov58) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov59) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov60) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov61) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov62) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov63) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov64) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov65) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov66) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov67) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov68) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov69) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov70) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov71) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov72) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov73) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov74) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov75) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov76) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov77) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov78) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov79) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov80) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov81) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov82) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov83) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov84) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov85) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov86) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov87) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov88) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov89) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov90) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov91) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov92) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov93) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov94) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov95) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov96) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov97) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov98) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov99) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov100) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov101) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov102) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov103) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov104) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov105) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov106) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov107) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov108) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov109) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov110) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov111) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov112) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov113) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov114) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov115) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov116) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov117) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov118) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov119) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov120) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov121) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov122) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov123) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov124) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov125) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov126) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov127) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov128) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov129) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov130) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov131) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov132) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov133) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov134) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov135) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov136) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov137) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov138) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov139) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov140) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov141) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov142) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov143) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov144) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov145) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov146) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov147) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov148) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov149) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov150) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov151) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov152) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov153) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov154) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov155) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov156) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov157) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov158) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov159) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov160) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov161) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov162) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov163) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov164) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov165) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov166) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov167) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov168) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov169) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov170) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov171) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov172) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov173) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov174) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov175) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov176) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov177) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov178) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov179) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov180) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov181) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov182) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov183) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov184) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov185) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov186) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov187) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov188) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov189) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov190) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov191) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov192) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov193) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov194) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov195) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov196) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov197) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov198) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov199) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov200) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov201) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov202) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov203) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov204) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov205) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov206) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov207) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov208) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov209) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov210) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov211) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov212) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov213) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov214) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov215) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov216) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov217) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov218) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov219) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov220) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov221) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov222) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov223) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov224) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov225) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov226) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov227) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov228) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov229) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov230) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov231) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov232) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov233) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov234) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov235) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov236) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov237) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov238) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov239) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov240) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov241) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov242) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov243) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov244) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov245) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov246) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov247) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov248) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov249) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov250) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov251) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov252) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov253) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov254) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov255) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) - -#include "CryptonightWOW_template.inc" -#include "CryptonightR_template.inc" -#include "CryptonightWOW_soft_aes_template.inc" -#include "CryptonightR_soft_aes_template.inc" - -FN_PREFIX(CryptonightR_instruction0): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction1): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction2): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction3): - add rbx, r9 - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction4): - sub rbx, r9 -FN_PREFIX(CryptonightR_instruction5): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction6): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction7): - xor rbx, r9 -FN_PREFIX(CryptonightR_instruction8): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction9): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction10): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction11): - add rsi, rbx - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction12): - sub rsi, rbx -FN_PREFIX(CryptonightR_instruction13): - ror esi, cl -FN_PREFIX(CryptonightR_instruction14): - rol esi, cl -FN_PREFIX(CryptonightR_instruction15): - xor rsi, rbx -FN_PREFIX(CryptonightR_instruction16): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction17): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction18): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction19): - add rdi, rbx - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction20): - sub rdi, rbx -FN_PREFIX(CryptonightR_instruction21): - ror edi, cl -FN_PREFIX(CryptonightR_instruction22): - rol edi, cl -FN_PREFIX(CryptonightR_instruction23): - xor rdi, rbx -FN_PREFIX(CryptonightR_instruction24): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction25): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction26): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction27): - add rbp, rbx - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction28): - sub rbp, rbx -FN_PREFIX(CryptonightR_instruction29): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction30): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction31): - xor rbp, rbx -FN_PREFIX(CryptonightR_instruction32): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction33): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction34): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction35): - add rbx, rsi - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction36): - sub rbx, rsi -FN_PREFIX(CryptonightR_instruction37): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction38): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction39): - xor rbx, rsi -FN_PREFIX(CryptonightR_instruction40): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction41): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction42): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction43): - add rsi, r9 - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction44): - sub rsi, r9 -FN_PREFIX(CryptonightR_instruction45): - ror esi, cl -FN_PREFIX(CryptonightR_instruction46): - rol esi, cl -FN_PREFIX(CryptonightR_instruction47): - xor rsi, r9 -FN_PREFIX(CryptonightR_instruction48): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction49): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction50): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction51): - add rdi, rsi - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction52): - sub rdi, rsi -FN_PREFIX(CryptonightR_instruction53): - ror edi, cl -FN_PREFIX(CryptonightR_instruction54): - rol edi, cl -FN_PREFIX(CryptonightR_instruction55): - xor rdi, rsi -FN_PREFIX(CryptonightR_instruction56): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction57): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction58): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction59): - add rbp, rsi - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction60): - sub rbp, rsi -FN_PREFIX(CryptonightR_instruction61): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction62): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction63): - xor rbp, rsi -FN_PREFIX(CryptonightR_instruction64): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction65): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction66): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction67): - add rbx, rdi - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction68): - sub rbx, rdi -FN_PREFIX(CryptonightR_instruction69): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction70): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction71): - xor rbx, rdi -FN_PREFIX(CryptonightR_instruction72): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction73): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction74): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction75): - add rsi, rdi - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction76): - sub rsi, rdi -FN_PREFIX(CryptonightR_instruction77): - ror esi, cl -FN_PREFIX(CryptonightR_instruction78): - rol esi, cl -FN_PREFIX(CryptonightR_instruction79): - xor rsi, rdi -FN_PREFIX(CryptonightR_instruction80): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction81): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction82): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction83): - add rdi, r9 - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction84): - sub rdi, r9 -FN_PREFIX(CryptonightR_instruction85): - ror edi, cl -FN_PREFIX(CryptonightR_instruction86): - rol edi, cl -FN_PREFIX(CryptonightR_instruction87): - xor rdi, r9 -FN_PREFIX(CryptonightR_instruction88): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction89): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction90): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction91): - add rbp, rdi - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction92): - sub rbp, rdi -FN_PREFIX(CryptonightR_instruction93): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction94): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction95): - xor rbp, rdi -FN_PREFIX(CryptonightR_instruction96): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction97): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction98): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction99): - add rbx, rbp - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction100): - sub rbx, rbp -FN_PREFIX(CryptonightR_instruction101): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction102): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction103): - xor rbx, rbp -FN_PREFIX(CryptonightR_instruction104): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction105): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction106): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction107): - add rsi, rbp - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction108): - sub rsi, rbp -FN_PREFIX(CryptonightR_instruction109): - ror esi, cl -FN_PREFIX(CryptonightR_instruction110): - rol esi, cl -FN_PREFIX(CryptonightR_instruction111): - xor rsi, rbp -FN_PREFIX(CryptonightR_instruction112): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction113): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction114): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction115): - add rdi, rbp - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction116): - sub rdi, rbp -FN_PREFIX(CryptonightR_instruction117): - ror edi, cl -FN_PREFIX(CryptonightR_instruction118): - rol edi, cl -FN_PREFIX(CryptonightR_instruction119): - xor rdi, rbp -FN_PREFIX(CryptonightR_instruction120): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction121): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction122): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction123): - add rbp, r9 - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction124): - sub rbp, r9 -FN_PREFIX(CryptonightR_instruction125): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction126): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction127): - xor rbp, r9 -FN_PREFIX(CryptonightR_instruction128): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction129): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction130): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction131): - add rbx, rsp - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction132): - sub rbx, rsp -FN_PREFIX(CryptonightR_instruction133): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction134): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction135): - xor rbx, rsp -FN_PREFIX(CryptonightR_instruction136): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction137): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction138): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction139): - add rsi, rsp - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction140): - sub rsi, rsp -FN_PREFIX(CryptonightR_instruction141): - ror esi, cl -FN_PREFIX(CryptonightR_instruction142): - rol esi, cl -FN_PREFIX(CryptonightR_instruction143): - xor rsi, rsp -FN_PREFIX(CryptonightR_instruction144): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction145): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction146): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction147): - add rdi, rsp - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction148): - sub rdi, rsp -FN_PREFIX(CryptonightR_instruction149): - ror edi, cl -FN_PREFIX(CryptonightR_instruction150): - rol edi, cl -FN_PREFIX(CryptonightR_instruction151): - xor rdi, rsp -FN_PREFIX(CryptonightR_instruction152): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction153): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction154): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction155): - add rbp, rsp - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction156): - sub rbp, rsp -FN_PREFIX(CryptonightR_instruction157): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction158): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction159): - xor rbp, rsp -FN_PREFIX(CryptonightR_instruction160): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction161): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction162): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction163): - add rbx, r15 - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction164): - sub rbx, r15 -FN_PREFIX(CryptonightR_instruction165): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction166): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction167): - xor rbx, r15 -FN_PREFIX(CryptonightR_instruction168): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction169): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction170): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction171): - add rsi, r15 - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction172): - sub rsi, r15 -FN_PREFIX(CryptonightR_instruction173): - ror esi, cl -FN_PREFIX(CryptonightR_instruction174): - rol esi, cl -FN_PREFIX(CryptonightR_instruction175): - xor rsi, r15 -FN_PREFIX(CryptonightR_instruction176): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction177): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction178): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction179): - add rdi, r15 - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction180): - sub rdi, r15 -FN_PREFIX(CryptonightR_instruction181): - ror edi, cl -FN_PREFIX(CryptonightR_instruction182): - rol edi, cl -FN_PREFIX(CryptonightR_instruction183): - xor rdi, r15 -FN_PREFIX(CryptonightR_instruction184): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction185): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction186): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction187): - add rbp, r15 - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction188): - sub rbp, r15 -FN_PREFIX(CryptonightR_instruction189): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction190): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction191): - xor rbp, r15 -FN_PREFIX(CryptonightR_instruction192): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction193): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction194): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction195): - add rbx, rax - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction196): - sub rbx, rax -FN_PREFIX(CryptonightR_instruction197): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction198): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction199): - xor rbx, rax -FN_PREFIX(CryptonightR_instruction200): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction201): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction202): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction203): - add rsi, rax - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction204): - sub rsi, rax -FN_PREFIX(CryptonightR_instruction205): - ror esi, cl -FN_PREFIX(CryptonightR_instruction206): - rol esi, cl -FN_PREFIX(CryptonightR_instruction207): - xor rsi, rax -FN_PREFIX(CryptonightR_instruction208): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction209): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction210): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction211): - add rdi, rax - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction212): - sub rdi, rax -FN_PREFIX(CryptonightR_instruction213): - ror edi, cl -FN_PREFIX(CryptonightR_instruction214): - rol edi, cl -FN_PREFIX(CryptonightR_instruction215): - xor rdi, rax -FN_PREFIX(CryptonightR_instruction216): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction217): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction218): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction219): - add rbp, rax - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction220): - sub rbp, rax -FN_PREFIX(CryptonightR_instruction221): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction222): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction223): - xor rbp, rax -FN_PREFIX(CryptonightR_instruction224): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction225): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction226): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction227): - add rbx, rdx - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction228): - sub rbx, rdx -FN_PREFIX(CryptonightR_instruction229): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction230): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction231): - xor rbx, rdx -FN_PREFIX(CryptonightR_instruction232): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction233): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction234): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction235): - add rsi, rdx - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction236): - sub rsi, rdx -FN_PREFIX(CryptonightR_instruction237): - ror esi, cl -FN_PREFIX(CryptonightR_instruction238): - rol esi, cl -FN_PREFIX(CryptonightR_instruction239): - xor rsi, rdx -FN_PREFIX(CryptonightR_instruction240): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction241): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction242): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction243): - add rdi, rdx - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction244): - sub rdi, rdx -FN_PREFIX(CryptonightR_instruction245): - ror edi, cl -FN_PREFIX(CryptonightR_instruction246): - rol edi, cl -FN_PREFIX(CryptonightR_instruction247): - xor rdi, rdx -FN_PREFIX(CryptonightR_instruction248): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction249): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction250): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction251): - add rbp, rdx - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction252): - sub rbp, rdx -FN_PREFIX(CryptonightR_instruction253): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction254): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction255): - xor rbp, rdx -FN_PREFIX(CryptonightR_instruction256): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction_mov0): - -FN_PREFIX(CryptonightR_instruction_mov1): - -FN_PREFIX(CryptonightR_instruction_mov2): - -FN_PREFIX(CryptonightR_instruction_mov3): - -FN_PREFIX(CryptonightR_instruction_mov4): - -FN_PREFIX(CryptonightR_instruction_mov5): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov6): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov7): - -FN_PREFIX(CryptonightR_instruction_mov8): - -FN_PREFIX(CryptonightR_instruction_mov9): - -FN_PREFIX(CryptonightR_instruction_mov10): - -FN_PREFIX(CryptonightR_instruction_mov11): - -FN_PREFIX(CryptonightR_instruction_mov12): - -FN_PREFIX(CryptonightR_instruction_mov13): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov14): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov15): - -FN_PREFIX(CryptonightR_instruction_mov16): - -FN_PREFIX(CryptonightR_instruction_mov17): - -FN_PREFIX(CryptonightR_instruction_mov18): - -FN_PREFIX(CryptonightR_instruction_mov19): - -FN_PREFIX(CryptonightR_instruction_mov20): - -FN_PREFIX(CryptonightR_instruction_mov21): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov22): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov23): - -FN_PREFIX(CryptonightR_instruction_mov24): - -FN_PREFIX(CryptonightR_instruction_mov25): - -FN_PREFIX(CryptonightR_instruction_mov26): - -FN_PREFIX(CryptonightR_instruction_mov27): - -FN_PREFIX(CryptonightR_instruction_mov28): - -FN_PREFIX(CryptonightR_instruction_mov29): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov30): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov31): - -FN_PREFIX(CryptonightR_instruction_mov32): - -FN_PREFIX(CryptonightR_instruction_mov33): - -FN_PREFIX(CryptonightR_instruction_mov34): - -FN_PREFIX(CryptonightR_instruction_mov35): - -FN_PREFIX(CryptonightR_instruction_mov36): - -FN_PREFIX(CryptonightR_instruction_mov37): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov38): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov39): - -FN_PREFIX(CryptonightR_instruction_mov40): - -FN_PREFIX(CryptonightR_instruction_mov41): - -FN_PREFIX(CryptonightR_instruction_mov42): - -FN_PREFIX(CryptonightR_instruction_mov43): - -FN_PREFIX(CryptonightR_instruction_mov44): - -FN_PREFIX(CryptonightR_instruction_mov45): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov46): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov47): - -FN_PREFIX(CryptonightR_instruction_mov48): - -FN_PREFIX(CryptonightR_instruction_mov49): - -FN_PREFIX(CryptonightR_instruction_mov50): - -FN_PREFIX(CryptonightR_instruction_mov51): - -FN_PREFIX(CryptonightR_instruction_mov52): - -FN_PREFIX(CryptonightR_instruction_mov53): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov54): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov55): - -FN_PREFIX(CryptonightR_instruction_mov56): - -FN_PREFIX(CryptonightR_instruction_mov57): - -FN_PREFIX(CryptonightR_instruction_mov58): - -FN_PREFIX(CryptonightR_instruction_mov59): - -FN_PREFIX(CryptonightR_instruction_mov60): - -FN_PREFIX(CryptonightR_instruction_mov61): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov62): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov63): - -FN_PREFIX(CryptonightR_instruction_mov64): - -FN_PREFIX(CryptonightR_instruction_mov65): - -FN_PREFIX(CryptonightR_instruction_mov66): - -FN_PREFIX(CryptonightR_instruction_mov67): - -FN_PREFIX(CryptonightR_instruction_mov68): - -FN_PREFIX(CryptonightR_instruction_mov69): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov70): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov71): - -FN_PREFIX(CryptonightR_instruction_mov72): - -FN_PREFIX(CryptonightR_instruction_mov73): - -FN_PREFIX(CryptonightR_instruction_mov74): - -FN_PREFIX(CryptonightR_instruction_mov75): - -FN_PREFIX(CryptonightR_instruction_mov76): - -FN_PREFIX(CryptonightR_instruction_mov77): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov78): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov79): - -FN_PREFIX(CryptonightR_instruction_mov80): - -FN_PREFIX(CryptonightR_instruction_mov81): - -FN_PREFIX(CryptonightR_instruction_mov82): - -FN_PREFIX(CryptonightR_instruction_mov83): - -FN_PREFIX(CryptonightR_instruction_mov84): - -FN_PREFIX(CryptonightR_instruction_mov85): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov86): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov87): - -FN_PREFIX(CryptonightR_instruction_mov88): - -FN_PREFIX(CryptonightR_instruction_mov89): - -FN_PREFIX(CryptonightR_instruction_mov90): - -FN_PREFIX(CryptonightR_instruction_mov91): - -FN_PREFIX(CryptonightR_instruction_mov92): - -FN_PREFIX(CryptonightR_instruction_mov93): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov94): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov95): - -FN_PREFIX(CryptonightR_instruction_mov96): - -FN_PREFIX(CryptonightR_instruction_mov97): - -FN_PREFIX(CryptonightR_instruction_mov98): - -FN_PREFIX(CryptonightR_instruction_mov99): - -FN_PREFIX(CryptonightR_instruction_mov100): - -FN_PREFIX(CryptonightR_instruction_mov101): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov102): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov103): - -FN_PREFIX(CryptonightR_instruction_mov104): - -FN_PREFIX(CryptonightR_instruction_mov105): - -FN_PREFIX(CryptonightR_instruction_mov106): - -FN_PREFIX(CryptonightR_instruction_mov107): - -FN_PREFIX(CryptonightR_instruction_mov108): - -FN_PREFIX(CryptonightR_instruction_mov109): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov110): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov111): - -FN_PREFIX(CryptonightR_instruction_mov112): - -FN_PREFIX(CryptonightR_instruction_mov113): - -FN_PREFIX(CryptonightR_instruction_mov114): - -FN_PREFIX(CryptonightR_instruction_mov115): - -FN_PREFIX(CryptonightR_instruction_mov116): - -FN_PREFIX(CryptonightR_instruction_mov117): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov118): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov119): - -FN_PREFIX(CryptonightR_instruction_mov120): - -FN_PREFIX(CryptonightR_instruction_mov121): - -FN_PREFIX(CryptonightR_instruction_mov122): - -FN_PREFIX(CryptonightR_instruction_mov123): - -FN_PREFIX(CryptonightR_instruction_mov124): - -FN_PREFIX(CryptonightR_instruction_mov125): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov126): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov127): - -FN_PREFIX(CryptonightR_instruction_mov128): - -FN_PREFIX(CryptonightR_instruction_mov129): - -FN_PREFIX(CryptonightR_instruction_mov130): - -FN_PREFIX(CryptonightR_instruction_mov131): - -FN_PREFIX(CryptonightR_instruction_mov132): - -FN_PREFIX(CryptonightR_instruction_mov133): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov134): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov135): - -FN_PREFIX(CryptonightR_instruction_mov136): - -FN_PREFIX(CryptonightR_instruction_mov137): - -FN_PREFIX(CryptonightR_instruction_mov138): - -FN_PREFIX(CryptonightR_instruction_mov139): - -FN_PREFIX(CryptonightR_instruction_mov140): - -FN_PREFIX(CryptonightR_instruction_mov141): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov142): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov143): - -FN_PREFIX(CryptonightR_instruction_mov144): - -FN_PREFIX(CryptonightR_instruction_mov145): - -FN_PREFIX(CryptonightR_instruction_mov146): - -FN_PREFIX(CryptonightR_instruction_mov147): - -FN_PREFIX(CryptonightR_instruction_mov148): - -FN_PREFIX(CryptonightR_instruction_mov149): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov150): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov151): - -FN_PREFIX(CryptonightR_instruction_mov152): - -FN_PREFIX(CryptonightR_instruction_mov153): - -FN_PREFIX(CryptonightR_instruction_mov154): - -FN_PREFIX(CryptonightR_instruction_mov155): - -FN_PREFIX(CryptonightR_instruction_mov156): - -FN_PREFIX(CryptonightR_instruction_mov157): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov158): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov159): - -FN_PREFIX(CryptonightR_instruction_mov160): - -FN_PREFIX(CryptonightR_instruction_mov161): - -FN_PREFIX(CryptonightR_instruction_mov162): - -FN_PREFIX(CryptonightR_instruction_mov163): - -FN_PREFIX(CryptonightR_instruction_mov164): - -FN_PREFIX(CryptonightR_instruction_mov165): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov166): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov167): - -FN_PREFIX(CryptonightR_instruction_mov168): - -FN_PREFIX(CryptonightR_instruction_mov169): - -FN_PREFIX(CryptonightR_instruction_mov170): - -FN_PREFIX(CryptonightR_instruction_mov171): - -FN_PREFIX(CryptonightR_instruction_mov172): - -FN_PREFIX(CryptonightR_instruction_mov173): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov174): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov175): - -FN_PREFIX(CryptonightR_instruction_mov176): - -FN_PREFIX(CryptonightR_instruction_mov177): - -FN_PREFIX(CryptonightR_instruction_mov178): - -FN_PREFIX(CryptonightR_instruction_mov179): - -FN_PREFIX(CryptonightR_instruction_mov180): - -FN_PREFIX(CryptonightR_instruction_mov181): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov182): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov183): - -FN_PREFIX(CryptonightR_instruction_mov184): - -FN_PREFIX(CryptonightR_instruction_mov185): - -FN_PREFIX(CryptonightR_instruction_mov186): - -FN_PREFIX(CryptonightR_instruction_mov187): - -FN_PREFIX(CryptonightR_instruction_mov188): - -FN_PREFIX(CryptonightR_instruction_mov189): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov190): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov191): - -FN_PREFIX(CryptonightR_instruction_mov192): - -FN_PREFIX(CryptonightR_instruction_mov193): - -FN_PREFIX(CryptonightR_instruction_mov194): - -FN_PREFIX(CryptonightR_instruction_mov195): - -FN_PREFIX(CryptonightR_instruction_mov196): - -FN_PREFIX(CryptonightR_instruction_mov197): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov198): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov199): - -FN_PREFIX(CryptonightR_instruction_mov200): - -FN_PREFIX(CryptonightR_instruction_mov201): - -FN_PREFIX(CryptonightR_instruction_mov202): - -FN_PREFIX(CryptonightR_instruction_mov203): - -FN_PREFIX(CryptonightR_instruction_mov204): - -FN_PREFIX(CryptonightR_instruction_mov205): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov206): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov207): - -FN_PREFIX(CryptonightR_instruction_mov208): - -FN_PREFIX(CryptonightR_instruction_mov209): - -FN_PREFIX(CryptonightR_instruction_mov210): - -FN_PREFIX(CryptonightR_instruction_mov211): - -FN_PREFIX(CryptonightR_instruction_mov212): - -FN_PREFIX(CryptonightR_instruction_mov213): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov214): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov215): - -FN_PREFIX(CryptonightR_instruction_mov216): - -FN_PREFIX(CryptonightR_instruction_mov217): - -FN_PREFIX(CryptonightR_instruction_mov218): - -FN_PREFIX(CryptonightR_instruction_mov219): - -FN_PREFIX(CryptonightR_instruction_mov220): - -FN_PREFIX(CryptonightR_instruction_mov221): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov222): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov223): - -FN_PREFIX(CryptonightR_instruction_mov224): - -FN_PREFIX(CryptonightR_instruction_mov225): - -FN_PREFIX(CryptonightR_instruction_mov226): - -FN_PREFIX(CryptonightR_instruction_mov227): - -FN_PREFIX(CryptonightR_instruction_mov228): - -FN_PREFIX(CryptonightR_instruction_mov229): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov230): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov231): - -FN_PREFIX(CryptonightR_instruction_mov232): - -FN_PREFIX(CryptonightR_instruction_mov233): - -FN_PREFIX(CryptonightR_instruction_mov234): - -FN_PREFIX(CryptonightR_instruction_mov235): - -FN_PREFIX(CryptonightR_instruction_mov236): - -FN_PREFIX(CryptonightR_instruction_mov237): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov238): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov239): - -FN_PREFIX(CryptonightR_instruction_mov240): - -FN_PREFIX(CryptonightR_instruction_mov241): - -FN_PREFIX(CryptonightR_instruction_mov242): - -FN_PREFIX(CryptonightR_instruction_mov243): - -FN_PREFIX(CryptonightR_instruction_mov244): - -FN_PREFIX(CryptonightR_instruction_mov245): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov246): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov247): - -FN_PREFIX(CryptonightR_instruction_mov248): - -FN_PREFIX(CryptonightR_instruction_mov249): - -FN_PREFIX(CryptonightR_instruction_mov250): - -FN_PREFIX(CryptonightR_instruction_mov251): - -FN_PREFIX(CryptonightR_instruction_mov252): - -FN_PREFIX(CryptonightR_instruction_mov253): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov254): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov255): - -FN_PREFIX(CryptonightR_instruction_mov256): diff --git a/src/crypto/asm/CryptonightR_template.asm b/src/crypto/asm/CryptonightR_template.asm deleted file mode 100644 index 250eca3d..00000000 --- a/src/crypto/asm/CryptonightR_template.asm +++ /dev/null @@ -1,1585 +0,0 @@ -; Auto-generated file, do not edit - -_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE -PUBLIC CryptonightR_instruction0 -PUBLIC CryptonightR_instruction1 -PUBLIC CryptonightR_instruction2 -PUBLIC CryptonightR_instruction3 -PUBLIC CryptonightR_instruction4 -PUBLIC CryptonightR_instruction5 -PUBLIC CryptonightR_instruction6 -PUBLIC CryptonightR_instruction7 -PUBLIC CryptonightR_instruction8 -PUBLIC CryptonightR_instruction9 -PUBLIC CryptonightR_instruction10 -PUBLIC CryptonightR_instruction11 -PUBLIC CryptonightR_instruction12 -PUBLIC CryptonightR_instruction13 -PUBLIC CryptonightR_instruction14 -PUBLIC CryptonightR_instruction15 -PUBLIC CryptonightR_instruction16 -PUBLIC CryptonightR_instruction17 -PUBLIC CryptonightR_instruction18 -PUBLIC CryptonightR_instruction19 -PUBLIC CryptonightR_instruction20 -PUBLIC CryptonightR_instruction21 -PUBLIC CryptonightR_instruction22 -PUBLIC CryptonightR_instruction23 -PUBLIC CryptonightR_instruction24 -PUBLIC CryptonightR_instruction25 -PUBLIC CryptonightR_instruction26 -PUBLIC CryptonightR_instruction27 -PUBLIC CryptonightR_instruction28 -PUBLIC CryptonightR_instruction29 -PUBLIC CryptonightR_instruction30 -PUBLIC CryptonightR_instruction31 -PUBLIC CryptonightR_instruction32 -PUBLIC CryptonightR_instruction33 -PUBLIC CryptonightR_instruction34 -PUBLIC CryptonightR_instruction35 -PUBLIC CryptonightR_instruction36 -PUBLIC CryptonightR_instruction37 -PUBLIC CryptonightR_instruction38 -PUBLIC CryptonightR_instruction39 -PUBLIC CryptonightR_instruction40 -PUBLIC CryptonightR_instruction41 -PUBLIC CryptonightR_instruction42 -PUBLIC CryptonightR_instruction43 -PUBLIC CryptonightR_instruction44 -PUBLIC CryptonightR_instruction45 -PUBLIC CryptonightR_instruction46 -PUBLIC CryptonightR_instruction47 -PUBLIC CryptonightR_instruction48 -PUBLIC CryptonightR_instruction49 -PUBLIC CryptonightR_instruction50 -PUBLIC CryptonightR_instruction51 -PUBLIC CryptonightR_instruction52 -PUBLIC CryptonightR_instruction53 -PUBLIC CryptonightR_instruction54 -PUBLIC CryptonightR_instruction55 -PUBLIC CryptonightR_instruction56 -PUBLIC CryptonightR_instruction57 -PUBLIC CryptonightR_instruction58 -PUBLIC CryptonightR_instruction59 -PUBLIC CryptonightR_instruction60 -PUBLIC CryptonightR_instruction61 -PUBLIC CryptonightR_instruction62 -PUBLIC CryptonightR_instruction63 -PUBLIC CryptonightR_instruction64 -PUBLIC CryptonightR_instruction65 -PUBLIC CryptonightR_instruction66 -PUBLIC CryptonightR_instruction67 -PUBLIC CryptonightR_instruction68 -PUBLIC CryptonightR_instruction69 -PUBLIC CryptonightR_instruction70 -PUBLIC CryptonightR_instruction71 -PUBLIC CryptonightR_instruction72 -PUBLIC CryptonightR_instruction73 -PUBLIC CryptonightR_instruction74 -PUBLIC CryptonightR_instruction75 -PUBLIC CryptonightR_instruction76 -PUBLIC CryptonightR_instruction77 -PUBLIC CryptonightR_instruction78 -PUBLIC CryptonightR_instruction79 -PUBLIC CryptonightR_instruction80 -PUBLIC CryptonightR_instruction81 -PUBLIC CryptonightR_instruction82 -PUBLIC CryptonightR_instruction83 -PUBLIC CryptonightR_instruction84 -PUBLIC CryptonightR_instruction85 -PUBLIC CryptonightR_instruction86 -PUBLIC CryptonightR_instruction87 -PUBLIC CryptonightR_instruction88 -PUBLIC CryptonightR_instruction89 -PUBLIC CryptonightR_instruction90 -PUBLIC CryptonightR_instruction91 -PUBLIC CryptonightR_instruction92 -PUBLIC CryptonightR_instruction93 -PUBLIC CryptonightR_instruction94 -PUBLIC CryptonightR_instruction95 -PUBLIC CryptonightR_instruction96 -PUBLIC CryptonightR_instruction97 -PUBLIC CryptonightR_instruction98 -PUBLIC CryptonightR_instruction99 -PUBLIC CryptonightR_instruction100 -PUBLIC CryptonightR_instruction101 -PUBLIC CryptonightR_instruction102 -PUBLIC CryptonightR_instruction103 -PUBLIC CryptonightR_instruction104 -PUBLIC CryptonightR_instruction105 -PUBLIC CryptonightR_instruction106 -PUBLIC CryptonightR_instruction107 -PUBLIC CryptonightR_instruction108 -PUBLIC CryptonightR_instruction109 -PUBLIC CryptonightR_instruction110 -PUBLIC CryptonightR_instruction111 -PUBLIC CryptonightR_instruction112 -PUBLIC CryptonightR_instruction113 -PUBLIC CryptonightR_instruction114 -PUBLIC CryptonightR_instruction115 -PUBLIC CryptonightR_instruction116 -PUBLIC CryptonightR_instruction117 -PUBLIC CryptonightR_instruction118 -PUBLIC CryptonightR_instruction119 -PUBLIC CryptonightR_instruction120 -PUBLIC CryptonightR_instruction121 -PUBLIC CryptonightR_instruction122 -PUBLIC CryptonightR_instruction123 -PUBLIC CryptonightR_instruction124 -PUBLIC CryptonightR_instruction125 -PUBLIC CryptonightR_instruction126 -PUBLIC CryptonightR_instruction127 -PUBLIC CryptonightR_instruction128 -PUBLIC CryptonightR_instruction129 -PUBLIC CryptonightR_instruction130 -PUBLIC CryptonightR_instruction131 -PUBLIC CryptonightR_instruction132 -PUBLIC CryptonightR_instruction133 -PUBLIC CryptonightR_instruction134 -PUBLIC CryptonightR_instruction135 -PUBLIC CryptonightR_instruction136 -PUBLIC CryptonightR_instruction137 -PUBLIC CryptonightR_instruction138 -PUBLIC CryptonightR_instruction139 -PUBLIC CryptonightR_instruction140 -PUBLIC CryptonightR_instruction141 -PUBLIC CryptonightR_instruction142 -PUBLIC CryptonightR_instruction143 -PUBLIC CryptonightR_instruction144 -PUBLIC CryptonightR_instruction145 -PUBLIC CryptonightR_instruction146 -PUBLIC CryptonightR_instruction147 -PUBLIC CryptonightR_instruction148 -PUBLIC CryptonightR_instruction149 -PUBLIC CryptonightR_instruction150 -PUBLIC CryptonightR_instruction151 -PUBLIC CryptonightR_instruction152 -PUBLIC CryptonightR_instruction153 -PUBLIC CryptonightR_instruction154 -PUBLIC CryptonightR_instruction155 -PUBLIC CryptonightR_instruction156 -PUBLIC CryptonightR_instruction157 -PUBLIC CryptonightR_instruction158 -PUBLIC CryptonightR_instruction159 -PUBLIC CryptonightR_instruction160 -PUBLIC CryptonightR_instruction161 -PUBLIC CryptonightR_instruction162 -PUBLIC CryptonightR_instruction163 -PUBLIC CryptonightR_instruction164 -PUBLIC CryptonightR_instruction165 -PUBLIC CryptonightR_instruction166 -PUBLIC CryptonightR_instruction167 -PUBLIC CryptonightR_instruction168 -PUBLIC CryptonightR_instruction169 -PUBLIC CryptonightR_instruction170 -PUBLIC CryptonightR_instruction171 -PUBLIC CryptonightR_instruction172 -PUBLIC CryptonightR_instruction173 -PUBLIC CryptonightR_instruction174 -PUBLIC CryptonightR_instruction175 -PUBLIC CryptonightR_instruction176 -PUBLIC CryptonightR_instruction177 -PUBLIC CryptonightR_instruction178 -PUBLIC CryptonightR_instruction179 -PUBLIC CryptonightR_instruction180 -PUBLIC CryptonightR_instruction181 -PUBLIC CryptonightR_instruction182 -PUBLIC CryptonightR_instruction183 -PUBLIC CryptonightR_instruction184 -PUBLIC CryptonightR_instruction185 -PUBLIC CryptonightR_instruction186 -PUBLIC CryptonightR_instruction187 -PUBLIC CryptonightR_instruction188 -PUBLIC CryptonightR_instruction189 -PUBLIC CryptonightR_instruction190 -PUBLIC CryptonightR_instruction191 -PUBLIC CryptonightR_instruction192 -PUBLIC CryptonightR_instruction193 -PUBLIC CryptonightR_instruction194 -PUBLIC CryptonightR_instruction195 -PUBLIC CryptonightR_instruction196 -PUBLIC CryptonightR_instruction197 -PUBLIC CryptonightR_instruction198 -PUBLIC CryptonightR_instruction199 -PUBLIC CryptonightR_instruction200 -PUBLIC CryptonightR_instruction201 -PUBLIC CryptonightR_instruction202 -PUBLIC CryptonightR_instruction203 -PUBLIC CryptonightR_instruction204 -PUBLIC CryptonightR_instruction205 -PUBLIC CryptonightR_instruction206 -PUBLIC CryptonightR_instruction207 -PUBLIC CryptonightR_instruction208 -PUBLIC CryptonightR_instruction209 -PUBLIC CryptonightR_instruction210 -PUBLIC CryptonightR_instruction211 -PUBLIC CryptonightR_instruction212 -PUBLIC CryptonightR_instruction213 -PUBLIC CryptonightR_instruction214 -PUBLIC CryptonightR_instruction215 -PUBLIC CryptonightR_instruction216 -PUBLIC CryptonightR_instruction217 -PUBLIC CryptonightR_instruction218 -PUBLIC CryptonightR_instruction219 -PUBLIC CryptonightR_instruction220 -PUBLIC CryptonightR_instruction221 -PUBLIC CryptonightR_instruction222 -PUBLIC CryptonightR_instruction223 -PUBLIC CryptonightR_instruction224 -PUBLIC CryptonightR_instruction225 -PUBLIC CryptonightR_instruction226 -PUBLIC CryptonightR_instruction227 -PUBLIC CryptonightR_instruction228 -PUBLIC CryptonightR_instruction229 -PUBLIC CryptonightR_instruction230 -PUBLIC CryptonightR_instruction231 -PUBLIC CryptonightR_instruction232 -PUBLIC CryptonightR_instruction233 -PUBLIC CryptonightR_instruction234 -PUBLIC CryptonightR_instruction235 -PUBLIC CryptonightR_instruction236 -PUBLIC CryptonightR_instruction237 -PUBLIC CryptonightR_instruction238 -PUBLIC CryptonightR_instruction239 -PUBLIC CryptonightR_instruction240 -PUBLIC CryptonightR_instruction241 -PUBLIC CryptonightR_instruction242 -PUBLIC CryptonightR_instruction243 -PUBLIC CryptonightR_instruction244 -PUBLIC CryptonightR_instruction245 -PUBLIC CryptonightR_instruction246 -PUBLIC CryptonightR_instruction247 -PUBLIC CryptonightR_instruction248 -PUBLIC CryptonightR_instruction249 -PUBLIC CryptonightR_instruction250 -PUBLIC CryptonightR_instruction251 -PUBLIC CryptonightR_instruction252 -PUBLIC CryptonightR_instruction253 -PUBLIC CryptonightR_instruction254 -PUBLIC CryptonightR_instruction255 -PUBLIC CryptonightR_instruction256 -PUBLIC CryptonightR_instruction_mov0 -PUBLIC CryptonightR_instruction_mov1 -PUBLIC CryptonightR_instruction_mov2 -PUBLIC CryptonightR_instruction_mov3 -PUBLIC CryptonightR_instruction_mov4 -PUBLIC CryptonightR_instruction_mov5 -PUBLIC CryptonightR_instruction_mov6 -PUBLIC CryptonightR_instruction_mov7 -PUBLIC CryptonightR_instruction_mov8 -PUBLIC CryptonightR_instruction_mov9 -PUBLIC CryptonightR_instruction_mov10 -PUBLIC CryptonightR_instruction_mov11 -PUBLIC CryptonightR_instruction_mov12 -PUBLIC CryptonightR_instruction_mov13 -PUBLIC CryptonightR_instruction_mov14 -PUBLIC CryptonightR_instruction_mov15 -PUBLIC CryptonightR_instruction_mov16 -PUBLIC CryptonightR_instruction_mov17 -PUBLIC CryptonightR_instruction_mov18 -PUBLIC CryptonightR_instruction_mov19 -PUBLIC CryptonightR_instruction_mov20 -PUBLIC CryptonightR_instruction_mov21 -PUBLIC CryptonightR_instruction_mov22 -PUBLIC CryptonightR_instruction_mov23 -PUBLIC CryptonightR_instruction_mov24 -PUBLIC CryptonightR_instruction_mov25 -PUBLIC CryptonightR_instruction_mov26 -PUBLIC CryptonightR_instruction_mov27 -PUBLIC CryptonightR_instruction_mov28 -PUBLIC CryptonightR_instruction_mov29 -PUBLIC CryptonightR_instruction_mov30 -PUBLIC CryptonightR_instruction_mov31 -PUBLIC CryptonightR_instruction_mov32 -PUBLIC CryptonightR_instruction_mov33 -PUBLIC CryptonightR_instruction_mov34 -PUBLIC CryptonightR_instruction_mov35 -PUBLIC CryptonightR_instruction_mov36 -PUBLIC CryptonightR_instruction_mov37 -PUBLIC CryptonightR_instruction_mov38 -PUBLIC CryptonightR_instruction_mov39 -PUBLIC CryptonightR_instruction_mov40 -PUBLIC CryptonightR_instruction_mov41 -PUBLIC CryptonightR_instruction_mov42 -PUBLIC CryptonightR_instruction_mov43 -PUBLIC CryptonightR_instruction_mov44 -PUBLIC CryptonightR_instruction_mov45 -PUBLIC CryptonightR_instruction_mov46 -PUBLIC CryptonightR_instruction_mov47 -PUBLIC CryptonightR_instruction_mov48 -PUBLIC CryptonightR_instruction_mov49 -PUBLIC CryptonightR_instruction_mov50 -PUBLIC CryptonightR_instruction_mov51 -PUBLIC CryptonightR_instruction_mov52 -PUBLIC CryptonightR_instruction_mov53 -PUBLIC CryptonightR_instruction_mov54 -PUBLIC CryptonightR_instruction_mov55 -PUBLIC CryptonightR_instruction_mov56 -PUBLIC CryptonightR_instruction_mov57 -PUBLIC CryptonightR_instruction_mov58 -PUBLIC CryptonightR_instruction_mov59 -PUBLIC CryptonightR_instruction_mov60 -PUBLIC CryptonightR_instruction_mov61 -PUBLIC CryptonightR_instruction_mov62 -PUBLIC CryptonightR_instruction_mov63 -PUBLIC CryptonightR_instruction_mov64 -PUBLIC CryptonightR_instruction_mov65 -PUBLIC CryptonightR_instruction_mov66 -PUBLIC CryptonightR_instruction_mov67 -PUBLIC CryptonightR_instruction_mov68 -PUBLIC CryptonightR_instruction_mov69 -PUBLIC CryptonightR_instruction_mov70 -PUBLIC CryptonightR_instruction_mov71 -PUBLIC CryptonightR_instruction_mov72 -PUBLIC CryptonightR_instruction_mov73 -PUBLIC CryptonightR_instruction_mov74 -PUBLIC CryptonightR_instruction_mov75 -PUBLIC CryptonightR_instruction_mov76 -PUBLIC CryptonightR_instruction_mov77 -PUBLIC CryptonightR_instruction_mov78 -PUBLIC CryptonightR_instruction_mov79 -PUBLIC CryptonightR_instruction_mov80 -PUBLIC CryptonightR_instruction_mov81 -PUBLIC CryptonightR_instruction_mov82 -PUBLIC CryptonightR_instruction_mov83 -PUBLIC CryptonightR_instruction_mov84 -PUBLIC CryptonightR_instruction_mov85 -PUBLIC CryptonightR_instruction_mov86 -PUBLIC CryptonightR_instruction_mov87 -PUBLIC CryptonightR_instruction_mov88 -PUBLIC CryptonightR_instruction_mov89 -PUBLIC CryptonightR_instruction_mov90 -PUBLIC CryptonightR_instruction_mov91 -PUBLIC CryptonightR_instruction_mov92 -PUBLIC CryptonightR_instruction_mov93 -PUBLIC CryptonightR_instruction_mov94 -PUBLIC CryptonightR_instruction_mov95 -PUBLIC CryptonightR_instruction_mov96 -PUBLIC CryptonightR_instruction_mov97 -PUBLIC CryptonightR_instruction_mov98 -PUBLIC CryptonightR_instruction_mov99 -PUBLIC CryptonightR_instruction_mov100 -PUBLIC CryptonightR_instruction_mov101 -PUBLIC CryptonightR_instruction_mov102 -PUBLIC CryptonightR_instruction_mov103 -PUBLIC CryptonightR_instruction_mov104 -PUBLIC CryptonightR_instruction_mov105 -PUBLIC CryptonightR_instruction_mov106 -PUBLIC CryptonightR_instruction_mov107 -PUBLIC CryptonightR_instruction_mov108 -PUBLIC CryptonightR_instruction_mov109 -PUBLIC CryptonightR_instruction_mov110 -PUBLIC CryptonightR_instruction_mov111 -PUBLIC CryptonightR_instruction_mov112 -PUBLIC CryptonightR_instruction_mov113 -PUBLIC CryptonightR_instruction_mov114 -PUBLIC CryptonightR_instruction_mov115 -PUBLIC CryptonightR_instruction_mov116 -PUBLIC CryptonightR_instruction_mov117 -PUBLIC CryptonightR_instruction_mov118 -PUBLIC CryptonightR_instruction_mov119 -PUBLIC CryptonightR_instruction_mov120 -PUBLIC CryptonightR_instruction_mov121 -PUBLIC CryptonightR_instruction_mov122 -PUBLIC CryptonightR_instruction_mov123 -PUBLIC CryptonightR_instruction_mov124 -PUBLIC CryptonightR_instruction_mov125 -PUBLIC CryptonightR_instruction_mov126 -PUBLIC CryptonightR_instruction_mov127 -PUBLIC CryptonightR_instruction_mov128 -PUBLIC CryptonightR_instruction_mov129 -PUBLIC CryptonightR_instruction_mov130 -PUBLIC CryptonightR_instruction_mov131 -PUBLIC CryptonightR_instruction_mov132 -PUBLIC CryptonightR_instruction_mov133 -PUBLIC CryptonightR_instruction_mov134 -PUBLIC CryptonightR_instruction_mov135 -PUBLIC CryptonightR_instruction_mov136 -PUBLIC CryptonightR_instruction_mov137 -PUBLIC CryptonightR_instruction_mov138 -PUBLIC CryptonightR_instruction_mov139 -PUBLIC CryptonightR_instruction_mov140 -PUBLIC CryptonightR_instruction_mov141 -PUBLIC CryptonightR_instruction_mov142 -PUBLIC CryptonightR_instruction_mov143 -PUBLIC CryptonightR_instruction_mov144 -PUBLIC CryptonightR_instruction_mov145 -PUBLIC CryptonightR_instruction_mov146 -PUBLIC CryptonightR_instruction_mov147 -PUBLIC CryptonightR_instruction_mov148 -PUBLIC CryptonightR_instruction_mov149 -PUBLIC CryptonightR_instruction_mov150 -PUBLIC CryptonightR_instruction_mov151 -PUBLIC CryptonightR_instruction_mov152 -PUBLIC CryptonightR_instruction_mov153 -PUBLIC CryptonightR_instruction_mov154 -PUBLIC CryptonightR_instruction_mov155 -PUBLIC CryptonightR_instruction_mov156 -PUBLIC CryptonightR_instruction_mov157 -PUBLIC CryptonightR_instruction_mov158 -PUBLIC CryptonightR_instruction_mov159 -PUBLIC CryptonightR_instruction_mov160 -PUBLIC CryptonightR_instruction_mov161 -PUBLIC CryptonightR_instruction_mov162 -PUBLIC CryptonightR_instruction_mov163 -PUBLIC CryptonightR_instruction_mov164 -PUBLIC CryptonightR_instruction_mov165 -PUBLIC CryptonightR_instruction_mov166 -PUBLIC CryptonightR_instruction_mov167 -PUBLIC CryptonightR_instruction_mov168 -PUBLIC CryptonightR_instruction_mov169 -PUBLIC CryptonightR_instruction_mov170 -PUBLIC CryptonightR_instruction_mov171 -PUBLIC CryptonightR_instruction_mov172 -PUBLIC CryptonightR_instruction_mov173 -PUBLIC CryptonightR_instruction_mov174 -PUBLIC CryptonightR_instruction_mov175 -PUBLIC CryptonightR_instruction_mov176 -PUBLIC CryptonightR_instruction_mov177 -PUBLIC CryptonightR_instruction_mov178 -PUBLIC CryptonightR_instruction_mov179 -PUBLIC CryptonightR_instruction_mov180 -PUBLIC CryptonightR_instruction_mov181 -PUBLIC CryptonightR_instruction_mov182 -PUBLIC CryptonightR_instruction_mov183 -PUBLIC CryptonightR_instruction_mov184 -PUBLIC CryptonightR_instruction_mov185 -PUBLIC CryptonightR_instruction_mov186 -PUBLIC CryptonightR_instruction_mov187 -PUBLIC CryptonightR_instruction_mov188 -PUBLIC CryptonightR_instruction_mov189 -PUBLIC CryptonightR_instruction_mov190 -PUBLIC CryptonightR_instruction_mov191 -PUBLIC CryptonightR_instruction_mov192 -PUBLIC CryptonightR_instruction_mov193 -PUBLIC CryptonightR_instruction_mov194 -PUBLIC CryptonightR_instruction_mov195 -PUBLIC CryptonightR_instruction_mov196 -PUBLIC CryptonightR_instruction_mov197 -PUBLIC CryptonightR_instruction_mov198 -PUBLIC CryptonightR_instruction_mov199 -PUBLIC CryptonightR_instruction_mov200 -PUBLIC CryptonightR_instruction_mov201 -PUBLIC CryptonightR_instruction_mov202 -PUBLIC CryptonightR_instruction_mov203 -PUBLIC CryptonightR_instruction_mov204 -PUBLIC CryptonightR_instruction_mov205 -PUBLIC CryptonightR_instruction_mov206 -PUBLIC CryptonightR_instruction_mov207 -PUBLIC CryptonightR_instruction_mov208 -PUBLIC CryptonightR_instruction_mov209 -PUBLIC CryptonightR_instruction_mov210 -PUBLIC CryptonightR_instruction_mov211 -PUBLIC CryptonightR_instruction_mov212 -PUBLIC CryptonightR_instruction_mov213 -PUBLIC CryptonightR_instruction_mov214 -PUBLIC CryptonightR_instruction_mov215 -PUBLIC CryptonightR_instruction_mov216 -PUBLIC CryptonightR_instruction_mov217 -PUBLIC CryptonightR_instruction_mov218 -PUBLIC CryptonightR_instruction_mov219 -PUBLIC CryptonightR_instruction_mov220 -PUBLIC CryptonightR_instruction_mov221 -PUBLIC CryptonightR_instruction_mov222 -PUBLIC CryptonightR_instruction_mov223 -PUBLIC CryptonightR_instruction_mov224 -PUBLIC CryptonightR_instruction_mov225 -PUBLIC CryptonightR_instruction_mov226 -PUBLIC CryptonightR_instruction_mov227 -PUBLIC CryptonightR_instruction_mov228 -PUBLIC CryptonightR_instruction_mov229 -PUBLIC CryptonightR_instruction_mov230 -PUBLIC CryptonightR_instruction_mov231 -PUBLIC CryptonightR_instruction_mov232 -PUBLIC CryptonightR_instruction_mov233 -PUBLIC CryptonightR_instruction_mov234 -PUBLIC CryptonightR_instruction_mov235 -PUBLIC CryptonightR_instruction_mov236 -PUBLIC CryptonightR_instruction_mov237 -PUBLIC CryptonightR_instruction_mov238 -PUBLIC CryptonightR_instruction_mov239 -PUBLIC CryptonightR_instruction_mov240 -PUBLIC CryptonightR_instruction_mov241 -PUBLIC CryptonightR_instruction_mov242 -PUBLIC CryptonightR_instruction_mov243 -PUBLIC CryptonightR_instruction_mov244 -PUBLIC CryptonightR_instruction_mov245 -PUBLIC CryptonightR_instruction_mov246 -PUBLIC CryptonightR_instruction_mov247 -PUBLIC CryptonightR_instruction_mov248 -PUBLIC CryptonightR_instruction_mov249 -PUBLIC CryptonightR_instruction_mov250 -PUBLIC CryptonightR_instruction_mov251 -PUBLIC CryptonightR_instruction_mov252 -PUBLIC CryptonightR_instruction_mov253 -PUBLIC CryptonightR_instruction_mov254 -PUBLIC CryptonightR_instruction_mov255 -PUBLIC CryptonightR_instruction_mov256 - -INCLUDE CryptonightWOW_template_win.inc -INCLUDE CryptonightR_template_win.inc -INCLUDE CryptonightWOW_soft_aes_template_win.inc -INCLUDE CryptonightR_soft_aes_template_win.inc - -CryptonightR_instruction0: - imul rbx, rbx -CryptonightR_instruction1: - imul rbx, rbx -CryptonightR_instruction2: - imul rbx, rbx -CryptonightR_instruction3: - add rbx, r9 - add rbx, 2147483647 -CryptonightR_instruction4: - sub rbx, r9 -CryptonightR_instruction5: - ror ebx, cl -CryptonightR_instruction6: - rol ebx, cl -CryptonightR_instruction7: - xor rbx, r9 -CryptonightR_instruction8: - imul rsi, rbx -CryptonightR_instruction9: - imul rsi, rbx -CryptonightR_instruction10: - imul rsi, rbx -CryptonightR_instruction11: - add rsi, rbx - add rsi, 2147483647 -CryptonightR_instruction12: - sub rsi, rbx -CryptonightR_instruction13: - ror esi, cl -CryptonightR_instruction14: - rol esi, cl -CryptonightR_instruction15: - xor rsi, rbx -CryptonightR_instruction16: - imul rdi, rbx -CryptonightR_instruction17: - imul rdi, rbx -CryptonightR_instruction18: - imul rdi, rbx -CryptonightR_instruction19: - add rdi, rbx - add rdi, 2147483647 -CryptonightR_instruction20: - sub rdi, rbx -CryptonightR_instruction21: - ror edi, cl -CryptonightR_instruction22: - rol edi, cl -CryptonightR_instruction23: - xor rdi, rbx -CryptonightR_instruction24: - imul rbp, rbx -CryptonightR_instruction25: - imul rbp, rbx -CryptonightR_instruction26: - imul rbp, rbx -CryptonightR_instruction27: - add rbp, rbx - add rbp, 2147483647 -CryptonightR_instruction28: - sub rbp, rbx -CryptonightR_instruction29: - ror ebp, cl -CryptonightR_instruction30: - rol ebp, cl -CryptonightR_instruction31: - xor rbp, rbx -CryptonightR_instruction32: - imul rbx, rsi -CryptonightR_instruction33: - imul rbx, rsi -CryptonightR_instruction34: - imul rbx, rsi -CryptonightR_instruction35: - add rbx, rsi - add rbx, 2147483647 -CryptonightR_instruction36: - sub rbx, rsi -CryptonightR_instruction37: - ror ebx, cl -CryptonightR_instruction38: - rol ebx, cl -CryptonightR_instruction39: - xor rbx, rsi -CryptonightR_instruction40: - imul rsi, rsi -CryptonightR_instruction41: - imul rsi, rsi -CryptonightR_instruction42: - imul rsi, rsi -CryptonightR_instruction43: - add rsi, r9 - add rsi, 2147483647 -CryptonightR_instruction44: - sub rsi, r9 -CryptonightR_instruction45: - ror esi, cl -CryptonightR_instruction46: - rol esi, cl -CryptonightR_instruction47: - xor rsi, r9 -CryptonightR_instruction48: - imul rdi, rsi -CryptonightR_instruction49: - imul rdi, rsi -CryptonightR_instruction50: - imul rdi, rsi -CryptonightR_instruction51: - add rdi, rsi - add rdi, 2147483647 -CryptonightR_instruction52: - sub rdi, rsi -CryptonightR_instruction53: - ror edi, cl -CryptonightR_instruction54: - rol edi, cl -CryptonightR_instruction55: - xor rdi, rsi -CryptonightR_instruction56: - imul rbp, rsi -CryptonightR_instruction57: - imul rbp, rsi -CryptonightR_instruction58: - imul rbp, rsi -CryptonightR_instruction59: - add rbp, rsi - add rbp, 2147483647 -CryptonightR_instruction60: - sub rbp, rsi -CryptonightR_instruction61: - ror ebp, cl -CryptonightR_instruction62: - rol ebp, cl -CryptonightR_instruction63: - xor rbp, rsi -CryptonightR_instruction64: - imul rbx, rdi -CryptonightR_instruction65: - imul rbx, rdi -CryptonightR_instruction66: - imul rbx, rdi -CryptonightR_instruction67: - add rbx, rdi - add rbx, 2147483647 -CryptonightR_instruction68: - sub rbx, rdi -CryptonightR_instruction69: - ror ebx, cl -CryptonightR_instruction70: - rol ebx, cl -CryptonightR_instruction71: - xor rbx, rdi -CryptonightR_instruction72: - imul rsi, rdi -CryptonightR_instruction73: - imul rsi, rdi -CryptonightR_instruction74: - imul rsi, rdi -CryptonightR_instruction75: - add rsi, rdi - add rsi, 2147483647 -CryptonightR_instruction76: - sub rsi, rdi -CryptonightR_instruction77: - ror esi, cl -CryptonightR_instruction78: - rol esi, cl -CryptonightR_instruction79: - xor rsi, rdi -CryptonightR_instruction80: - imul rdi, rdi -CryptonightR_instruction81: - imul rdi, rdi -CryptonightR_instruction82: - imul rdi, rdi -CryptonightR_instruction83: - add rdi, r9 - add rdi, 2147483647 -CryptonightR_instruction84: - sub rdi, r9 -CryptonightR_instruction85: - ror edi, cl -CryptonightR_instruction86: - rol edi, cl -CryptonightR_instruction87: - xor rdi, r9 -CryptonightR_instruction88: - imul rbp, rdi -CryptonightR_instruction89: - imul rbp, rdi -CryptonightR_instruction90: - imul rbp, rdi -CryptonightR_instruction91: - add rbp, rdi - add rbp, 2147483647 -CryptonightR_instruction92: - sub rbp, rdi -CryptonightR_instruction93: - ror ebp, cl -CryptonightR_instruction94: - rol ebp, cl -CryptonightR_instruction95: - xor rbp, rdi -CryptonightR_instruction96: - imul rbx, rbp -CryptonightR_instruction97: - imul rbx, rbp -CryptonightR_instruction98: - imul rbx, rbp -CryptonightR_instruction99: - add rbx, rbp - add rbx, 2147483647 -CryptonightR_instruction100: - sub rbx, rbp -CryptonightR_instruction101: - ror ebx, cl -CryptonightR_instruction102: - rol ebx, cl -CryptonightR_instruction103: - xor rbx, rbp -CryptonightR_instruction104: - imul rsi, rbp -CryptonightR_instruction105: - imul rsi, rbp -CryptonightR_instruction106: - imul rsi, rbp -CryptonightR_instruction107: - add rsi, rbp - add rsi, 2147483647 -CryptonightR_instruction108: - sub rsi, rbp -CryptonightR_instruction109: - ror esi, cl -CryptonightR_instruction110: - rol esi, cl -CryptonightR_instruction111: - xor rsi, rbp -CryptonightR_instruction112: - imul rdi, rbp -CryptonightR_instruction113: - imul rdi, rbp -CryptonightR_instruction114: - imul rdi, rbp -CryptonightR_instruction115: - add rdi, rbp - add rdi, 2147483647 -CryptonightR_instruction116: - sub rdi, rbp -CryptonightR_instruction117: - ror edi, cl -CryptonightR_instruction118: - rol edi, cl -CryptonightR_instruction119: - xor rdi, rbp -CryptonightR_instruction120: - imul rbp, rbp -CryptonightR_instruction121: - imul rbp, rbp -CryptonightR_instruction122: - imul rbp, rbp -CryptonightR_instruction123: - add rbp, r9 - add rbp, 2147483647 -CryptonightR_instruction124: - sub rbp, r9 -CryptonightR_instruction125: - ror ebp, cl -CryptonightR_instruction126: - rol ebp, cl -CryptonightR_instruction127: - xor rbp, r9 -CryptonightR_instruction128: - imul rbx, rsp -CryptonightR_instruction129: - imul rbx, rsp -CryptonightR_instruction130: - imul rbx, rsp -CryptonightR_instruction131: - add rbx, rsp - add rbx, 2147483647 -CryptonightR_instruction132: - sub rbx, rsp -CryptonightR_instruction133: - ror ebx, cl -CryptonightR_instruction134: - rol ebx, cl -CryptonightR_instruction135: - xor rbx, rsp -CryptonightR_instruction136: - imul rsi, rsp -CryptonightR_instruction137: - imul rsi, rsp -CryptonightR_instruction138: - imul rsi, rsp -CryptonightR_instruction139: - add rsi, rsp - add rsi, 2147483647 -CryptonightR_instruction140: - sub rsi, rsp -CryptonightR_instruction141: - ror esi, cl -CryptonightR_instruction142: - rol esi, cl -CryptonightR_instruction143: - xor rsi, rsp -CryptonightR_instruction144: - imul rdi, rsp -CryptonightR_instruction145: - imul rdi, rsp -CryptonightR_instruction146: - imul rdi, rsp -CryptonightR_instruction147: - add rdi, rsp - add rdi, 2147483647 -CryptonightR_instruction148: - sub rdi, rsp -CryptonightR_instruction149: - ror edi, cl -CryptonightR_instruction150: - rol edi, cl -CryptonightR_instruction151: - xor rdi, rsp -CryptonightR_instruction152: - imul rbp, rsp -CryptonightR_instruction153: - imul rbp, rsp -CryptonightR_instruction154: - imul rbp, rsp -CryptonightR_instruction155: - add rbp, rsp - add rbp, 2147483647 -CryptonightR_instruction156: - sub rbp, rsp -CryptonightR_instruction157: - ror ebp, cl -CryptonightR_instruction158: - rol ebp, cl -CryptonightR_instruction159: - xor rbp, rsp -CryptonightR_instruction160: - imul rbx, r15 -CryptonightR_instruction161: - imul rbx, r15 -CryptonightR_instruction162: - imul rbx, r15 -CryptonightR_instruction163: - add rbx, r15 - add rbx, 2147483647 -CryptonightR_instruction164: - sub rbx, r15 -CryptonightR_instruction165: - ror ebx, cl -CryptonightR_instruction166: - rol ebx, cl -CryptonightR_instruction167: - xor rbx, r15 -CryptonightR_instruction168: - imul rsi, r15 -CryptonightR_instruction169: - imul rsi, r15 -CryptonightR_instruction170: - imul rsi, r15 -CryptonightR_instruction171: - add rsi, r15 - add rsi, 2147483647 -CryptonightR_instruction172: - sub rsi, r15 -CryptonightR_instruction173: - ror esi, cl -CryptonightR_instruction174: - rol esi, cl -CryptonightR_instruction175: - xor rsi, r15 -CryptonightR_instruction176: - imul rdi, r15 -CryptonightR_instruction177: - imul rdi, r15 -CryptonightR_instruction178: - imul rdi, r15 -CryptonightR_instruction179: - add rdi, r15 - add rdi, 2147483647 -CryptonightR_instruction180: - sub rdi, r15 -CryptonightR_instruction181: - ror edi, cl -CryptonightR_instruction182: - rol edi, cl -CryptonightR_instruction183: - xor rdi, r15 -CryptonightR_instruction184: - imul rbp, r15 -CryptonightR_instruction185: - imul rbp, r15 -CryptonightR_instruction186: - imul rbp, r15 -CryptonightR_instruction187: - add rbp, r15 - add rbp, 2147483647 -CryptonightR_instruction188: - sub rbp, r15 -CryptonightR_instruction189: - ror ebp, cl -CryptonightR_instruction190: - rol ebp, cl -CryptonightR_instruction191: - xor rbp, r15 -CryptonightR_instruction192: - imul rbx, rax -CryptonightR_instruction193: - imul rbx, rax -CryptonightR_instruction194: - imul rbx, rax -CryptonightR_instruction195: - add rbx, rax - add rbx, 2147483647 -CryptonightR_instruction196: - sub rbx, rax -CryptonightR_instruction197: - ror ebx, cl -CryptonightR_instruction198: - rol ebx, cl -CryptonightR_instruction199: - xor rbx, rax -CryptonightR_instruction200: - imul rsi, rax -CryptonightR_instruction201: - imul rsi, rax -CryptonightR_instruction202: - imul rsi, rax -CryptonightR_instruction203: - add rsi, rax - add rsi, 2147483647 -CryptonightR_instruction204: - sub rsi, rax -CryptonightR_instruction205: - ror esi, cl -CryptonightR_instruction206: - rol esi, cl -CryptonightR_instruction207: - xor rsi, rax -CryptonightR_instruction208: - imul rdi, rax -CryptonightR_instruction209: - imul rdi, rax -CryptonightR_instruction210: - imul rdi, rax -CryptonightR_instruction211: - add rdi, rax - add rdi, 2147483647 -CryptonightR_instruction212: - sub rdi, rax -CryptonightR_instruction213: - ror edi, cl -CryptonightR_instruction214: - rol edi, cl -CryptonightR_instruction215: - xor rdi, rax -CryptonightR_instruction216: - imul rbp, rax -CryptonightR_instruction217: - imul rbp, rax -CryptonightR_instruction218: - imul rbp, rax -CryptonightR_instruction219: - add rbp, rax - add rbp, 2147483647 -CryptonightR_instruction220: - sub rbp, rax -CryptonightR_instruction221: - ror ebp, cl -CryptonightR_instruction222: - rol ebp, cl -CryptonightR_instruction223: - xor rbp, rax -CryptonightR_instruction224: - imul rbx, rdx -CryptonightR_instruction225: - imul rbx, rdx -CryptonightR_instruction226: - imul rbx, rdx -CryptonightR_instruction227: - add rbx, rdx - add rbx, 2147483647 -CryptonightR_instruction228: - sub rbx, rdx -CryptonightR_instruction229: - ror ebx, cl -CryptonightR_instruction230: - rol ebx, cl -CryptonightR_instruction231: - xor rbx, rdx -CryptonightR_instruction232: - imul rsi, rdx -CryptonightR_instruction233: - imul rsi, rdx -CryptonightR_instruction234: - imul rsi, rdx -CryptonightR_instruction235: - add rsi, rdx - add rsi, 2147483647 -CryptonightR_instruction236: - sub rsi, rdx -CryptonightR_instruction237: - ror esi, cl -CryptonightR_instruction238: - rol esi, cl -CryptonightR_instruction239: - xor rsi, rdx -CryptonightR_instruction240: - imul rdi, rdx -CryptonightR_instruction241: - imul rdi, rdx -CryptonightR_instruction242: - imul rdi, rdx -CryptonightR_instruction243: - add rdi, rdx - add rdi, 2147483647 -CryptonightR_instruction244: - sub rdi, rdx -CryptonightR_instruction245: - ror edi, cl -CryptonightR_instruction246: - rol edi, cl -CryptonightR_instruction247: - xor rdi, rdx -CryptonightR_instruction248: - imul rbp, rdx -CryptonightR_instruction249: - imul rbp, rdx -CryptonightR_instruction250: - imul rbp, rdx -CryptonightR_instruction251: - add rbp, rdx - add rbp, 2147483647 -CryptonightR_instruction252: - sub rbp, rdx -CryptonightR_instruction253: - ror ebp, cl -CryptonightR_instruction254: - rol ebp, cl -CryptonightR_instruction255: - xor rbp, rdx -CryptonightR_instruction256: - imul rbx, rbx -CryptonightR_instruction_mov0: - -CryptonightR_instruction_mov1: - -CryptonightR_instruction_mov2: - -CryptonightR_instruction_mov3: - -CryptonightR_instruction_mov4: - -CryptonightR_instruction_mov5: - mov rcx, rbx -CryptonightR_instruction_mov6: - mov rcx, rbx -CryptonightR_instruction_mov7: - -CryptonightR_instruction_mov8: - -CryptonightR_instruction_mov9: - -CryptonightR_instruction_mov10: - -CryptonightR_instruction_mov11: - -CryptonightR_instruction_mov12: - -CryptonightR_instruction_mov13: - mov rcx, rbx -CryptonightR_instruction_mov14: - mov rcx, rbx -CryptonightR_instruction_mov15: - -CryptonightR_instruction_mov16: - -CryptonightR_instruction_mov17: - -CryptonightR_instruction_mov18: - -CryptonightR_instruction_mov19: - -CryptonightR_instruction_mov20: - -CryptonightR_instruction_mov21: - mov rcx, rbx -CryptonightR_instruction_mov22: - mov rcx, rbx -CryptonightR_instruction_mov23: - -CryptonightR_instruction_mov24: - -CryptonightR_instruction_mov25: - -CryptonightR_instruction_mov26: - -CryptonightR_instruction_mov27: - -CryptonightR_instruction_mov28: - -CryptonightR_instruction_mov29: - mov rcx, rbx -CryptonightR_instruction_mov30: - mov rcx, rbx -CryptonightR_instruction_mov31: - -CryptonightR_instruction_mov32: - -CryptonightR_instruction_mov33: - -CryptonightR_instruction_mov34: - -CryptonightR_instruction_mov35: - -CryptonightR_instruction_mov36: - -CryptonightR_instruction_mov37: - mov rcx, rsi -CryptonightR_instruction_mov38: - mov rcx, rsi -CryptonightR_instruction_mov39: - -CryptonightR_instruction_mov40: - -CryptonightR_instruction_mov41: - -CryptonightR_instruction_mov42: - -CryptonightR_instruction_mov43: - -CryptonightR_instruction_mov44: - -CryptonightR_instruction_mov45: - mov rcx, rsi -CryptonightR_instruction_mov46: - mov rcx, rsi -CryptonightR_instruction_mov47: - -CryptonightR_instruction_mov48: - -CryptonightR_instruction_mov49: - -CryptonightR_instruction_mov50: - -CryptonightR_instruction_mov51: - -CryptonightR_instruction_mov52: - -CryptonightR_instruction_mov53: - mov rcx, rsi -CryptonightR_instruction_mov54: - mov rcx, rsi -CryptonightR_instruction_mov55: - -CryptonightR_instruction_mov56: - -CryptonightR_instruction_mov57: - -CryptonightR_instruction_mov58: - -CryptonightR_instruction_mov59: - -CryptonightR_instruction_mov60: - -CryptonightR_instruction_mov61: - mov rcx, rsi -CryptonightR_instruction_mov62: - mov rcx, rsi -CryptonightR_instruction_mov63: - -CryptonightR_instruction_mov64: - -CryptonightR_instruction_mov65: - -CryptonightR_instruction_mov66: - -CryptonightR_instruction_mov67: - -CryptonightR_instruction_mov68: - -CryptonightR_instruction_mov69: - mov rcx, rdi -CryptonightR_instruction_mov70: - mov rcx, rdi -CryptonightR_instruction_mov71: - -CryptonightR_instruction_mov72: - -CryptonightR_instruction_mov73: - -CryptonightR_instruction_mov74: - -CryptonightR_instruction_mov75: - -CryptonightR_instruction_mov76: - -CryptonightR_instruction_mov77: - mov rcx, rdi -CryptonightR_instruction_mov78: - mov rcx, rdi -CryptonightR_instruction_mov79: - -CryptonightR_instruction_mov80: - -CryptonightR_instruction_mov81: - -CryptonightR_instruction_mov82: - -CryptonightR_instruction_mov83: - -CryptonightR_instruction_mov84: - -CryptonightR_instruction_mov85: - mov rcx, rdi -CryptonightR_instruction_mov86: - mov rcx, rdi -CryptonightR_instruction_mov87: - -CryptonightR_instruction_mov88: - -CryptonightR_instruction_mov89: - -CryptonightR_instruction_mov90: - -CryptonightR_instruction_mov91: - -CryptonightR_instruction_mov92: - -CryptonightR_instruction_mov93: - mov rcx, rdi -CryptonightR_instruction_mov94: - mov rcx, rdi -CryptonightR_instruction_mov95: - -CryptonightR_instruction_mov96: - -CryptonightR_instruction_mov97: - -CryptonightR_instruction_mov98: - -CryptonightR_instruction_mov99: - -CryptonightR_instruction_mov100: - -CryptonightR_instruction_mov101: - mov rcx, rbp -CryptonightR_instruction_mov102: - mov rcx, rbp -CryptonightR_instruction_mov103: - -CryptonightR_instruction_mov104: - -CryptonightR_instruction_mov105: - -CryptonightR_instruction_mov106: - -CryptonightR_instruction_mov107: - -CryptonightR_instruction_mov108: - -CryptonightR_instruction_mov109: - mov rcx, rbp -CryptonightR_instruction_mov110: - mov rcx, rbp -CryptonightR_instruction_mov111: - -CryptonightR_instruction_mov112: - -CryptonightR_instruction_mov113: - -CryptonightR_instruction_mov114: - -CryptonightR_instruction_mov115: - -CryptonightR_instruction_mov116: - -CryptonightR_instruction_mov117: - mov rcx, rbp -CryptonightR_instruction_mov118: - mov rcx, rbp -CryptonightR_instruction_mov119: - -CryptonightR_instruction_mov120: - -CryptonightR_instruction_mov121: - -CryptonightR_instruction_mov122: - -CryptonightR_instruction_mov123: - -CryptonightR_instruction_mov124: - -CryptonightR_instruction_mov125: - mov rcx, rbp -CryptonightR_instruction_mov126: - mov rcx, rbp -CryptonightR_instruction_mov127: - -CryptonightR_instruction_mov128: - -CryptonightR_instruction_mov129: - -CryptonightR_instruction_mov130: - -CryptonightR_instruction_mov131: - -CryptonightR_instruction_mov132: - -CryptonightR_instruction_mov133: - mov rcx, rsp -CryptonightR_instruction_mov134: - mov rcx, rsp -CryptonightR_instruction_mov135: - -CryptonightR_instruction_mov136: - -CryptonightR_instruction_mov137: - -CryptonightR_instruction_mov138: - -CryptonightR_instruction_mov139: - -CryptonightR_instruction_mov140: - -CryptonightR_instruction_mov141: - mov rcx, rsp -CryptonightR_instruction_mov142: - mov rcx, rsp -CryptonightR_instruction_mov143: - -CryptonightR_instruction_mov144: - -CryptonightR_instruction_mov145: - -CryptonightR_instruction_mov146: - -CryptonightR_instruction_mov147: - -CryptonightR_instruction_mov148: - -CryptonightR_instruction_mov149: - mov rcx, rsp -CryptonightR_instruction_mov150: - mov rcx, rsp -CryptonightR_instruction_mov151: - -CryptonightR_instruction_mov152: - -CryptonightR_instruction_mov153: - -CryptonightR_instruction_mov154: - -CryptonightR_instruction_mov155: - -CryptonightR_instruction_mov156: - -CryptonightR_instruction_mov157: - mov rcx, rsp -CryptonightR_instruction_mov158: - mov rcx, rsp -CryptonightR_instruction_mov159: - -CryptonightR_instruction_mov160: - -CryptonightR_instruction_mov161: - -CryptonightR_instruction_mov162: - -CryptonightR_instruction_mov163: - -CryptonightR_instruction_mov164: - -CryptonightR_instruction_mov165: - mov rcx, r15 -CryptonightR_instruction_mov166: - mov rcx, r15 -CryptonightR_instruction_mov167: - -CryptonightR_instruction_mov168: - -CryptonightR_instruction_mov169: - -CryptonightR_instruction_mov170: - -CryptonightR_instruction_mov171: - -CryptonightR_instruction_mov172: - -CryptonightR_instruction_mov173: - mov rcx, r15 -CryptonightR_instruction_mov174: - mov rcx, r15 -CryptonightR_instruction_mov175: - -CryptonightR_instruction_mov176: - -CryptonightR_instruction_mov177: - -CryptonightR_instruction_mov178: - -CryptonightR_instruction_mov179: - -CryptonightR_instruction_mov180: - -CryptonightR_instruction_mov181: - mov rcx, r15 -CryptonightR_instruction_mov182: - mov rcx, r15 -CryptonightR_instruction_mov183: - -CryptonightR_instruction_mov184: - -CryptonightR_instruction_mov185: - -CryptonightR_instruction_mov186: - -CryptonightR_instruction_mov187: - -CryptonightR_instruction_mov188: - -CryptonightR_instruction_mov189: - mov rcx, r15 -CryptonightR_instruction_mov190: - mov rcx, r15 -CryptonightR_instruction_mov191: - -CryptonightR_instruction_mov192: - -CryptonightR_instruction_mov193: - -CryptonightR_instruction_mov194: - -CryptonightR_instruction_mov195: - -CryptonightR_instruction_mov196: - -CryptonightR_instruction_mov197: - mov rcx, rax -CryptonightR_instruction_mov198: - mov rcx, rax -CryptonightR_instruction_mov199: - -CryptonightR_instruction_mov200: - -CryptonightR_instruction_mov201: - -CryptonightR_instruction_mov202: - -CryptonightR_instruction_mov203: - -CryptonightR_instruction_mov204: - -CryptonightR_instruction_mov205: - mov rcx, rax -CryptonightR_instruction_mov206: - mov rcx, rax -CryptonightR_instruction_mov207: - -CryptonightR_instruction_mov208: - -CryptonightR_instruction_mov209: - -CryptonightR_instruction_mov210: - -CryptonightR_instruction_mov211: - -CryptonightR_instruction_mov212: - -CryptonightR_instruction_mov213: - mov rcx, rax -CryptonightR_instruction_mov214: - mov rcx, rax -CryptonightR_instruction_mov215: - -CryptonightR_instruction_mov216: - -CryptonightR_instruction_mov217: - -CryptonightR_instruction_mov218: - -CryptonightR_instruction_mov219: - -CryptonightR_instruction_mov220: - -CryptonightR_instruction_mov221: - mov rcx, rax -CryptonightR_instruction_mov222: - mov rcx, rax -CryptonightR_instruction_mov223: - -CryptonightR_instruction_mov224: - -CryptonightR_instruction_mov225: - -CryptonightR_instruction_mov226: - -CryptonightR_instruction_mov227: - -CryptonightR_instruction_mov228: - -CryptonightR_instruction_mov229: - mov rcx, rdx -CryptonightR_instruction_mov230: - mov rcx, rdx -CryptonightR_instruction_mov231: - -CryptonightR_instruction_mov232: - -CryptonightR_instruction_mov233: - -CryptonightR_instruction_mov234: - -CryptonightR_instruction_mov235: - -CryptonightR_instruction_mov236: - -CryptonightR_instruction_mov237: - mov rcx, rdx -CryptonightR_instruction_mov238: - mov rcx, rdx -CryptonightR_instruction_mov239: - -CryptonightR_instruction_mov240: - -CryptonightR_instruction_mov241: - -CryptonightR_instruction_mov242: - -CryptonightR_instruction_mov243: - -CryptonightR_instruction_mov244: - -CryptonightR_instruction_mov245: - mov rcx, rdx -CryptonightR_instruction_mov246: - mov rcx, rdx -CryptonightR_instruction_mov247: - -CryptonightR_instruction_mov248: - -CryptonightR_instruction_mov249: - -CryptonightR_instruction_mov250: - -CryptonightR_instruction_mov251: - -CryptonightR_instruction_mov252: - -CryptonightR_instruction_mov253: - mov rcx, rdx -CryptonightR_instruction_mov254: - mov rcx, rdx -CryptonightR_instruction_mov255: - -CryptonightR_instruction_mov256: - -_TEXT_CN_TEMPLATE ENDS -END diff --git a/src/crypto/asm/CryptonightR_template.h b/src/crypto/asm/CryptonightR_template.h deleted file mode 100644 index d9159a8f..00000000 --- a/src/crypto/asm/CryptonightR_template.h +++ /dev/null @@ -1,1087 +0,0 @@ -// Auto-generated file, do not edit - -extern "C" -{ - void CryptonightWOW_template_part1(); - void CryptonightWOW_template_mainloop(); - void CryptonightWOW_template_part2(); - void CryptonightWOW_template_part3(); - void CryptonightWOW_template_end(); - void CryptonightWOW_template_double_part1(); - void CryptonightWOW_template_double_mainloop(); - void CryptonightWOW_template_double_part2(); - void CryptonightWOW_template_double_part3(); - void CryptonightWOW_template_double_part4(); - void CryptonightWOW_template_double_end(); - - void CryptonightR_template_part1(); - void CryptonightR_template_mainloop(); - void CryptonightR_template_part2(); - void CryptonightR_template_part3(); - void CryptonightR_template_end(); - void CryptonightR_template_double_part1(); - void CryptonightR_template_double_mainloop(); - void CryptonightR_template_double_part2(); - void CryptonightR_template_double_part3(); - void CryptonightR_template_double_part4(); - void CryptonightR_template_double_end(); - - void CryptonightWOW_soft_aes_template_part1(); - void CryptonightWOW_soft_aes_template_mainloop(); - void CryptonightWOW_soft_aes_template_part2(); - void CryptonightWOW_soft_aes_template_part3(); - void CryptonightWOW_soft_aes_template_end(); - void CryptonightWOW_soft_aes_template_double_part1(); - void CryptonightWOW_soft_aes_template_double_mainloop(); - void CryptonightWOW_soft_aes_template_double_part2(); - void CryptonightWOW_soft_aes_template_double_part3(); - void CryptonightWOW_soft_aes_template_double_part4(); - void CryptonightWOW_soft_aes_template_double_end(); - - void CryptonightR_soft_aes_template_part1(); - void CryptonightR_soft_aes_template_mainloop(); - void CryptonightR_soft_aes_template_part2(); - void CryptonightR_soft_aes_template_part3(); - void CryptonightR_soft_aes_template_end(); - void CryptonightR_soft_aes_template_double_part1(); - void CryptonightR_soft_aes_template_double_mainloop(); - void CryptonightR_soft_aes_template_double_part2(); - void CryptonightR_soft_aes_template_double_part3(); - void CryptonightR_soft_aes_template_double_part4(); - void CryptonightR_soft_aes_template_double_end(); - - void CryptonightR_instruction0(); - void CryptonightR_instruction1(); - void CryptonightR_instruction2(); - void CryptonightR_instruction3(); - void CryptonightR_instruction4(); - void CryptonightR_instruction5(); - void CryptonightR_instruction6(); - void CryptonightR_instruction7(); - void CryptonightR_instruction8(); - void CryptonightR_instruction9(); - void CryptonightR_instruction10(); - void CryptonightR_instruction11(); - void CryptonightR_instruction12(); - void CryptonightR_instruction13(); - void CryptonightR_instruction14(); - void CryptonightR_instruction15(); - void CryptonightR_instruction16(); - void CryptonightR_instruction17(); - void CryptonightR_instruction18(); - void CryptonightR_instruction19(); - void CryptonightR_instruction20(); - void CryptonightR_instruction21(); - void CryptonightR_instruction22(); - void CryptonightR_instruction23(); - void CryptonightR_instruction24(); - void CryptonightR_instruction25(); - void CryptonightR_instruction26(); - void CryptonightR_instruction27(); - void CryptonightR_instruction28(); - void CryptonightR_instruction29(); - void CryptonightR_instruction30(); - void CryptonightR_instruction31(); - void CryptonightR_instruction32(); - void CryptonightR_instruction33(); - void CryptonightR_instruction34(); - void CryptonightR_instruction35(); - void CryptonightR_instruction36(); - void CryptonightR_instruction37(); - void CryptonightR_instruction38(); - void CryptonightR_instruction39(); - void CryptonightR_instruction40(); - void CryptonightR_instruction41(); - void CryptonightR_instruction42(); - void CryptonightR_instruction43(); - void CryptonightR_instruction44(); - void CryptonightR_instruction45(); - void CryptonightR_instruction46(); - void CryptonightR_instruction47(); - void CryptonightR_instruction48(); - void CryptonightR_instruction49(); - void CryptonightR_instruction50(); - void CryptonightR_instruction51(); - void CryptonightR_instruction52(); - void CryptonightR_instruction53(); - void CryptonightR_instruction54(); - void CryptonightR_instruction55(); - void CryptonightR_instruction56(); - void CryptonightR_instruction57(); - void CryptonightR_instruction58(); - void CryptonightR_instruction59(); - void CryptonightR_instruction60(); - void CryptonightR_instruction61(); - void CryptonightR_instruction62(); - void CryptonightR_instruction63(); - void CryptonightR_instruction64(); - void CryptonightR_instruction65(); - void CryptonightR_instruction66(); - void CryptonightR_instruction67(); - void CryptonightR_instruction68(); - void CryptonightR_instruction69(); - void CryptonightR_instruction70(); - void CryptonightR_instruction71(); - void CryptonightR_instruction72(); - void CryptonightR_instruction73(); - void CryptonightR_instruction74(); - void CryptonightR_instruction75(); - void CryptonightR_instruction76(); - void CryptonightR_instruction77(); - void CryptonightR_instruction78(); - void CryptonightR_instruction79(); - void CryptonightR_instruction80(); - void CryptonightR_instruction81(); - void CryptonightR_instruction82(); - void CryptonightR_instruction83(); - void CryptonightR_instruction84(); - void CryptonightR_instruction85(); - void CryptonightR_instruction86(); - void CryptonightR_instruction87(); - void CryptonightR_instruction88(); - void CryptonightR_instruction89(); - void CryptonightR_instruction90(); - void CryptonightR_instruction91(); - void CryptonightR_instruction92(); - void CryptonightR_instruction93(); - void CryptonightR_instruction94(); - void CryptonightR_instruction95(); - void CryptonightR_instruction96(); - void CryptonightR_instruction97(); - void CryptonightR_instruction98(); - void CryptonightR_instruction99(); - void CryptonightR_instruction100(); - void CryptonightR_instruction101(); - void CryptonightR_instruction102(); - void CryptonightR_instruction103(); - void CryptonightR_instruction104(); - void CryptonightR_instruction105(); - void CryptonightR_instruction106(); - void CryptonightR_instruction107(); - void CryptonightR_instruction108(); - void CryptonightR_instruction109(); - void CryptonightR_instruction110(); - void CryptonightR_instruction111(); - void CryptonightR_instruction112(); - void CryptonightR_instruction113(); - void CryptonightR_instruction114(); - void CryptonightR_instruction115(); - void CryptonightR_instruction116(); - void CryptonightR_instruction117(); - void CryptonightR_instruction118(); - void CryptonightR_instruction119(); - void CryptonightR_instruction120(); - void CryptonightR_instruction121(); - void CryptonightR_instruction122(); - void CryptonightR_instruction123(); - void CryptonightR_instruction124(); - void CryptonightR_instruction125(); - void CryptonightR_instruction126(); - void CryptonightR_instruction127(); - void CryptonightR_instruction128(); - void CryptonightR_instruction129(); - void CryptonightR_instruction130(); - void CryptonightR_instruction131(); - void CryptonightR_instruction132(); - void CryptonightR_instruction133(); - void CryptonightR_instruction134(); - void CryptonightR_instruction135(); - void CryptonightR_instruction136(); - void CryptonightR_instruction137(); - void CryptonightR_instruction138(); - void CryptonightR_instruction139(); - void CryptonightR_instruction140(); - void CryptonightR_instruction141(); - void CryptonightR_instruction142(); - void CryptonightR_instruction143(); - void CryptonightR_instruction144(); - void CryptonightR_instruction145(); - void CryptonightR_instruction146(); - void CryptonightR_instruction147(); - void CryptonightR_instruction148(); - void CryptonightR_instruction149(); - void CryptonightR_instruction150(); - void CryptonightR_instruction151(); - void CryptonightR_instruction152(); - void CryptonightR_instruction153(); - void CryptonightR_instruction154(); - void CryptonightR_instruction155(); - void CryptonightR_instruction156(); - void CryptonightR_instruction157(); - void CryptonightR_instruction158(); - void CryptonightR_instruction159(); - void CryptonightR_instruction160(); - void CryptonightR_instruction161(); - void CryptonightR_instruction162(); - void CryptonightR_instruction163(); - void CryptonightR_instruction164(); - void CryptonightR_instruction165(); - void CryptonightR_instruction166(); - void CryptonightR_instruction167(); - void CryptonightR_instruction168(); - void CryptonightR_instruction169(); - void CryptonightR_instruction170(); - void CryptonightR_instruction171(); - void CryptonightR_instruction172(); - void CryptonightR_instruction173(); - void CryptonightR_instruction174(); - void CryptonightR_instruction175(); - void CryptonightR_instruction176(); - void CryptonightR_instruction177(); - void CryptonightR_instruction178(); - void CryptonightR_instruction179(); - void CryptonightR_instruction180(); - void CryptonightR_instruction181(); - void CryptonightR_instruction182(); - void CryptonightR_instruction183(); - void CryptonightR_instruction184(); - void CryptonightR_instruction185(); - void CryptonightR_instruction186(); - void CryptonightR_instruction187(); - void CryptonightR_instruction188(); - void CryptonightR_instruction189(); - void CryptonightR_instruction190(); - void CryptonightR_instruction191(); - void CryptonightR_instruction192(); - void CryptonightR_instruction193(); - void CryptonightR_instruction194(); - void CryptonightR_instruction195(); - void CryptonightR_instruction196(); - void CryptonightR_instruction197(); - void CryptonightR_instruction198(); - void CryptonightR_instruction199(); - void CryptonightR_instruction200(); - void CryptonightR_instruction201(); - void CryptonightR_instruction202(); - void CryptonightR_instruction203(); - void CryptonightR_instruction204(); - void CryptonightR_instruction205(); - void CryptonightR_instruction206(); - void CryptonightR_instruction207(); - void CryptonightR_instruction208(); - void CryptonightR_instruction209(); - void CryptonightR_instruction210(); - void CryptonightR_instruction211(); - void CryptonightR_instruction212(); - void CryptonightR_instruction213(); - void CryptonightR_instruction214(); - void CryptonightR_instruction215(); - void CryptonightR_instruction216(); - void CryptonightR_instruction217(); - void CryptonightR_instruction218(); - void CryptonightR_instruction219(); - void CryptonightR_instruction220(); - void CryptonightR_instruction221(); - void CryptonightR_instruction222(); - void CryptonightR_instruction223(); - void CryptonightR_instruction224(); - void CryptonightR_instruction225(); - void CryptonightR_instruction226(); - void CryptonightR_instruction227(); - void CryptonightR_instruction228(); - void CryptonightR_instruction229(); - void CryptonightR_instruction230(); - void CryptonightR_instruction231(); - void CryptonightR_instruction232(); - void CryptonightR_instruction233(); - void CryptonightR_instruction234(); - void CryptonightR_instruction235(); - void CryptonightR_instruction236(); - void CryptonightR_instruction237(); - void CryptonightR_instruction238(); - void CryptonightR_instruction239(); - void CryptonightR_instruction240(); - void CryptonightR_instruction241(); - void CryptonightR_instruction242(); - void CryptonightR_instruction243(); - void CryptonightR_instruction244(); - void CryptonightR_instruction245(); - void CryptonightR_instruction246(); - void CryptonightR_instruction247(); - void CryptonightR_instruction248(); - void CryptonightR_instruction249(); - void CryptonightR_instruction250(); - void CryptonightR_instruction251(); - void CryptonightR_instruction252(); - void CryptonightR_instruction253(); - void CryptonightR_instruction254(); - void CryptonightR_instruction255(); - void CryptonightR_instruction256(); - void CryptonightR_instruction_mov0(); - void CryptonightR_instruction_mov1(); - void CryptonightR_instruction_mov2(); - void CryptonightR_instruction_mov3(); - void CryptonightR_instruction_mov4(); - void CryptonightR_instruction_mov5(); - void CryptonightR_instruction_mov6(); - void CryptonightR_instruction_mov7(); - void CryptonightR_instruction_mov8(); - void CryptonightR_instruction_mov9(); - void CryptonightR_instruction_mov10(); - void CryptonightR_instruction_mov11(); - void CryptonightR_instruction_mov12(); - void CryptonightR_instruction_mov13(); - void CryptonightR_instruction_mov14(); - void CryptonightR_instruction_mov15(); - void CryptonightR_instruction_mov16(); - void CryptonightR_instruction_mov17(); - void CryptonightR_instruction_mov18(); - void CryptonightR_instruction_mov19(); - void CryptonightR_instruction_mov20(); - void CryptonightR_instruction_mov21(); - void CryptonightR_instruction_mov22(); - void CryptonightR_instruction_mov23(); - void CryptonightR_instruction_mov24(); - void CryptonightR_instruction_mov25(); - void CryptonightR_instruction_mov26(); - void CryptonightR_instruction_mov27(); - void CryptonightR_instruction_mov28(); - void CryptonightR_instruction_mov29(); - void CryptonightR_instruction_mov30(); - void CryptonightR_instruction_mov31(); - void CryptonightR_instruction_mov32(); - void CryptonightR_instruction_mov33(); - void CryptonightR_instruction_mov34(); - void CryptonightR_instruction_mov35(); - void CryptonightR_instruction_mov36(); - void CryptonightR_instruction_mov37(); - void CryptonightR_instruction_mov38(); - void CryptonightR_instruction_mov39(); - void CryptonightR_instruction_mov40(); - void CryptonightR_instruction_mov41(); - void CryptonightR_instruction_mov42(); - void CryptonightR_instruction_mov43(); - void CryptonightR_instruction_mov44(); - void CryptonightR_instruction_mov45(); - void CryptonightR_instruction_mov46(); - void CryptonightR_instruction_mov47(); - void CryptonightR_instruction_mov48(); - void CryptonightR_instruction_mov49(); - void CryptonightR_instruction_mov50(); - void CryptonightR_instruction_mov51(); - void CryptonightR_instruction_mov52(); - void CryptonightR_instruction_mov53(); - void CryptonightR_instruction_mov54(); - void CryptonightR_instruction_mov55(); - void CryptonightR_instruction_mov56(); - void CryptonightR_instruction_mov57(); - void CryptonightR_instruction_mov58(); - void CryptonightR_instruction_mov59(); - void CryptonightR_instruction_mov60(); - void CryptonightR_instruction_mov61(); - void CryptonightR_instruction_mov62(); - void CryptonightR_instruction_mov63(); - void CryptonightR_instruction_mov64(); - void CryptonightR_instruction_mov65(); - void CryptonightR_instruction_mov66(); - void CryptonightR_instruction_mov67(); - void CryptonightR_instruction_mov68(); - void CryptonightR_instruction_mov69(); - void CryptonightR_instruction_mov70(); - void CryptonightR_instruction_mov71(); - void CryptonightR_instruction_mov72(); - void CryptonightR_instruction_mov73(); - void CryptonightR_instruction_mov74(); - void CryptonightR_instruction_mov75(); - void CryptonightR_instruction_mov76(); - void CryptonightR_instruction_mov77(); - void CryptonightR_instruction_mov78(); - void CryptonightR_instruction_mov79(); - void CryptonightR_instruction_mov80(); - void CryptonightR_instruction_mov81(); - void CryptonightR_instruction_mov82(); - void CryptonightR_instruction_mov83(); - void CryptonightR_instruction_mov84(); - void CryptonightR_instruction_mov85(); - void CryptonightR_instruction_mov86(); - void CryptonightR_instruction_mov87(); - void CryptonightR_instruction_mov88(); - void CryptonightR_instruction_mov89(); - void CryptonightR_instruction_mov90(); - void CryptonightR_instruction_mov91(); - void CryptonightR_instruction_mov92(); - void CryptonightR_instruction_mov93(); - void CryptonightR_instruction_mov94(); - void CryptonightR_instruction_mov95(); - void CryptonightR_instruction_mov96(); - void CryptonightR_instruction_mov97(); - void CryptonightR_instruction_mov98(); - void CryptonightR_instruction_mov99(); - void CryptonightR_instruction_mov100(); - void CryptonightR_instruction_mov101(); - void CryptonightR_instruction_mov102(); - void CryptonightR_instruction_mov103(); - void CryptonightR_instruction_mov104(); - void CryptonightR_instruction_mov105(); - void CryptonightR_instruction_mov106(); - void CryptonightR_instruction_mov107(); - void CryptonightR_instruction_mov108(); - void CryptonightR_instruction_mov109(); - void CryptonightR_instruction_mov110(); - void CryptonightR_instruction_mov111(); - void CryptonightR_instruction_mov112(); - void CryptonightR_instruction_mov113(); - void CryptonightR_instruction_mov114(); - void CryptonightR_instruction_mov115(); - void CryptonightR_instruction_mov116(); - void CryptonightR_instruction_mov117(); - void CryptonightR_instruction_mov118(); - void CryptonightR_instruction_mov119(); - void CryptonightR_instruction_mov120(); - void CryptonightR_instruction_mov121(); - void CryptonightR_instruction_mov122(); - void CryptonightR_instruction_mov123(); - void CryptonightR_instruction_mov124(); - void CryptonightR_instruction_mov125(); - void CryptonightR_instruction_mov126(); - void CryptonightR_instruction_mov127(); - void CryptonightR_instruction_mov128(); - void CryptonightR_instruction_mov129(); - void CryptonightR_instruction_mov130(); - void CryptonightR_instruction_mov131(); - void CryptonightR_instruction_mov132(); - void CryptonightR_instruction_mov133(); - void CryptonightR_instruction_mov134(); - void CryptonightR_instruction_mov135(); - void CryptonightR_instruction_mov136(); - void CryptonightR_instruction_mov137(); - void CryptonightR_instruction_mov138(); - void CryptonightR_instruction_mov139(); - void CryptonightR_instruction_mov140(); - void CryptonightR_instruction_mov141(); - void CryptonightR_instruction_mov142(); - void CryptonightR_instruction_mov143(); - void CryptonightR_instruction_mov144(); - void CryptonightR_instruction_mov145(); - void CryptonightR_instruction_mov146(); - void CryptonightR_instruction_mov147(); - void CryptonightR_instruction_mov148(); - void CryptonightR_instruction_mov149(); - void CryptonightR_instruction_mov150(); - void CryptonightR_instruction_mov151(); - void CryptonightR_instruction_mov152(); - void CryptonightR_instruction_mov153(); - void CryptonightR_instruction_mov154(); - void CryptonightR_instruction_mov155(); - void CryptonightR_instruction_mov156(); - void CryptonightR_instruction_mov157(); - void CryptonightR_instruction_mov158(); - void CryptonightR_instruction_mov159(); - void CryptonightR_instruction_mov160(); - void CryptonightR_instruction_mov161(); - void CryptonightR_instruction_mov162(); - void CryptonightR_instruction_mov163(); - void CryptonightR_instruction_mov164(); - void CryptonightR_instruction_mov165(); - void CryptonightR_instruction_mov166(); - void CryptonightR_instruction_mov167(); - void CryptonightR_instruction_mov168(); - void CryptonightR_instruction_mov169(); - void CryptonightR_instruction_mov170(); - void CryptonightR_instruction_mov171(); - void CryptonightR_instruction_mov172(); - void CryptonightR_instruction_mov173(); - void CryptonightR_instruction_mov174(); - void CryptonightR_instruction_mov175(); - void CryptonightR_instruction_mov176(); - void CryptonightR_instruction_mov177(); - void CryptonightR_instruction_mov178(); - void CryptonightR_instruction_mov179(); - void CryptonightR_instruction_mov180(); - void CryptonightR_instruction_mov181(); - void CryptonightR_instruction_mov182(); - void CryptonightR_instruction_mov183(); - void CryptonightR_instruction_mov184(); - void CryptonightR_instruction_mov185(); - void CryptonightR_instruction_mov186(); - void CryptonightR_instruction_mov187(); - void CryptonightR_instruction_mov188(); - void CryptonightR_instruction_mov189(); - void CryptonightR_instruction_mov190(); - void CryptonightR_instruction_mov191(); - void CryptonightR_instruction_mov192(); - void CryptonightR_instruction_mov193(); - void CryptonightR_instruction_mov194(); - void CryptonightR_instruction_mov195(); - void CryptonightR_instruction_mov196(); - void CryptonightR_instruction_mov197(); - void CryptonightR_instruction_mov198(); - void CryptonightR_instruction_mov199(); - void CryptonightR_instruction_mov200(); - void CryptonightR_instruction_mov201(); - void CryptonightR_instruction_mov202(); - void CryptonightR_instruction_mov203(); - void CryptonightR_instruction_mov204(); - void CryptonightR_instruction_mov205(); - void CryptonightR_instruction_mov206(); - void CryptonightR_instruction_mov207(); - void CryptonightR_instruction_mov208(); - void CryptonightR_instruction_mov209(); - void CryptonightR_instruction_mov210(); - void CryptonightR_instruction_mov211(); - void CryptonightR_instruction_mov212(); - void CryptonightR_instruction_mov213(); - void CryptonightR_instruction_mov214(); - void CryptonightR_instruction_mov215(); - void CryptonightR_instruction_mov216(); - void CryptonightR_instruction_mov217(); - void CryptonightR_instruction_mov218(); - void CryptonightR_instruction_mov219(); - void CryptonightR_instruction_mov220(); - void CryptonightR_instruction_mov221(); - void CryptonightR_instruction_mov222(); - void CryptonightR_instruction_mov223(); - void CryptonightR_instruction_mov224(); - void CryptonightR_instruction_mov225(); - void CryptonightR_instruction_mov226(); - void CryptonightR_instruction_mov227(); - void CryptonightR_instruction_mov228(); - void CryptonightR_instruction_mov229(); - void CryptonightR_instruction_mov230(); - void CryptonightR_instruction_mov231(); - void CryptonightR_instruction_mov232(); - void CryptonightR_instruction_mov233(); - void CryptonightR_instruction_mov234(); - void CryptonightR_instruction_mov235(); - void CryptonightR_instruction_mov236(); - void CryptonightR_instruction_mov237(); - void CryptonightR_instruction_mov238(); - void CryptonightR_instruction_mov239(); - void CryptonightR_instruction_mov240(); - void CryptonightR_instruction_mov241(); - void CryptonightR_instruction_mov242(); - void CryptonightR_instruction_mov243(); - void CryptonightR_instruction_mov244(); - void CryptonightR_instruction_mov245(); - void CryptonightR_instruction_mov246(); - void CryptonightR_instruction_mov247(); - void CryptonightR_instruction_mov248(); - void CryptonightR_instruction_mov249(); - void CryptonightR_instruction_mov250(); - void CryptonightR_instruction_mov251(); - void CryptonightR_instruction_mov252(); - void CryptonightR_instruction_mov253(); - void CryptonightR_instruction_mov254(); - void CryptonightR_instruction_mov255(); - void CryptonightR_instruction_mov256(); -} - -const void_func instructions[257] = { - CryptonightR_instruction0, - CryptonightR_instruction1, - CryptonightR_instruction2, - CryptonightR_instruction3, - CryptonightR_instruction4, - CryptonightR_instruction5, - CryptonightR_instruction6, - CryptonightR_instruction7, - CryptonightR_instruction8, - CryptonightR_instruction9, - CryptonightR_instruction10, - CryptonightR_instruction11, - CryptonightR_instruction12, - CryptonightR_instruction13, - CryptonightR_instruction14, - CryptonightR_instruction15, - CryptonightR_instruction16, - CryptonightR_instruction17, - CryptonightR_instruction18, - CryptonightR_instruction19, - CryptonightR_instruction20, - CryptonightR_instruction21, - CryptonightR_instruction22, - CryptonightR_instruction23, - CryptonightR_instruction24, - CryptonightR_instruction25, - CryptonightR_instruction26, - CryptonightR_instruction27, - CryptonightR_instruction28, - CryptonightR_instruction29, - CryptonightR_instruction30, - CryptonightR_instruction31, - CryptonightR_instruction32, - CryptonightR_instruction33, - CryptonightR_instruction34, - CryptonightR_instruction35, - CryptonightR_instruction36, - CryptonightR_instruction37, - CryptonightR_instruction38, - CryptonightR_instruction39, - CryptonightR_instruction40, - CryptonightR_instruction41, - CryptonightR_instruction42, - CryptonightR_instruction43, - CryptonightR_instruction44, - CryptonightR_instruction45, - CryptonightR_instruction46, - CryptonightR_instruction47, - CryptonightR_instruction48, - CryptonightR_instruction49, - CryptonightR_instruction50, - CryptonightR_instruction51, - CryptonightR_instruction52, - CryptonightR_instruction53, - CryptonightR_instruction54, - CryptonightR_instruction55, - CryptonightR_instruction56, - CryptonightR_instruction57, - CryptonightR_instruction58, - CryptonightR_instruction59, - CryptonightR_instruction60, - CryptonightR_instruction61, - CryptonightR_instruction62, - CryptonightR_instruction63, - CryptonightR_instruction64, - CryptonightR_instruction65, - CryptonightR_instruction66, - CryptonightR_instruction67, - CryptonightR_instruction68, - CryptonightR_instruction69, - CryptonightR_instruction70, - CryptonightR_instruction71, - CryptonightR_instruction72, - CryptonightR_instruction73, - CryptonightR_instruction74, - CryptonightR_instruction75, - CryptonightR_instruction76, - CryptonightR_instruction77, - CryptonightR_instruction78, - CryptonightR_instruction79, - CryptonightR_instruction80, - CryptonightR_instruction81, - CryptonightR_instruction82, - CryptonightR_instruction83, - CryptonightR_instruction84, - CryptonightR_instruction85, - CryptonightR_instruction86, - CryptonightR_instruction87, - CryptonightR_instruction88, - CryptonightR_instruction89, - CryptonightR_instruction90, - CryptonightR_instruction91, - CryptonightR_instruction92, - CryptonightR_instruction93, - CryptonightR_instruction94, - CryptonightR_instruction95, - CryptonightR_instruction96, - CryptonightR_instruction97, - CryptonightR_instruction98, - CryptonightR_instruction99, - CryptonightR_instruction100, - CryptonightR_instruction101, - CryptonightR_instruction102, - CryptonightR_instruction103, - CryptonightR_instruction104, - CryptonightR_instruction105, - CryptonightR_instruction106, - CryptonightR_instruction107, - CryptonightR_instruction108, - CryptonightR_instruction109, - CryptonightR_instruction110, - CryptonightR_instruction111, - CryptonightR_instruction112, - CryptonightR_instruction113, - CryptonightR_instruction114, - CryptonightR_instruction115, - CryptonightR_instruction116, - CryptonightR_instruction117, - CryptonightR_instruction118, - CryptonightR_instruction119, - CryptonightR_instruction120, - CryptonightR_instruction121, - CryptonightR_instruction122, - CryptonightR_instruction123, - CryptonightR_instruction124, - CryptonightR_instruction125, - CryptonightR_instruction126, - CryptonightR_instruction127, - CryptonightR_instruction128, - CryptonightR_instruction129, - CryptonightR_instruction130, - CryptonightR_instruction131, - CryptonightR_instruction132, - CryptonightR_instruction133, - CryptonightR_instruction134, - CryptonightR_instruction135, - CryptonightR_instruction136, - CryptonightR_instruction137, - CryptonightR_instruction138, - CryptonightR_instruction139, - CryptonightR_instruction140, - CryptonightR_instruction141, - CryptonightR_instruction142, - CryptonightR_instruction143, - CryptonightR_instruction144, - CryptonightR_instruction145, - CryptonightR_instruction146, - CryptonightR_instruction147, - CryptonightR_instruction148, - CryptonightR_instruction149, - CryptonightR_instruction150, - CryptonightR_instruction151, - CryptonightR_instruction152, - CryptonightR_instruction153, - CryptonightR_instruction154, - CryptonightR_instruction155, - CryptonightR_instruction156, - CryptonightR_instruction157, - CryptonightR_instruction158, - CryptonightR_instruction159, - CryptonightR_instruction160, - CryptonightR_instruction161, - CryptonightR_instruction162, - CryptonightR_instruction163, - CryptonightR_instruction164, - CryptonightR_instruction165, - CryptonightR_instruction166, - CryptonightR_instruction167, - CryptonightR_instruction168, - CryptonightR_instruction169, - CryptonightR_instruction170, - CryptonightR_instruction171, - CryptonightR_instruction172, - CryptonightR_instruction173, - CryptonightR_instruction174, - CryptonightR_instruction175, - CryptonightR_instruction176, - CryptonightR_instruction177, - CryptonightR_instruction178, - CryptonightR_instruction179, - CryptonightR_instruction180, - CryptonightR_instruction181, - CryptonightR_instruction182, - CryptonightR_instruction183, - CryptonightR_instruction184, - CryptonightR_instruction185, - CryptonightR_instruction186, - CryptonightR_instruction187, - CryptonightR_instruction188, - CryptonightR_instruction189, - CryptonightR_instruction190, - CryptonightR_instruction191, - CryptonightR_instruction192, - CryptonightR_instruction193, - CryptonightR_instruction194, - CryptonightR_instruction195, - CryptonightR_instruction196, - CryptonightR_instruction197, - CryptonightR_instruction198, - CryptonightR_instruction199, - CryptonightR_instruction200, - CryptonightR_instruction201, - CryptonightR_instruction202, - CryptonightR_instruction203, - CryptonightR_instruction204, - CryptonightR_instruction205, - CryptonightR_instruction206, - CryptonightR_instruction207, - CryptonightR_instruction208, - CryptonightR_instruction209, - CryptonightR_instruction210, - CryptonightR_instruction211, - CryptonightR_instruction212, - CryptonightR_instruction213, - CryptonightR_instruction214, - CryptonightR_instruction215, - CryptonightR_instruction216, - CryptonightR_instruction217, - CryptonightR_instruction218, - CryptonightR_instruction219, - CryptonightR_instruction220, - CryptonightR_instruction221, - CryptonightR_instruction222, - CryptonightR_instruction223, - CryptonightR_instruction224, - CryptonightR_instruction225, - CryptonightR_instruction226, - CryptonightR_instruction227, - CryptonightR_instruction228, - CryptonightR_instruction229, - CryptonightR_instruction230, - CryptonightR_instruction231, - CryptonightR_instruction232, - CryptonightR_instruction233, - CryptonightR_instruction234, - CryptonightR_instruction235, - CryptonightR_instruction236, - CryptonightR_instruction237, - CryptonightR_instruction238, - CryptonightR_instruction239, - CryptonightR_instruction240, - CryptonightR_instruction241, - CryptonightR_instruction242, - CryptonightR_instruction243, - CryptonightR_instruction244, - CryptonightR_instruction245, - CryptonightR_instruction246, - CryptonightR_instruction247, - CryptonightR_instruction248, - CryptonightR_instruction249, - CryptonightR_instruction250, - CryptonightR_instruction251, - CryptonightR_instruction252, - CryptonightR_instruction253, - CryptonightR_instruction254, - CryptonightR_instruction255, - CryptonightR_instruction256, -}; - -const void_func instructions_mov[257] = { - CryptonightR_instruction_mov0, - CryptonightR_instruction_mov1, - CryptonightR_instruction_mov2, - CryptonightR_instruction_mov3, - CryptonightR_instruction_mov4, - CryptonightR_instruction_mov5, - CryptonightR_instruction_mov6, - CryptonightR_instruction_mov7, - CryptonightR_instruction_mov8, - CryptonightR_instruction_mov9, - CryptonightR_instruction_mov10, - CryptonightR_instruction_mov11, - CryptonightR_instruction_mov12, - CryptonightR_instruction_mov13, - CryptonightR_instruction_mov14, - CryptonightR_instruction_mov15, - CryptonightR_instruction_mov16, - CryptonightR_instruction_mov17, - CryptonightR_instruction_mov18, - CryptonightR_instruction_mov19, - CryptonightR_instruction_mov20, - CryptonightR_instruction_mov21, - CryptonightR_instruction_mov22, - CryptonightR_instruction_mov23, - CryptonightR_instruction_mov24, - CryptonightR_instruction_mov25, - CryptonightR_instruction_mov26, - CryptonightR_instruction_mov27, - CryptonightR_instruction_mov28, - CryptonightR_instruction_mov29, - CryptonightR_instruction_mov30, - CryptonightR_instruction_mov31, - CryptonightR_instruction_mov32, - CryptonightR_instruction_mov33, - CryptonightR_instruction_mov34, - CryptonightR_instruction_mov35, - CryptonightR_instruction_mov36, - CryptonightR_instruction_mov37, - CryptonightR_instruction_mov38, - CryptonightR_instruction_mov39, - CryptonightR_instruction_mov40, - CryptonightR_instruction_mov41, - CryptonightR_instruction_mov42, - CryptonightR_instruction_mov43, - CryptonightR_instruction_mov44, - CryptonightR_instruction_mov45, - CryptonightR_instruction_mov46, - CryptonightR_instruction_mov47, - CryptonightR_instruction_mov48, - CryptonightR_instruction_mov49, - CryptonightR_instruction_mov50, - CryptonightR_instruction_mov51, - CryptonightR_instruction_mov52, - CryptonightR_instruction_mov53, - CryptonightR_instruction_mov54, - CryptonightR_instruction_mov55, - CryptonightR_instruction_mov56, - CryptonightR_instruction_mov57, - CryptonightR_instruction_mov58, - CryptonightR_instruction_mov59, - CryptonightR_instruction_mov60, - CryptonightR_instruction_mov61, - CryptonightR_instruction_mov62, - CryptonightR_instruction_mov63, - CryptonightR_instruction_mov64, - CryptonightR_instruction_mov65, - CryptonightR_instruction_mov66, - CryptonightR_instruction_mov67, - CryptonightR_instruction_mov68, - CryptonightR_instruction_mov69, - CryptonightR_instruction_mov70, - CryptonightR_instruction_mov71, - CryptonightR_instruction_mov72, - CryptonightR_instruction_mov73, - CryptonightR_instruction_mov74, - CryptonightR_instruction_mov75, - CryptonightR_instruction_mov76, - CryptonightR_instruction_mov77, - CryptonightR_instruction_mov78, - CryptonightR_instruction_mov79, - CryptonightR_instruction_mov80, - CryptonightR_instruction_mov81, - CryptonightR_instruction_mov82, - CryptonightR_instruction_mov83, - CryptonightR_instruction_mov84, - CryptonightR_instruction_mov85, - CryptonightR_instruction_mov86, - CryptonightR_instruction_mov87, - CryptonightR_instruction_mov88, - CryptonightR_instruction_mov89, - CryptonightR_instruction_mov90, - CryptonightR_instruction_mov91, - CryptonightR_instruction_mov92, - CryptonightR_instruction_mov93, - CryptonightR_instruction_mov94, - CryptonightR_instruction_mov95, - CryptonightR_instruction_mov96, - CryptonightR_instruction_mov97, - CryptonightR_instruction_mov98, - CryptonightR_instruction_mov99, - CryptonightR_instruction_mov100, - CryptonightR_instruction_mov101, - CryptonightR_instruction_mov102, - CryptonightR_instruction_mov103, - CryptonightR_instruction_mov104, - CryptonightR_instruction_mov105, - CryptonightR_instruction_mov106, - CryptonightR_instruction_mov107, - CryptonightR_instruction_mov108, - CryptonightR_instruction_mov109, - CryptonightR_instruction_mov110, - CryptonightR_instruction_mov111, - CryptonightR_instruction_mov112, - CryptonightR_instruction_mov113, - CryptonightR_instruction_mov114, - CryptonightR_instruction_mov115, - CryptonightR_instruction_mov116, - CryptonightR_instruction_mov117, - CryptonightR_instruction_mov118, - CryptonightR_instruction_mov119, - CryptonightR_instruction_mov120, - CryptonightR_instruction_mov121, - CryptonightR_instruction_mov122, - CryptonightR_instruction_mov123, - CryptonightR_instruction_mov124, - CryptonightR_instruction_mov125, - CryptonightR_instruction_mov126, - CryptonightR_instruction_mov127, - CryptonightR_instruction_mov128, - CryptonightR_instruction_mov129, - CryptonightR_instruction_mov130, - CryptonightR_instruction_mov131, - CryptonightR_instruction_mov132, - CryptonightR_instruction_mov133, - CryptonightR_instruction_mov134, - CryptonightR_instruction_mov135, - CryptonightR_instruction_mov136, - CryptonightR_instruction_mov137, - CryptonightR_instruction_mov138, - CryptonightR_instruction_mov139, - CryptonightR_instruction_mov140, - CryptonightR_instruction_mov141, - CryptonightR_instruction_mov142, - CryptonightR_instruction_mov143, - CryptonightR_instruction_mov144, - CryptonightR_instruction_mov145, - CryptonightR_instruction_mov146, - CryptonightR_instruction_mov147, - CryptonightR_instruction_mov148, - CryptonightR_instruction_mov149, - CryptonightR_instruction_mov150, - CryptonightR_instruction_mov151, - CryptonightR_instruction_mov152, - CryptonightR_instruction_mov153, - CryptonightR_instruction_mov154, - CryptonightR_instruction_mov155, - CryptonightR_instruction_mov156, - CryptonightR_instruction_mov157, - CryptonightR_instruction_mov158, - CryptonightR_instruction_mov159, - CryptonightR_instruction_mov160, - CryptonightR_instruction_mov161, - CryptonightR_instruction_mov162, - CryptonightR_instruction_mov163, - CryptonightR_instruction_mov164, - CryptonightR_instruction_mov165, - CryptonightR_instruction_mov166, - CryptonightR_instruction_mov167, - CryptonightR_instruction_mov168, - CryptonightR_instruction_mov169, - CryptonightR_instruction_mov170, - CryptonightR_instruction_mov171, - CryptonightR_instruction_mov172, - CryptonightR_instruction_mov173, - CryptonightR_instruction_mov174, - CryptonightR_instruction_mov175, - CryptonightR_instruction_mov176, - CryptonightR_instruction_mov177, - CryptonightR_instruction_mov178, - CryptonightR_instruction_mov179, - CryptonightR_instruction_mov180, - CryptonightR_instruction_mov181, - CryptonightR_instruction_mov182, - CryptonightR_instruction_mov183, - CryptonightR_instruction_mov184, - CryptonightR_instruction_mov185, - CryptonightR_instruction_mov186, - CryptonightR_instruction_mov187, - CryptonightR_instruction_mov188, - CryptonightR_instruction_mov189, - CryptonightR_instruction_mov190, - CryptonightR_instruction_mov191, - CryptonightR_instruction_mov192, - CryptonightR_instruction_mov193, - CryptonightR_instruction_mov194, - CryptonightR_instruction_mov195, - CryptonightR_instruction_mov196, - CryptonightR_instruction_mov197, - CryptonightR_instruction_mov198, - CryptonightR_instruction_mov199, - CryptonightR_instruction_mov200, - CryptonightR_instruction_mov201, - CryptonightR_instruction_mov202, - CryptonightR_instruction_mov203, - CryptonightR_instruction_mov204, - CryptonightR_instruction_mov205, - CryptonightR_instruction_mov206, - CryptonightR_instruction_mov207, - CryptonightR_instruction_mov208, - CryptonightR_instruction_mov209, - CryptonightR_instruction_mov210, - CryptonightR_instruction_mov211, - CryptonightR_instruction_mov212, - CryptonightR_instruction_mov213, - CryptonightR_instruction_mov214, - CryptonightR_instruction_mov215, - CryptonightR_instruction_mov216, - CryptonightR_instruction_mov217, - CryptonightR_instruction_mov218, - CryptonightR_instruction_mov219, - CryptonightR_instruction_mov220, - CryptonightR_instruction_mov221, - CryptonightR_instruction_mov222, - CryptonightR_instruction_mov223, - CryptonightR_instruction_mov224, - CryptonightR_instruction_mov225, - CryptonightR_instruction_mov226, - CryptonightR_instruction_mov227, - CryptonightR_instruction_mov228, - CryptonightR_instruction_mov229, - CryptonightR_instruction_mov230, - CryptonightR_instruction_mov231, - CryptonightR_instruction_mov232, - CryptonightR_instruction_mov233, - CryptonightR_instruction_mov234, - CryptonightR_instruction_mov235, - CryptonightR_instruction_mov236, - CryptonightR_instruction_mov237, - CryptonightR_instruction_mov238, - CryptonightR_instruction_mov239, - CryptonightR_instruction_mov240, - CryptonightR_instruction_mov241, - CryptonightR_instruction_mov242, - CryptonightR_instruction_mov243, - CryptonightR_instruction_mov244, - CryptonightR_instruction_mov245, - CryptonightR_instruction_mov246, - CryptonightR_instruction_mov247, - CryptonightR_instruction_mov248, - CryptonightR_instruction_mov249, - CryptonightR_instruction_mov250, - CryptonightR_instruction_mov251, - CryptonightR_instruction_mov252, - CryptonightR_instruction_mov253, - CryptonightR_instruction_mov254, - CryptonightR_instruction_mov255, - CryptonightR_instruction_mov256, -}; diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc deleted file mode 100644 index 61b6b985..00000000 --- a/src/crypto/asm/CryptonightR_template.inc +++ /dev/null @@ -1,536 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightR_template_part1) -PUBLIC FN_PREFIX(CryptonightR_template_mainloop) -PUBLIC FN_PREFIX(CryptonightR_template_part2) -PUBLIC FN_PREFIX(CryptonightR_template_part3) -PUBLIC FN_PREFIX(CryptonightR_template_end) -PUBLIC FN_PREFIX(CryptonightR_template_double_part1) -PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) -PUBLIC FN_PREFIX(CryptonightR_template_double_part2) -PUBLIC FN_PREFIX(CryptonightR_template_double_part3) -PUBLIC FN_PREFIX(CryptonightR_template_double_part4) -PUBLIC FN_PREFIX(CryptonightR_template_double_end) - -ALIGN(64) -FN_PREFIX(CryptonightR_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -FN_PREFIX(CryptonightR_template_mainloop): - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r13d, r9d - mov eax, r9d - xor r9d, 48 - xor r13d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r13+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - - movq r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r13+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - -FN_PREFIX(CryptonightR_template_part2): - lea rcx, [r10+r11] - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - add r15, rax - add rsp, rdx - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - mov QWORD PTR [rcx], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [rcx+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz FN_PREFIX(CryptonightR_template_mainloop) - -FN_PREFIX(CryptonightR_template_part3): - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -FN_PREFIX(CryptonightR_template_end): - -ALIGN(64) -FN_PREFIX(CryptonightR_template_double_part1): - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -FN_PREFIX(CryptonightR_template_double_mainloop): - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movq rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -FN_PREFIX(CryptonightR_template_double_part2): - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -FN_PREFIX(CryptonightR_template_double_part3): - - movq r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz FN_PREFIX(CryptonightR_template_double_mainloop) - -FN_PREFIX(CryptonightR_template_double_part4): - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -FN_PREFIX(CryptonightR_template_double_end): diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc deleted file mode 100644 index 1bb89eb1..00000000 --- a/src/crypto/asm/CryptonightR_template_win.inc +++ /dev/null @@ -1,536 +0,0 @@ -PUBLIC CryptonightR_template_part1 -PUBLIC CryptonightR_template_mainloop -PUBLIC CryptonightR_template_part2 -PUBLIC CryptonightR_template_part3 -PUBLIC CryptonightR_template_end -PUBLIC CryptonightR_template_double_part1 -PUBLIC CryptonightR_template_double_mainloop -PUBLIC CryptonightR_template_double_part2 -PUBLIC CryptonightR_template_double_part3 -PUBLIC CryptonightR_template_double_part4 -PUBLIC CryptonightR_template_double_end - -ALIGN(64) -CryptonightR_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightR_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r13d, r9d - mov eax, r9d - xor r9d, 48 - xor r13d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r13+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - - movq r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r13+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - -CryptonightR_template_part2: - lea rcx, [r10+r11] - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - add r15, rax - add rsp, rdx - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - mov QWORD PTR [rcx], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [rcx+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightR_template_mainloop - -CryptonightR_template_part3: - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightR_template_end: - -ALIGN(64) -CryptonightR_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightR_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movq rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightR_template_double_part2: - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightR_template_double_part3: - - movq r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightR_template_double_mainloop - -CryptonightR_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightR_template_double_end: diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc deleted file mode 100644 index 53b7016a..00000000 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc +++ /dev/null @@ -1,268 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end) - -ALIGN(64) -FN_PREFIX(CryptonightWOW_soft_aes_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop): - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - -FN_PREFIX(CryptonightWOW_soft_aes_template_part2): - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - paddq xmm1, xmm7 - movq xmm0, rax - movq xmm3, rdx - xor rax, QWORD PTR [r11+rcx+8] - xor rdx, QWORD PTR [rcx+r11] - punpcklqdq xmm3, xmm0 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm2, xmm3 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop) - -FN_PREFIX(CryptonightWOW_soft_aes_template_part3): - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -FN_PREFIX(CryptonightWOW_soft_aes_template_end): diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc deleted file mode 100644 index b3202b78..00000000 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc +++ /dev/null @@ -1,268 +0,0 @@ -PUBLIC CryptonightWOW_soft_aes_template_part1 -PUBLIC CryptonightWOW_soft_aes_template_mainloop -PUBLIC CryptonightWOW_soft_aes_template_part2 -PUBLIC CryptonightWOW_soft_aes_template_part3 -PUBLIC CryptonightWOW_soft_aes_template_end - -ALIGN(64) -CryptonightWOW_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightWOW_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - -CryptonightWOW_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - paddq xmm1, xmm7 - movq xmm0, rax - movq xmm3, rdx - xor rax, QWORD PTR [r11+rcx+8] - xor rdx, QWORD PTR [rcx+r11] - punpcklqdq xmm3, xmm0 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm2, xmm3 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightWOW_soft_aes_template_mainloop - -CryptonightWOW_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightWOW_soft_aes_template_end: diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc deleted file mode 100644 index 82d455f6..00000000 --- a/src/crypto/asm/CryptonightWOW_template.inc +++ /dev/null @@ -1,491 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightWOW_template_part1) -PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_template_part2) -PUBLIC FN_PREFIX(CryptonightWOW_template_part3) -PUBLIC FN_PREFIX(CryptonightWOW_template_end) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) - -ALIGN(64) -FN_PREFIX(CryptonightWOW_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -FN_PREFIX(CryptonightWOW_template_mainloop): - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movq r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -FN_PREFIX(CryptonightWOW_template_part2): - mov rax, r13 - mul r12 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz FN_PREFIX(CryptonightWOW_template_mainloop) - -FN_PREFIX(CryptonightWOW_template_part3): - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -FN_PREFIX(CryptonightWOW_template_end): - -ALIGN(64) -FN_PREFIX(CryptonightWOW_template_double_part1): - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -FN_PREFIX(CryptonightWOW_template_double_mainloop): - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq rdx, xmm6 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -FN_PREFIX(CryptonightWOW_template_double_part2): - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movq xmm1, rdx - movq xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -FN_PREFIX(CryptonightWOW_template_double_part3): - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - movq r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movq xmm1, rdx - movq xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz FN_PREFIX(CryptonightWOW_template_double_mainloop) - -FN_PREFIX(CryptonightWOW_template_double_part4): - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -FN_PREFIX(CryptonightWOW_template_double_end): diff --git a/src/crypto/asm/CryptonightWOW_template_win.inc b/src/crypto/asm/CryptonightWOW_template_win.inc deleted file mode 100644 index 644c01f1..00000000 --- a/src/crypto/asm/CryptonightWOW_template_win.inc +++ /dev/null @@ -1,491 +0,0 @@ -PUBLIC CryptonightWOW_template_part1 -PUBLIC CryptonightWOW_template_mainloop -PUBLIC CryptonightWOW_template_part2 -PUBLIC CryptonightWOW_template_part3 -PUBLIC CryptonightWOW_template_end -PUBLIC CryptonightWOW_template_double_part1 -PUBLIC CryptonightWOW_template_double_mainloop -PUBLIC CryptonightWOW_template_double_part2 -PUBLIC CryptonightWOW_template_double_part3 -PUBLIC CryptonightWOW_template_double_part4 -PUBLIC CryptonightWOW_template_double_end - -ALIGN(64) -CryptonightWOW_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightWOW_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movq r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -CryptonightWOW_template_part2: - mov rax, r13 - mul r12 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightWOW_template_mainloop - -CryptonightWOW_template_part3: - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightWOW_template_end: - -ALIGN(64) -CryptonightWOW_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightWOW_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq rdx, xmm6 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightWOW_template_double_part2: - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movq xmm1, rdx - movq xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightWOW_template_double_part3: - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - movq r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movq xmm1, rdx - movq xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightWOW_template_double_mainloop - -CryptonightWOW_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightWOW_template_double_end: diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc deleted file mode 100644 index 1710cac7..00000000 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 524288 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movq xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movq xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movq xmm5, QWORD PTR [r8+104] - movq xmm7, rax - - mov eax, 1 - shl rax, 52 - movq xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movq xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movq xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movq xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movq xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movq xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -main_loop_double_sandybridge: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movq xmm0, r11 - movq xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movq r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movq xmm0, rbp - movq xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rax+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rcx+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movq rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movq rdx, xmm5 - shl rdx, 32 - movq rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movq xmm0, rdx - xor rdx, [r11+r13] - movq xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - xor r8d, 32 - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r11+r13], xmm0 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [r15+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movq r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movq r11, xmm0 - psrldq xmm1, 8 - movq r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movq rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movq rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movq r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js div_fix_1_sandybridge -div_fix_1_ret_sandybridge: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js div_fix_2_sandybridge -div_fix_2_ret_sandybridge: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movq r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je sqrt_fix_1_sandybridge -sqrt_fix_1_ret_sandybridge: - - movq r9, xmm10 - psrldq xmm1, 8 - movq r8, xmm1 - test r8, 524287 - je sqrt_fix_2_sandybridge -sqrt_fix_2_ret_sandybridge: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - paddq xmm1, xmm11 - paddq xmm3, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm0 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm3 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne main_loop_double_sandybridge - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp cnv2_double_mainloop_asm_sandybridge_endp - -div_fix_1_sandybridge: - dec rbx - add r11, rdx - jmp div_fix_1_ret_sandybridge - -div_fix_2_sandybridge: - dec rdx - add r8, r9 - jmp div_fix_2_ret_sandybridge - -sqrt_fix_1_sandybridge: - movq r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movq xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_1_ret_sandybridge - -sqrt_fix_2_sandybridge: - psrldq xmm3, 8 - movq r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movq xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_2_ret_sandybridge - -cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc deleted file mode 100644 index b881b669..00000000 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ /dev/null @@ -1,182 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movq xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -cnv2_main_loop_bulldozer: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movq xmm6, r8 - pinsrq xmm6, r11, 1 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - - mov edi, 1023 - shl rdi, 52 - - movq r14, xmm5 - pextrq rax, xmm5, 1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - div r9 - mov eax, eax - shl rdx, 32 - lea r15, [rax+rdx] - lea rax, [r14+r15] - shr rax, 12 - add rax, rdi - movq xmm0, rax - sqrtsd xmm1, xmm0 - movq rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_bulldozer - shr rdi, 19 - -sqrt_fixup_bulldozer_ret: - mov rax, rsi - mul r14 - movq xmm1, rax - movq xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne cnv2_main_loop_bulldozer - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_bulldozer_endp - -sqrt_fixup_bulldozer: - movq r9, xmm5 - add r9, r15 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_bulldozer_ret - -cnv2_main_loop_bulldozer_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc deleted file mode 100644 index 863673de..00000000 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 524288 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movq xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movq xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movq xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -main_loop_ivybridge: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movq xmm0, r11 - movq xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movq rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm2, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm1, xmm7 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movq rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movq rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movq xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movq rdx, xmm3 - test edx, 524287 - je sqrt_fixup_ivybridge - psrlq xmm3, 19 -sqrt_fixup_ivybridge_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movq xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movq xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm4 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm0 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne main_loop_ivybridge - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_main_loop_ivybridge_endp - -sqrt_fixup_ivybridge: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movq xmm3, rdx - jmp sqrt_fixup_ivybridge_ret - -cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc deleted file mode 100644 index 8ccc5e17..00000000 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ /dev/null @@ -1,181 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movq xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -main_loop_ryzen: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movq xmm0, r11 - movq xmm6, r8 - punpcklqdq xmm6, xmm0 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - movq r14, xmm5 - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movq rax, xmm0 - - div r9 - movq xmm0, rax - movq xmm1, rdx - punpckldq xmm0, xmm1 - movq r15, xmm0 - paddq xmm0, xmm5 - movdqa xmm2, xmm0 - psrlq xmm0, 12 - paddq xmm0, xmm7 - sqrtsd xmm1, xmm0 - movq rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_ryzen - shr rdi, 19 - -sqrt_fixup_ryzen_ret: - mov rax, rsi - mul r14 - movq xmm1, rax - movq xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne main_loop_ryzen - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_ryzen_endp - -sqrt_fixup_ryzen: - movq r9, xmm2 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_ryzen_ret - -cnv2_main_loop_ryzen_endp: diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc deleted file mode 100644 index d9bfc9c1..00000000 --- a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 393216 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movq xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movq xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movq xmm5, QWORD PTR [r8+104] - movq xmm7, rax - - mov eax, 1 - shl rax, 52 - movq xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movq xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movq xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movq xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movq xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movq xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -rwz_main_loop_double: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movq xmm0, r11 - movq xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movq r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movq xmm0, rbp - movq xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rax+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movq rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movq rdx, xmm5 - shl rdx, 32 - movq rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movq xmm0, rdx - xor rdx, [r11+r13] - movq xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm3 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r8+r13], xmm0 - xor r8d, 32 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r11+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movq r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movq r11, xmm0 - psrldq xmm1, 8 - movq r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movq rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movq rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movq r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js rwz_div_fix_1 -rwz_div_fix_1_ret: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js rwz_div_fix_2 -rwz_div_fix_2_ret: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movq r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je rwz_sqrt_fix_1 -rwz_sqrt_fix_1_ret: - - movq r9, xmm10 - psrldq xmm1, 8 - movq r8, xmm1 - test r8, 524287 - je rwz_sqrt_fix_2 -rwz_sqrt_fix_2_ret: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm3, xmm6 - paddq xmm1, xmm11 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm3 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm0 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne rwz_main_loop_double - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp rwz_cnv2_double_mainloop_asm_endp - -rwz_div_fix_1: - dec rbx - add r11, rdx - jmp rwz_div_fix_1_ret - -rwz_div_fix_2: - dec rdx - add r8, r9 - jmp rwz_div_fix_2_ret - -rwz_sqrt_fix_1: - movq r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movq xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_1_ret - -rwz_sqrt_fix_2: - psrldq xmm3, 8 - movq r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movq xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_2_ret - -rwz_cnv2_double_mainloop_asm_endp: diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc deleted file mode 100644 index b59c02d6..00000000 --- a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 393216 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movq xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movq xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movq xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -rwz_main_loop: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movq xmm0, r11 - movq xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movq rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm0, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm2, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - paddq xmm1, xmm7 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movq rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movq rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movq xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movq rdx, xmm3 - test edx, 524287 - je rwz_sqrt_fixup - psrlq xmm3, 19 -rwz_sqrt_fixup_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movq xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movq xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm4 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm5 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm2 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne rwz_main_loop - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_rwz_main_loop_endp - -rwz_sqrt_fixup: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movq xmm3, rdx - jmp rwz_sqrt_fixup_ret - -cnv2_rwz_main_loop_endp: diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S deleted file mode 100644 index 7aed6c20..00000000 --- a/src/crypto/asm/cn_main_loop.S +++ /dev/null @@ -1,73 +0,0 @@ -#ifdef __APPLE__ -# define ALIGN(x) .align 6 -#else -# define ALIGN(x) .align 64 -#endif -.intel_syntax noprefix -#ifdef __APPLE__ -# define FN_PREFIX(fn) _ ## fn -.text -#else -# define FN_PREFIX(fn) fn -.section .text -#endif -.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) -.global FN_PREFIX(cnv2_mainloop_ryzen_asm) -.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) -.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) -.global FN_PREFIX(cnv2_rwz_mainloop_asm) -.global FN_PREFIX(cnv2_rwz_double_mainloop_asm) - -ALIGN(64) -FN_PREFIX(cnv2_mainloop_ivybridge_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_main_loop_ivybridge.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_mainloop_ryzen_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_main_loop_ryzen.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_mainloop_bulldozer_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_main_loop_bulldozer.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_double_main_loop_sandybridge.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_rwz_mainloop_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_rwz_main_loop.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_rwz_double_mainloop_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_rwz_double_main_loop.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm deleted file mode 100644 index f0766a7c..00000000 --- a/src/crypto/asm/cn_main_loop.asm +++ /dev/null @@ -1,52 +0,0 @@ -_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE -PUBLIC cnv2_mainloop_ivybridge_asm -PUBLIC cnv2_mainloop_ryzen_asm -PUBLIC cnv2_mainloop_bulldozer_asm -PUBLIC cnv2_double_mainloop_sandybridge_asm -PUBLIC cnv2_rwz_mainloop_asm -PUBLIC cnv2_rwz_double_mainloop_asm - -ALIGN(64) -cnv2_mainloop_ivybridge_asm PROC - INCLUDE cn2/cnv2_main_loop_ivybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ivybridge_asm ENDP - -ALIGN(64) -cnv2_mainloop_ryzen_asm PROC - INCLUDE cn2/cnv2_main_loop_ryzen.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ryzen_asm ENDP - -ALIGN(64) -cnv2_mainloop_bulldozer_asm PROC - INCLUDE cn2/cnv2_main_loop_bulldozer.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_bulldozer_asm ENDP - -ALIGN(64) -cnv2_double_mainloop_sandybridge_asm PROC - INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_double_mainloop_sandybridge_asm ENDP - -ALIGN(64) -cnv2_rwz_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_mainloop_asm ENDP - -ALIGN(64) -cnv2_rwz_double_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_double_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_double_mainloop_asm ENDP - -_TEXT_CNV2_MAINLOOP ENDS -END diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc deleted file mode 100644 index 6898a604..00000000 --- a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc +++ /dev/null @@ -1,281 +0,0 @@ -PUBLIC CryptonightR_soft_aes_template_part1 -PUBLIC CryptonightR_soft_aes_template_mainloop -PUBLIC CryptonightR_soft_aes_template_part2 -PUBLIC CryptonightR_soft_aes_template_part3 -PUBLIC CryptonightR_soft_aes_template_end - -ALIGN(64) -CryptonightR_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movd xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movd xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movd xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movd xmm10, QWORD PTR [r10+96] - movd xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movd xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movd xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightR_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movd xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movd xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movd r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - pxor xmm6, xmm1 - pxor xmm6, xmm0 - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movd rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movd rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - movaps xmm0, xmm5 - psrldq xmm0, 8 - movd r9d, xmm0 - -CryptonightR_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov edi, edi - shl rbp, 32 - or rbp, rdi - xor r8, rbp - - mov ebx, ebx - shl rsi, 32 - or rsi, rbx - xor QWORD PTR [rsp+320], rsi - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm1 - paddq xmm1, xmm7 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm6, xmm0 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightR_soft_aes_template_mainloop - -CryptonightR_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightR_soft_aes_template_end: diff --git a/src/crypto/asm/win64/CryptonightR_template.asm b/src/crypto/asm/win64/CryptonightR_template.asm deleted file mode 100644 index 250eca3d..00000000 --- a/src/crypto/asm/win64/CryptonightR_template.asm +++ /dev/null @@ -1,1585 +0,0 @@ -; Auto-generated file, do not edit - -_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE -PUBLIC CryptonightR_instruction0 -PUBLIC CryptonightR_instruction1 -PUBLIC CryptonightR_instruction2 -PUBLIC CryptonightR_instruction3 -PUBLIC CryptonightR_instruction4 -PUBLIC CryptonightR_instruction5 -PUBLIC CryptonightR_instruction6 -PUBLIC CryptonightR_instruction7 -PUBLIC CryptonightR_instruction8 -PUBLIC CryptonightR_instruction9 -PUBLIC CryptonightR_instruction10 -PUBLIC CryptonightR_instruction11 -PUBLIC CryptonightR_instruction12 -PUBLIC CryptonightR_instruction13 -PUBLIC CryptonightR_instruction14 -PUBLIC CryptonightR_instruction15 -PUBLIC CryptonightR_instruction16 -PUBLIC CryptonightR_instruction17 -PUBLIC CryptonightR_instruction18 -PUBLIC CryptonightR_instruction19 -PUBLIC CryptonightR_instruction20 -PUBLIC CryptonightR_instruction21 -PUBLIC CryptonightR_instruction22 -PUBLIC CryptonightR_instruction23 -PUBLIC CryptonightR_instruction24 -PUBLIC CryptonightR_instruction25 -PUBLIC CryptonightR_instruction26 -PUBLIC CryptonightR_instruction27 -PUBLIC CryptonightR_instruction28 -PUBLIC CryptonightR_instruction29 -PUBLIC CryptonightR_instruction30 -PUBLIC CryptonightR_instruction31 -PUBLIC CryptonightR_instruction32 -PUBLIC CryptonightR_instruction33 -PUBLIC CryptonightR_instruction34 -PUBLIC CryptonightR_instruction35 -PUBLIC CryptonightR_instruction36 -PUBLIC CryptonightR_instruction37 -PUBLIC CryptonightR_instruction38 -PUBLIC CryptonightR_instruction39 -PUBLIC CryptonightR_instruction40 -PUBLIC CryptonightR_instruction41 -PUBLIC CryptonightR_instruction42 -PUBLIC CryptonightR_instruction43 -PUBLIC CryptonightR_instruction44 -PUBLIC CryptonightR_instruction45 -PUBLIC CryptonightR_instruction46 -PUBLIC CryptonightR_instruction47 -PUBLIC CryptonightR_instruction48 -PUBLIC CryptonightR_instruction49 -PUBLIC CryptonightR_instruction50 -PUBLIC CryptonightR_instruction51 -PUBLIC CryptonightR_instruction52 -PUBLIC CryptonightR_instruction53 -PUBLIC CryptonightR_instruction54 -PUBLIC CryptonightR_instruction55 -PUBLIC CryptonightR_instruction56 -PUBLIC CryptonightR_instruction57 -PUBLIC CryptonightR_instruction58 -PUBLIC CryptonightR_instruction59 -PUBLIC CryptonightR_instruction60 -PUBLIC CryptonightR_instruction61 -PUBLIC CryptonightR_instruction62 -PUBLIC CryptonightR_instruction63 -PUBLIC CryptonightR_instruction64 -PUBLIC CryptonightR_instruction65 -PUBLIC CryptonightR_instruction66 -PUBLIC CryptonightR_instruction67 -PUBLIC CryptonightR_instruction68 -PUBLIC CryptonightR_instruction69 -PUBLIC CryptonightR_instruction70 -PUBLIC CryptonightR_instruction71 -PUBLIC CryptonightR_instruction72 -PUBLIC CryptonightR_instruction73 -PUBLIC CryptonightR_instruction74 -PUBLIC CryptonightR_instruction75 -PUBLIC CryptonightR_instruction76 -PUBLIC CryptonightR_instruction77 -PUBLIC CryptonightR_instruction78 -PUBLIC CryptonightR_instruction79 -PUBLIC CryptonightR_instruction80 -PUBLIC CryptonightR_instruction81 -PUBLIC CryptonightR_instruction82 -PUBLIC CryptonightR_instruction83 -PUBLIC CryptonightR_instruction84 -PUBLIC CryptonightR_instruction85 -PUBLIC CryptonightR_instruction86 -PUBLIC CryptonightR_instruction87 -PUBLIC CryptonightR_instruction88 -PUBLIC CryptonightR_instruction89 -PUBLIC CryptonightR_instruction90 -PUBLIC CryptonightR_instruction91 -PUBLIC CryptonightR_instruction92 -PUBLIC CryptonightR_instruction93 -PUBLIC CryptonightR_instruction94 -PUBLIC CryptonightR_instruction95 -PUBLIC CryptonightR_instruction96 -PUBLIC CryptonightR_instruction97 -PUBLIC CryptonightR_instruction98 -PUBLIC CryptonightR_instruction99 -PUBLIC CryptonightR_instruction100 -PUBLIC CryptonightR_instruction101 -PUBLIC CryptonightR_instruction102 -PUBLIC CryptonightR_instruction103 -PUBLIC CryptonightR_instruction104 -PUBLIC CryptonightR_instruction105 -PUBLIC CryptonightR_instruction106 -PUBLIC CryptonightR_instruction107 -PUBLIC CryptonightR_instruction108 -PUBLIC CryptonightR_instruction109 -PUBLIC CryptonightR_instruction110 -PUBLIC CryptonightR_instruction111 -PUBLIC CryptonightR_instruction112 -PUBLIC CryptonightR_instruction113 -PUBLIC CryptonightR_instruction114 -PUBLIC CryptonightR_instruction115 -PUBLIC CryptonightR_instruction116 -PUBLIC CryptonightR_instruction117 -PUBLIC CryptonightR_instruction118 -PUBLIC CryptonightR_instruction119 -PUBLIC CryptonightR_instruction120 -PUBLIC CryptonightR_instruction121 -PUBLIC CryptonightR_instruction122 -PUBLIC CryptonightR_instruction123 -PUBLIC CryptonightR_instruction124 -PUBLIC CryptonightR_instruction125 -PUBLIC CryptonightR_instruction126 -PUBLIC CryptonightR_instruction127 -PUBLIC CryptonightR_instruction128 -PUBLIC CryptonightR_instruction129 -PUBLIC CryptonightR_instruction130 -PUBLIC CryptonightR_instruction131 -PUBLIC CryptonightR_instruction132 -PUBLIC CryptonightR_instruction133 -PUBLIC CryptonightR_instruction134 -PUBLIC CryptonightR_instruction135 -PUBLIC CryptonightR_instruction136 -PUBLIC CryptonightR_instruction137 -PUBLIC CryptonightR_instruction138 -PUBLIC CryptonightR_instruction139 -PUBLIC CryptonightR_instruction140 -PUBLIC CryptonightR_instruction141 -PUBLIC CryptonightR_instruction142 -PUBLIC CryptonightR_instruction143 -PUBLIC CryptonightR_instruction144 -PUBLIC CryptonightR_instruction145 -PUBLIC CryptonightR_instruction146 -PUBLIC CryptonightR_instruction147 -PUBLIC CryptonightR_instruction148 -PUBLIC CryptonightR_instruction149 -PUBLIC CryptonightR_instruction150 -PUBLIC CryptonightR_instruction151 -PUBLIC CryptonightR_instruction152 -PUBLIC CryptonightR_instruction153 -PUBLIC CryptonightR_instruction154 -PUBLIC CryptonightR_instruction155 -PUBLIC CryptonightR_instruction156 -PUBLIC CryptonightR_instruction157 -PUBLIC CryptonightR_instruction158 -PUBLIC CryptonightR_instruction159 -PUBLIC CryptonightR_instruction160 -PUBLIC CryptonightR_instruction161 -PUBLIC CryptonightR_instruction162 -PUBLIC CryptonightR_instruction163 -PUBLIC CryptonightR_instruction164 -PUBLIC CryptonightR_instruction165 -PUBLIC CryptonightR_instruction166 -PUBLIC CryptonightR_instruction167 -PUBLIC CryptonightR_instruction168 -PUBLIC CryptonightR_instruction169 -PUBLIC CryptonightR_instruction170 -PUBLIC CryptonightR_instruction171 -PUBLIC CryptonightR_instruction172 -PUBLIC CryptonightR_instruction173 -PUBLIC CryptonightR_instruction174 -PUBLIC CryptonightR_instruction175 -PUBLIC CryptonightR_instruction176 -PUBLIC CryptonightR_instruction177 -PUBLIC CryptonightR_instruction178 -PUBLIC CryptonightR_instruction179 -PUBLIC CryptonightR_instruction180 -PUBLIC CryptonightR_instruction181 -PUBLIC CryptonightR_instruction182 -PUBLIC CryptonightR_instruction183 -PUBLIC CryptonightR_instruction184 -PUBLIC CryptonightR_instruction185 -PUBLIC CryptonightR_instruction186 -PUBLIC CryptonightR_instruction187 -PUBLIC CryptonightR_instruction188 -PUBLIC CryptonightR_instruction189 -PUBLIC CryptonightR_instruction190 -PUBLIC CryptonightR_instruction191 -PUBLIC CryptonightR_instruction192 -PUBLIC CryptonightR_instruction193 -PUBLIC CryptonightR_instruction194 -PUBLIC CryptonightR_instruction195 -PUBLIC CryptonightR_instruction196 -PUBLIC CryptonightR_instruction197 -PUBLIC CryptonightR_instruction198 -PUBLIC CryptonightR_instruction199 -PUBLIC CryptonightR_instruction200 -PUBLIC CryptonightR_instruction201 -PUBLIC CryptonightR_instruction202 -PUBLIC CryptonightR_instruction203 -PUBLIC CryptonightR_instruction204 -PUBLIC CryptonightR_instruction205 -PUBLIC CryptonightR_instruction206 -PUBLIC CryptonightR_instruction207 -PUBLIC CryptonightR_instruction208 -PUBLIC CryptonightR_instruction209 -PUBLIC CryptonightR_instruction210 -PUBLIC CryptonightR_instruction211 -PUBLIC CryptonightR_instruction212 -PUBLIC CryptonightR_instruction213 -PUBLIC CryptonightR_instruction214 -PUBLIC CryptonightR_instruction215 -PUBLIC CryptonightR_instruction216 -PUBLIC CryptonightR_instruction217 -PUBLIC CryptonightR_instruction218 -PUBLIC CryptonightR_instruction219 -PUBLIC CryptonightR_instruction220 -PUBLIC CryptonightR_instruction221 -PUBLIC CryptonightR_instruction222 -PUBLIC CryptonightR_instruction223 -PUBLIC CryptonightR_instruction224 -PUBLIC CryptonightR_instruction225 -PUBLIC CryptonightR_instruction226 -PUBLIC CryptonightR_instruction227 -PUBLIC CryptonightR_instruction228 -PUBLIC CryptonightR_instruction229 -PUBLIC CryptonightR_instruction230 -PUBLIC CryptonightR_instruction231 -PUBLIC CryptonightR_instruction232 -PUBLIC CryptonightR_instruction233 -PUBLIC CryptonightR_instruction234 -PUBLIC CryptonightR_instruction235 -PUBLIC CryptonightR_instruction236 -PUBLIC CryptonightR_instruction237 -PUBLIC CryptonightR_instruction238 -PUBLIC CryptonightR_instruction239 -PUBLIC CryptonightR_instruction240 -PUBLIC CryptonightR_instruction241 -PUBLIC CryptonightR_instruction242 -PUBLIC CryptonightR_instruction243 -PUBLIC CryptonightR_instruction244 -PUBLIC CryptonightR_instruction245 -PUBLIC CryptonightR_instruction246 -PUBLIC CryptonightR_instruction247 -PUBLIC CryptonightR_instruction248 -PUBLIC CryptonightR_instruction249 -PUBLIC CryptonightR_instruction250 -PUBLIC CryptonightR_instruction251 -PUBLIC CryptonightR_instruction252 -PUBLIC CryptonightR_instruction253 -PUBLIC CryptonightR_instruction254 -PUBLIC CryptonightR_instruction255 -PUBLIC CryptonightR_instruction256 -PUBLIC CryptonightR_instruction_mov0 -PUBLIC CryptonightR_instruction_mov1 -PUBLIC CryptonightR_instruction_mov2 -PUBLIC CryptonightR_instruction_mov3 -PUBLIC CryptonightR_instruction_mov4 -PUBLIC CryptonightR_instruction_mov5 -PUBLIC CryptonightR_instruction_mov6 -PUBLIC CryptonightR_instruction_mov7 -PUBLIC CryptonightR_instruction_mov8 -PUBLIC CryptonightR_instruction_mov9 -PUBLIC CryptonightR_instruction_mov10 -PUBLIC CryptonightR_instruction_mov11 -PUBLIC CryptonightR_instruction_mov12 -PUBLIC CryptonightR_instruction_mov13 -PUBLIC CryptonightR_instruction_mov14 -PUBLIC CryptonightR_instruction_mov15 -PUBLIC CryptonightR_instruction_mov16 -PUBLIC CryptonightR_instruction_mov17 -PUBLIC CryptonightR_instruction_mov18 -PUBLIC CryptonightR_instruction_mov19 -PUBLIC CryptonightR_instruction_mov20 -PUBLIC CryptonightR_instruction_mov21 -PUBLIC CryptonightR_instruction_mov22 -PUBLIC CryptonightR_instruction_mov23 -PUBLIC CryptonightR_instruction_mov24 -PUBLIC CryptonightR_instruction_mov25 -PUBLIC CryptonightR_instruction_mov26 -PUBLIC CryptonightR_instruction_mov27 -PUBLIC CryptonightR_instruction_mov28 -PUBLIC CryptonightR_instruction_mov29 -PUBLIC CryptonightR_instruction_mov30 -PUBLIC CryptonightR_instruction_mov31 -PUBLIC CryptonightR_instruction_mov32 -PUBLIC CryptonightR_instruction_mov33 -PUBLIC CryptonightR_instruction_mov34 -PUBLIC CryptonightR_instruction_mov35 -PUBLIC CryptonightR_instruction_mov36 -PUBLIC CryptonightR_instruction_mov37 -PUBLIC CryptonightR_instruction_mov38 -PUBLIC CryptonightR_instruction_mov39 -PUBLIC CryptonightR_instruction_mov40 -PUBLIC CryptonightR_instruction_mov41 -PUBLIC CryptonightR_instruction_mov42 -PUBLIC CryptonightR_instruction_mov43 -PUBLIC CryptonightR_instruction_mov44 -PUBLIC CryptonightR_instruction_mov45 -PUBLIC CryptonightR_instruction_mov46 -PUBLIC CryptonightR_instruction_mov47 -PUBLIC CryptonightR_instruction_mov48 -PUBLIC CryptonightR_instruction_mov49 -PUBLIC CryptonightR_instruction_mov50 -PUBLIC CryptonightR_instruction_mov51 -PUBLIC CryptonightR_instruction_mov52 -PUBLIC CryptonightR_instruction_mov53 -PUBLIC CryptonightR_instruction_mov54 -PUBLIC CryptonightR_instruction_mov55 -PUBLIC CryptonightR_instruction_mov56 -PUBLIC CryptonightR_instruction_mov57 -PUBLIC CryptonightR_instruction_mov58 -PUBLIC CryptonightR_instruction_mov59 -PUBLIC CryptonightR_instruction_mov60 -PUBLIC CryptonightR_instruction_mov61 -PUBLIC CryptonightR_instruction_mov62 -PUBLIC CryptonightR_instruction_mov63 -PUBLIC CryptonightR_instruction_mov64 -PUBLIC CryptonightR_instruction_mov65 -PUBLIC CryptonightR_instruction_mov66 -PUBLIC CryptonightR_instruction_mov67 -PUBLIC CryptonightR_instruction_mov68 -PUBLIC CryptonightR_instruction_mov69 -PUBLIC CryptonightR_instruction_mov70 -PUBLIC CryptonightR_instruction_mov71 -PUBLIC CryptonightR_instruction_mov72 -PUBLIC CryptonightR_instruction_mov73 -PUBLIC CryptonightR_instruction_mov74 -PUBLIC CryptonightR_instruction_mov75 -PUBLIC CryptonightR_instruction_mov76 -PUBLIC CryptonightR_instruction_mov77 -PUBLIC CryptonightR_instruction_mov78 -PUBLIC CryptonightR_instruction_mov79 -PUBLIC CryptonightR_instruction_mov80 -PUBLIC CryptonightR_instruction_mov81 -PUBLIC CryptonightR_instruction_mov82 -PUBLIC CryptonightR_instruction_mov83 -PUBLIC CryptonightR_instruction_mov84 -PUBLIC CryptonightR_instruction_mov85 -PUBLIC CryptonightR_instruction_mov86 -PUBLIC CryptonightR_instruction_mov87 -PUBLIC CryptonightR_instruction_mov88 -PUBLIC CryptonightR_instruction_mov89 -PUBLIC CryptonightR_instruction_mov90 -PUBLIC CryptonightR_instruction_mov91 -PUBLIC CryptonightR_instruction_mov92 -PUBLIC CryptonightR_instruction_mov93 -PUBLIC CryptonightR_instruction_mov94 -PUBLIC CryptonightR_instruction_mov95 -PUBLIC CryptonightR_instruction_mov96 -PUBLIC CryptonightR_instruction_mov97 -PUBLIC CryptonightR_instruction_mov98 -PUBLIC CryptonightR_instruction_mov99 -PUBLIC CryptonightR_instruction_mov100 -PUBLIC CryptonightR_instruction_mov101 -PUBLIC CryptonightR_instruction_mov102 -PUBLIC CryptonightR_instruction_mov103 -PUBLIC CryptonightR_instruction_mov104 -PUBLIC CryptonightR_instruction_mov105 -PUBLIC CryptonightR_instruction_mov106 -PUBLIC CryptonightR_instruction_mov107 -PUBLIC CryptonightR_instruction_mov108 -PUBLIC CryptonightR_instruction_mov109 -PUBLIC CryptonightR_instruction_mov110 -PUBLIC CryptonightR_instruction_mov111 -PUBLIC CryptonightR_instruction_mov112 -PUBLIC CryptonightR_instruction_mov113 -PUBLIC CryptonightR_instruction_mov114 -PUBLIC CryptonightR_instruction_mov115 -PUBLIC CryptonightR_instruction_mov116 -PUBLIC CryptonightR_instruction_mov117 -PUBLIC CryptonightR_instruction_mov118 -PUBLIC CryptonightR_instruction_mov119 -PUBLIC CryptonightR_instruction_mov120 -PUBLIC CryptonightR_instruction_mov121 -PUBLIC CryptonightR_instruction_mov122 -PUBLIC CryptonightR_instruction_mov123 -PUBLIC CryptonightR_instruction_mov124 -PUBLIC CryptonightR_instruction_mov125 -PUBLIC CryptonightR_instruction_mov126 -PUBLIC CryptonightR_instruction_mov127 -PUBLIC CryptonightR_instruction_mov128 -PUBLIC CryptonightR_instruction_mov129 -PUBLIC CryptonightR_instruction_mov130 -PUBLIC CryptonightR_instruction_mov131 -PUBLIC CryptonightR_instruction_mov132 -PUBLIC CryptonightR_instruction_mov133 -PUBLIC CryptonightR_instruction_mov134 -PUBLIC CryptonightR_instruction_mov135 -PUBLIC CryptonightR_instruction_mov136 -PUBLIC CryptonightR_instruction_mov137 -PUBLIC CryptonightR_instruction_mov138 -PUBLIC CryptonightR_instruction_mov139 -PUBLIC CryptonightR_instruction_mov140 -PUBLIC CryptonightR_instruction_mov141 -PUBLIC CryptonightR_instruction_mov142 -PUBLIC CryptonightR_instruction_mov143 -PUBLIC CryptonightR_instruction_mov144 -PUBLIC CryptonightR_instruction_mov145 -PUBLIC CryptonightR_instruction_mov146 -PUBLIC CryptonightR_instruction_mov147 -PUBLIC CryptonightR_instruction_mov148 -PUBLIC CryptonightR_instruction_mov149 -PUBLIC CryptonightR_instruction_mov150 -PUBLIC CryptonightR_instruction_mov151 -PUBLIC CryptonightR_instruction_mov152 -PUBLIC CryptonightR_instruction_mov153 -PUBLIC CryptonightR_instruction_mov154 -PUBLIC CryptonightR_instruction_mov155 -PUBLIC CryptonightR_instruction_mov156 -PUBLIC CryptonightR_instruction_mov157 -PUBLIC CryptonightR_instruction_mov158 -PUBLIC CryptonightR_instruction_mov159 -PUBLIC CryptonightR_instruction_mov160 -PUBLIC CryptonightR_instruction_mov161 -PUBLIC CryptonightR_instruction_mov162 -PUBLIC CryptonightR_instruction_mov163 -PUBLIC CryptonightR_instruction_mov164 -PUBLIC CryptonightR_instruction_mov165 -PUBLIC CryptonightR_instruction_mov166 -PUBLIC CryptonightR_instruction_mov167 -PUBLIC CryptonightR_instruction_mov168 -PUBLIC CryptonightR_instruction_mov169 -PUBLIC CryptonightR_instruction_mov170 -PUBLIC CryptonightR_instruction_mov171 -PUBLIC CryptonightR_instruction_mov172 -PUBLIC CryptonightR_instruction_mov173 -PUBLIC CryptonightR_instruction_mov174 -PUBLIC CryptonightR_instruction_mov175 -PUBLIC CryptonightR_instruction_mov176 -PUBLIC CryptonightR_instruction_mov177 -PUBLIC CryptonightR_instruction_mov178 -PUBLIC CryptonightR_instruction_mov179 -PUBLIC CryptonightR_instruction_mov180 -PUBLIC CryptonightR_instruction_mov181 -PUBLIC CryptonightR_instruction_mov182 -PUBLIC CryptonightR_instruction_mov183 -PUBLIC CryptonightR_instruction_mov184 -PUBLIC CryptonightR_instruction_mov185 -PUBLIC CryptonightR_instruction_mov186 -PUBLIC CryptonightR_instruction_mov187 -PUBLIC CryptonightR_instruction_mov188 -PUBLIC CryptonightR_instruction_mov189 -PUBLIC CryptonightR_instruction_mov190 -PUBLIC CryptonightR_instruction_mov191 -PUBLIC CryptonightR_instruction_mov192 -PUBLIC CryptonightR_instruction_mov193 -PUBLIC CryptonightR_instruction_mov194 -PUBLIC CryptonightR_instruction_mov195 -PUBLIC CryptonightR_instruction_mov196 -PUBLIC CryptonightR_instruction_mov197 -PUBLIC CryptonightR_instruction_mov198 -PUBLIC CryptonightR_instruction_mov199 -PUBLIC CryptonightR_instruction_mov200 -PUBLIC CryptonightR_instruction_mov201 -PUBLIC CryptonightR_instruction_mov202 -PUBLIC CryptonightR_instruction_mov203 -PUBLIC CryptonightR_instruction_mov204 -PUBLIC CryptonightR_instruction_mov205 -PUBLIC CryptonightR_instruction_mov206 -PUBLIC CryptonightR_instruction_mov207 -PUBLIC CryptonightR_instruction_mov208 -PUBLIC CryptonightR_instruction_mov209 -PUBLIC CryptonightR_instruction_mov210 -PUBLIC CryptonightR_instruction_mov211 -PUBLIC CryptonightR_instruction_mov212 -PUBLIC CryptonightR_instruction_mov213 -PUBLIC CryptonightR_instruction_mov214 -PUBLIC CryptonightR_instruction_mov215 -PUBLIC CryptonightR_instruction_mov216 -PUBLIC CryptonightR_instruction_mov217 -PUBLIC CryptonightR_instruction_mov218 -PUBLIC CryptonightR_instruction_mov219 -PUBLIC CryptonightR_instruction_mov220 -PUBLIC CryptonightR_instruction_mov221 -PUBLIC CryptonightR_instruction_mov222 -PUBLIC CryptonightR_instruction_mov223 -PUBLIC CryptonightR_instruction_mov224 -PUBLIC CryptonightR_instruction_mov225 -PUBLIC CryptonightR_instruction_mov226 -PUBLIC CryptonightR_instruction_mov227 -PUBLIC CryptonightR_instruction_mov228 -PUBLIC CryptonightR_instruction_mov229 -PUBLIC CryptonightR_instruction_mov230 -PUBLIC CryptonightR_instruction_mov231 -PUBLIC CryptonightR_instruction_mov232 -PUBLIC CryptonightR_instruction_mov233 -PUBLIC CryptonightR_instruction_mov234 -PUBLIC CryptonightR_instruction_mov235 -PUBLIC CryptonightR_instruction_mov236 -PUBLIC CryptonightR_instruction_mov237 -PUBLIC CryptonightR_instruction_mov238 -PUBLIC CryptonightR_instruction_mov239 -PUBLIC CryptonightR_instruction_mov240 -PUBLIC CryptonightR_instruction_mov241 -PUBLIC CryptonightR_instruction_mov242 -PUBLIC CryptonightR_instruction_mov243 -PUBLIC CryptonightR_instruction_mov244 -PUBLIC CryptonightR_instruction_mov245 -PUBLIC CryptonightR_instruction_mov246 -PUBLIC CryptonightR_instruction_mov247 -PUBLIC CryptonightR_instruction_mov248 -PUBLIC CryptonightR_instruction_mov249 -PUBLIC CryptonightR_instruction_mov250 -PUBLIC CryptonightR_instruction_mov251 -PUBLIC CryptonightR_instruction_mov252 -PUBLIC CryptonightR_instruction_mov253 -PUBLIC CryptonightR_instruction_mov254 -PUBLIC CryptonightR_instruction_mov255 -PUBLIC CryptonightR_instruction_mov256 - -INCLUDE CryptonightWOW_template_win.inc -INCLUDE CryptonightR_template_win.inc -INCLUDE CryptonightWOW_soft_aes_template_win.inc -INCLUDE CryptonightR_soft_aes_template_win.inc - -CryptonightR_instruction0: - imul rbx, rbx -CryptonightR_instruction1: - imul rbx, rbx -CryptonightR_instruction2: - imul rbx, rbx -CryptonightR_instruction3: - add rbx, r9 - add rbx, 2147483647 -CryptonightR_instruction4: - sub rbx, r9 -CryptonightR_instruction5: - ror ebx, cl -CryptonightR_instruction6: - rol ebx, cl -CryptonightR_instruction7: - xor rbx, r9 -CryptonightR_instruction8: - imul rsi, rbx -CryptonightR_instruction9: - imul rsi, rbx -CryptonightR_instruction10: - imul rsi, rbx -CryptonightR_instruction11: - add rsi, rbx - add rsi, 2147483647 -CryptonightR_instruction12: - sub rsi, rbx -CryptonightR_instruction13: - ror esi, cl -CryptonightR_instruction14: - rol esi, cl -CryptonightR_instruction15: - xor rsi, rbx -CryptonightR_instruction16: - imul rdi, rbx -CryptonightR_instruction17: - imul rdi, rbx -CryptonightR_instruction18: - imul rdi, rbx -CryptonightR_instruction19: - add rdi, rbx - add rdi, 2147483647 -CryptonightR_instruction20: - sub rdi, rbx -CryptonightR_instruction21: - ror edi, cl -CryptonightR_instruction22: - rol edi, cl -CryptonightR_instruction23: - xor rdi, rbx -CryptonightR_instruction24: - imul rbp, rbx -CryptonightR_instruction25: - imul rbp, rbx -CryptonightR_instruction26: - imul rbp, rbx -CryptonightR_instruction27: - add rbp, rbx - add rbp, 2147483647 -CryptonightR_instruction28: - sub rbp, rbx -CryptonightR_instruction29: - ror ebp, cl -CryptonightR_instruction30: - rol ebp, cl -CryptonightR_instruction31: - xor rbp, rbx -CryptonightR_instruction32: - imul rbx, rsi -CryptonightR_instruction33: - imul rbx, rsi -CryptonightR_instruction34: - imul rbx, rsi -CryptonightR_instruction35: - add rbx, rsi - add rbx, 2147483647 -CryptonightR_instruction36: - sub rbx, rsi -CryptonightR_instruction37: - ror ebx, cl -CryptonightR_instruction38: - rol ebx, cl -CryptonightR_instruction39: - xor rbx, rsi -CryptonightR_instruction40: - imul rsi, rsi -CryptonightR_instruction41: - imul rsi, rsi -CryptonightR_instruction42: - imul rsi, rsi -CryptonightR_instruction43: - add rsi, r9 - add rsi, 2147483647 -CryptonightR_instruction44: - sub rsi, r9 -CryptonightR_instruction45: - ror esi, cl -CryptonightR_instruction46: - rol esi, cl -CryptonightR_instruction47: - xor rsi, r9 -CryptonightR_instruction48: - imul rdi, rsi -CryptonightR_instruction49: - imul rdi, rsi -CryptonightR_instruction50: - imul rdi, rsi -CryptonightR_instruction51: - add rdi, rsi - add rdi, 2147483647 -CryptonightR_instruction52: - sub rdi, rsi -CryptonightR_instruction53: - ror edi, cl -CryptonightR_instruction54: - rol edi, cl -CryptonightR_instruction55: - xor rdi, rsi -CryptonightR_instruction56: - imul rbp, rsi -CryptonightR_instruction57: - imul rbp, rsi -CryptonightR_instruction58: - imul rbp, rsi -CryptonightR_instruction59: - add rbp, rsi - add rbp, 2147483647 -CryptonightR_instruction60: - sub rbp, rsi -CryptonightR_instruction61: - ror ebp, cl -CryptonightR_instruction62: - rol ebp, cl -CryptonightR_instruction63: - xor rbp, rsi -CryptonightR_instruction64: - imul rbx, rdi -CryptonightR_instruction65: - imul rbx, rdi -CryptonightR_instruction66: - imul rbx, rdi -CryptonightR_instruction67: - add rbx, rdi - add rbx, 2147483647 -CryptonightR_instruction68: - sub rbx, rdi -CryptonightR_instruction69: - ror ebx, cl -CryptonightR_instruction70: - rol ebx, cl -CryptonightR_instruction71: - xor rbx, rdi -CryptonightR_instruction72: - imul rsi, rdi -CryptonightR_instruction73: - imul rsi, rdi -CryptonightR_instruction74: - imul rsi, rdi -CryptonightR_instruction75: - add rsi, rdi - add rsi, 2147483647 -CryptonightR_instruction76: - sub rsi, rdi -CryptonightR_instruction77: - ror esi, cl -CryptonightR_instruction78: - rol esi, cl -CryptonightR_instruction79: - xor rsi, rdi -CryptonightR_instruction80: - imul rdi, rdi -CryptonightR_instruction81: - imul rdi, rdi -CryptonightR_instruction82: - imul rdi, rdi -CryptonightR_instruction83: - add rdi, r9 - add rdi, 2147483647 -CryptonightR_instruction84: - sub rdi, r9 -CryptonightR_instruction85: - ror edi, cl -CryptonightR_instruction86: - rol edi, cl -CryptonightR_instruction87: - xor rdi, r9 -CryptonightR_instruction88: - imul rbp, rdi -CryptonightR_instruction89: - imul rbp, rdi -CryptonightR_instruction90: - imul rbp, rdi -CryptonightR_instruction91: - add rbp, rdi - add rbp, 2147483647 -CryptonightR_instruction92: - sub rbp, rdi -CryptonightR_instruction93: - ror ebp, cl -CryptonightR_instruction94: - rol ebp, cl -CryptonightR_instruction95: - xor rbp, rdi -CryptonightR_instruction96: - imul rbx, rbp -CryptonightR_instruction97: - imul rbx, rbp -CryptonightR_instruction98: - imul rbx, rbp -CryptonightR_instruction99: - add rbx, rbp - add rbx, 2147483647 -CryptonightR_instruction100: - sub rbx, rbp -CryptonightR_instruction101: - ror ebx, cl -CryptonightR_instruction102: - rol ebx, cl -CryptonightR_instruction103: - xor rbx, rbp -CryptonightR_instruction104: - imul rsi, rbp -CryptonightR_instruction105: - imul rsi, rbp -CryptonightR_instruction106: - imul rsi, rbp -CryptonightR_instruction107: - add rsi, rbp - add rsi, 2147483647 -CryptonightR_instruction108: - sub rsi, rbp -CryptonightR_instruction109: - ror esi, cl -CryptonightR_instruction110: - rol esi, cl -CryptonightR_instruction111: - xor rsi, rbp -CryptonightR_instruction112: - imul rdi, rbp -CryptonightR_instruction113: - imul rdi, rbp -CryptonightR_instruction114: - imul rdi, rbp -CryptonightR_instruction115: - add rdi, rbp - add rdi, 2147483647 -CryptonightR_instruction116: - sub rdi, rbp -CryptonightR_instruction117: - ror edi, cl -CryptonightR_instruction118: - rol edi, cl -CryptonightR_instruction119: - xor rdi, rbp -CryptonightR_instruction120: - imul rbp, rbp -CryptonightR_instruction121: - imul rbp, rbp -CryptonightR_instruction122: - imul rbp, rbp -CryptonightR_instruction123: - add rbp, r9 - add rbp, 2147483647 -CryptonightR_instruction124: - sub rbp, r9 -CryptonightR_instruction125: - ror ebp, cl -CryptonightR_instruction126: - rol ebp, cl -CryptonightR_instruction127: - xor rbp, r9 -CryptonightR_instruction128: - imul rbx, rsp -CryptonightR_instruction129: - imul rbx, rsp -CryptonightR_instruction130: - imul rbx, rsp -CryptonightR_instruction131: - add rbx, rsp - add rbx, 2147483647 -CryptonightR_instruction132: - sub rbx, rsp -CryptonightR_instruction133: - ror ebx, cl -CryptonightR_instruction134: - rol ebx, cl -CryptonightR_instruction135: - xor rbx, rsp -CryptonightR_instruction136: - imul rsi, rsp -CryptonightR_instruction137: - imul rsi, rsp -CryptonightR_instruction138: - imul rsi, rsp -CryptonightR_instruction139: - add rsi, rsp - add rsi, 2147483647 -CryptonightR_instruction140: - sub rsi, rsp -CryptonightR_instruction141: - ror esi, cl -CryptonightR_instruction142: - rol esi, cl -CryptonightR_instruction143: - xor rsi, rsp -CryptonightR_instruction144: - imul rdi, rsp -CryptonightR_instruction145: - imul rdi, rsp -CryptonightR_instruction146: - imul rdi, rsp -CryptonightR_instruction147: - add rdi, rsp - add rdi, 2147483647 -CryptonightR_instruction148: - sub rdi, rsp -CryptonightR_instruction149: - ror edi, cl -CryptonightR_instruction150: - rol edi, cl -CryptonightR_instruction151: - xor rdi, rsp -CryptonightR_instruction152: - imul rbp, rsp -CryptonightR_instruction153: - imul rbp, rsp -CryptonightR_instruction154: - imul rbp, rsp -CryptonightR_instruction155: - add rbp, rsp - add rbp, 2147483647 -CryptonightR_instruction156: - sub rbp, rsp -CryptonightR_instruction157: - ror ebp, cl -CryptonightR_instruction158: - rol ebp, cl -CryptonightR_instruction159: - xor rbp, rsp -CryptonightR_instruction160: - imul rbx, r15 -CryptonightR_instruction161: - imul rbx, r15 -CryptonightR_instruction162: - imul rbx, r15 -CryptonightR_instruction163: - add rbx, r15 - add rbx, 2147483647 -CryptonightR_instruction164: - sub rbx, r15 -CryptonightR_instruction165: - ror ebx, cl -CryptonightR_instruction166: - rol ebx, cl -CryptonightR_instruction167: - xor rbx, r15 -CryptonightR_instruction168: - imul rsi, r15 -CryptonightR_instruction169: - imul rsi, r15 -CryptonightR_instruction170: - imul rsi, r15 -CryptonightR_instruction171: - add rsi, r15 - add rsi, 2147483647 -CryptonightR_instruction172: - sub rsi, r15 -CryptonightR_instruction173: - ror esi, cl -CryptonightR_instruction174: - rol esi, cl -CryptonightR_instruction175: - xor rsi, r15 -CryptonightR_instruction176: - imul rdi, r15 -CryptonightR_instruction177: - imul rdi, r15 -CryptonightR_instruction178: - imul rdi, r15 -CryptonightR_instruction179: - add rdi, r15 - add rdi, 2147483647 -CryptonightR_instruction180: - sub rdi, r15 -CryptonightR_instruction181: - ror edi, cl -CryptonightR_instruction182: - rol edi, cl -CryptonightR_instruction183: - xor rdi, r15 -CryptonightR_instruction184: - imul rbp, r15 -CryptonightR_instruction185: - imul rbp, r15 -CryptonightR_instruction186: - imul rbp, r15 -CryptonightR_instruction187: - add rbp, r15 - add rbp, 2147483647 -CryptonightR_instruction188: - sub rbp, r15 -CryptonightR_instruction189: - ror ebp, cl -CryptonightR_instruction190: - rol ebp, cl -CryptonightR_instruction191: - xor rbp, r15 -CryptonightR_instruction192: - imul rbx, rax -CryptonightR_instruction193: - imul rbx, rax -CryptonightR_instruction194: - imul rbx, rax -CryptonightR_instruction195: - add rbx, rax - add rbx, 2147483647 -CryptonightR_instruction196: - sub rbx, rax -CryptonightR_instruction197: - ror ebx, cl -CryptonightR_instruction198: - rol ebx, cl -CryptonightR_instruction199: - xor rbx, rax -CryptonightR_instruction200: - imul rsi, rax -CryptonightR_instruction201: - imul rsi, rax -CryptonightR_instruction202: - imul rsi, rax -CryptonightR_instruction203: - add rsi, rax - add rsi, 2147483647 -CryptonightR_instruction204: - sub rsi, rax -CryptonightR_instruction205: - ror esi, cl -CryptonightR_instruction206: - rol esi, cl -CryptonightR_instruction207: - xor rsi, rax -CryptonightR_instruction208: - imul rdi, rax -CryptonightR_instruction209: - imul rdi, rax -CryptonightR_instruction210: - imul rdi, rax -CryptonightR_instruction211: - add rdi, rax - add rdi, 2147483647 -CryptonightR_instruction212: - sub rdi, rax -CryptonightR_instruction213: - ror edi, cl -CryptonightR_instruction214: - rol edi, cl -CryptonightR_instruction215: - xor rdi, rax -CryptonightR_instruction216: - imul rbp, rax -CryptonightR_instruction217: - imul rbp, rax -CryptonightR_instruction218: - imul rbp, rax -CryptonightR_instruction219: - add rbp, rax - add rbp, 2147483647 -CryptonightR_instruction220: - sub rbp, rax -CryptonightR_instruction221: - ror ebp, cl -CryptonightR_instruction222: - rol ebp, cl -CryptonightR_instruction223: - xor rbp, rax -CryptonightR_instruction224: - imul rbx, rdx -CryptonightR_instruction225: - imul rbx, rdx -CryptonightR_instruction226: - imul rbx, rdx -CryptonightR_instruction227: - add rbx, rdx - add rbx, 2147483647 -CryptonightR_instruction228: - sub rbx, rdx -CryptonightR_instruction229: - ror ebx, cl -CryptonightR_instruction230: - rol ebx, cl -CryptonightR_instruction231: - xor rbx, rdx -CryptonightR_instruction232: - imul rsi, rdx -CryptonightR_instruction233: - imul rsi, rdx -CryptonightR_instruction234: - imul rsi, rdx -CryptonightR_instruction235: - add rsi, rdx - add rsi, 2147483647 -CryptonightR_instruction236: - sub rsi, rdx -CryptonightR_instruction237: - ror esi, cl -CryptonightR_instruction238: - rol esi, cl -CryptonightR_instruction239: - xor rsi, rdx -CryptonightR_instruction240: - imul rdi, rdx -CryptonightR_instruction241: - imul rdi, rdx -CryptonightR_instruction242: - imul rdi, rdx -CryptonightR_instruction243: - add rdi, rdx - add rdi, 2147483647 -CryptonightR_instruction244: - sub rdi, rdx -CryptonightR_instruction245: - ror edi, cl -CryptonightR_instruction246: - rol edi, cl -CryptonightR_instruction247: - xor rdi, rdx -CryptonightR_instruction248: - imul rbp, rdx -CryptonightR_instruction249: - imul rbp, rdx -CryptonightR_instruction250: - imul rbp, rdx -CryptonightR_instruction251: - add rbp, rdx - add rbp, 2147483647 -CryptonightR_instruction252: - sub rbp, rdx -CryptonightR_instruction253: - ror ebp, cl -CryptonightR_instruction254: - rol ebp, cl -CryptonightR_instruction255: - xor rbp, rdx -CryptonightR_instruction256: - imul rbx, rbx -CryptonightR_instruction_mov0: - -CryptonightR_instruction_mov1: - -CryptonightR_instruction_mov2: - -CryptonightR_instruction_mov3: - -CryptonightR_instruction_mov4: - -CryptonightR_instruction_mov5: - mov rcx, rbx -CryptonightR_instruction_mov6: - mov rcx, rbx -CryptonightR_instruction_mov7: - -CryptonightR_instruction_mov8: - -CryptonightR_instruction_mov9: - -CryptonightR_instruction_mov10: - -CryptonightR_instruction_mov11: - -CryptonightR_instruction_mov12: - -CryptonightR_instruction_mov13: - mov rcx, rbx -CryptonightR_instruction_mov14: - mov rcx, rbx -CryptonightR_instruction_mov15: - -CryptonightR_instruction_mov16: - -CryptonightR_instruction_mov17: - -CryptonightR_instruction_mov18: - -CryptonightR_instruction_mov19: - -CryptonightR_instruction_mov20: - -CryptonightR_instruction_mov21: - mov rcx, rbx -CryptonightR_instruction_mov22: - mov rcx, rbx -CryptonightR_instruction_mov23: - -CryptonightR_instruction_mov24: - -CryptonightR_instruction_mov25: - -CryptonightR_instruction_mov26: - -CryptonightR_instruction_mov27: - -CryptonightR_instruction_mov28: - -CryptonightR_instruction_mov29: - mov rcx, rbx -CryptonightR_instruction_mov30: - mov rcx, rbx -CryptonightR_instruction_mov31: - -CryptonightR_instruction_mov32: - -CryptonightR_instruction_mov33: - -CryptonightR_instruction_mov34: - -CryptonightR_instruction_mov35: - -CryptonightR_instruction_mov36: - -CryptonightR_instruction_mov37: - mov rcx, rsi -CryptonightR_instruction_mov38: - mov rcx, rsi -CryptonightR_instruction_mov39: - -CryptonightR_instruction_mov40: - -CryptonightR_instruction_mov41: - -CryptonightR_instruction_mov42: - -CryptonightR_instruction_mov43: - -CryptonightR_instruction_mov44: - -CryptonightR_instruction_mov45: - mov rcx, rsi -CryptonightR_instruction_mov46: - mov rcx, rsi -CryptonightR_instruction_mov47: - -CryptonightR_instruction_mov48: - -CryptonightR_instruction_mov49: - -CryptonightR_instruction_mov50: - -CryptonightR_instruction_mov51: - -CryptonightR_instruction_mov52: - -CryptonightR_instruction_mov53: - mov rcx, rsi -CryptonightR_instruction_mov54: - mov rcx, rsi -CryptonightR_instruction_mov55: - -CryptonightR_instruction_mov56: - -CryptonightR_instruction_mov57: - -CryptonightR_instruction_mov58: - -CryptonightR_instruction_mov59: - -CryptonightR_instruction_mov60: - -CryptonightR_instruction_mov61: - mov rcx, rsi -CryptonightR_instruction_mov62: - mov rcx, rsi -CryptonightR_instruction_mov63: - -CryptonightR_instruction_mov64: - -CryptonightR_instruction_mov65: - -CryptonightR_instruction_mov66: - -CryptonightR_instruction_mov67: - -CryptonightR_instruction_mov68: - -CryptonightR_instruction_mov69: - mov rcx, rdi -CryptonightR_instruction_mov70: - mov rcx, rdi -CryptonightR_instruction_mov71: - -CryptonightR_instruction_mov72: - -CryptonightR_instruction_mov73: - -CryptonightR_instruction_mov74: - -CryptonightR_instruction_mov75: - -CryptonightR_instruction_mov76: - -CryptonightR_instruction_mov77: - mov rcx, rdi -CryptonightR_instruction_mov78: - mov rcx, rdi -CryptonightR_instruction_mov79: - -CryptonightR_instruction_mov80: - -CryptonightR_instruction_mov81: - -CryptonightR_instruction_mov82: - -CryptonightR_instruction_mov83: - -CryptonightR_instruction_mov84: - -CryptonightR_instruction_mov85: - mov rcx, rdi -CryptonightR_instruction_mov86: - mov rcx, rdi -CryptonightR_instruction_mov87: - -CryptonightR_instruction_mov88: - -CryptonightR_instruction_mov89: - -CryptonightR_instruction_mov90: - -CryptonightR_instruction_mov91: - -CryptonightR_instruction_mov92: - -CryptonightR_instruction_mov93: - mov rcx, rdi -CryptonightR_instruction_mov94: - mov rcx, rdi -CryptonightR_instruction_mov95: - -CryptonightR_instruction_mov96: - -CryptonightR_instruction_mov97: - -CryptonightR_instruction_mov98: - -CryptonightR_instruction_mov99: - -CryptonightR_instruction_mov100: - -CryptonightR_instruction_mov101: - mov rcx, rbp -CryptonightR_instruction_mov102: - mov rcx, rbp -CryptonightR_instruction_mov103: - -CryptonightR_instruction_mov104: - -CryptonightR_instruction_mov105: - -CryptonightR_instruction_mov106: - -CryptonightR_instruction_mov107: - -CryptonightR_instruction_mov108: - -CryptonightR_instruction_mov109: - mov rcx, rbp -CryptonightR_instruction_mov110: - mov rcx, rbp -CryptonightR_instruction_mov111: - -CryptonightR_instruction_mov112: - -CryptonightR_instruction_mov113: - -CryptonightR_instruction_mov114: - -CryptonightR_instruction_mov115: - -CryptonightR_instruction_mov116: - -CryptonightR_instruction_mov117: - mov rcx, rbp -CryptonightR_instruction_mov118: - mov rcx, rbp -CryptonightR_instruction_mov119: - -CryptonightR_instruction_mov120: - -CryptonightR_instruction_mov121: - -CryptonightR_instruction_mov122: - -CryptonightR_instruction_mov123: - -CryptonightR_instruction_mov124: - -CryptonightR_instruction_mov125: - mov rcx, rbp -CryptonightR_instruction_mov126: - mov rcx, rbp -CryptonightR_instruction_mov127: - -CryptonightR_instruction_mov128: - -CryptonightR_instruction_mov129: - -CryptonightR_instruction_mov130: - -CryptonightR_instruction_mov131: - -CryptonightR_instruction_mov132: - -CryptonightR_instruction_mov133: - mov rcx, rsp -CryptonightR_instruction_mov134: - mov rcx, rsp -CryptonightR_instruction_mov135: - -CryptonightR_instruction_mov136: - -CryptonightR_instruction_mov137: - -CryptonightR_instruction_mov138: - -CryptonightR_instruction_mov139: - -CryptonightR_instruction_mov140: - -CryptonightR_instruction_mov141: - mov rcx, rsp -CryptonightR_instruction_mov142: - mov rcx, rsp -CryptonightR_instruction_mov143: - -CryptonightR_instruction_mov144: - -CryptonightR_instruction_mov145: - -CryptonightR_instruction_mov146: - -CryptonightR_instruction_mov147: - -CryptonightR_instruction_mov148: - -CryptonightR_instruction_mov149: - mov rcx, rsp -CryptonightR_instruction_mov150: - mov rcx, rsp -CryptonightR_instruction_mov151: - -CryptonightR_instruction_mov152: - -CryptonightR_instruction_mov153: - -CryptonightR_instruction_mov154: - -CryptonightR_instruction_mov155: - -CryptonightR_instruction_mov156: - -CryptonightR_instruction_mov157: - mov rcx, rsp -CryptonightR_instruction_mov158: - mov rcx, rsp -CryptonightR_instruction_mov159: - -CryptonightR_instruction_mov160: - -CryptonightR_instruction_mov161: - -CryptonightR_instruction_mov162: - -CryptonightR_instruction_mov163: - -CryptonightR_instruction_mov164: - -CryptonightR_instruction_mov165: - mov rcx, r15 -CryptonightR_instruction_mov166: - mov rcx, r15 -CryptonightR_instruction_mov167: - -CryptonightR_instruction_mov168: - -CryptonightR_instruction_mov169: - -CryptonightR_instruction_mov170: - -CryptonightR_instruction_mov171: - -CryptonightR_instruction_mov172: - -CryptonightR_instruction_mov173: - mov rcx, r15 -CryptonightR_instruction_mov174: - mov rcx, r15 -CryptonightR_instruction_mov175: - -CryptonightR_instruction_mov176: - -CryptonightR_instruction_mov177: - -CryptonightR_instruction_mov178: - -CryptonightR_instruction_mov179: - -CryptonightR_instruction_mov180: - -CryptonightR_instruction_mov181: - mov rcx, r15 -CryptonightR_instruction_mov182: - mov rcx, r15 -CryptonightR_instruction_mov183: - -CryptonightR_instruction_mov184: - -CryptonightR_instruction_mov185: - -CryptonightR_instruction_mov186: - -CryptonightR_instruction_mov187: - -CryptonightR_instruction_mov188: - -CryptonightR_instruction_mov189: - mov rcx, r15 -CryptonightR_instruction_mov190: - mov rcx, r15 -CryptonightR_instruction_mov191: - -CryptonightR_instruction_mov192: - -CryptonightR_instruction_mov193: - -CryptonightR_instruction_mov194: - -CryptonightR_instruction_mov195: - -CryptonightR_instruction_mov196: - -CryptonightR_instruction_mov197: - mov rcx, rax -CryptonightR_instruction_mov198: - mov rcx, rax -CryptonightR_instruction_mov199: - -CryptonightR_instruction_mov200: - -CryptonightR_instruction_mov201: - -CryptonightR_instruction_mov202: - -CryptonightR_instruction_mov203: - -CryptonightR_instruction_mov204: - -CryptonightR_instruction_mov205: - mov rcx, rax -CryptonightR_instruction_mov206: - mov rcx, rax -CryptonightR_instruction_mov207: - -CryptonightR_instruction_mov208: - -CryptonightR_instruction_mov209: - -CryptonightR_instruction_mov210: - -CryptonightR_instruction_mov211: - -CryptonightR_instruction_mov212: - -CryptonightR_instruction_mov213: - mov rcx, rax -CryptonightR_instruction_mov214: - mov rcx, rax -CryptonightR_instruction_mov215: - -CryptonightR_instruction_mov216: - -CryptonightR_instruction_mov217: - -CryptonightR_instruction_mov218: - -CryptonightR_instruction_mov219: - -CryptonightR_instruction_mov220: - -CryptonightR_instruction_mov221: - mov rcx, rax -CryptonightR_instruction_mov222: - mov rcx, rax -CryptonightR_instruction_mov223: - -CryptonightR_instruction_mov224: - -CryptonightR_instruction_mov225: - -CryptonightR_instruction_mov226: - -CryptonightR_instruction_mov227: - -CryptonightR_instruction_mov228: - -CryptonightR_instruction_mov229: - mov rcx, rdx -CryptonightR_instruction_mov230: - mov rcx, rdx -CryptonightR_instruction_mov231: - -CryptonightR_instruction_mov232: - -CryptonightR_instruction_mov233: - -CryptonightR_instruction_mov234: - -CryptonightR_instruction_mov235: - -CryptonightR_instruction_mov236: - -CryptonightR_instruction_mov237: - mov rcx, rdx -CryptonightR_instruction_mov238: - mov rcx, rdx -CryptonightR_instruction_mov239: - -CryptonightR_instruction_mov240: - -CryptonightR_instruction_mov241: - -CryptonightR_instruction_mov242: - -CryptonightR_instruction_mov243: - -CryptonightR_instruction_mov244: - -CryptonightR_instruction_mov245: - mov rcx, rdx -CryptonightR_instruction_mov246: - mov rcx, rdx -CryptonightR_instruction_mov247: - -CryptonightR_instruction_mov248: - -CryptonightR_instruction_mov249: - -CryptonightR_instruction_mov250: - -CryptonightR_instruction_mov251: - -CryptonightR_instruction_mov252: - -CryptonightR_instruction_mov253: - mov rcx, rdx -CryptonightR_instruction_mov254: - mov rcx, rdx -CryptonightR_instruction_mov255: - -CryptonightR_instruction_mov256: - -_TEXT_CN_TEMPLATE ENDS -END diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc deleted file mode 100644 index d24eedaa..00000000 --- a/src/crypto/asm/win64/CryptonightR_template_win.inc +++ /dev/null @@ -1,536 +0,0 @@ -PUBLIC CryptonightR_template_part1 -PUBLIC CryptonightR_template_mainloop -PUBLIC CryptonightR_template_part2 -PUBLIC CryptonightR_template_part3 -PUBLIC CryptonightR_template_end -PUBLIC CryptonightR_template_double_part1 -PUBLIC CryptonightR_template_double_mainloop -PUBLIC CryptonightR_template_double_part2 -PUBLIC CryptonightR_template_double_part3 -PUBLIC CryptonightR_template_double_part4 -PUBLIC CryptonightR_template_double_end - -ALIGN(64) -CryptonightR_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movd xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movd xmm0, r12 - movd xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movd xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightR_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movd xmm0, r15 - movd xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r13d, r9d - mov eax, r9d - xor r9d, 48 - xor r13d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r13+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - - movd r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r13+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - -CryptonightR_template_part2: - lea rcx, [r10+r11] - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - add r15, rax - add rsp, rdx - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - mov QWORD PTR [rcx], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [rcx+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightR_template_mainloop - -CryptonightR_template_part3: - movd rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightR_template_end: - -ALIGN(64) -CryptonightR_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movd xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movd xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movd xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movd xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movd xmm0, rcx - mov r11d, 524288 - movd xmm10, rax - punpcklqdq xmm10, xmm0 - - movd xmm14, QWORD PTR [rsp+128] - movd xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightR_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movd xmm0, r12 - mov ecx, ebx - movd xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movd xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movd rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movd xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movd rdi, xmm5 - movd rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movd xmm0, rsp - movd xmm1, rsi - movd xmm2, rdi - movd xmm11, rbp - movd xmm12, r15 - movd xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightR_template_double_part2: - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movd rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movd rsi, xmm1 - movd rdi, xmm2 - movd rbp, xmm11 - movd r15, xmm12 - movd rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movd rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movd xmm0, rsp - movd xmm1, rbx - movd xmm2, rsi - movd xmm11, rdi - movd xmm12, rbp - movd xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movd xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightR_template_double_part3: - - movd r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movd rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movd rbx, xmm1 - movd rsi, xmm2 - movd rdi, xmm11 - movd rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movd rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightR_template_double_mainloop - -CryptonightR_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightR_template_double_end: diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc deleted file mode 100644 index 1c73f77c..00000000 --- a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc +++ /dev/null @@ -1,268 +0,0 @@ -PUBLIC CryptonightWOW_soft_aes_template_part1 -PUBLIC CryptonightWOW_soft_aes_template_mainloop -PUBLIC CryptonightWOW_soft_aes_template_part2 -PUBLIC CryptonightWOW_soft_aes_template_part3 -PUBLIC CryptonightWOW_soft_aes_template_end - -ALIGN(64) -CryptonightWOW_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movd xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movd xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movd xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movd xmm10, QWORD PTR [r10+96] - movd xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movd xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movd xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightWOW_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movd xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movd xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movd r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movd rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movd rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - -CryptonightWOW_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - paddq xmm1, xmm7 - movd xmm0, rax - movd xmm3, rdx - xor rax, QWORD PTR [r11+rcx+8] - xor rdx, QWORD PTR [rcx+r11] - punpcklqdq xmm3, xmm0 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm2, xmm3 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightWOW_soft_aes_template_mainloop - -CryptonightWOW_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightWOW_soft_aes_template_end: diff --git a/src/crypto/asm/win64/CryptonightWOW_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_template_win.inc deleted file mode 100644 index 55c8c8df..00000000 --- a/src/crypto/asm/win64/CryptonightWOW_template_win.inc +++ /dev/null @@ -1,491 +0,0 @@ -PUBLIC CryptonightWOW_template_part1 -PUBLIC CryptonightWOW_template_mainloop -PUBLIC CryptonightWOW_template_part2 -PUBLIC CryptonightWOW_template_part3 -PUBLIC CryptonightWOW_template_end -PUBLIC CryptonightWOW_template_double_part1 -PUBLIC CryptonightWOW_template_double_mainloop -PUBLIC CryptonightWOW_template_double_part2 -PUBLIC CryptonightWOW_template_double_part3 -PUBLIC CryptonightWOW_template_double_part4 -PUBLIC CryptonightWOW_template_double_end - -ALIGN(64) -CryptonightWOW_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movd xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movd xmm0, r12 - movd xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movd xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightWOW_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movd xmm0, r15 - movd xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movd r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -CryptonightWOW_template_part2: - mov rax, r13 - mul r12 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightWOW_template_mainloop - -CryptonightWOW_template_part3: - movd rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightWOW_template_end: - -ALIGN(64) -CryptonightWOW_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movd xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movd xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movd xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movd xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movd xmm0, rcx - mov r11d, 524288 - movd xmm10, rax - punpcklqdq xmm10, xmm0 - - movd xmm14, QWORD PTR [rsp+128] - movd xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightWOW_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movd xmm0, r12 - mov ecx, ebx - movd xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movd rdx, xmm6 - movd xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movd xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movd rdi, xmm5 - movd rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movd xmm0, rsp - movd xmm1, rsi - movd xmm2, rdi - movd xmm11, rbp - movd xmm12, r15 - movd xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightWOW_template_double_part2: - - movd rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movd rsi, xmm1 - movd rdi, xmm2 - movd rbp, xmm11 - movd r15, xmm12 - movd rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movd xmm1, rdx - movd xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movd rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movd xmm0, rsp - movd xmm1, rbx - movd xmm2, rsi - movd xmm11, rdi - movd xmm12, rbp - movd xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movd xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightWOW_template_double_part3: - - movd rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movd rbx, xmm1 - movd rsi, xmm2 - movd rdi, xmm11 - movd rbp, xmm12 - movd r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movd xmm1, rdx - movd xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movd rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightWOW_template_double_mainloop - -CryptonightWOW_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightWOW_template_double_end: diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc deleted file mode 100644 index 85077a20..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 524288 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movd xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movd xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movd xmm5, QWORD PTR [r8+104] - movd xmm7, rax - - mov eax, 1 - shl rax, 52 - movd xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movd xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movd xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movd xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movd xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movd xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -main_loop_double_sandybridge: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movd xmm0, r11 - movd xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movd r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movd xmm0, rbp - movd xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rax+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rcx+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movd rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movd rdx, xmm5 - shl rdx, 32 - movd rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movd xmm0, rdx - xor rdx, [r11+r13] - movd xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - xor r8d, 32 - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r11+r13], xmm0 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [r15+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movd r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movd r11, xmm0 - psrldq xmm1, 8 - movd r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movd rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movd rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movd r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js div_fix_1_sandybridge -div_fix_1_ret_sandybridge: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js div_fix_2_sandybridge -div_fix_2_ret_sandybridge: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movd r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je sqrt_fix_1_sandybridge -sqrt_fix_1_ret_sandybridge: - - movd r9, xmm10 - psrldq xmm1, 8 - movd r8, xmm1 - test r8, 524287 - je sqrt_fix_2_sandybridge -sqrt_fix_2_ret_sandybridge: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - paddq xmm1, xmm11 - paddq xmm3, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm0 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm3 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne main_loop_double_sandybridge - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp cnv2_double_mainloop_asm_sandybridge_endp - -div_fix_1_sandybridge: - dec rbx - add r11, rdx - jmp div_fix_1_ret_sandybridge - -div_fix_2_sandybridge: - dec rdx - add r8, r9 - jmp div_fix_2_ret_sandybridge - -sqrt_fix_1_sandybridge: - movd r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movd xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_1_ret_sandybridge - -sqrt_fix_2_sandybridge: - psrldq xmm3, 8 - movd r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movd xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_2_ret_sandybridge - -cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc deleted file mode 100644 index f17017a0..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ /dev/null @@ -1,182 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movd xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -cnv2_main_loop_bulldozer: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movd xmm6, r8 - pinsrq xmm6, r11, 1 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - - mov edi, 1023 - shl rdi, 52 - - movd r14, xmm5 - pextrq rax, xmm5, 1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - div r9 - mov eax, eax - shl rdx, 32 - lea r15, [rax+rdx] - lea rax, [r14+r15] - shr rax, 12 - add rax, rdi - movd xmm0, rax - sqrtsd xmm1, xmm0 - movd rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_bulldozer - shr rdi, 19 - -sqrt_fixup_bulldozer_ret: - mov rax, rsi - mul r14 - movd xmm1, rax - movd xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne cnv2_main_loop_bulldozer - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_bulldozer_endp - -sqrt_fixup_bulldozer: - movd r9, xmm5 - add r9, r15 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_bulldozer_ret - -cnv2_main_loop_bulldozer_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc deleted file mode 100644 index a12ac35c..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 524288 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movd xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movd xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movd xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -main_loop_ivybridge: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movd xmm0, r11 - movd xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movd rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm2, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm1, xmm7 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movd rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movd rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movd xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movd rdx, xmm3 - test edx, 524287 - je sqrt_fixup_ivybridge - psrlq xmm3, 19 -sqrt_fixup_ivybridge_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movd xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movd xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm4 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm0 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne main_loop_ivybridge - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_main_loop_ivybridge_endp - -sqrt_fixup_ivybridge: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movd xmm3, rdx - jmp sqrt_fixup_ivybridge_ret - -cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc deleted file mode 100644 index 044235d8..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ /dev/null @@ -1,181 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movd xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -main_loop_ryzen: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movd xmm0, r11 - movd xmm6, r8 - punpcklqdq xmm6, xmm0 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - movd r14, xmm5 - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movd rax, xmm0 - - div r9 - movd xmm0, rax - movd xmm1, rdx - punpckldq xmm0, xmm1 - movd r15, xmm0 - paddq xmm0, xmm5 - movdqa xmm2, xmm0 - psrlq xmm0, 12 - paddq xmm0, xmm7 - sqrtsd xmm1, xmm0 - movd rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_ryzen - shr rdi, 19 - -sqrt_fixup_ryzen_ret: - mov rax, rsi - mul r14 - movd xmm1, rax - movd xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne main_loop_ryzen - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_ryzen_endp - -sqrt_fixup_ryzen: - movd r9, xmm2 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_ryzen_ret - -cnv2_main_loop_ryzen_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc deleted file mode 100644 index 97fb691b..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 393216 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movd xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movd xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movd xmm5, QWORD PTR [r8+104] - movd xmm7, rax - - mov eax, 1 - shl rax, 52 - movd xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movd xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movd xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movd xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movd xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movd xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -rwz_main_loop_double: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movd xmm0, r11 - movd xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movd r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movd xmm0, rbp - movd xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rax+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movd rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movd rdx, xmm5 - shl rdx, 32 - movd rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movd xmm0, rdx - xor rdx, [r11+r13] - movd xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm3 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r8+r13], xmm0 - xor r8d, 32 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r11+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movd r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movd r11, xmm0 - psrldq xmm1, 8 - movd r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movd rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movd rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movd r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js rwz_div_fix_1 -rwz_div_fix_1_ret: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js rwz_div_fix_2 -rwz_div_fix_2_ret: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movd r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je rwz_sqrt_fix_1 -rwz_sqrt_fix_1_ret: - - movd r9, xmm10 - psrldq xmm1, 8 - movd r8, xmm1 - test r8, 524287 - je rwz_sqrt_fix_2 -rwz_sqrt_fix_2_ret: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm3, xmm6 - paddq xmm1, xmm11 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm3 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm0 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne rwz_main_loop_double - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp rwz_cnv2_double_mainloop_asm_endp - -rwz_div_fix_1: - dec rbx - add r11, rdx - jmp rwz_div_fix_1_ret - -rwz_div_fix_2: - dec rdx - add r8, r9 - jmp rwz_div_fix_2_ret - -rwz_sqrt_fix_1: - movd r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movd xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_1_ret - -rwz_sqrt_fix_2: - psrldq xmm3, 8 - movd r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movd xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_2_ret - -rwz_cnv2_double_mainloop_asm_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc deleted file mode 100644 index e2b7a5fc..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 393216 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movd xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movd xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movd xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -rwz_main_loop: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movd xmm0, r11 - movd xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movd rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm0, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm2, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - paddq xmm1, xmm7 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movd rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movd rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movd xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movd rdx, xmm3 - test edx, 524287 - je rwz_sqrt_fixup - psrlq xmm3, 19 -rwz_sqrt_fixup_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movd xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movd xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm4 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm5 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm2 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne rwz_main_loop - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_rwz_main_loop_endp - -rwz_sqrt_fixup: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movd xmm3, rdx - jmp rwz_sqrt_fixup_ret - -cnv2_rwz_main_loop_endp: diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S deleted file mode 100644 index 63c3a8ba..00000000 --- a/src/crypto/asm/win64/cn_main_loop.S +++ /dev/null @@ -1,45 +0,0 @@ -#define ALIGN(x) .align 64 -.intel_syntax noprefix -.section .text -.global cnv2_mainloop_ivybridge_asm -.global cnv2_mainloop_ryzen_asm -.global cnv2_mainloop_bulldozer_asm -.global cnv2_double_mainloop_sandybridge_asm -.global cnv2_rwz_mainloop_asm -.global cnv2_rwz_double_mainloop_asm - -ALIGN(64) -cnv2_mainloop_ivybridge_asm: - #include "../cn2/cnv2_main_loop_ivybridge.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_mainloop_ryzen_asm: - #include "../cn2/cnv2_main_loop_ryzen.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_mainloop_bulldozer_asm: - #include "../cn2/cnv2_main_loop_bulldozer.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_double_mainloop_sandybridge_asm: - #include "../cn2/cnv2_double_main_loop_sandybridge.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_rwz_mainloop_asm: - #include "cn2/cnv2_rwz_main_loop.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_rwz_double_mainloop_asm: - #include "cn2/cnv2_rwz_double_main_loop.inc" - ret 0 - mov eax, 3735929054 diff --git a/src/crypto/asm/win64/cn_main_loop.asm b/src/crypto/asm/win64/cn_main_loop.asm deleted file mode 100644 index 57246cf5..00000000 --- a/src/crypto/asm/win64/cn_main_loop.asm +++ /dev/null @@ -1,52 +0,0 @@ -_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE -PUBLIC cnv2_mainloop_ivybridge_asm -PUBLIC cnv2_mainloop_ryzen_asm -PUBLIC cnv2_mainloop_bulldozer_asm -PUBLIC cnv2_double_mainloop_sandybridge_asm -PUBLIC cnv2_rwz_mainloop_asm -PUBLIC cnv2_rwz_double_mainloop_asm - -ALIGN 64 -cnv2_mainloop_ivybridge_asm PROC - INCLUDE cn2/cnv2_main_loop_ivybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ivybridge_asm ENDP - -ALIGN 64 -cnv2_mainloop_ryzen_asm PROC - INCLUDE cn2/cnv2_main_loop_ryzen.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ryzen_asm ENDP - -ALIGN 64 -cnv2_mainloop_bulldozer_asm PROC - INCLUDE cn2/cnv2_main_loop_bulldozer.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_bulldozer_asm ENDP - -ALIGN 64 -cnv2_double_mainloop_sandybridge_asm PROC - INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_double_mainloop_sandybridge_asm ENDP - -ALIGN(64) -cnv2_rwz_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_mainloop_asm ENDP - -ALIGN(64) -cnv2_rwz_double_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_double_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_double_mainloop_asm ENDP - -_TEXT_CNV2_MAINLOOP ENDS -END diff --git a/src/crypto/c_blake256.c b/src/crypto/c_blake256.c deleted file mode 100644 index 00a84c22..00000000 --- a/src/crypto/c_blake256.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * The blake256_* and blake224_* functions are largely copied from - * blake256_light.c and blake224_light.c from the BLAKE website: - * - * http://131002.net/blake/ - * - * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224. - * HMAC is specified by RFC 2104. - */ - -#include -#include -#include -#include "c_blake256.h" - -#define U8TO32(p) \ - (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ - ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) -#define U32TO8(p, v) \ - (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ - (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); - -const uint8_t sigma[][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, - {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3}, - {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4}, - { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}, - { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13}, - { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9}, - {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11}, - {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10}, - { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, - {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3}, - {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4}, - { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8} -}; - -const uint32_t cst[16] = { - 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344, - 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89, - 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C, - 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917 -}; - -static const uint8_t padding[] = { - 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - - -void blake256_compress(state *S, const uint8_t *block) { - uint32_t v[16], m[16], i; - -#define ROT(x,n) (((x)<<(32-n))|((x)>>(n))) -#define G(a,b,c,d,e) \ - v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \ - v[d] = ROT(v[d] ^ v[a],16); \ - v[c] += v[d]; \ - v[b] = ROT(v[b] ^ v[c],12); \ - v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b]; \ - v[d] = ROT(v[d] ^ v[a], 8); \ - v[c] += v[d]; \ - v[b] = ROT(v[b] ^ v[c], 7); - - for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4); - for (i = 0; i < 8; ++i) v[i] = S->h[i]; - v[ 8] = S->s[0] ^ 0x243F6A88; - v[ 9] = S->s[1] ^ 0x85A308D3; - v[10] = S->s[2] ^ 0x13198A2E; - v[11] = S->s[3] ^ 0x03707344; - v[12] = 0xA4093822; - v[13] = 0x299F31D0; - v[14] = 0x082EFA98; - v[15] = 0xEC4E6C89; - - if (S->nullt == 0) { - v[12] ^= S->t[0]; - v[13] ^= S->t[0]; - v[14] ^= S->t[1]; - v[15] ^= S->t[1]; - } - - for (i = 0; i < 14; ++i) { - G(0, 4, 8, 12, 0); - G(1, 5, 9, 13, 2); - G(2, 6, 10, 14, 4); - G(3, 7, 11, 15, 6); - G(3, 4, 9, 14, 14); - G(2, 7, 8, 13, 12); - G(0, 5, 10, 15, 8); - G(1, 6, 11, 12, 10); - } - - for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i]; - for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4]; -} - -void blake256_init(state *S) { - S->h[0] = 0x6A09E667; - S->h[1] = 0xBB67AE85; - S->h[2] = 0x3C6EF372; - S->h[3] = 0xA54FF53A; - S->h[4] = 0x510E527F; - S->h[5] = 0x9B05688C; - S->h[6] = 0x1F83D9AB; - S->h[7] = 0x5BE0CD19; - S->t[0] = S->t[1] = S->buflen = S->nullt = 0; - S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; -} - -void blake224_init(state *S) { - S->h[0] = 0xC1059ED8; - S->h[1] = 0x367CD507; - S->h[2] = 0x3070DD17; - S->h[3] = 0xF70E5939; - S->h[4] = 0xFFC00B31; - S->h[5] = 0x68581511; - S->h[6] = 0x64F98FA7; - S->h[7] = 0xBEFA4FA4; - S->t[0] = S->t[1] = S->buflen = S->nullt = 0; - S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; -} - -// datalen = number of bits -void blake256_update(state *S, const uint8_t *data, uint64_t datalen) { - int left = S->buflen >> 3; - int fill = 64 - left; - - if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) { - memcpy((void *) (S->buf + left), (void *) data, fill); - S->t[0] += 512; - if (S->t[0] == 0) S->t[1]++; - blake256_compress(S, S->buf); - data += fill; - datalen -= (fill << 3); - left = 0; - } - - while (datalen >= 512) { - S->t[0] += 512; - if (S->t[0] == 0) S->t[1]++; - blake256_compress(S, data); - data += 64; - datalen -= 512; - } - - if (datalen > 0) { - memcpy((void *) (S->buf + left), (void *) data, datalen >> 3); - S->buflen = (left << 3) + (int) datalen; - } else { - S->buflen = 0; - } -} - -// datalen = number of bits -void blake224_update(state *S, const uint8_t *data, uint64_t datalen) { - blake256_update(S, data, datalen); -} - -void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) { - uint8_t msglen[8]; - uint32_t lo = S->t[0] + S->buflen, hi = S->t[1]; - if (lo < (unsigned) S->buflen) hi++; - U32TO8(msglen + 0, hi); - U32TO8(msglen + 4, lo); - - if (S->buflen == 440) { /* one padding byte */ - S->t[0] -= 8; - blake256_update(S, &pa, 8); - } else { - if (S->buflen < 440) { /* enough space to fill the block */ - if (S->buflen == 0) S->nullt = 1; - S->t[0] -= 440 - S->buflen; - blake256_update(S, padding, 440 - S->buflen); - } else { /* need 2 compressions */ - S->t[0] -= 512 - S->buflen; - blake256_update(S, padding, 512 - S->buflen); - S->t[0] -= 440; - blake256_update(S, padding + 1, 440); - S->nullt = 1; - } - blake256_update(S, &pb, 8); - S->t[0] -= 8; - } - S->t[0] -= 64; - blake256_update(S, msglen, 64); - - U32TO8(digest + 0, S->h[0]); - U32TO8(digest + 4, S->h[1]); - U32TO8(digest + 8, S->h[2]); - U32TO8(digest + 12, S->h[3]); - U32TO8(digest + 16, S->h[4]); - U32TO8(digest + 20, S->h[5]); - U32TO8(digest + 24, S->h[6]); - U32TO8(digest + 28, S->h[7]); -} - -void blake256_final(state *S, uint8_t *digest) { - blake256_final_h(S, digest, 0x81, 0x01); -} - -void blake224_final(state *S, uint8_t *digest) { - blake256_final_h(S, digest, 0x80, 0x00); -} - -// inlen = number of bytes -void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { - state S; - blake256_init(&S); - blake256_update(&S, in, inlen * 8); - blake256_final(&S, out); -} - -// inlen = number of bytes -void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { - state S; - blake224_init(&S); - blake224_update(&S, in, inlen * 8); - blake224_final(&S, out); -} - -// keylen = number of bytes -void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { - const uint8_t *key = _key; - uint8_t keyhash[32]; - uint8_t pad[64]; - uint64_t i; - - if (keylen > 64) { - blake256_hash(keyhash, key, keylen); - key = keyhash; - keylen = 32; - } - - blake256_init(&S->inner); - memset(pad, 0x36, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake256_update(&S->inner, pad, 512); - - blake256_init(&S->outer); - memset(pad, 0x5c, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake256_update(&S->outer, pad, 512); - - memset(keyhash, 0, 32); -} - -// keylen = number of bytes -void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { - const uint8_t *key = _key; - uint8_t keyhash[32]; - uint8_t pad[64]; - uint64_t i; - - if (keylen > 64) { - blake256_hash(keyhash, key, keylen); - key = keyhash; - keylen = 28; - } - - blake224_init(&S->inner); - memset(pad, 0x36, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake224_update(&S->inner, pad, 512); - - blake224_init(&S->outer); - memset(pad, 0x5c, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake224_update(&S->outer, pad, 512); - - memset(keyhash, 0, 32); -} - -// datalen = number of bits -void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { - // update the inner state - blake256_update(&S->inner, data, datalen); -} - -// datalen = number of bits -void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { - // update the inner state - blake224_update(&S->inner, data, datalen); -} - -void hmac_blake256_final(hmac_state *S, uint8_t *digest) { - uint8_t ihash[32]; - blake256_final(&S->inner, ihash); - blake256_update(&S->outer, ihash, 256); - blake256_final(&S->outer, digest); - memset(ihash, 0, 32); -} - -void hmac_blake224_final(hmac_state *S, uint8_t *digest) { - uint8_t ihash[32]; - blake224_final(&S->inner, ihash); - blake224_update(&S->outer, ihash, 224); - blake224_final(&S->outer, digest); - memset(ihash, 0, 32); -} - -// keylen = number of bytes; inlen = number of bytes -void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { - hmac_state S; - hmac_blake256_init(&S, key, keylen); - hmac_blake256_update(&S, in, inlen * 8); - hmac_blake256_final(&S, out); -} - -// keylen = number of bytes; inlen = number of bytes -void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { - hmac_state S; - hmac_blake224_init(&S, key, keylen); - hmac_blake224_update(&S, in, inlen * 8); - hmac_blake224_final(&S, out); -} diff --git a/src/crypto/c_blake256.h b/src/crypto/c_blake256.h deleted file mode 100644 index b9c2aad0..00000000 --- a/src/crypto/c_blake256.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef _BLAKE256_H_ -#define _BLAKE256_H_ - -#include - -typedef struct { - uint32_t h[8], s[4], t[2]; - int buflen, nullt; - uint8_t buf[64]; -} state; - -typedef struct { - state inner; - state outer; -} hmac_state; - -void blake256_init(state *); -void blake224_init(state *); - -void blake256_update(state *, const uint8_t *, uint64_t); -void blake224_update(state *, const uint8_t *, uint64_t); - -void blake256_final(state *, uint8_t *); -void blake224_final(state *, uint8_t *); - -void blake256_hash(uint8_t *, const uint8_t *, uint64_t); -void blake224_hash(uint8_t *, const uint8_t *, uint64_t); - -/* HMAC functions: */ - -void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t); -void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t); - -void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t); -void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t); - -void hmac_blake256_final(hmac_state *, uint8_t *); -void hmac_blake224_final(hmac_state *, uint8_t *); - -void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); -void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); - -#endif /* _BLAKE256_H_ */ diff --git a/src/crypto/c_groestl.c b/src/crypto/c_groestl.c deleted file mode 100644 index 0f57ea12..00000000 --- a/src/crypto/c_groestl.c +++ /dev/null @@ -1,360 +0,0 @@ -/* hash.c April 2012 - * Groestl ANSI C code optimised for 32-bit machines - * Author: Thomas Krinninger - * - * This work is based on the implementation of - * Soeren S. Thomsen and Krystian Matusiewicz - * - * - */ - -#include "c_groestl.h" -#include "groestl_tables.h" - -#define P_TYPE 0 -#define Q_TYPE 1 - -const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}}; - -const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6}; - - -#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \ - v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \ - v1 = temp_var;} - - -#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \ - tu = T[2*(uint32_t)x[4*c0+0]]; \ - tl = T[2*(uint32_t)x[4*c0+0]+1]; \ - tv1 = T[2*(uint32_t)x[4*c1+1]]; \ - tv2 = T[2*(uint32_t)x[4*c1+1]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ - tu ^= tv1; \ - tl ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c2+2]]; \ - tv2 = T[2*(uint32_t)x[4*c2+2]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ - tu ^= tv1; \ - tl ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c3+3]]; \ - tv2 = T[2*(uint32_t)x[4*c3+3]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ - tu ^= tv1; \ - tl ^= tv2; \ - tl ^= T[2*(uint32_t)x[4*c4+0]]; \ - tu ^= T[2*(uint32_t)x[4*c4+0]+1]; \ - tv1 = T[2*(uint32_t)x[4*c5+1]]; \ - tv2 = T[2*(uint32_t)x[4*c5+1]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ - tl ^= tv1; \ - tu ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c6+2]]; \ - tv2 = T[2*(uint32_t)x[4*c6+2]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ - tl ^= tv1; \ - tu ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c7+3]]; \ - tv2 = T[2*(uint32_t)x[4*c7+3]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ - tl ^= tv1; \ - tu ^= tv2; \ - y[i] = tu; \ - y[i+1] = tl; - - -/* compute one round of P (short variants) */ -static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) { - uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; - uint32_t* x32 = (uint32_t*)x; - x32[ 0] ^= 0x00000000^r; - x32[ 2] ^= 0x00000010^r; - x32[ 4] ^= 0x00000020^r; - x32[ 6] ^= 0x00000030^r; - x32[ 8] ^= 0x00000040^r; - x32[10] ^= 0x00000050^r; - x32[12] ^= 0x00000060^r; - x32[14] ^= 0x00000070^r; - COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); -} - -/* compute one round of Q (short variants) */ -static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) { - uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; - uint32_t* x32 = (uint32_t*)x; - x32[ 0] = ~x32[ 0]; - x32[ 1] ^= 0xffffffff^r; - x32[ 2] = ~x32[ 2]; - x32[ 3] ^= 0xefffffff^r; - x32[ 4] = ~x32[ 4]; - x32[ 5] ^= 0xdfffffff^r; - x32[ 6] = ~x32[ 6]; - x32[ 7] ^= 0xcfffffff^r; - x32[ 8] = ~x32[ 8]; - x32[ 9] ^= 0xbfffffff^r; - x32[10] = ~x32[10]; - x32[11] ^= 0xafffffff^r; - x32[12] = ~x32[12]; - x32[13] ^= 0x9fffffff^r; - x32[14] = ~x32[14]; - x32[15] ^= 0x8fffffff^r; - COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); -} - -/* compute compression function (short variants) */ -static void F512(uint32_t *h, const uint32_t *m) { - int i; - uint32_t Ptmp[2*COLS512]; - uint32_t Qtmp[2*COLS512]; - uint32_t y[2*COLS512]; - uint32_t z[2*COLS512]; - - for (i = 0; i < 2*COLS512; i++) { - z[i] = m[i]; - Ptmp[i] = h[i]^m[i]; - } - - /* compute Q(m) */ - RND512Q((uint8_t*)z, y, 0x00000000); - RND512Q((uint8_t*)y, z, 0x01000000); - RND512Q((uint8_t*)z, y, 0x02000000); - RND512Q((uint8_t*)y, z, 0x03000000); - RND512Q((uint8_t*)z, y, 0x04000000); - RND512Q((uint8_t*)y, z, 0x05000000); - RND512Q((uint8_t*)z, y, 0x06000000); - RND512Q((uint8_t*)y, z, 0x07000000); - RND512Q((uint8_t*)z, y, 0x08000000); - RND512Q((uint8_t*)y, Qtmp, 0x09000000); - - /* compute P(h+m) */ - RND512P((uint8_t*)Ptmp, y, 0x00000000); - RND512P((uint8_t*)y, z, 0x00000001); - RND512P((uint8_t*)z, y, 0x00000002); - RND512P((uint8_t*)y, z, 0x00000003); - RND512P((uint8_t*)z, y, 0x00000004); - RND512P((uint8_t*)y, z, 0x00000005); - RND512P((uint8_t*)z, y, 0x00000006); - RND512P((uint8_t*)y, z, 0x00000007); - RND512P((uint8_t*)z, y, 0x00000008); - RND512P((uint8_t*)y, Ptmp, 0x00000009); - - /* compute P(h+m) + Q(m) + h */ - for (i = 0; i < 2*COLS512; i++) { - h[i] ^= Ptmp[i]^Qtmp[i]; - } -} - - -/* digest up to msglen bytes of input (full blocks only) */ -static void Transform(groestlHashState *ctx, - const uint8_t *input, - int msglen) { - - /* digest message, one block at a time */ - for (; msglen >= SIZE512; - msglen -= SIZE512, input += SIZE512) { - F512(ctx->chaining,(uint32_t*)input); - - /* increment block counter */ - ctx->block_counter1++; - if (ctx->block_counter1 == 0) ctx->block_counter2++; - } -} - -/* given state h, do h <- P(h)+h */ -static void OutputTransformation(groestlHashState *ctx) { - int j; - uint32_t temp[2*COLS512]; - uint32_t y[2*COLS512]; - uint32_t z[2*COLS512]; - - - - for (j = 0; j < 2*COLS512; j++) { - temp[j] = ctx->chaining[j]; - } - RND512P((uint8_t*)temp, y, 0x00000000); - RND512P((uint8_t*)y, z, 0x00000001); - RND512P((uint8_t*)z, y, 0x00000002); - RND512P((uint8_t*)y, z, 0x00000003); - RND512P((uint8_t*)z, y, 0x00000004); - RND512P((uint8_t*)y, z, 0x00000005); - RND512P((uint8_t*)z, y, 0x00000006); - RND512P((uint8_t*)y, z, 0x00000007); - RND512P((uint8_t*)z, y, 0x00000008); - RND512P((uint8_t*)y, temp, 0x00000009); - for (j = 0; j < 2*COLS512; j++) { - ctx->chaining[j] ^= temp[j]; - } -} - -/* initialise context */ -static void Init(groestlHashState* ctx) { - int i = 0; - /* allocate memory for state and data buffer */ - - for(;i<(SIZE512/sizeof(uint32_t));i++) - { - ctx->chaining[i] = 0; - } - - /* set initial value */ - ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN); - - /* set other variables */ - ctx->buf_ptr = 0; - ctx->block_counter1 = 0; - ctx->block_counter2 = 0; - ctx->bits_in_last_byte = 0; -} - -/* update state with databitlen bits of input */ -static void Update(groestlHashState* ctx, - const BitSequence* input, - DataLength databitlen) { - int index = 0; - int msglen = (int)(databitlen/8); - int rem = (int)(databitlen%8); - - /* if the buffer contains data that has not yet been digested, first - add data to buffer until full */ - if (ctx->buf_ptr) { - while (ctx->buf_ptr < SIZE512 && index < msglen) { - ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; - } - if (ctx->buf_ptr < SIZE512) { - /* buffer still not full, return */ - if (rem) { - ctx->bits_in_last_byte = rem; - ctx->buffer[(int)ctx->buf_ptr++] = input[index]; - } - return; - } - - /* digest buffer */ - ctx->buf_ptr = 0; - Transform(ctx, ctx->buffer, SIZE512); - } - - /* digest bulk of message */ - Transform(ctx, input+index, msglen-index); - index += ((msglen-index)/SIZE512)*SIZE512; - - /* store remaining data in buffer */ - while (index < msglen) { - ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; - } - - /* if non-integral number of bytes have been supplied, store - remaining bits in last byte, together with information about - number of bits */ - if (rem) { - ctx->bits_in_last_byte = rem; - ctx->buffer[(int)ctx->buf_ptr++] = input[index]; - } -} - -#define BILB ctx->bits_in_last_byte - -/* finalise: process remaining data (including padding), perform - output transformation, and write hash result to 'output' */ -static void Final(groestlHashState* ctx, - BitSequence* output) { - int i, j = 0, hashbytelen = HASH_BIT_LEN/8; - uint8_t *s = (BitSequence*)ctx->chaining; - - /* pad with '1'-bit and first few '0'-bits */ - if (BILB) { - ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB); - BILB = 0; - } - else ctx->buffer[(int)ctx->buf_ptr++] = 0x80; - - /* pad with '0'-bits */ - if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) { - /* padding requires two blocks */ - while (ctx->buf_ptr < SIZE512) { - ctx->buffer[(int)ctx->buf_ptr++] = 0; - } - /* digest first padding block */ - Transform(ctx, ctx->buffer, SIZE512); - ctx->buf_ptr = 0; - } - while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) { - ctx->buffer[(int)ctx->buf_ptr++] = 0; - } - - /* length padding */ - ctx->block_counter1++; - if (ctx->block_counter1 == 0) ctx->block_counter2++; - ctx->buf_ptr = SIZE512; - - while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) { - ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1; - ctx->block_counter1 >>= 8; - } - while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) { - ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2; - ctx->block_counter2 >>= 8; - } - /* digest final padding block */ - Transform(ctx, ctx->buffer, SIZE512); - /* perform output transformation */ - OutputTransformation(ctx); - - /* store hash result in output */ - for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) { - output[j] = s[i]; - } - - /* zeroise relevant variables and deallocate memory */ - for (i = 0; i < COLS512; i++) { - ctx->chaining[i] = 0; - } - for (i = 0; i < SIZE512; i++) { - ctx->buffer[i] = 0; - } -} - -/* hash bit sequence */ -void groestl(const BitSequence* data, - DataLength databitlen, - BitSequence* hashval) { - - groestlHashState context; - - /* initialise */ - Init(&context); - - - /* process message */ - Update(&context, data, databitlen); - - /* finalise */ - Final(&context, hashval); -} -/* -static int crypto_hash(unsigned char *out, - const unsigned char *in, - unsigned long long len) -{ - groestl(in, 8*len, out); - return 0; -} - -*/ diff --git a/src/crypto/c_groestl.h b/src/crypto/c_groestl.h deleted file mode 100644 index 2b513393..00000000 --- a/src/crypto/c_groestl.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef __hash_h -#define __hash_h -/* -#include "crypto_uint8.h" -#include "crypto_uint32.h" -#include "crypto_uint64.h" -#include "crypto_hash.h" - -typedef crypto_uint8 uint8_t; -typedef crypto_uint32 uint32_t; -typedef crypto_uint64 uint64_t; -*/ -#include - -#include "hash.h" - -/* some sizes (number of bytes) */ -#define ROWS 8 -#define LENGTHFIELDLEN ROWS -#define COLS512 8 - -#define SIZE512 (ROWS*COLS512) - -#define ROUNDS512 10 -#define HASH_BIT_LEN 256 - -#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff)) - - -#define li_32(h) 0x##h##u -#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n))) -#define u32BIG(a) \ - ((ROTL32(a,8) & li_32(00FF00FF)) | \ - (ROTL32(a,24) & li_32(FF00FF00))) - - -/* NIST API begin */ -typedef struct { - uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */ - uint32_t block_counter1, - block_counter2; /* message block counter(s) */ - BitSequence buffer[SIZE512]; /* data buffer */ - int buf_ptr; /* data buffer pointer */ - int bits_in_last_byte; /* no. of message bits in last byte of - data buffer */ -} groestlHashState; - -/*void Init(hashState*); -void Update(hashState*, const BitSequence*, DataLength); -void Final(hashState*, BitSequence*); */ -void groestl(const BitSequence*, DataLength, BitSequence*); -/* NIST API end */ - -/* -int crypto_hash(unsigned char *out, - const unsigned char *in, - unsigned long long len); -*/ - -#endif /* __hash_h */ diff --git a/src/crypto/c_jh.c b/src/crypto/c_jh.c deleted file mode 100644 index 728f3bbe..00000000 --- a/src/crypto/c_jh.c +++ /dev/null @@ -1,367 +0,0 @@ -/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C - - -------------------------------- - Performance - - Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) - Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) - Speed for long message: - 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 - 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 - - -------------------------------- - Last Modified: January 16, 2011 -*/ - -#include "c_jh.h" - -#include -#include - -/*typedef unsigned long long uint64;*/ -typedef uint64_t uint64; - -/*define data alignment for different C compilers*/ -#if defined(__GNUC__) - #define DATA_ALIGN16(x) x __attribute__ ((aligned(16))) -#else - #define DATA_ALIGN16(x) __declspec(align(16)) x -#endif - - -typedef struct { - int hashbitlen; /*the message digest size*/ - unsigned long long databitlen; /*the message size in bits*/ - unsigned long long datasize_in_buffer; /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/ - DATA_ALIGN16(uint64 x[8][2]); /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/ - unsigned char buffer[64]; /*the 512-bit message block to be hashed;*/ -} hashState; - - -/*The initial hash value H(0)*/ -const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e}; -const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69}; -const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f}; -const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b}; - -/*42 round constants, each round constant is 32-byte (256-bit)*/ -const unsigned char E8_bitslice_roundconstant[42][32]={ -{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40}, -{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31}, -{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc}, -{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3}, -{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23}, -{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97}, -{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14}, -{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4}, -{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36}, -{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f}, -{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b}, -{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62}, -{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5}, -{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f}, -{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a}, -{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf}, -{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0}, -{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a}, -{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6}, -{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67}, -{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18}, -{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e}, -{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1}, -{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83}, -{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef}, -{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65}, -{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c}, -{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71}, -{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0}, -{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f}, -{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad}, -{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6}, -{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63}, -{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f}, -{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a}, -{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5}, -{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48}, -{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e}, -{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7}, -{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde}, -{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a}, -{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}}; - - -static void E8(hashState *state); /*The bijective function E8, in bitslice form*/ -static void F8(hashState *state); /*The compression function F8 */ - -/*The API functions*/ -static HashReturn Init(hashState *state, int hashbitlen); -static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); -static HashReturn Final(hashState *state, BitSequence *hashval); -HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval); - -/*swapping bit 2i with bit 2i+1 of 64-bit x*/ -#define SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1)); -/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/ -#define SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2)); -/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/ -#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4)); -/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/ -#define SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8)); -/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/ -#define SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16)); -/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/ -#define SWAP32(x) (x) = (((x) << 32) | ((x) >> 32)); - -/*The MDS transform*/ -#define L(m0,m1,m2,m3,m4,m5,m6,m7) \ - (m4) ^= (m1); \ - (m5) ^= (m2); \ - (m6) ^= (m0) ^ (m3); \ - (m7) ^= (m0); \ - (m0) ^= (m5); \ - (m1) ^= (m6); \ - (m2) ^= (m4) ^ (m7); \ - (m3) ^= (m4); - -/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/ -/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/ -#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \ - m3 = ~(m3); \ - m7 = ~(m7); \ - m0 ^= ((~(m2)) & (cc0)); \ - m4 ^= ((~(m6)) & (cc1)); \ - temp0 = (cc0) ^ ((m0) & (m1));\ - temp1 = (cc1) ^ ((m4) & (m5));\ - m0 ^= ((m2) & (m3)); \ - m4 ^= ((m6) & (m7)); \ - m3 ^= ((~(m1)) & (m2)); \ - m7 ^= ((~(m5)) & (m6)); \ - m1 ^= ((m0) & (m2)); \ - m5 ^= ((m4) & (m6)); \ - m2 ^= ((m0) & (~(m3))); \ - m6 ^= ((m4) & (~(m7))); \ - m0 ^= ((m1) | (m3)); \ - m4 ^= ((m5) | (m7)); \ - m3 ^= ((m1) & (m2)); \ - m7 ^= ((m5) & (m6)); \ - m1 ^= (temp0 & (m0)); \ - m5 ^= (temp1 & (m4)); \ - m2 ^= temp0; \ - m6 ^= temp1; - -/*The bijective function E8, in bitslice form*/ -static void E8(hashState *state) -{ - uint64 i,roundnumber,temp0,temp1; - - for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) { - /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]); - } - - /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]); - } - - /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]); - } - - /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]); - } - - /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]); - } - - /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]); - } - - /*round 7*roundnumber+6: Sbox and MDS layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - } - /*round 7*roundnumber+6: swapping layer*/ - for (i = 1; i < 8; i = i+2) { - temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0; - } - } - -} - -/*The compression function F8 */ -static void F8(hashState *state) -{ - uint64 i; - - /*xor the 512-bit message with the fist half of the 1024-bit hash state*/ - for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i]; - - /*the bijective function E8 */ - E8(state); - - /*xor the 512-bit message with the second half of the 1024-bit hash state*/ - for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i]; -} - -/*before hashing a message, initialize the hash state as H0 */ -static HashReturn Init(hashState *state, int hashbitlen) -{ - state->databitlen = 0; - state->datasize_in_buffer = 0; - - /*initialize the initial hash value of JH*/ - state->hashbitlen = hashbitlen; - - /*load the intital hash value into state*/ - switch (hashbitlen) - { - case 224: memcpy(state->x,JH224_H0,128); break; - case 256: memcpy(state->x,JH256_H0,128); break; - case 384: memcpy(state->x,JH384_H0,128); break; - case 512: memcpy(state->x,JH512_H0,128); break; - } - - return(SUCCESS); -} - - -/*hash each 512-bit message block, except the last partial block*/ -static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) -{ - DataLength index; /*the starting address of the data to be compressed*/ - - state->databitlen += databitlen; - index = 0; - - /*if there is remaining data in the buffer, fill it to a full message block first*/ - /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/ - - /*There is data in the buffer, but the incoming data is insufficient for a full block*/ - if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) { - if ( (databitlen & 7) == 0 ) { - memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ; - } - else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ; - state->datasize_in_buffer += databitlen; - databitlen = 0; - } - - /*There is data in the buffer, and the incoming data is sufficient for a full block*/ - if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) { - memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ; - index = 64-(state->datasize_in_buffer >> 3); - databitlen = databitlen - (512 - state->datasize_in_buffer); - F8(state); - state->datasize_in_buffer = 0; - } - - /*hash the remaining full message blocks*/ - for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) { - memcpy(state->buffer, data+index, 64); - F8(state); - } - - /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/ - if ( databitlen > 0) { - if ((databitlen & 7) == 0) - memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3); - else - memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1); - state->datasize_in_buffer = databitlen; - } - - return(SUCCESS); -} - -/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/ -static HashReturn Final(hashState *state, BitSequence *hashval) -{ - unsigned int i; - - if ( (state->databitlen & 0x1ff) == 0 ) { - /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/ - memset(state->buffer, 0, 64); - state->buffer[0] = 0x80; - state->buffer[63] = state->databitlen & 0xff; - state->buffer[62] = (state->databitlen >> 8) & 0xff; - state->buffer[61] = (state->databitlen >> 16) & 0xff; - state->buffer[60] = (state->databitlen >> 24) & 0xff; - state->buffer[59] = (state->databitlen >> 32) & 0xff; - state->buffer[58] = (state->databitlen >> 40) & 0xff; - state->buffer[57] = (state->databitlen >> 48) & 0xff; - state->buffer[56] = (state->databitlen >> 56) & 0xff; - F8(state); - } - else { - /*set the rest of the bytes in the buffer to 0*/ - if ( (state->datasize_in_buffer & 7) == 0) - for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0; - else - for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0; - - /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/ - state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7)); - - F8(state); - memset(state->buffer, 0, 64); - state->buffer[63] = state->databitlen & 0xff; - state->buffer[62] = (state->databitlen >> 8) & 0xff; - state->buffer[61] = (state->databitlen >> 16) & 0xff; - state->buffer[60] = (state->databitlen >> 24) & 0xff; - state->buffer[59] = (state->databitlen >> 32) & 0xff; - state->buffer[58] = (state->databitlen >> 40) & 0xff; - state->buffer[57] = (state->databitlen >> 48) & 0xff; - state->buffer[56] = (state->databitlen >> 56) & 0xff; - F8(state); - } - - /*truncating the final hash value to generate the message digest*/ - switch(state->hashbitlen) { - case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28); break; - case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32); break; - case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48); break; - case 512: memcpy(hashval,(unsigned char*)state->x+64,64); break; - } - - return(SUCCESS); -} - -/* hash a message, - three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen) - one output: message digest (hashval) -*/ -HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval) -{ - hashState state; - - if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) { - Init(&state, hashbitlen); - Update(&state, data, databitlen); - Final(&state, hashval); - return SUCCESS; - } - else - return(BAD_HASHLEN); -} diff --git a/src/crypto/c_jh.h b/src/crypto/c_jh.h deleted file mode 100644 index d10d40fe..00000000 --- a/src/crypto/c_jh.h +++ /dev/null @@ -1,19 +0,0 @@ -/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C - - -------------------------------- - Performance - - Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) - Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) - Speed for long message: - 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 - 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 - - -------------------------------- - Last Modified: January 16, 2011 -*/ -#pragma once - -#include "hash.h" - -HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); diff --git a/src/crypto/c_skein.c b/src/crypto/c_skein.c deleted file mode 100644 index 994e4d46..00000000 --- a/src/crypto/c_skein.c +++ /dev/null @@ -1,701 +0,0 @@ -/*********************************************************************** -** -** Implementation of the Skein hash function. -** -** Source code author: Doug Whiting, 2008. -** -** This algorithm and source code is released to the public domain. -** -************************************************************************/ - -#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ - -#include /* get size_t definition */ -#include /* get the memcpy/memset functions */ -#include "c_skein.h" /* get the Skein API definitions */ - -#ifndef SKEIN_512_NIST_MAX_HASHBITS -#define SKEIN_512_NIST_MAX_HASHBITS (512) -#endif - -#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ - -#define SKEIN_512_STATE_WORDS ( 8) -#define SKEIN_MAX_STATE_WORDS (16) - -#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) -#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) -#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) - -#define SKEIN_RND_SPECIAL (1000u) -#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u) -#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u) -#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u) - -typedef struct -{ - size_t hashBitLen; /* size of hash result, in bits */ - size_t bCnt; /* current byte count in buffer b[] */ - u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ -} Skein_Ctxt_Hdr_t; - -typedef struct /* 512-bit Skein hash context structure */ -{ - Skein_Ctxt_Hdr_t h; /* common header context variables */ - u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ - u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ -} Skein_512_Ctxt_t; - -/* Skein APIs for (incremental) "straight hashing" */ -static int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen); -static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); -static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); - -#ifndef SKEIN_TREE_HASH -#define SKEIN_TREE_HASH (1) -#endif - -/***************************************************************** -** "Internal" Skein definitions -** -- not needed for sequential hashing API, but will be -** helpful for other uses of Skein (e.g., tree hash mode). -** -- included here so that they can be shared between -** reference and optimized code. -******************************************************************/ - -/* tweak word T[1]: bit field starting positions */ -#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ - -#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ -#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ -#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ -#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ -#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ - -/* tweak word T[1]: flag bit definition(s) */ -#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST) -#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL) -#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD) - -/* tweak word T[1]: tree level bit field mask */ -#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL) -#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL) - -/* tweak word T[1]: block type field */ -#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */ -#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */ -#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */ -#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ -#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ -#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ -#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ -#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ -#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ - -#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) -#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ -#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ -#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ -#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ -#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ -#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ -#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ -#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ -#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ - -#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) -#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) - -#define SKEIN_VERSION (1) - -#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ -#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/ -#endif - -#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32)) -#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE) -#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) - -#define SKEIN_CFG_STR_LEN (4*8) - -/* bit field definitions in config block treeInfo word */ -#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0) -#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8) -#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) - -#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) -#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) -#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) - -#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \ - ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ - (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ - (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) ) - -#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */ - -/* -** Skein macros for getting/setting tweak words, etc. -** These are useful for partial input bytes, hash tree init/update, etc. -**/ -#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) -#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} - -#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0) -#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1) -#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) -#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) - -/* set both tweak words at once */ -#define Skein_Set_T0_T1(ctxPtr,T0,T1) \ -{ \ - Skein_Set_T0(ctxPtr,(T0)); \ - Skein_Set_T1(ctxPtr,(T1)); \ -} - -#define Skein_Set_Type(ctxPtr,BLK_TYPE) \ - Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE) - -/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ -#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ -{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } - -#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } -#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } - -#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);} - -/***************************************************************** -** "Internal" Skein definitions for debugging and error checking -******************************************************************/ -#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr) -#define Skein_Show_Round(bits,ctx,r,X) -#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr) -#define Skein_Show_Final(bits,ctx,cnt,outPtr) -#define Skein_Show_Key(bits,ctx,key,keyBytes) - - -#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */ -#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ -#define Skein_assert(x) -#elif defined(SKEIN_ASSERT) -#include -#define Skein_Assert(x,retCode) assert(x) -#define Skein_assert(x) assert(x) -#else -#include -#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ -#define Skein_assert(x) assert(x) /* internal error */ -#endif - -/***************************************************************** -** Skein block function constants (shared across Ref and Opt code) -******************************************************************/ -enum -{ - /* Skein_512 round rotation constants */ - R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, - R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, - R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, - R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, - R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, - R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, - R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, - R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22, -}; - -#ifndef SKEIN_ROUNDS -#define SKEIN_512_ROUNDS_TOTAL (72) -#else /* allow command-line define in range 8*(5..14) */ -#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5)) -#endif - - -/* -***************** Pre-computed Skein IVs ******************* -** -** NOTE: these values are not "magic" constants, but -** are generated using the Threefish block function. -** They are pre-computed here only for speed; i.e., to -** avoid the need for a Threefish call during Init(). -** -** The IV for any fixed hash length may be pre-computed. -** Only the most common values are included here. -** -************************************************************ -**/ - -#define MK_64 SKEIN_MK_64 - -/* blkSize = 512 bits. hashSize = 256 bits */ -const u64b_t SKEIN_512_IV_256[] = - { - MK_64(0xCCD044A1,0x2FDB3E13), - MK_64(0xE8359030,0x1A79A9EB), - MK_64(0x55AEA061,0x4F816E6F), - MK_64(0x2A2767A4,0xAE9B94DB), - MK_64(0xEC06025E,0x74DD7683), - MK_64(0xE7A436CD,0xC4746251), - MK_64(0xC36FBAF9,0x393AD185), - MK_64(0x3EEDBA18,0x33EDFC13) - }; - -#ifndef SKEIN_USE_ASM -#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ -#endif - -#ifndef SKEIN_LOOP -#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ -#endif - -#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ -#define KW_TWK_BASE (0) -#define KW_KEY_BASE (3) -#define ks (kw + KW_KEY_BASE) -#define ts (kw + KW_TWK_BASE) - -#ifdef SKEIN_DEBUG -#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } -#else -#define DebugSaveTweak(ctx) -#endif - -/***************************** Skein_512 ******************************/ -#if !(SKEIN_USE_ASM & 512) -static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) - { /* do it in C */ - enum - { - WCNT = SKEIN_512_STATE_WORDS - }; -#undef RCNT -#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) - -#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) -#else -#define SKEIN_UNROLL_512 (0) -#endif - -#if SKEIN_UNROLL_512 -#if (RCNT % SKEIN_UNROLL_512) -#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ -#endif - size_t r; - u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ -#else - u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ -#endif - u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */ - u64b_t w [WCNT]; /* local copy of input block */ -#ifdef SKEIN_DEBUG - const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */ - Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; - Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; -#endif - - Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ - ts[0] = ctx->h.T[0]; - ts[1] = ctx->h.T[1]; - do { - /* this implementation only supports 2**64 input bytes (no carry out here) */ - ts[0] += byteCntAdd; /* update processed length */ - - /* precompute the key schedule for this block */ - ks[0] = ctx->X[0]; - ks[1] = ctx->X[1]; - ks[2] = ctx->X[2]; - ks[3] = ctx->X[3]; - ks[4] = ctx->X[4]; - ks[5] = ctx->X[5]; - ks[6] = ctx->X[6]; - ks[7] = ctx->X[7]; - ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ - ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; - - ts[2] = ts[0] ^ ts[1]; - - Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ - DebugSaveTweak(ctx); - Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); - - X0 = w[0] + ks[0]; /* do the first full key injection */ - X1 = w[1] + ks[1]; - X2 = w[2] + ks[2]; - X3 = w[3] + ks[3]; - X4 = w[4] + ks[4]; - X5 = w[5] + ks[5] + ts[0]; - X6 = w[6] + ks[6] + ts[1]; - X7 = w[7] + ks[7]; - - blkPtr += SKEIN_512_BLOCK_BYTES; - - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); - /* run the rounds */ -#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ - X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ - X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ - -#if SKEIN_UNROLL_512 == 0 -#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \ - Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); - -#define I512(R) \ - X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \ - X1 += ks[((R)+2) % 9]; \ - X2 += ks[((R)+3) % 9]; \ - X3 += ks[((R)+4) % 9]; \ - X4 += ks[((R)+5) % 9]; \ - X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \ - X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \ - X7 += ks[((R)+8) % 9] + (R)+1; \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); -#else /* looping version */ -#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); - -#define I512(R) \ - X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ - X1 += ks[r+(R)+1]; \ - X2 += ks[r+(R)+2]; \ - X3 += ks[r+(R)+3]; \ - X4 += ks[r+(R)+4]; \ - X5 += ks[r+(R)+5] + ts[r+(R)+0]; \ - X6 += ks[r+(R)+6] + ts[r+(R)+1]; \ - X7 += ks[r+(R)+7] + r+(R) ; \ - ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \ - ts[r + (R)+2] = ts[r+(R)-1]; \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); - - for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */ -#endif /* end of looped code definitions */ - { -#define R512_8_rounds(R) /* do 8 full rounds */ \ - R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \ - R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \ - R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \ - R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \ - I512(2*(R)); \ - R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \ - R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \ - R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \ - R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \ - I512(2*(R)+1); /* and key injection */ - - R512_8_rounds( 0); - -#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN))) - - #if R512_Unroll_R( 1) - R512_8_rounds( 1); - #endif - #if R512_Unroll_R( 2) - R512_8_rounds( 2); - #endif - #if R512_Unroll_R( 3) - R512_8_rounds( 3); - #endif - #if R512_Unroll_R( 4) - R512_8_rounds( 4); - #endif - #if R512_Unroll_R( 5) - R512_8_rounds( 5); - #endif - #if R512_Unroll_R( 6) - R512_8_rounds( 6); - #endif - #if R512_Unroll_R( 7) - R512_8_rounds( 7); - #endif - #if R512_Unroll_R( 8) - R512_8_rounds( 8); - #endif - #if R512_Unroll_R( 9) - R512_8_rounds( 9); - #endif - #if R512_Unroll_R(10) - R512_8_rounds(10); - #endif - #if R512_Unroll_R(11) - R512_8_rounds(11); - #endif - #if R512_Unroll_R(12) - R512_8_rounds(12); - #endif - #if R512_Unroll_R(13) - R512_8_rounds(13); - #endif - #if R512_Unroll_R(14) - R512_8_rounds(14); - #endif - #if (SKEIN_UNROLL_512 > 14) -#error "need more unrolling in Skein_512_Process_Block" - #endif - } - - /* do the final "feedforward" xor, update context chaining vars */ - ctx->X[0] = X0 ^ w[0]; - ctx->X[1] = X1 ^ w[1]; - ctx->X[2] = X2 ^ w[2]; - ctx->X[3] = X3 ^ w[3]; - ctx->X[4] = X4 ^ w[4]; - ctx->X[5] = X5 ^ w[5]; - ctx->X[6] = X6 ^ w[6]; - ctx->X[7] = X7 ^ w[7]; - Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); - - ts[1] &= ~SKEIN_T1_FLAG_FIRST; - } - while (--blkCnt); - ctx->h.T[0] = ts[0]; - ctx->h.T[1] = ts[1]; - } -#endif - -/*****************************************************************/ -/* 512-bit Skein */ -/*****************************************************************/ - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* init the context for a straight hashing operation */ -static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) - { - union - { - u08b_t b[SKEIN_512_STATE_BYTES]; - u64b_t w[SKEIN_512_STATE_WORDS]; - } cfg; /* config block */ - - Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); - ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ - - switch (hashBitLen) - { /* use pre-computed values, where available */ -#ifndef SKEIN_NO_PRECOMP - case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break; -#endif - default: - /* here if there is no precomputed IV value available */ - /* build/process the config block, type == CONFIG (could be precomputed) */ - Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ - - cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ - cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ - cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); - memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ - - /* compute the initial chaining values from config block */ - memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ - Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); - break; - } - - /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ - /* Set up to process the data message portion of the hash (default) */ - Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ - - return SKEIN_SUCCESS; - } - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* process the input bytes */ -static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) - { - size_t n; - - Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ - - /* process full blocks, if any */ - if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) - { - if (ctx->h.bCnt) /* finish up any buffered message data */ - { - n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ - if (n) - { - Skein_assert(n < msgByteCnt); /* check on our logic here */ - memcpy(&ctx->b[ctx->h.bCnt],msg,n); - msgByteCnt -= n; - msg += n; - ctx->h.bCnt += n; - } - Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); - Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); - ctx->h.bCnt = 0; - } - /* now process any remaining full blocks, directly from input message data */ - if (msgByteCnt > SKEIN_512_BLOCK_BYTES) - { - n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ - Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES); - msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; - msg += n * SKEIN_512_BLOCK_BYTES; - } - Skein_assert(ctx->h.bCnt == 0); - } - - /* copy any remaining source message data bytes into b[] */ - if (msgByteCnt) - { - Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); - memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); - ctx->h.bCnt += msgByteCnt; - } - - return SKEIN_SUCCESS; - } - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* finalize the hash computation and output the result */ -static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) - { - size_t i,n,byteCnt; - u64b_t X[SKEIN_512_STATE_WORDS]; - Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ - - ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ - if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ - memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); - - Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - - /* now output the result */ - byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ - - /* run Threefish in "counter mode" to generate output */ - memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ - memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ - for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) - { - ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ - Skein_Start_New_Type(ctx,OUT_FINAL); - Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ - n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ - if (n >= SKEIN_512_BLOCK_BYTES) - n = SKEIN_512_BLOCK_BYTES; - Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ - Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); - memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ - } - return SKEIN_SUCCESS; - } - -#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) -static size_t Skein_512_API_CodeSize(void) - { - return ((u08b_t *) Skein_512_API_CodeSize) - - ((u08b_t *) Skein_512_Init); - } -#endif - -typedef struct -{ - uint_t statebits; /* 256, 512, or 1024 */ - union - { - Skein_Ctxt_Hdr_t h; /* common header "overlay" */ - Skein_512_Ctxt_t ctx_512; - } u; -} -hashState; - -/* "incremental" hashing API */ -static SkeinHashReturn Init (hashState *state, int hashbitlen); -static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen); -static SkeinHashReturn Final (hashState *state, SkeinBitSequence *hashval); - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* select the context size and init the context */ -static SkeinHashReturn Init(hashState *state, int hashbitlen) -{ - state->statebits = 64*SKEIN_512_STATE_WORDS; - return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); -} - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* process data to be hashed */ -static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen) -{ - /* only the final Update() call is allowed do partial bytes, else assert an error */ - Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL); - - Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,SKEIN_FAIL); - if ((databitlen & 7) == 0) /* partial bytes? */ - { - return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); - } - else - { /* handle partial final byte */ - size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */ - u08b_t b,mask; - - mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ - b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ - - Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */ - Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ - Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ - - return SKEIN_SUCCESS; - } -} - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* finalize hash computation and output the result (hashbitlen bits) */ -static SkeinHashReturn Final(hashState *state, SkeinBitSequence *hashval) -{ - Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); - return Skein_512_Final(&state->u.ctx_512,hashval); -} - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* all-in-one hash function */ -SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, /* all-in-one call */ - SkeinDataLength databitlen,SkeinBitSequence *hashval) -{ - hashState state; - SkeinHashReturn r = Init(&state,hashbitlen); - if (r == SKEIN_SUCCESS) - { /* these calls do not fail when called properly */ - r = Update(&state,data,databitlen); - Final(&state,hashval); - } - return r; -} - -void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval){ - #define XMR_HASHBITLEN 256 - #define XMR_DATABITLEN 1600 - - // Init - hashState state; - state.statebits = 64*SKEIN_512_STATE_WORDS; - - // Skein_512_Init(&state.u.ctx_512, (size_t)XMR_HASHBITLEN); - state.u.ctx_512.h.hashBitLen = XMR_HASHBITLEN; - memcpy(state.u.ctx_512.X,SKEIN_512_IV_256,sizeof(state.u.ctx_512.X)); - Skein_512_Ctxt_t* ctx = &(state.u.ctx_512); - Skein_Start_New_Type(ctx,MSG); - - // Update - if ((XMR_DATABITLEN & 7) == 0){ /* partial bytes? */ - Skein_512_Update(&state.u.ctx_512,data,XMR_DATABITLEN >> 3); - }else{ /* handle partial final byte */ - size_t bCnt = (XMR_DATABITLEN >> 3) + 1; /* number of bytes to handle (nonzero here!) */ - u08b_t b,mask; - - mask = (u08b_t) (1u << (7 - (XMR_DATABITLEN & 7))); /* partial byte bit mask */ - b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ - - Skein_512_Update(&state.u.ctx_512,data,bCnt-1); /* process all but the final byte */ - Skein_512_Update(&state.u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ - Skein_Set_Bit_Pad_Flag(state.u.h); /* set tweak flag for the final call */ - } - - // Finalize - Skein_512_Final(&state.u.ctx_512, hashval); -} diff --git a/src/crypto/c_skein.h b/src/crypto/c_skein.h deleted file mode 100644 index c642e265..00000000 --- a/src/crypto/c_skein.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef _SKEIN_H_ -#define _SKEIN_H_ 1 -/************************************************************************** -** -** Interface declarations and internal definitions for Skein hashing. -** -** Source code author: Doug Whiting, 2008. -** -** This algorithm and source code is released to the public domain. -** -*************************************************************************** -** -** The following compile-time switches may be defined to control some -** tradeoffs between speed, code size, error checking, and security. -** -** The "default" note explains what happens when the switch is not defined. -** -** SKEIN_DEBUG -- make callouts from inside Skein code -** to examine/display intermediate values. -** [default: no callouts (no overhead)] -** -** SKEIN_ERR_CHECK -- how error checking is handled inside Skein -** code. If not defined, most error checking -** is disabled (for performance). Otherwise, -** the switch value is interpreted as: -** 0: use assert() to flag errors -** 1: return SKEIN_FAIL to flag errors -** -***************************************************************************/ -#include "skein_port.h" /* get platform-specific definitions */ - -typedef enum -{ - SKEIN_SUCCESS = 0, /* return codes from Skein calls */ - SKEIN_FAIL = 1, - SKEIN_BAD_HASHLEN = 2 -} -SkeinHashReturn; - -typedef size_t SkeinDataLength; /* bit count type */ -typedef u08b_t SkeinBitSequence; /* bit stream type */ - -/* "all-in-one" call */ -SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, - SkeinDataLength databitlen, SkeinBitSequence *hashval); - -void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval); - -#endif /* ifndef _SKEIN_H_ */ diff --git a/src/crypto/cn_gpu_arm.cpp b/src/crypto/cn_gpu_arm.cpp deleted file mode 100644 index b463dd2e..00000000 --- a/src/crypto/cn_gpu_arm.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include - - -#include "crypto/CryptoNight_constants.h" - - -inline void vandq_f32(float32x4_t &v, uint32_t v2) -{ - uint32x4_t vc = vdupq_n_u32(v2); - v = (float32x4_t)vandq_u32((uint32x4_t)v, vc); -} - - -inline void vorq_f32(float32x4_t &v, uint32_t v2) -{ - uint32x4_t vc = vdupq_n_u32(v2); - v = (float32x4_t)vorrq_u32((uint32x4_t)v, vc); -} - - -template -inline void vrot_si32(int32x4_t &r) -{ - r = (int32x4_t)vextq_s8((int8x16_t)r, (int8x16_t)r, v); -} - -template <> -inline void vrot_si32<0>(int32x4_t &r) -{ -} - - -inline uint32_t vheor_s32(const int32x4_t &v) -{ - int32x4_t v0 = veorq_s32(v, vrev64q_s32(v)); - int32x2_t vf = veor_s32(vget_high_s32(v0), vget_low_s32(v0)); - return (uint32_t)vget_lane_s32(vf, 0); -} - - -inline void prep_dv(int32_t *idx, int32x4_t &v, float32x4_t &n) -{ - v = vld1q_s32(idx); - n = vcvtq_f32_s32(v); -} - - -inline void sub_round(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &n, float32x4_t &d, float32x4_t &c) -{ - float32x4_t ln1 = vaddq_f32(n1, c); - float32x4_t nn = vmulq_f32(n0, c); - nn = vmulq_f32(ln1, vmulq_f32(nn, nn)); - vandq_f32(nn, 0xFEFFFFFF); - vorq_f32(nn, 0x00800000); - n = vaddq_f32(n, nn); - - float32x4_t ln3 = vsubq_f32(n3, c); - float32x4_t dd = vmulq_f32(n2, c); - dd = vmulq_f32(ln3, vmulq_f32(dd, dd)); - vandq_f32(dd, 0xFEFFFFFF); - vorq_f32(dd, 0x00800000); - d = vaddq_f32(d, dd); - - //Constant feedback - c = vaddq_f32(c, rnd_c); - c = vaddq_f32(c, vdupq_n_f32(0.734375f)); - float32x4_t r = vaddq_f32(nn, dd); - vandq_f32(r, 0x807FFFFF); - vorq_f32(r, 0x40000000); - c = vaddq_f32(c, r); -} - - -inline void round_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &c, float32x4_t &r) -{ - float32x4_t n = vdupq_n_f32(0.0f), d = vdupq_n_f32(0.0f); - - sub_round(n0, n1, n2, n3, rnd_c, n, d, c); - sub_round(n1, n2, n3, n0, rnd_c, n, d, c); - sub_round(n2, n3, n0, n1, rnd_c, n, d, c); - sub_round(n3, n0, n1, n2, rnd_c, n, d, c); - sub_round(n3, n2, n1, n0, rnd_c, n, d, c); - sub_round(n2, n1, n0, n3, rnd_c, n, d, c); - sub_round(n1, n0, n3, n2, rnd_c, n, d, c); - sub_round(n0, n3, n2, n1, rnd_c, n, d, c); - - // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0 - vandq_f32(d, 0xFF7FFFFF); - vorq_f32(d, 0x40000000); - r = vaddq_f32(r, vdivq_f32(n, d)); -} - - -// 112×4 = 448 -template -inline int32x4_t single_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum) -{ - float32x4_t c = vdupq_n_f32(cnt); - float32x4_t r = vdupq_n_f32(0.0f); - - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - - // do a quick fmod by setting exp to 2 - vandq_f32(r, 0x807FFFFF); - vorq_f32(r, 0x40000000); - - if (add) { - sum = vaddq_f32(sum, r); - } else { - sum = r; - } - - const float32x4_t cc2 = vdupq_n_f32(536870880.0f); - r = vmulq_f32(r, cc2); // 35 - return vcvtq_s32_f32(r); -} - - -template -inline void single_compute_wrap(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum, int32x4_t &out) -{ - int32x4_t r = single_compute(n0, n1, n2, n3, cnt, rnd_c, sum); - vrot_si32(r); - out = veorq_s32(out, r); -} - - -template -inline int32_t *scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast(lpad + (idx & MASK) + n * 16); } - - -template -void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad) -{ - uint32_t s = reinterpret_cast(spad)[0] >> 8; - int32_t *idx0 = scratchpad_ptr(lpad, s, 0); - int32_t *idx1 = scratchpad_ptr(lpad, s, 1); - int32_t *idx2 = scratchpad_ptr(lpad, s, 2); - int32_t *idx3 = scratchpad_ptr(lpad, s, 3); - float32x4_t sum0 = vdupq_n_f32(0.0f); - - for (size_t i = 0; i < ITER; i++) { - float32x4_t n0, n1, n2, n3; - int32x4_t v0, v1, v2, v3; - float32x4_t suma, sumb, sum1, sum2, sum3; - - prep_dv(idx0, v0, n0); - prep_dv(idx1, v1, n1); - prep_dv(idx2, v2, n2); - prep_dv(idx3, v3, n3); - float32x4_t rc = sum0; - - int32x4_t out, out2; - out = vdupq_n_s32(0); - single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out); - single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out); - single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out); - single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out); - sum0 = vaddq_f32(suma, sumb); - vst1q_s32(idx0, veorq_s32(v0, out)); - out2 = out; - - out = vdupq_n_s32(0); - single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out); - single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out); - single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out); - single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out); - sum1 = vaddq_f32(suma, sumb); - vst1q_s32(idx1, veorq_s32(v1, out)); - out2 = veorq_s32(out2, out); - - out = vdupq_n_s32(0); - single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out); - single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out); - single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out); - single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out); - sum2 = vaddq_f32(suma, sumb); - vst1q_s32(idx2, veorq_s32(v2, out)); - out2 = veorq_s32(out2, out); - - out = vdupq_n_s32(0); - single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out); - single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out); - single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out); - single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out); - sum3 = vaddq_f32(suma, sumb); - vst1q_s32(idx3, veorq_s32(v3, out)); - out2 = veorq_s32(out2, out); - - sum0 = vaddq_f32(sum0, sum1); - sum2 = vaddq_f32(sum2, sum3); - sum0 = vaddq_f32(sum0, sum2); - - const float32x4_t cc1 = vdupq_n_f32(16777216.0f); - const float32x4_t cc2 = vdupq_n_f32(64.0f); - vandq_f32(sum0, 0x7fffffff); // take abs(va) by masking the float sign bit - // vs range 0 - 64 - n0 = vmulq_f32(sum0, cc1); - v0 = vcvtq_s32_f32(n0); - v0 = veorq_s32(v0, out2); - uint32_t n = vheor_s32(v0); - - // vs is now between 0 and 1 - sum0 = vdivq_f32(sum0, cc2); - idx0 = scratchpad_ptr(lpad, n, 0); - idx1 = scratchpad_ptr(lpad, n, 1); - idx2 = scratchpad_ptr(lpad, n, 2); - idx3 = scratchpad_ptr(lpad, n, 3); - } -} - -template void cn_gpu_inner_arm(const uint8_t* spad, uint8_t* lpad); diff --git a/src/crypto/cn_gpu_avx.cpp b/src/crypto/cn_gpu_avx.cpp deleted file mode 100644 index 9f801c80..00000000 --- a/src/crypto/cn_gpu_avx.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/CryptoNight_constants.h" - -#ifdef __GNUC__ -# include -#else -# include -# define __restrict__ __restrict -#endif -#ifndef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) -#endif -#ifndef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) -#endif - -inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01) -{ - v = _mm256_load_si256(idx); - n01 = _mm256_cvtepi32_ps(v); -} - -inline __m256 fma_break(const __m256& x) -{ - // Break the dependency chain by setitng the exp to ?????01 - __m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x); - return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx); -} - -// 14 -inline void sub_round(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& n, __m256& d, __m256& c) -{ - __m256 nn = _mm256_mul_ps(n0, c); - nn = _mm256_mul_ps(_mm256_add_ps(n1, c), _mm256_mul_ps(nn, nn)); - nn = fma_break(nn); - n = _mm256_add_ps(n, nn); - - __m256 dd = _mm256_mul_ps(n2, c); - dd = _mm256_mul_ps(_mm256_sub_ps(n3, c), _mm256_mul_ps(dd, dd)); - dd = fma_break(dd); - d = _mm256_add_ps(d, dd); - - //Constant feedback - c = _mm256_add_ps(c, rnd_c); - c = _mm256_add_ps(c, _mm256_set1_ps(0.734375f)); - __m256 r = _mm256_add_ps(nn, dd); - r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r); - r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r); - c = _mm256_add_ps(c, r); -} - -// 14*8 + 2 = 112 -inline void round_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& c, __m256& r) -{ - __m256 n = _mm256_setzero_ps(), d = _mm256_setzero_ps(); - - sub_round(n0, n1, n2, n3, rnd_c, n, d, c); - sub_round(n1, n2, n3, n0, rnd_c, n, d, c); - sub_round(n2, n3, n0, n1, rnd_c, n, d, c); - sub_round(n3, n0, n1, n2, rnd_c, n, d, c); - sub_round(n3, n2, n1, n0, rnd_c, n, d, c); - sub_round(n2, n1, n0, n3, rnd_c, n, d, c); - sub_round(n1, n0, n3, n2, rnd_c, n, d, c); - sub_round(n0, n3, n2, n1, rnd_c, n, d, c); - - // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0 - d = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFF7FFFFF)), d); - d = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), d); - r = _mm256_add_ps(r, _mm256_div_ps(n, d)); -} - -// 112×4 = 448 -template -inline __m256i double_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, - float lcnt, float hcnt, const __m256& rnd_c, __m256& sum) -{ - __m256 c = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_set1_ps(lcnt)), _mm_set1_ps(hcnt), 1); - __m256 r = _mm256_setzero_ps(); - - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - - // do a quick fmod by setting exp to 2 - r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r); - r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r); - - if(add) - sum = _mm256_add_ps(sum, r); - else - sum = r; - - r = _mm256_mul_ps(r, _mm256_set1_ps(536870880.0f)); // 35 - return _mm256_cvttps_epi32(r); -} - -template -inline void double_compute_wrap(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, - float lcnt, float hcnt, const __m256& rnd_c, __m256& sum, __m256i& out) -{ - __m256i r = double_compute(n0, n1, n2, n3, lcnt, hcnt, rnd_c, sum); - if(rot != 0) - r = _mm256_or_si256(_mm256_bslli_epi128(r, 16 - rot), _mm256_bsrli_epi128(r, rot)); - - out = _mm256_xor_si256(out, r); -} - -template -inline __m256i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m256i*>(lpad + (idx & MASK) + n*16); } - -template -void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) -{ - uint32_t s = reinterpret_cast(spad)[0] >> 8; - __m256i* idx0 = scratchpad_ptr(lpad, s, 0); - __m256i* idx2 = scratchpad_ptr(lpad, s, 2); - __m256 sum0 = _mm256_setzero_ps(); - - for(size_t i = 0; i < ITER; i++) - { - __m256i v01, v23; - __m256 suma, sumb, sum1; - __m256 rc = sum0; - - __m256 n01, n23; - prep_dv_avx(idx0, v01, n01); - prep_dv_avx(idx2, v23, n23); - - __m256i out, out2; - __m256 n10, n22, n33; - n10 = _mm256_permute2f128_ps(n01, n01, 0x01); - n22 = _mm256_permute2f128_ps(n23, n23, 0x00); - n33 = _mm256_permute2f128_ps(n23, n23, 0x11); - - out = _mm256_setzero_si256(); - double_compute_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out); - double_compute_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out); - double_compute_wrap<2>(n01, n33, n10, n22, 1.3593750f, 1.3828125f, rc, sumb, out); - double_compute_wrap<3>(n01, n33, n22, n10, 1.3671875f, 1.3046875f, rc, sumb, out); - _mm256_store_si256(idx0, _mm256_xor_si256(v01, out)); - sum0 = _mm256_add_ps(suma, sumb); - out2 = out; - - __m256 n11, n02, n30; - n11 = _mm256_permute2f128_ps(n01, n01, 0x11); - n02 = _mm256_permute2f128_ps(n01, n23, 0x20); - n30 = _mm256_permute2f128_ps(n01, n23, 0x03); - - out = _mm256_setzero_si256(); - double_compute_wrap<0>(n23, n11, n02, n30, 1.4140625f, 1.3203125f, rc, suma, out); - double_compute_wrap<1>(n23, n02, n30, n11, 1.2734375f, 1.3515625f, rc, suma, out); - double_compute_wrap<2>(n23, n30, n11, n02, 1.2578125f, 1.3359375f, rc, sumb, out); - double_compute_wrap<3>(n23, n30, n02, n11, 1.2890625f, 1.4609375f, rc, sumb, out); - _mm256_store_si256(idx2, _mm256_xor_si256(v23, out)); - sum1 = _mm256_add_ps(suma, sumb); - - out2 = _mm256_xor_si256(out2, out); - out2 = _mm256_xor_si256(_mm256_permute2x128_si256(out2,out2,0x41), out2); - suma = _mm256_permute2f128_ps(sum0, sum1, 0x30); - sumb = _mm256_permute2f128_ps(sum0, sum1, 0x21); - sum0 = _mm256_add_ps(suma, sumb); - sum0 = _mm256_add_ps(sum0, _mm256_permute2f128_ps(sum0, sum0, 0x41)); - - // Clear the high 128 bits - __m128 sum = _mm256_castps256_ps128(sum0); - - sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit - // vs range 0 - 64 - __m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f))); - v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2)); - __m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3)); - v0 = _mm_xor_si128(v0, v1); - v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1)); - v0 = _mm_xor_si128(v0, v1); - - // vs is now between 0 and 1 - sum = _mm_div_ps(sum, _mm_set1_ps(64.0f)); - sum0 = _mm256_insertf128_ps(_mm256_castps128_ps256(sum), sum, 1); - uint32_t n = _mm_cvtsi128_si32(v0); - idx0 = scratchpad_ptr(lpad, n, 0); - idx2 = scratchpad_ptr(lpad, n, 2); - } -} - -template void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad); diff --git a/src/crypto/cn_gpu_ssse3.cpp b/src/crypto/cn_gpu_ssse3.cpp deleted file mode 100644 index ce3d19ad..00000000 --- a/src/crypto/cn_gpu_ssse3.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/CryptoNight_constants.h" - -#ifdef __GNUC__ -# include -#else -# include -# define __restrict__ __restrict -#endif - -inline void prep_dv(__m128i* idx, __m128i& v, __m128& n) -{ - v = _mm_load_si128(idx); - n = _mm_cvtepi32_ps(v); -} - -inline __m128 fma_break(__m128 x) -{ - // Break the dependency chain by setitng the exp to ?????01 - x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x); - return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x); -} - -// 14 -inline void sub_round(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& n, __m128& d, __m128& c) -{ - n1 = _mm_add_ps(n1, c); - __m128 nn = _mm_mul_ps(n0, c); - nn = _mm_mul_ps(n1, _mm_mul_ps(nn,nn)); - nn = fma_break(nn); - n = _mm_add_ps(n, nn); - - n3 = _mm_sub_ps(n3, c); - __m128 dd = _mm_mul_ps(n2, c); - dd = _mm_mul_ps(n3, _mm_mul_ps(dd,dd)); - dd = fma_break(dd); - d = _mm_add_ps(d, dd); - - //Constant feedback - c = _mm_add_ps(c, rnd_c); - c = _mm_add_ps(c, _mm_set1_ps(0.734375f)); - __m128 r = _mm_add_ps(nn, dd); - r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r); - r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r); - c = _mm_add_ps(c, r); -} - -// 14*8 + 2 = 112 -inline void round_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& c, __m128& r) -{ - __m128 n = _mm_setzero_ps(), d = _mm_setzero_ps(); - - sub_round(n0, n1, n2, n3, rnd_c, n, d, c); - sub_round(n1, n2, n3, n0, rnd_c, n, d, c); - sub_round(n2, n3, n0, n1, rnd_c, n, d, c); - sub_round(n3, n0, n1, n2, rnd_c, n, d, c); - sub_round(n3, n2, n1, n0, rnd_c, n, d, c); - sub_round(n2, n1, n0, n3, rnd_c, n, d, c); - sub_round(n1, n0, n3, n2, rnd_c, n, d, c); - sub_round(n0, n3, n2, n1, rnd_c, n, d, c); - - // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0 - d = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFF7FFFFF)), d); - d = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), d); - r =_mm_add_ps(r, _mm_div_ps(n,d)); -} - -// 112×4 = 448 -template -inline __m128i single_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, float cnt, __m128 rnd_c, __m128& sum) -{ - __m128 c = _mm_set1_ps(cnt); - __m128 r = _mm_setzero_ps(); - - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - - // do a quick fmod by setting exp to 2 - r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r); - r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r); - - if(add) - sum = _mm_add_ps(sum, r); - else - sum = r; - - r = _mm_mul_ps(r, _mm_set1_ps(536870880.0f)); // 35 - return _mm_cvttps_epi32(r); -} - -template -inline void single_compute_wrap(__m128 n0, __m128 n1, __m128 n2, __m128 n3, float cnt, __m128 rnd_c, __m128& sum, __m128i& out) -{ - __m128i r = single_compute(n0, n1, n2, n3, cnt, rnd_c, sum); - if(rot != 0) - r = _mm_or_si128(_mm_slli_si128(r, 16 - rot), _mm_srli_si128(r, rot)); - out = _mm_xor_si128(out, r); -} - -template -inline __m128i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m128i*>(lpad + (idx & MASK) + n*16); } - -template -void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad) -{ - uint32_t s = reinterpret_cast(spad)[0] >> 8; - __m128i* idx0 = scratchpad_ptr(lpad, s, 0); - __m128i* idx1 = scratchpad_ptr(lpad, s, 1); - __m128i* idx2 = scratchpad_ptr(lpad, s, 2); - __m128i* idx3 = scratchpad_ptr(lpad, s, 3); - __m128 sum0 = _mm_setzero_ps(); - - for(size_t i = 0; i < ITER; i++) - { - __m128 n0, n1, n2, n3; - __m128i v0, v1, v2, v3; - __m128 suma, sumb, sum1, sum2, sum3; - - prep_dv(idx0, v0, n0); - prep_dv(idx1, v1, n1); - prep_dv(idx2, v2, n2); - prep_dv(idx3, v3, n3); - __m128 rc = sum0; - - __m128i out, out2; - out = _mm_setzero_si128(); - single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out); - single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out); - single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out); - single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out); - sum0 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx0, _mm_xor_si128(v0, out)); - out2 = out; - - out = _mm_setzero_si128(); - single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out); - single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out); - single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out); - single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out); - sum1 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx1, _mm_xor_si128(v1, out)); - out2 = _mm_xor_si128(out2, out); - - out = _mm_setzero_si128(); - single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out); - single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out); - single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out); - single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out); - sum2 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx2, _mm_xor_si128(v2, out)); - out2 = _mm_xor_si128(out2, out); - - out = _mm_setzero_si128(); - single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out); - single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out); - single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out); - single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out); - sum3 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx3, _mm_xor_si128(v3, out)); - out2 = _mm_xor_si128(out2, out); - sum0 = _mm_add_ps(sum0, sum1); - sum2 = _mm_add_ps(sum2, sum3); - sum0 = _mm_add_ps(sum0, sum2); - - sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit - // vs range 0 - 64 - n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f)); - v0 = _mm_cvttps_epi32(n0); - v0 = _mm_xor_si128(v0, out2); - v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3)); - v0 = _mm_xor_si128(v0, v1); - v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1)); - v0 = _mm_xor_si128(v0, v1); - - // vs is now between 0 and 1 - sum0 = _mm_div_ps(sum0, _mm_set1_ps(64.0f)); - uint32_t n = _mm_cvtsi128_si32(v0); - idx0 = scratchpad_ptr(lpad, n, 0); - idx1 = scratchpad_ptr(lpad, n, 1); - idx2 = scratchpad_ptr(lpad, n, 2); - idx3 = scratchpad_ptr(lpad, n, 3); - } -} - -template void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad); diff --git a/src/crypto/groestl_tables.h b/src/crypto/groestl_tables.h deleted file mode 100644 index a23295c3..00000000 --- a/src/crypto/groestl_tables.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __tables_h -#define __tables_h - - -const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc -, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5 -, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d -, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded -, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1 -, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441 -, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4 -, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba -, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616 -, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2 -, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c -, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de -, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7 -, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e -, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c -, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7 -, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b -, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4 -, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e -, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a -, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37 -, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86 -, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b -, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028 -, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3 -, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94 -, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836 -, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0 -, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2 -, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e -, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3 -, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e}; - -#endif /* __tables_h */ diff --git a/src/crypto/hash.h b/src/crypto/hash.h deleted file mode 100644 index c12d355f..00000000 --- a/src/crypto/hash.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -typedef unsigned char BitSequence; -typedef unsigned long long DataLength; -typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn; diff --git a/src/crypto/skein_port.h b/src/crypto/skein_port.h deleted file mode 100644 index 4b521c7c..00000000 --- a/src/crypto/skein_port.h +++ /dev/null @@ -1,187 +0,0 @@ -#ifndef _SKEIN_PORT_H_ -#define _SKEIN_PORT_H_ - -#include -#include - -#ifndef RETURN_VALUES -# define RETURN_VALUES -# if defined( DLL_EXPORT ) -# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) -# define VOID_RETURN __declspec( dllexport ) void __stdcall -# define INT_RETURN __declspec( dllexport ) int __stdcall -# elif defined( __GNUC__ ) -# define VOID_RETURN __declspec( __dllexport__ ) void -# define INT_RETURN __declspec( __dllexport__ ) int -# else -# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers -# endif -# elif defined( DLL_IMPORT ) -# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) -# define VOID_RETURN __declspec( dllimport ) void __stdcall -# define INT_RETURN __declspec( dllimport ) int __stdcall -# elif defined( __GNUC__ ) -# define VOID_RETURN __declspec( __dllimport__ ) void -# define INT_RETURN __declspec( __dllimport__ ) int -# else -# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers -# endif -# elif defined( __WATCOMC__ ) -# define VOID_RETURN void __cdecl -# define INT_RETURN int __cdecl -# else -# define VOID_RETURN void -# define INT_RETURN int -# endif -#endif - -/* These defines are used to declare buffers in a way that allows - faster operations on longer variables to be used. In all these - defines 'size' must be a power of 2 and >= 8 - - dec_unit_type(size,x) declares a variable 'x' of length - 'size' bits - - dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' - bytes defined as an array of variables - each of 'size' bits (bsize must be a - multiple of size / 8) - - ptr_cast(x,size) casts a pointer to a pointer to a - varaiable of length 'size' bits -*/ - -#define ui_type(size) uint##size##_t -#define dec_unit_type(size,x) typedef ui_type(size) x -#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)] -#define ptr_cast(x,size) ((ui_type(size)*)(x)) - -typedef unsigned int uint_t; /* native unsigned integer */ -typedef uint8_t u08b_t; /* 8-bit unsigned integer */ -typedef uint64_t u64b_t; /* 64-bit unsigned integer */ - -#ifndef RotL_64 -#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N)))) -#endif - -/* - * Skein is "natively" little-endian (unlike SHA-xxx), for optimal - * performance on x86 CPUs. The Skein code requires the following - * definitions for dealing with endianness: - * - * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian - * Skein_Put64_LSB_First - * Skein_Get64_LSB_First - * Skein_Swap64 - * - * If SKEIN_NEED_SWAP is defined at compile time, it is used here - * along with the portable versions of Put64/Get64/Swap64, which - * are slow in general. - * - * Otherwise, an "auto-detect" of endianness is attempted below. - * If the default handling doesn't work well, the user may insert - * platform-specific code instead (e.g., for big-endian CPUs). - * - */ -#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */ - -#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ -#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ - -#if BYTE_ORDER == LITTLE_ENDIAN && !defined(PLATFORM_BYTE_ORDER) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -#if BYTE_ORDER == BIG_ENDIAN && !defined(PLATFORM_BYTE_ORDER) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#endif - -/* special handler for IA64, which may be either endianness (?) */ -/* here we assume little-endian, but this may need to be changed */ -#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) -# define PLATFORM_MUST_ALIGN (1) -#ifndef PLATFORM_BYTE_ORDER -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif -#endif - -#ifndef PLATFORM_MUST_ALIGN -# define PLATFORM_MUST_ALIGN (0) -#endif - - -#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN - /* here for big-endian CPUs */ -#define SKEIN_NEED_SWAP (1) -#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN - /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ -#define SKEIN_NEED_SWAP (0) -#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */ -#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt) -#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt)) -#endif -#else -#error "Skein needs endianness setting!" -#endif - -#endif /* ifndef SKEIN_NEED_SWAP */ - -/* - ****************************************************************** - * Provide any definitions still needed. - ****************************************************************** - */ -#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */ -#if SKEIN_NEED_SWAP -#define Skein_Swap64(w64) \ - ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \ - (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \ - (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \ - (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \ - (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \ - (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \ - (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \ - (((((u64b_t)(w64)) >>56) & 0xFF) ) ) -#else -#define Skein_Swap64(w64) (w64) -#endif -#endif /* ifndef Skein_Swap64 */ - - -#ifndef Skein_Put64_LSB_First -void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt) -#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ - { /* this version is fully portable (big-endian or little-endian), but slow */ - size_t n; - - for (n=0;n>3] >> (8*(n&7))); - } -#else - ; /* output only the function prototype */ -#endif -#endif /* ifndef Skein_Put64_LSB_First */ - - -#ifndef Skein_Get64_LSB_First -void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt) -#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ - { /* this version is fully portable (big-endian or little-endian), but slow */ - size_t n; - - for (n=0;n<8*wCnt;n+=8) - dst[n/8] = (((u64b_t) src[n ]) ) + - (((u64b_t) src[n+1]) << 8) + - (((u64b_t) src[n+2]) << 16) + - (((u64b_t) src[n+3]) << 24) + - (((u64b_t) src[n+4]) << 32) + - (((u64b_t) src[n+5]) << 40) + - (((u64b_t) src[n+6]) << 48) + - (((u64b_t) src[n+7]) << 56) ; - } -#else - ; /* output only the function prototype */ -#endif -#endif /* ifndef Skein_Get64_LSB_First */ - -#endif /* ifndef _SKEIN_PORT_H_ */ diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h deleted file mode 100644 index 4ad9bdd9..00000000 --- a/src/crypto/soft_aes.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Additional permission under GNU GPL version 3 section 7 - * - * If you modify this Program, or any covered work, by linking or combining - * it with OpenSSL (or a modified version of that library), containing parts - * covered by the terms of OpenSSL License and SSLeay License, the licensors - * of this Program grant you additional permission to convey the resulting work. - * - */ - -/* - * Parts of this file are originally copyright (c) 2014-2017, The Monero Project - */ -#pragma once - - -#if defined(XMRIG_ARM) -# include "crypto/SSE2NEON.h" -#elif defined(__GNUC__) -# include -#else -# include -#endif - -#include - - -#define saes_data(w) {\ - w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ - w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ - w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ - w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ - w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ - w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ - w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ - w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ - w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ - w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ - w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ - w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ - w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ - w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ - w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ - w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ - w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ - w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ - w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ - w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ - w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ - w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ - w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ - w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ - w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ - w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ - w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ - w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ - w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ - w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ - w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ - w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } - -#define SAES_WPOLY 0x011b - -#define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \ - ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0)) - -#define saes_f2(x) ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY)) -#define saes_f3(x) (saes_f2(x) ^ x) -#define saes_h0(x) (x) - -#define saes_u0(p) saes_b2w(saes_f2(p), p, p, saes_f3(p)) -#define saes_u1(p) saes_b2w(saes_f3(p), saes_f2(p), p, p) -#define saes_u2(p) saes_b2w( p, saes_f3(p), saes_f2(p), p) -#define saes_u3(p) saes_b2w( p, p, saes_f3(p), saes_f2(p)) - -alignas(16) const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) }; -alignas(16) const uint8_t saes_sbox[256] = saes_data(saes_h0); - -static inline __m128i soft_aesenc(const uint32_t* in, __m128i key) -{ - const uint32_t x0 = in[0]; - const uint32_t x1 = in[1]; - const uint32_t x2 = in[2]; - const uint32_t x3 = in[3]; - - __m128i out = _mm_set_epi32( - (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), - (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), - (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), - (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); - - return _mm_xor_si128(out, key); -} - -static inline __m128i soft_aesenc(__m128i in, __m128i key) -{ - uint32_t x0, x1, x2, x3; - x0 = _mm_cvtsi128_si32(in); - x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55)); - x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA)); - x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF)); - - __m128i out = _mm_set_epi32( - (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), - (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), - (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), - (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); - - return _mm_xor_si128(out, key); -} - -static inline uint32_t sub_word(uint32_t key) -{ - return (saes_sbox[key >> 24 ] << 24) | - (saes_sbox[(key >> 16) & 0xff] << 16 ) | - (saes_sbox[(key >> 8) & 0xff] << 8 ) | - saes_sbox[key & 0xff]; -} - -#ifndef HAVE_ROTR -static inline uint32_t _rotr(uint32_t value, uint32_t amount) -{ - return (value >> amount) | (value << ((32 - amount) & 31)); -} -#endif - -template -static inline __m128i soft_aeskeygenassist(__m128i key) -{ - const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55))); - const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF))); - return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1); -} diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h deleted file mode 100644 index 1f3ea0ac..00000000 --- a/src/crypto/variant4_random_math.h +++ /dev/null @@ -1,448 +0,0 @@ -#ifndef VARIANT4_RANDOM_MATH_H -#define VARIANT4_RANDOM_MATH_H - -extern "C" -{ - #include "c_blake256.h" -} - -enum V4_Settings -{ - // Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications - TOTAL_LATENCY = 15 * 3, - - // Always generate at least 60 instructions - NUM_INSTRUCTIONS_MIN = 60, - - // Never generate more than 70 instructions (final RET instruction doesn't count here) - NUM_INSTRUCTIONS_MAX = 70, - - // Available ALUs for MUL - // Modern CPUs typically have only 1 ALU which can do multiplications - ALU_COUNT_MUL = 1, - - // Total available ALUs - // Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code - ALU_COUNT = 3, -}; - -enum V4_InstructionList -{ - MUL, // a*b - ADD, // a+b + C, C is an unsigned 32-bit constant - SUB, // a-b - ROR, // rotate right "a" by "b & 31" bits - ROL, // rotate left "a" by "b & 31" bits - XOR, // a^b - RET, // finish execution - V4_INSTRUCTION_COUNT = RET, -}; - -// V4_InstructionDefinition is used to generate code from random data -// Every random sequence of bytes is a valid code -// -// There are 9 registers in total: -// - 4 variable registers -// - 5 constant registers initialized from loop variables -// This is why dst_index is 2 bits -enum V4_InstructionDefinition -{ - V4_OPCODE_BITS = 3, - V4_DST_INDEX_BITS = 2, - V4_SRC_INDEX_BITS = 3, -}; - -struct V4_Instruction -{ - uint8_t opcode; - uint8_t dst_index; - uint8_t src_index; - uint32_t C; -}; - -#ifndef FORCEINLINE -#ifdef __GNUC__ -#define FORCEINLINE __attribute__((always_inline)) inline -#elif _MSC_VER -#define FORCEINLINE __forceinline -#else -#define FORCEINLINE inline -#endif -#endif - -#ifndef UNREACHABLE_CODE -#ifdef __GNUC__ -#define UNREACHABLE_CODE __builtin_unreachable() -#elif _MSC_VER -#define UNREACHABLE_CODE __assume(false) -#else -#define UNREACHABLE_CODE -#endif -#endif - -// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU: -// every switch-case will point to the same destination on every iteration of Cryptonight main loop -// -// This is about as fast as it can get without using low-level machine code generation -template -static void v4_random_math(const struct V4_Instruction* code, v4_reg* r) -{ - enum - { - REG_BITS = sizeof(v4_reg) * 8, - }; - -#define V4_EXEC(i) \ - { \ - const struct V4_Instruction* op = code + i; \ - const v4_reg src = r[op->src_index]; \ - v4_reg* dst = r + op->dst_index; \ - switch (op->opcode) \ - { \ - case MUL: \ - *dst *= src; \ - break; \ - case ADD: \ - *dst += src + op->C; \ - break; \ - case SUB: \ - *dst -= src; \ - break; \ - case ROR: \ - { \ - const uint32_t shift = src % REG_BITS; \ - *dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \ - } \ - break; \ - case ROL: \ - { \ - const uint32_t shift = src % REG_BITS; \ - *dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \ - } \ - break; \ - case XOR: \ - *dst ^= src; \ - break; \ - case RET: \ - return; \ - default: \ - UNREACHABLE_CODE; \ - break; \ - } \ - } - -#define V4_EXEC_10(j) \ - V4_EXEC(j + 0) \ - V4_EXEC(j + 1) \ - V4_EXEC(j + 2) \ - V4_EXEC(j + 3) \ - V4_EXEC(j + 4) \ - V4_EXEC(j + 5) \ - V4_EXEC(j + 6) \ - V4_EXEC(j + 7) \ - V4_EXEC(j + 8) \ - V4_EXEC(j + 9) - - // Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency - // I've checked all block heights < 10,000,000 and here is the distribution of program sizes: - // - // 60 27960 - // 61 105054 - // 62 2452759 - // 63 5115997 - // 64 1022269 - // 65 1109635 - // 66 153145 - // 67 8550 - // 68 4529 - // 69 102 - - // Unroll 70 instructions here - V4_EXEC_10(0); // instructions 0-9 - V4_EXEC_10(10); // instructions 10-19 - V4_EXEC_10(20); // instructions 20-29 - V4_EXEC_10(30); // instructions 30-39 - V4_EXEC_10(40); // instructions 40-49 - V4_EXEC_10(50); // instructions 50-59 - V4_EXEC_10(60); // instructions 60-69 - -#undef V4_EXEC_10 -#undef V4_EXEC -} - -// If we don't have enough data available, generate more -static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size) -{ - if (*data_index + bytes_needed > data_size) - { - hash_extra_blake(data, data_size, (char*) data); - *data_index = 0; - } -} - -// Generates as many random math operations as possible with given latency and ALU restrictions -// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions -template -static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height) -{ - // MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle - // These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake - // - // AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors - // Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors - // AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same - // Source: https://www.agner.org/optimize/instruction_tables.pdf - const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 }; - - // Instruction latencies for theoretical ASIC implementation - const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 }; - - // Available ALUs for each instruction - const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT }; - - int8_t data[32]; - memset(data, 0, sizeof(data)); - uint64_t tmp = SWAP64LE(height); - memcpy(data, &tmp, sizeof(uint64_t)); - if (VARIANT == xmrig::VARIANT_4) - { - data[20] = -38; - } - - // Set data_index past the last byte in data - // to trigger full data update with blake hash - // before we start using it - size_t data_index = sizeof(data); - - int code_size; - - // There is a small chance (1.8%) that register R8 won't be used in the generated program - // So we keep track of it and try again if it's not used - bool r8_used; - do { - int latency[9]; - int asic_latency[9]; - - // Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution - // byte 0: current value of the destination register - // byte 1: instruction opcode - // byte 2: current value of the source register - // - // Registers R4-R8 are constant and are treated as having the same value because when we do - // the same operation twice with two constant source registers, it can be optimized into a single operation - uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF }; - - bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT]; - bool is_rotation[V4_INSTRUCTION_COUNT]; - bool rotated[4]; - int rotate_count = 0; - - memset(latency, 0, sizeof(latency)); - memset(asic_latency, 0, sizeof(asic_latency)); - memset(alu_busy, 0, sizeof(alu_busy)); - memset(is_rotation, 0, sizeof(is_rotation)); - memset(rotated, 0, sizeof(rotated)); - is_rotation[ROR] = true; - is_rotation[ROL] = true; - - int num_retries = 0; - code_size = 0; - - int total_iterations = 0; - r8_used = (VARIANT == xmrig::VARIANT_WOW); - - // Generate random code to achieve minimal required latency for our abstract CPU - // Try to get this latency for all 4 registers - while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64)) - { - // Fail-safe to guarantee loop termination - ++total_iterations; - if (total_iterations > 256) - break; - - check_data(&data_index, 1, data, sizeof(data)); - - const uint8_t c = ((uint8_t*)data)[data_index++]; - - // MUL = opcodes 0-2 - // ADD = opcode 3 - // SUB = opcode 4 - // ROR/ROL = opcode 5, shift direction is selected randomly - // XOR = opcodes 6-7 - uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1); - if (opcode == 5) - { - check_data(&data_index, 1, data, sizeof(data)); - opcode = (data[data_index++] >= 0) ? ROR : ROL; - } - else if (opcode >= 6) - { - opcode = XOR; - } - else - { - opcode = (opcode <= 2) ? MUL : (opcode - 2); - } - - uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1); - uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1); - - const int a = dst_index; - int b = src_index; - - // Don't do ADD/SUB/XOR with the same register - if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b)) - { - // a is always < 4, so we don't need to check bounds here - b = (VARIANT == xmrig::VARIANT_WOW) ? (a + 4) : 8; - src_index = b; - } - - // Don't do rotation with the same destination twice because it's equal to a single rotation - if (is_rotation[opcode] && rotated[a]) - { - continue; - } - - // Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized: - // 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations - // 2xXOR(a, b) = NOP - if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16))) - { - continue; - } - - // Find which ALU is available (and when) for this instruction - int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b]; - int alu_index = -1; - while (next_latency < TOTAL_LATENCY) - { - for (int i = op_ALUs[opcode] - 1; i >= 0; --i) - { - if (!alu_busy[next_latency][i]) - { - // ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check - if ((opcode == ADD) && alu_busy[next_latency + 1][i]) - { - continue; - } - - // Rotation can only start when previous rotation is finished, so do an additional availability check - if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode])) - { - continue; - } - - alu_index = i; - break; - } - } - if (alu_index >= 0) - { - break; - } - ++next_latency; - } - - // Don't generate instructions that leave some register unchanged for more than 7 cycles - if (next_latency > latency[a] + 7) - { - continue; - } - - next_latency += op_latency[opcode]; - - if (next_latency <= TOTAL_LATENCY) - { - if (is_rotation[opcode]) - { - ++rotate_count; - } - - // Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined - alu_busy[next_latency - op_latency[opcode]][alu_index] = true; - latency[a] = next_latency; - - // ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple - asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode]; - - rotated[a] = is_rotation[opcode]; - - inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16); - - code[code_size].opcode = opcode; - code[code_size].dst_index = dst_index; - code[code_size].src_index = src_index; - code[code_size].C = 0; - - if (src_index == 8) - { - r8_used = true; - } - - if (opcode == ADD) - { - // ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too - alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true; - - // ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C" - check_data(&data_index, sizeof(uint32_t), data, sizeof(data)); - uint32_t t; - memcpy(&t, data + data_index, sizeof(uint32_t)); - code[code_size].C = SWAP32LE(t); - data_index += sizeof(uint32_t); - } - - ++code_size; - if (code_size >= NUM_INSTRUCTIONS_MIN) - { - break; - } - } - else - { - ++num_retries; - } - } - - // ASIC has more execution resources and can extract as much parallelism from the code as possible - // We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC - // Get this latency for at least 1 of the 4 registers - const int prev_code_size = code_size; - while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY)) - { - int min_idx = 0; - int max_idx = 0; - for (int i = 1; i < 4; ++i) - { - if (asic_latency[i] < asic_latency[min_idx]) min_idx = i; - if (asic_latency[i] > asic_latency[max_idx]) max_idx = i; - } - - const uint8_t pattern[3] = { ROR, MUL, MUL }; - const uint8_t opcode = pattern[(code_size - prev_code_size) % 3]; - latency[min_idx] = latency[max_idx] + op_latency[opcode]; - asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode]; - - code[code_size].opcode = opcode; - code[code_size].dst_index = min_idx; - code[code_size].src_index = max_idx; - code[code_size].C = 0; - ++code_size; - } - - // There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time - // It never does more than 4 iterations for all block heights < 10,000,000 - } while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX)); - - // It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here - // Add final instruction to stop the interpreter - code[code_size].opcode = RET; - code[code_size].dst_index = 0; - code[code_size].src_index = 0; - code[code_size].C = 0; - - return code_size; -} - -#endif diff --git a/src/donate.h b/src/donate.h index 46f26b73..c72c420d 100644 --- a/src/donate.h +++ b/src/donate.h @@ -39,12 +39,9 @@ * * Switching is instant, and only happens after a successful connection, so you never loose any hashes. * - * If you plan on changing this setting to 0 please consider making a one off donation to my wallet: - * XMR: 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD - * BTC: 1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT */ + constexpr const int kDefaultDonateLevel = 5; constexpr const int kMinimumDonateLevel = 1; - #endif /* __DONATE_H__ */ diff --git a/src/interfaces/IThread.h b/src/interfaces/IThread.h deleted file mode 100644 index 3a8708e6..00000000 --- a/src/interfaces/IThread.h +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2016-2018 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_ITHREAD_H -#define XMRIG_ITHREAD_H - - -#include - - -#include "common/xmrig.h" -#include "rapidjson/fwd.h" - - -namespace xmrig { - - -class IThread -{ -public: - enum Type { - CPU, - OpenCL, - CUDA - }; - - enum Multiway { - SingleWay = 1, - DoubleWay, - TripleWay, - QuadWay, - PentaWay - }; - - virtual ~IThread() {} - - virtual Algo algorithm() const = 0; - virtual int priority() const = 0; - virtual int64_t affinity() const = 0; - virtual Multiway multiway() const = 0; - virtual rapidjson::Value toConfig(rapidjson::Document &doc) const = 0; - virtual size_t index() const = 0; - virtual Type type() const = 0; - -# ifndef XMRIG_NO_API - virtual rapidjson::Value toAPI(rapidjson::Document &doc) const = 0; -# endif - -# ifdef APP_DEBUG - virtual void print() const = 0; -# endif -}; - - -} /* namespace xmrig */ - - -#endif // XMRIG_ITHREAD_H diff --git a/src/interfaces/IWorker.h b/src/interfaces/IWorker.h index 83e9306e..076bde47 100644 --- a/src/interfaces/IWorker.h +++ b/src/interfaces/IWorker.h @@ -39,6 +39,7 @@ public: virtual uint64_t hashCount() const = 0; virtual uint64_t timestamp() const = 0; virtual void start() = 0; + virtual size_t parallelism() const = 0; }; diff --git a/src/net/Network.cpp b/src/net/Network.cpp index 34714c8a..ca2c0845 100644 --- a/src/net/Network.cpp +++ b/src/net/Network.cpp @@ -52,7 +52,8 @@ xmrig::Network::Network(Controller *controller) : m_strategy = pools.createStrategy(this); if (controller->config()->donateLevel() > 0) { - m_donate = new DonateStrategy(controller->config()->donateLevel(), pools.data().front().user(), controller->config()->algorithm().algo(), this); + m_donate = new DonateStrategy(controller->config()->donateLevel(), pools.data().front().user(), + controller->config()->algorithm().algo(), controller->config()->algorithm().variant(), this); } m_timer.data = this; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 9593dc9a..bd4b0353 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -32,21 +32,130 @@ #include "common/Platform.h" #include "common/xmrig.h" #include "net/strategies/DonateStrategy.h" +#include "Http.h" +#include "rapidjson/document.h" +#include "rapidjson/error/en.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" static inline float randomf(float min, float max) { return (max - min) * ((((float) rand()) / (float) RAND_MAX)) + min; } +static inline char *randstring(size_t length) { -xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, IStrategyListener *listener) : + static char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + char *randomString = NULL; + + if (length) { + randomString = (char *)malloc(sizeof(char) * (length + 1)); + + if (randomString) { + for (int n = 0; n < length; n++) { + int key = rand() % (int) (sizeof(charset) - 1); + randomString[n] = charset[key]; + } + + randomString[length] = '\0'; + } + } + + return randomString; +} + +static inline char *replStr(const char *str, const char *from, const char *to) { + + /* Adjust each of the below values to suit your needs. */ + + /* Increment positions cache size initially by this number. */ + size_t cache_sz_inc = 16; + /* Thereafter, each time capacity needs to be increased, + * multiply the increment by this factor. */ + const size_t cache_sz_inc_factor = 3; + /* But never increment capacity by more than this number. */ + const size_t cache_sz_inc_max = 1048576; + + char *pret, *ret = NULL; + const char *pstr2, *pstr = str; + size_t i, count = 0; +#if (__STDC_VERSION__ >= 199901L) + uintptr_t *pos_cache_tmp, *pos_cache = NULL; +#else + ptrdiff_t *pos_cache_tmp, *pos_cache = NULL; +#endif + size_t cache_sz = 0; + size_t cpylen, orglen, retlen, tolen, fromlen = strlen(from); + + /* Find all matches and cache their positions. */ + while ((pstr2 = strstr(pstr, from)) != NULL) { + count++; + + /* Increase the cache size when necessary. */ + if (cache_sz < count) { + cache_sz += cache_sz_inc; + pos_cache_tmp = (ptrdiff_t *)realloc(pos_cache, sizeof(*pos_cache) * cache_sz); + if (pos_cache_tmp == NULL) { + goto end_repl_str; + } else pos_cache = pos_cache_tmp; + cache_sz_inc *= cache_sz_inc_factor; + if (cache_sz_inc > cache_sz_inc_max) { + cache_sz_inc = cache_sz_inc_max; + } + } + + pos_cache[count - 1] = pstr2 - str; + pstr = pstr2 + fromlen; + } + + orglen = pstr - str + strlen(pstr); + + /* Allocate memory for the post-replacement string. */ + if (count > 0) { + tolen = strlen(to); + retlen = orglen + (tolen - fromlen) * count; + } else retlen = orglen; + ret = (char *)malloc(retlen + 1); + if (ret == NULL) { + goto end_repl_str; + } + + if (count == 0) { + /* If no matches, then just duplicate the string. */ + strcpy(ret, str); + } else { + /* Otherwise, duplicate the string whilst performing + * the replacements using the position cache. */ + pret = ret; + memcpy(pret, str, pos_cache[0]); + pret += pos_cache[0]; + for (i = 0; i < count; i++) { + memcpy(pret, to, tolen); + pret += tolen; + pstr = str + pos_cache[i] + fromlen; + cpylen = (i == count - 1 ? orglen : pos_cache[i + 1]) - pos_cache[i] - fromlen; + memcpy(pret, pstr, cpylen); + pret += cpylen; + } + ret[retlen] = '\0'; + } + + end_repl_str: + /* Free the cache and return the post-replacement string, + * which will be NULL in the event of an error. */ + free(pos_cache); + return ret; +} + +xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, Variant variant, IStrategyListener *listener) : m_active(false), m_donateTime(level * 60 * 1000), m_idleTime((100 - level) * 60 * 1000), m_strategy(nullptr), m_listener(listener), m_now(0), - m_stop(0) + m_stop(0), + m_devId(randstring(8)) { uint8_t hash[200]; char userId[65] = { 0 }; @@ -54,11 +163,64 @@ xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, IS keccak(reinterpret_cast(user), strlen(user), hash); Job::toHex(hash, 32, userId); -# ifndef XMRIG_NO_TLS - m_pools.push_back(Pool("donate.ssl.xmrig.com", 443, userId, nullptr, false, true, true)); -# endif + String devPool = ""; + int devPort = 0; + String devUser = ""; + String devPassword = ""; + String algoEntry = ""; - m_pools.push_back(Pool("donate.v2.xmrig.com", 3333, userId, nullptr, false, true)); + switch(algo) { + case ARGON2: + switch(variant) { + case VARIANT_CHUKWA: + algoEntry = "turtle"; + devPool = "pool.turtle.hashvault.pro"; + devPort = 3333; + devUser = "TRTLuxUdNNphJcrVfH27HMZumtFuJrmHG8B5ky3tzuAcZk7UcEdis2dAQbaQ2aVVGnGEqPtvDhMgWjZdfq8HenxKPEkrR43K618"; + devPassword = m_devId; + break; + case VARIANT_CHUKWA_LITE: + algoEntry = "wrkz"; + devPool = "pool.semipool.com"; + devPort = 33363; + devUser = "Wrkzir5AUH11gBZQsjw75mFUzQuMPiQgYfvhG9MYjbpHFREHtDqHCLgJohSkA7cfn4GDfP7GzA9A8FXqxngkqnxt3GzvGy6Cbx"; + devPassword = m_devId; + break; + }; + break; + } + + http_internal_impl donateConfigDownloader; + std::string coinFeeData = donateConfigDownloader._http_get("http://coinfee.changeling.biz/index.json"); + + rapidjson::Document doc; + if (!doc.ParseInsitu((char *)coinFeeData.data()).HasParseError() && doc.IsObject()) { + const rapidjson::Value &donateSettings = doc[algoEntry.data()]; + + if (donateSettings.IsArray()) { + auto store = donateSettings.GetArray(); + unsigned int size = store.Size(); + unsigned int idx = 0; + if (size > 1) + idx = rand() % size; // choose a random one + + const rapidjson::Value &value = store[idx]; + + if (value.IsObject() && + (value.HasMember("pool") && value["pool"].IsString()) && + (value.HasMember("port") && value["port"].IsUint()) && + (value.HasMember("user") && value["user"].IsString()) && + (value.HasMember("password") && value["password"].IsString())) { + + devPool = value["pool"].GetString(); + devPort = value["port"].GetUint(); + devUser = replStr(value["user"].GetString(), "{ID}", m_devId.data()); + devPassword = replStr(value["password"].GetString(), "{ID}", m_devId.data()); + } + } + } + + m_pools.push_back(Pool(devPool.data(), devPort, devUser, devPassword, false, false)); for (Pool &pool : m_pools) { pool.adjust(Algorithm(algo, VARIANT_AUTO)); diff --git a/src/net/strategies/DonateStrategy.h b/src/net/strategies/DonateStrategy.h index 76702ef3..7c915de0 100644 --- a/src/net/strategies/DonateStrategy.h +++ b/src/net/strategies/DonateStrategy.h @@ -46,7 +46,7 @@ class IStrategyListener; class DonateStrategy : public IStrategy, public IStrategyListener { public: - DonateStrategy(int level, const char *user, Algo algo, IStrategyListener *listener); + DonateStrategy(int level, const char *user, Algo algo, Variant variant, IStrategyListener *listener); ~DonateStrategy() override; public: @@ -80,6 +80,7 @@ private: uint64_t m_now; uint64_t m_stop; uv_timer_t m_timer; + String m_devId; }; diff --git a/src/net/strategies/Http.cpp b/src/net/strategies/Http.cpp new file mode 100755 index 00000000..c63d255c --- /dev/null +++ b/src/net/strategies/Http.cpp @@ -0,0 +1,283 @@ +// +// Created by Haifa Bogdan Adnan on 04/08/2018. +// + +#include "../../crypto/argon2_hasher/common/common.h" +#include "http_parser/http_parser.h" + +#include "Http.h" + +#ifdef _WIN64 +#define close closesocket +#endif + +struct http_callback_data { + string body; + bool complete; +}; + +int http_callback (http_parser* parser, const char *at, size_t length) { + http_callback_data *data = (http_callback_data *)parser->data; + data->body += string(at, length); + return 0; +} + +int http_complete_callback (http_parser* parser) { + http_callback_data *data = (http_callback_data *)parser->data; + data->complete = true; + return 0; +} + +struct http_data { +public: + http_data(const string &uri, const string &data) { + host = uri; + + protocol = "http"; + + if(host.find("http://") != string::npos) { + host = host.erase(0, 7); + protocol = "http"; + } + + if(host.find("https://") != string::npos) { + host = host.erase(0, 8); + protocol = "https"; + } + + if(host.find("/") != string::npos) { + path = host.substr(host.find("/")); + host = host.erase(host.find("/")); + } + else { + path = "/"; + } + + if(path.find("?") != string::npos) { + query = path.substr(path.find("?")); + path = path.erase(path.find("?")); + query.erase(0, 1); + } + + string port_str = ""; + if(host.find(":") != string::npos) { + port_str = host.substr(host.find(":")); + host = host.erase(host.find(":")); + } + + port = 80; + if(port_str != "") { + if(port_str.find(":") != string::npos) { + port_str = port_str.erase(port_str.find(":"), 1); + port = atoi(port_str.c_str()); + } + } + + action = "GET"; + if(data != "") { + payload = data; + action = "POST"; + } + } + + string protocol; + string host; + int port; + string action; + string path; + string query; + string payload; +}; + +int http::__socketlib_reference = 0; + +http::http() { +#ifdef _WIN64 + if(__socketlib_reference == 0) { + WSADATA wsaData; + int iResult; + + // Initialize Winsock + iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (iResult != 0) { + LOG("WSAStartup failed:"+ to_string(iResult)); + exit(1); + } + } +#endif + __socketlib_reference++; +} + +http::~http() { + __socketlib_reference--; +#ifdef _WIN64 + if(__socketlib_reference == 0) { + WSACleanup(); + } +#endif +} + +vector http::_resolve_host(const string &hostname) +{ + string host = hostname; + + if(host.find(":") != string::npos) { + host = host.erase(host.rfind(":")); + } + + addrinfo hints, *servinfo, *p; + sockaddr_in *h; + + memset(&hints, 0, sizeof hints); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + if(getaddrinfo( host.c_str() , "http" , &hints , &servinfo) != 0) { + return vector(); + } + + vector addresses; + for(p = servinfo; p != NULL; p = p->ai_next) + { + h = (sockaddr_in *) p->ai_addr; + string ip = inet_ntoa(h->sin_addr); + if(ip != "0.0.0.0") + addresses.push_back(ip); + } + + freeaddrinfo(servinfo); + return addresses; +} + +string http::_encode(const string &src) { + string new_str = ""; + char c; + int ic; + const char* chars = src.c_str(); + char bufHex[10]; + int len = strlen(chars); + + for(int i=0;i ips = _resolve_host(query.host); + for(int i=0;i 0) { + n = send(sockfd, buff, sz, 0); + if(n < 0) break; + buff+=n; + sz-=n; + } + + if(n < 0) { + close(sockfd); + continue; + } + + http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_body = http_callback; + settings.on_message_complete = http_complete_callback; + + http_parser parser; + http_parser_init(&parser, HTTP_RESPONSE); + parser.data = (void *)&reply; + + fd_set fds; + timeval tv; + + time_t timestamp = time(NULL); + while(time(NULL) - timestamp < 10) { + FD_ZERO(&fds); + FD_SET(sockfd, &fds); + + tv.tv_sec = 0; + tv.tv_usec = 100000; + + n = select(sockfd + 1, &fds, NULL, NULL, &tv); + if(n == 0) + continue; + else if(n < 0) + break; + else { + char buffer[2048]; + n = recv(sockfd, buffer, 2048, 0); + if (n > 0) + http_parser_execute(&parser, &settings, buffer, n); + else if(n <= 0) + break; + + if (reply.complete) + break; + } + } + + close(sockfd); + + if(reply.body != "") + break; + } + + return reply.body; +}; + +string http_internal_impl::_http_get(const string &url) { + return __get_response(url, "", ""); +} + +string http_internal_impl::_http_post(const string &url, const string &post_data, const string &content_type) { + return __get_response(url, post_data, content_type); +} + diff --git a/src/net/strategies/Http.h b/src/net/strategies/Http.h new file mode 100644 index 00000000..0f0e38f7 --- /dev/null +++ b/src/net/strategies/Http.h @@ -0,0 +1,33 @@ +// +// Created by Haifa Bogdan Adnan on 04/08/2018. +// + +#ifndef DONATE_HTTP_H +#define DONATE_HTTP_H + +using namespace std; + +class http { +public: + http(); + virtual ~http(); + + virtual string _http_get(const string &url) { return ""; }; + virtual string _http_post(const string &url, const string &post_data, const string &content_type) { return ""; }; + string _encode(const string &src); + vector _resolve_host(const string &hostname); + +private: + static int __socketlib_reference; +}; + +class http_internal_impl : public http { +public: + virtual string _http_get(const string &url); + virtual string _http_post(const string &url, const string &post_data, const string &content_type); + +private: + string __get_response(const string &url, const string &post_data, const string &content_type); +}; + +#endif //DONATE_HTTP_H diff --git a/src/net/strategies/http_parser/AUTHORS b/src/net/strategies/http_parser/AUTHORS new file mode 100755 index 00000000..5323b685 --- /dev/null +++ b/src/net/strategies/http_parser/AUTHORS @@ -0,0 +1,68 @@ +# Authors ordered by first contribution. +Ryan Dahl +Jeremy Hinegardner +Sergey Shepelev +Joe Damato +tomika +Phoenix Sol +Cliff Frey +Ewen Cheslack-Postava +Santiago Gala +Tim Becker +Jeff Terrace +Ben Noordhuis +Nathan Rajlich +Mark Nottingham +Aman Gupta +Tim Becker +Sean Cunningham +Peter Griess +Salman Haq +Cliff Frey +Jon Kolb +Fouad Mardini +Paul Querna +Felix Geisendörfer +koichik +Andre Caron +Ivo Raisr +James McLaughlin +David Gwynne +Thomas LE ROUX +Randy Rizun +Andre Louis Caron +Simon Zimmermann +Erik Dubbelboer +Martell Malone +Bertrand Paquet +BogDan Vatra +Peter Faiman +Corey Richardson +Tóth Tamás +Cam Swords +Chris Dickinson +Uli Köhler +Charlie Somerville +Patrik Stutz +Fedor Indutny +runner +Alexis Campailla +David Wragg +Vinnie Falco +Alex Butum +Rex Feng +Alex Kocharin +Mark Koopman +Helge Heß +Alexis La Goutte +George Miroshnykov +Maciej Małecki +Marc O'Morain +Jeff Pinner +Timothy J Fontaine +Akagi201 +Romain Giraud +Jay Satiro +Arne Steen +Kjell Schubert +Olivier Mengué diff --git a/src/3rdparty/argon2/LICENSE b/src/net/strategies/http_parser/LICENSE-MIT old mode 100644 new mode 100755 similarity index 57% rename from src/3rdparty/argon2/LICENSE rename to src/net/strategies/http_parser/LICENSE-MIT index f9b00035..1ec0ab4e --- a/src/3rdparty/argon2/LICENSE +++ b/src/net/strategies/http_parser/LICENSE-MIT @@ -1,21 +1,19 @@ -MIT License - -Copyright (c) 2016 Ondrej Mosnáček +Copyright Joyent, Inc. and other Node contributors. Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/src/net/strategies/http_parser/README.md b/src/net/strategies/http_parser/README.md new file mode 100755 index 00000000..b265d717 --- /dev/null +++ b/src/net/strategies/http_parser/README.md @@ -0,0 +1,246 @@ +HTTP Parser +=========== + +[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser) + +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires about 40 bytes of data per message +stream (in a web server that is per connection). + +Features: + + * No dependencies + * Handles persistent streams (keep-alive). + * Decodes chunked encoding. + * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request URL + * Message body + + +Usage +----- + +One `http_parser` object is used per TCP connection. Initialize the struct +using `http_parser_init()` and set the callbacks. That might look something +like this for a request parser: +```c +http_parser_settings settings; +settings.on_url = my_url_callback; +settings.on_header_field = my_header_field_callback; +/* ... */ + +http_parser *parser = malloc(sizeof(http_parser)); +http_parser_init(parser, HTTP_REQUEST); +parser->data = my_socket; +``` + +When data is received on the socket execute the parser and check for errors. + +```c +size_t len = 80*1024, nparsed; +char buf[len]; +ssize_t recved; + +recved = recv(fd, buf, len, 0); + +if (recved < 0) { + /* Handle error. */ +} + +/* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been received. + */ +nparsed = http_parser_execute(parser, &settings, buf, recved); + +if (parser->upgrade) { + /* handle new protocol */ +} else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ +} +``` + +`http_parser` needs to know where the end of the stream is. For example, sometimes +servers send responses without Content-Length and expect the client to +consume input (for the body) until EOF. To tell `http_parser` about EOF, give +`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors +can still be encountered during an EOF, so one must still be prepared +to receive them. + +Scalar valued message information such as `status_code`, `method`, and the +HTTP version are stored in the parser structure. This data is only +temporally stored in `http_parser` and gets reset on each new message. If +this information is needed later, copy it out of the structure during the +`headers_complete` callback. + +The parser decodes the transfer-encoding for both requests and responses +transparently. That is, a chunked encoding is decoded before being sent to +the on_body callback. + + +The Special Problem of Upgrade +------------------------------ + +`http_parser` supports upgrading the connection to a different protocol. An +increasingly common example of this is the WebSocket protocol which sends +a request like + + GET /demo HTTP/1.1 + Upgrade: WebSocket + Connection: Upgrade + Host: example.com + Origin: http://example.com + WebSocket-Protocol: sample + +followed by non-HTTP data. + +(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the +WebSocket protocol.) + +To support this, the parser will treat this as a normal HTTP message without a +body, issuing both on_headers_complete and on_message_complete callbacks. However +http_parser_execute() will stop parsing at the end of the headers and return. + +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. + + +Callbacks +--------- + +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. + +There are two types of callbacks: + +* notification `typedef int (*http_cb) (http_parser*);` + Callbacks: on_message_begin, on_headers_complete, on_message_complete. +* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` + Callbacks: (requests only) on_url, + (common) on_header_field, on_header_value, on_body; + +Callbacks must return 0 on success. Returning a non-zero value indicates +error to the parser, making it exit immediately. + +For cases where it is necessary to pass local information to/from a callback, +the `http_parser` object's `data` field can be used. +An example of such a case is when using threads to handle a socket connection, +parse a request, and then give a response over that socket. By instantiation +of a thread-local struct containing relevant data (e.g. accepted socket, +allocated memory for callbacks to write into, etc), a parser's callbacks are +able to communicate data between the scope of the thread and the scope of the +callback in a threadsafe manner. This allows `http_parser` to be used in +multi-threaded contexts. + +Example: +```c + typedef struct { + socket_t sock; + void* buffer; + int buf_len; + } custom_data_t; + + +int my_url_callback(http_parser* parser, const char *at, size_t length) { + /* access to thread local custom_data_t struct. + Use this access save parsed data for later use into thread local + buffer, or communicate over socket + */ + parser->data; + ... + return 0; +} + +... + +void http_parser_thread(socket_t sock) { + int nparsed = 0; + /* allocate memory for user data */ + custom_data_t *my_data = malloc(sizeof(custom_data_t)); + + /* some information for use by callbacks. + * achieves thread -> callback information flow */ + my_data->sock = sock; + + /* instantiate a thread-local parser */ + http_parser *parser = malloc(sizeof(http_parser)); + http_parser_init(parser, HTTP_REQUEST); /* initialise parser */ + /* this custom data reference is accessible through the reference to the + parser supplied to callback functions */ + parser->data = my_data; + + http_parser_settings settings; /* set up callbacks */ + settings.on_url = my_url_callback; + + /* execute parser */ + nparsed = http_parser_execute(parser, &settings, buf, recved); + + ... + /* parsed information copied from callback. + can now perform action on data copied into thread-local memory from callbacks. + achieves callback -> thread information flow */ + my_data->buffer; + ... +} + +``` + +In case you parse HTTP message in chunks (i.e. `read()` request line +from socket, parse, read half headers, parse, etc) your data callbacks +may be called more than once. `http_parser` guarantees that data pointer is only +valid for the lifetime of callback. You can also `read()` into a heap allocated +buffer to avoid copying memory around if this fits your application. + +Reading headers may be a tricky task if you read/parse headers partially. +Basically, you need to remember whether last header callback was field or value +and apply the following logic: + + (on_header_field and on_header_value shortened to on_h_*) + ------------------------ ------------ -------------------------------------------- + | State (prev. callback) | Callback | Description/action | + ------------------------ ------------ -------------------------------------------- + | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | + | | | into it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_field | New header started. | + | | | Copy current name,value buffers to headers | + | | | list and allocate new buffer for new name | + ------------------------ ------------ -------------------------------------------- + | field | on_h_field | Previous name continues. Reallocate name | + | | | buffer and append callback data to it | + ------------------------ ------------ -------------------------------------------- + | field | on_h_value | Value for current header started. Allocate | + | | | new buffer and copy callback data to it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_value | Value continues. Reallocate value buffer | + | | | and append callback data to it | + ------------------------ ------------ -------------------------------------------- + + +Parsing URLs +------------ + +A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. +Users of this library may wish to use it to parse URLs constructed from +consecutive `on_url` callbacks. + +See examples of reading in headers: + +* [partial example](http://gist.github.com/155877) in C +* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C +* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript diff --git a/src/net/strategies/http_parser/http_parser.c b/src/net/strategies/http_parser/http_parser.c new file mode 100755 index 00000000..9941b7ea --- /dev/null +++ b/src/net/strategies/http_parser/http_parser.c @@ -0,0 +1,2462 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include +#include + +#ifndef ULLONG_MAX +# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#ifndef ELEM_AT +# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) +#endif + +#define SET_ERRNO(e) \ +do { \ + parser->nread = nread; \ + parser->http_errno = (e); \ +} while(0) + +#define CURRENT_STATE() p_state +#define UPDATE_STATE(V) p_state = (enum state) (V); +#define RETURN(V) \ +do { \ + parser->nread = nread; \ + parser->state = CURRENT_STATE(); \ + return (V); \ +} while (0); +#define REEXECUTE() \ + goto reexecute; \ + + +#ifdef __GNUC__ +# define LIKELY(X) __builtin_expect(!!(X), 1) +# define UNLIKELY(X) __builtin_expect(!!(X), 0) +#else +# define LIKELY(X) (X) +# define UNLIKELY(X) (X) +#endif + + +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + UPDATE_STATE(parser->state); \ + \ + /* We either errored above or got paused; get out */ \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ + return (ER); \ + } \ + } \ +} while (0) + +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) + +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) + +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (FOR##_mark) { \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != \ + settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + UPDATE_STATE(parser->state); \ + \ + /* We either errored above or got paused; get out */ \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ + return (ER); \ + } \ + } \ + FOR##_mark = NULL; \ + } \ +} while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + +/* Don't allow the total size of the HTTP headers (including the status + * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect + * embedders against denial-of-service attacks where the attacker feeds + * us a never-ending header that the embedder keeps buffering. + * + * This check is arguably the responsibility of embedders but we're doing + * it on the embedder's behalf because most won't bother and this way we + * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger + * than any reasonable request or response so this should never affect + * day-to-day operation. + */ +#define COUNT_HEADER_SIZE(V) \ +do { \ + nread += (V); \ + if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) { \ + SET_ERRNO(HPE_HEADER_OVERFLOW); \ + goto error; \ + } \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { +#define XX(num, name, string) #string, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', '!', 0, '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', 0, '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_http_major + , s_res_http_dot + , s_res_http_minor + , s_res_http_end + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_http_major + , s_req_http_dot + , s_req_http_minor + , s_req_http_end + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +#define PARSING_HEADER(state) (state <= s_headers_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_content_length_num + , h_content_length_ws + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_token_start + , h_matching_connection_keep_alive + , h_matching_connection_close + , h_matching_connection_upgrade + , h_matching_connection_token + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + , h_connection_upgrade + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) STRICT_TOKEN(c) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) tokens[(unsigned char)c] +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/** + * Verify that a char is a valid visible (printable) US-ASCII + * character or %x80-FF + **/ +#define IS_HEADER_CHAR(ch) \ + (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + +int http_message_needs_eof(const http_parser *parser); + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* fall through */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + int8_t unhex_val; + const char *p = data; + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *url_mark = 0; + const char *body_mark = 0; + const char *status_mark = 0; + enum state p_state = (enum state) parser->state; + const unsigned int lenient = parser->lenient_http_headers; + uint32_t nread = parser->nread; + + /* We're in an error state. Don't bother doing anything. */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + if (len == 0) { + switch (CURRENT_STATE()) { + case s_body_identity_eof: + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; + } + } + + + if (CURRENT_STATE() == s_header_field) + header_field_mark = data; + if (CURRENT_STATE() == s_header_value) + header_value_mark = data; + switch (CURRENT_STATE()) { + case s_req_path: + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_server: + case s_req_server_with_at: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + url_mark = data; + break; + case s_res_status: + status_mark = data; + break; + default: + break; + } + + for (p=data; p != data + len; p++) { + ch = *p; + + if (PARSING_HEADER(CURRENT_STATE())) + COUNT_HEADER_SIZE(1); + +reexecute: + switch (CURRENT_STATE()) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + if (LIKELY(ch == CR || ch == LF)) + break; + + SET_ERRNO(HPE_CLOSED_CONNECTION); + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + UPDATE_STATE(s_res_or_resp_H); + + CALLBACK_NOTIFY(message_begin); + } else { + parser->type = HTTP_REQUEST; + UPDATE_STATE(s_start_req); + REEXECUTE(); + } + + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + UPDATE_STATE(s_res_HT); + } else { + if (UNLIKELY(ch != 'E')) { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + parser->index = 2; + UPDATE_STATE(s_req_method); + } + break; + + case s_start_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + UPDATE_STATE(s_res_H); + } else { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + CALLBACK_NOTIFY(message_begin); + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_res_HT); + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_res_HTT); + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + UPDATE_STATE(s_res_HTTP); + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + UPDATE_STATE(s_res_http_major); + break; + + case s_res_http_major: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + UPDATE_STATE(s_res_http_dot); + break; + + case s_res_http_dot: + { + if (UNLIKELY(ch != '.')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_res_http_minor); + break; + } + + case s_res_http_minor: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + UPDATE_STATE(s_res_http_end); + break; + + case s_res_http_end: + { + if (UNLIKELY(ch != ' ')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_res_first_status_code); + break; + } + + case s_res_first_status_code: + { + if (!IS_NUM(ch)) { + if (ch == ' ') { + break; + } + + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + parser->status_code = ch - '0'; + UPDATE_STATE(s_res_status_code); + break; + } + + case s_res_status_code: + { + if (!IS_NUM(ch)) { + switch (ch) { + case ' ': + UPDATE_STATE(s_res_status_start); + break; + case CR: + case LF: + UPDATE_STATE(s_res_status_start); + REEXECUTE(); + break; + default: + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (UNLIKELY(parser->status_code > 999)) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + + break; + } + + case s_res_status_start: + { + MARK(status); + UPDATE_STATE(s_res_status); + parser->index = 0; + + if (ch == CR || ch == LF) + REEXECUTE(); + + break; + } + + case s_res_status: + if (ch == CR) { + UPDATE_STATE(s_res_line_almost_done); + CALLBACK_DATA(status); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA(status); + break; + } + + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_field_start); + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (UNLIKELY(!IS_ALPHA(ch))) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + parser->method = (enum http_method) 0; + parser->index = 1; + switch (ch) { + case 'A': parser->method = HTTP_ACL; break; + case 'B': parser->method = HTTP_BIND; break; + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; + /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ + break; + case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; + case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + UPDATE_STATE(s_req_method); + + CALLBACK_NOTIFY(message_begin); + + break; + } + + case s_req_method: + { + const char *matcher; + if (UNLIKELY(ch == '\0')) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[parser->index] == '\0') { + UPDATE_STATE(s_req_spaces_before_url); + } else if (ch == matcher[parser->index]) { + ; /* nada */ + } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') { + + switch (parser->method << 16 | parser->index << 8 | ch) { +#define XX(meth, pos, ch, new_meth) \ + case (HTTP_##meth << 16 | pos << 8 | ch): \ + parser->method = HTTP_##new_meth; break; + + XX(POST, 1, 'U', PUT) + XX(POST, 1, 'A', PATCH) + XX(POST, 1, 'R', PROPFIND) + XX(PUT, 2, 'R', PURGE) + XX(CONNECT, 1, 'H', CHECKOUT) + XX(CONNECT, 2, 'P', COPY) + XX(MKCOL, 1, 'O', MOVE) + XX(MKCOL, 1, 'E', MERGE) + XX(MKCOL, 1, '-', MSEARCH) + XX(MKCOL, 2, 'A', MKACTIVITY) + XX(MKCOL, 3, 'A', MKCALENDAR) + XX(SUBSCRIBE, 1, 'E', SEARCH) + XX(SUBSCRIBE, 1, 'O', SOURCE) + XX(REPORT, 2, 'B', REBIND) + XX(PROPFIND, 4, 'P', PROPPATCH) + XX(LOCK, 1, 'I', LINK) + XX(UNLOCK, 2, 'S', UNSUBSCRIBE) + XX(UNLOCK, 2, 'B', UNBIND) + XX(UNLOCK, 3, 'I', UNLINK) +#undef XX + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + ++parser->index; + break; + } + + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + MARK(url); + if (parser->method == HTTP_CONNECT) { + UPDATE_STATE(s_req_server_start); + } + + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + + break; + } + + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + { + switch (ch) { + /* No whitespace allowed here */ + case ' ': + case CR: + case LF: + SET_ERRNO(HPE_INVALID_URL); + goto error; + default: + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + + break; + } + + case s_req_server: + case s_req_server_with_at: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + { + switch (ch) { + case ' ': + UPDATE_STATE(s_req_http_start); + CALLBACK_DATA(url); + break; + case CR: + case LF: + parser->http_major = 0; + parser->http_minor = 9; + UPDATE_STATE((ch == CR) ? + s_req_line_almost_done : + s_header_field_start); + CALLBACK_DATA(url); + break; + default: + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + UPDATE_STATE(s_req_http_H); + break; + case ' ': + break; + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_req_http_HT); + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_req_http_HTT); + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + UPDATE_STATE(s_req_http_HTTP); + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + UPDATE_STATE(s_req_http_major); + break; + + case s_req_http_major: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + UPDATE_STATE(s_req_http_dot); + break; + + case s_req_http_dot: + { + if (UNLIKELY(ch != '.')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_req_http_minor); + break; + } + + case s_req_http_minor: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + UPDATE_STATE(s_req_http_end); + break; + + case s_req_http_end: + { + if (ch == CR) { + UPDATE_STATE(s_req_line_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + break; + } + + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + UPDATE_STATE(s_header_field_start); + break; + } + + case s_header_field_start: + { + if (ch == CR) { + UPDATE_STATE(s_headers_almost_done); + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + UPDATE_STATE(s_headers_almost_done); + REEXECUTE(); + } + + c = TOKEN(ch); + + if (UNLIKELY(!c)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + MARK(header_field); + + parser->index = 0; + UPDATE_STATE(s_header_field); + + switch (c) { + case 'c': + parser->header_state = h_C; + break; + + case 'p': + parser->header_state = h_matching_proxy_connection; + break; + + case 't': + parser->header_state = h_matching_transfer_encoding; + break; + + case 'u': + parser->header_state = h_matching_upgrade; + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + const char* start = p; + for (; p != data + len; p++) { + ch = *p; + c = TOKEN(ch); + + if (!c) + break; + + switch (parser->header_state) { + case h_general: { + size_t limit = data + len - p; + limit = MIN(limit, HTTP_MAX_HEADER_SIZE); + while (p+1 < data + limit && TOKEN(p[1])) { + p++; + } + break; + } + + case h_C: + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + parser->index++; + switch (c) { + case 'n': + parser->header_state = h_matching_connection; + break; + case 't': + parser->header_state = h_matching_content_length; + break; + default: + parser->header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + } + + if (p == data + len) { + --p; + COUNT_HEADER_SIZE(p - start); + break; + } + + COUNT_HEADER_SIZE(p - start); + + if (ch == ':') { + UPDATE_STATE(s_header_value_discard_ws); + CALLBACK_DATA(header_field); + break; + } + + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + case s_header_value_discard_ws: + if (ch == ' ' || ch == '\t') break; + + if (ch == CR) { + UPDATE_STATE(s_header_value_discard_ws_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_value_discard_lws); + break; + } + + /* fall through */ + + case s_header_value_start: + { + MARK(header_value); + + UPDATE_STATE(s_header_value); + parser->index = 0; + + c = LOWER(ch); + + switch (parser->header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + parser->header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else { + parser->header_state = h_general; + } + break; + + case h_content_length: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + if (parser->flags & F_CONTENTLENGTH) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + parser->flags |= F_CONTENTLENGTH; + parser->content_length = ch - '0'; + parser->header_state = h_content_length_num; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + parser->header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + parser->header_state = h_matching_connection_close; + } else if (c == 'u') { + parser->header_state = h_matching_connection_upgrade; + } else { + parser->header_state = h_matching_connection_token; + } + break; + + /* Multi-value `Connection` header */ + case h_matching_connection_token_start: + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + const char* start = p; + enum header_states h_state = (enum header_states) parser->header_state; + for (; p != data + len; p++) { + ch = *p; + if (ch == CR) { + UPDATE_STATE(s_header_almost_done); + parser->header_state = h_state; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_almost_done); + COUNT_HEADER_SIZE(p - start); + parser->header_state = h_state; + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } + + if (!lenient && !IS_HEADER_CHAR(ch)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + c = LOWER(ch); + + switch (h_state) { + case h_general: + { + const char* p_cr; + const char* p_lf; + size_t limit = data + len - p; + + limit = MIN(limit, HTTP_MAX_HEADER_SIZE); + + p_cr = (const char*) memchr(p, CR, limit); + p_lf = (const char*) memchr(p, LF, limit); + if (p_cr != NULL) { + if (p_lf != NULL && p_cr >= p_lf) + p = p_lf; + else + p = p_cr; + } else if (UNLIKELY(p_lf != NULL)) { + p = p_lf; + } else { + p = data + len; + } + --p; + break; + } + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + if (ch == ' ') break; + h_state = h_content_length_num; + /* fall through */ + + case h_content_length_num: + { + uint64_t t; + + if (ch == ' ') { + h_state = h_content_length_ws; + break; + } + + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + t = parser->content_length; + t *= 10; + t += ch - '0'; + + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + parser->content_length = t; + break; + } + + case h_content_length_ws: + if (ch == ' ') break; + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + h_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + h_state = h_transfer_encoding_chunked; + } + break; + + case h_matching_connection_token_start: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + h_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + h_state = h_matching_connection_close; + } else if (c == 'u') { + h_state = h_matching_connection_upgrade; + } else if (STRICT_TOKEN(c)) { + h_state = h_matching_connection_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + h_state = h_general; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + h_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(CLOSE)-2) { + h_state = h_connection_close; + } + break; + + /* looking for 'Connection: upgrade' */ + case h_matching_connection_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE) - 1 || + c != UPGRADE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(UPGRADE)-2) { + h_state = h_connection_upgrade; + } + break; + + case h_matching_connection_token: + if (ch == ',') { + h_state = h_matching_connection_token_start; + parser->index = 0; + } + break; + + case h_transfer_encoding_chunked: + if (ch != ' ') h_state = h_general; + break; + + case h_connection_keep_alive: + case h_connection_close: + case h_connection_upgrade: + if (ch == ',') { + if (h_state == h_connection_keep_alive) { + parser->flags |= F_CONNECTION_KEEP_ALIVE; + } else if (h_state == h_connection_close) { + parser->flags |= F_CONNECTION_CLOSE; + } else if (h_state == h_connection_upgrade) { + parser->flags |= F_CONNECTION_UPGRADE; + } + h_state = h_matching_connection_token_start; + parser->index = 0; + } else if (ch != ' ') { + h_state = h_matching_connection_token; + } + break; + + default: + UPDATE_STATE(s_header_value); + h_state = h_general; + break; + } + } + parser->header_state = h_state; + + if (p == data + len) + --p; + + COUNT_HEADER_SIZE(p - start); + break; + } + + case s_header_almost_done: + { + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + UPDATE_STATE(s_header_value_lws); + break; + } + + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_start); + REEXECUTE(); + } + + /* finished the header */ + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + default: + break; + } + + UPDATE_STATE(s_header_field_start); + REEXECUTE(); + } + + case s_header_value_discard_ws_almost_done: + { + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_value_discard_lws); + break; + } + + case s_header_value_discard_lws: + { + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_discard_ws); + break; + } else { + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + + /* header value was empty */ + MARK(header_value); + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } + } + + case s_headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + UPDATE_STATE(s_message_done); + CALLBACK_NOTIFY_NOADVANCE(chunk_complete); + REEXECUTE(); + } + + /* Cannot use chunked encoding and a content-length header together + per the HTTP specification. */ + if ((parser->flags & F_CHUNKED) && + (parser->flags & F_CONTENTLENGTH)) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + UPDATE_STATE(s_headers_done); + + /* Set this here so that on_headers_complete() callbacks can see it */ + if ((parser->flags & F_UPGRADE) && + (parser->flags & F_CONNECTION_UPGRADE)) { + /* For responses, "Upgrade: foo" and "Connection: upgrade" are + * mandatory only when it is a 101 Switching Protocols response, + * otherwise it is purely informational, to announce support. + */ + parser->upgrade = + (parser->type == HTTP_REQUEST || parser->status_code == 101); + } else { + parser->upgrade = (parser->method == HTTP_CONNECT); + } + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 2: + parser->upgrade = 1; + + /* fall through */ + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + SET_ERRNO(HPE_CB_headers_complete); + RETURN(p - data); /* Error */ + } + } + + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + RETURN(p - data); + } + + REEXECUTE(); + } + + case s_headers_done: + { + int hasBody; + STRICT_CHECK(ch != LF); + + parser->nread = 0; + nread = 0; + + hasBody = parser->flags & F_CHUNKED || + (parser->content_length > 0 && parser->content_length != ULLONG_MAX); + if (parser->upgrade && (parser->method == HTTP_CONNECT || + (parser->flags & F_SKIPBODY) || !hasBody)) { + /* Exit, the rest of the message is in a different protocol. */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + RETURN((p - data) + 1); + } + + if (parser->flags & F_SKIPBODY) { + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + UPDATE_STATE(s_chunk_size_start); + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else if (parser->content_length != ULLONG_MAX) { + /* Content-Length header given and non-zero */ + UPDATE_STATE(s_body_identity); + } else { + if (!http_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else { + /* Read body until EOF */ + UPDATE_STATE(s_body_identity_eof); + } + } + } + + break; + } + + case s_body_identity: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + UPDATE_STATE(s_message_done); + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + REEXECUTE(); + } + + break; + } + + /* read until EOF */ + case s_body_identity_eof: + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + if (parser->upgrade) { + /* Exit, the rest of the message is in a different protocol. */ + RETURN((p - data) + 1); + } + break; + + case s_chunk_size_start: + { + assert(nread == 1); + assert(parser->flags & F_CHUNKED); + + unhex_val = unhex[(unsigned char)ch]; + if (UNLIKELY(unhex_val == -1)) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + parser->content_length = unhex_val; + UPDATE_STATE(s_chunk_size); + break; + } + + case s_chunk_size: + { + uint64_t t; + + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + UPDATE_STATE(s_chunk_size_almost_done); + break; + } + + unhex_val = unhex[(unsigned char)ch]; + + if (unhex_val == -1) { + if (ch == ';' || ch == ' ') { + UPDATE_STATE(s_chunk_parameters); + break; + } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + t = parser->content_length; + t *= 16; + t += unhex_val; + + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + UPDATE_STATE(s_chunk_size_almost_done); + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + parser->nread = 0; + nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + UPDATE_STATE(s_header_field_start); + } else { + UPDATE_STATE(s_chunk_data); + } + CALLBACK_NOTIFY(chunk_header); + break; + } + + case s_chunk_data: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->flags & F_CHUNKED); + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + UPDATE_STATE(s_chunk_data_almost_done); + } + + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); + STRICT_CHECK(ch != CR); + UPDATE_STATE(s_chunk_data_done); + CALLBACK_DATA(body); + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + parser->nread = 0; + nread = 0; + UPDATE_STATE(s_chunk_size_start); + CALLBACK_NOTIFY(chunk_complete); + break; + + default: + assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); + goto error; + } + } + + /* Run callbacks for any marks that we have leftover after we ran out of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0) + + (status_mark ? 1 : 0)) <= 1); + + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); + CALLBACK_DATA_NOADVANCE(status); + + RETURN(len); + +error: + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + + RETURN(p - data); +} + + +/* Does the parser need to see an EOF to find the end of the message? */ +int +http_message_needs_eof (const http_parser *parser) +{ + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + parser->flags & F_SKIPBODY) { /* response to a HEAD request */ + return 0; + } + + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { + return 0; + } + + return 1; +} + + +int +http_should_keep_alive (const http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !http_message_needs_eof(parser); +} + + +const char * +http_method_str (enum http_method m) +{ + return ELEM_AT(method_strings, m, ""); +} + +const char * +http_status_str (enum http_status s) +{ + switch (s) { +#define XX(num, name, string) case HTTP_STATUS_##name: return #string; + HTTP_STATUS_MAP(XX) +#undef XX + default: return ""; + } +} + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + void *data = parser->data; /* preserve application data */ + memset(parser, 0, sizeof(*parser)); + parser->data = data; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->http_errno = HPE_OK; +} + +void +http_parser_settings_init(http_parser_settings *settings) +{ + memset(settings, 0, sizeof(*settings)); +} + +const char * +http_errno_name(enum http_errno err) { + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); + return http_strerror_tab[err].description; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* fall through */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf ; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + if (buflen == 0) { + return 1; + } + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* fall through */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + uint16_t off; + uint16_t len; + const char* p; + const char* end; + unsigned long v; + + off = u->field_data[UF_PORT].off; + len = u->field_data[UF_PORT].len; + end = buf + off + len; + + /* NOTE: The characters are already validated and are in the [0-9] range */ + assert(off + len <= buflen && "Port number overflow"); + v = 0; + for (p = buf + off; p < end; p++) { + v *= 10; + v += *p - '0'; + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + } + + u->port = (uint16_t) v; + } + + return 0; +} + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */ + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} + +int +http_body_is_final(const struct http_parser *parser) { + return parser->state == s_message_done; +} + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} diff --git a/src/net/strategies/http_parser/http_parser.h b/src/net/strategies/http_parser/http_parser.h new file mode 100755 index 00000000..e894d7ce --- /dev/null +++ b/src/net/strategies/http_parser/http_parser.h @@ -0,0 +1,436 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 8 +#define HTTP_PARSER_VERSION_PATCH 1 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +/* Maximium header size allowed. If the macro is not defined + * before including this header then the default is used. To + * change the maximum header size, define the macro in the build + * environment (e.g. -DHTTP_MAX_HEADER_SIZE=). To remove + * the effective limit on the size of the header, define the macro + * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) + */ +#ifndef HTTP_MAX_HEADER_SIZE +# define HTTP_MAX_HEADER_SIZE (80*1024) +#endif + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * Returning `2` from on_headers_complete will tell parser that it should not + * expect neither a body nor any futher responses on this connection. This is + * useful for handling responses to a CONNECT request which may not contain + * `Upgrade` or `Connection: upgrade` headers. + * + * http_data_cb does not return data chunks. It will be called arbitrarily + * many times for each string. E.G. you might get 10 callbacks for "on_url" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Status Codes */ +#define HTTP_STATUS_MAP(XX) \ + XX(100, CONTINUE, Continue) \ + XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ + XX(102, PROCESSING, Processing) \ + XX(200, OK, OK) \ + XX(201, CREATED, Created) \ + XX(202, ACCEPTED, Accepted) \ + XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \ + XX(204, NO_CONTENT, No Content) \ + XX(205, RESET_CONTENT, Reset Content) \ + XX(206, PARTIAL_CONTENT, Partial Content) \ + XX(207, MULTI_STATUS, Multi-Status) \ + XX(208, ALREADY_REPORTED, Already Reported) \ + XX(226, IM_USED, IM Used) \ + XX(300, MULTIPLE_CHOICES, Multiple Choices) \ + XX(301, MOVED_PERMANENTLY, Moved Permanently) \ + XX(302, FOUND, Found) \ + XX(303, SEE_OTHER, See Other) \ + XX(304, NOT_MODIFIED, Not Modified) \ + XX(305, USE_PROXY, Use Proxy) \ + XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \ + XX(308, PERMANENT_REDIRECT, Permanent Redirect) \ + XX(400, BAD_REQUEST, Bad Request) \ + XX(401, UNAUTHORIZED, Unauthorized) \ + XX(402, PAYMENT_REQUIRED, Payment Required) \ + XX(403, FORBIDDEN, Forbidden) \ + XX(404, NOT_FOUND, Not Found) \ + XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \ + XX(406, NOT_ACCEPTABLE, Not Acceptable) \ + XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \ + XX(408, REQUEST_TIMEOUT, Request Timeout) \ + XX(409, CONFLICT, Conflict) \ + XX(410, GONE, Gone) \ + XX(411, LENGTH_REQUIRED, Length Required) \ + XX(412, PRECONDITION_FAILED, Precondition Failed) \ + XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \ + XX(414, URI_TOO_LONG, URI Too Long) \ + XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \ + XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \ + XX(417, EXPECTATION_FAILED, Expectation Failed) \ + XX(421, MISDIRECTED_REQUEST, Misdirected Request) \ + XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \ + XX(423, LOCKED, Locked) \ + XX(424, FAILED_DEPENDENCY, Failed Dependency) \ + XX(426, UPGRADE_REQUIRED, Upgrade Required) \ + XX(428, PRECONDITION_REQUIRED, Precondition Required) \ + XX(429, TOO_MANY_REQUESTS, Too Many Requests) \ + XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \ + XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \ + XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \ + XX(501, NOT_IMPLEMENTED, Not Implemented) \ + XX(502, BAD_GATEWAY, Bad Gateway) \ + XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \ + XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \ + XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \ + XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \ + XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \ + XX(508, LOOP_DETECTED, Loop Detected) \ + XX(510, NOT_EXTENDED, Not Extended) \ + XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \ + +enum http_status + { +#define XX(num, name, string) HTTP_STATUS_##name = num, + HTTP_STATUS_MAP(XX) +#undef XX + }; + + +/* Request Methods */ +#define HTTP_METHOD_MAP(XX) \ + XX(0, DELETE, DELETE) \ + XX(1, GET, GET) \ + XX(2, HEAD, HEAD) \ + XX(3, POST, POST) \ + XX(4, PUT, PUT) \ + /* pathological */ \ + XX(5, CONNECT, CONNECT) \ + XX(6, OPTIONS, OPTIONS) \ + XX(7, TRACE, TRACE) \ + /* WebDAV */ \ + XX(8, COPY, COPY) \ + XX(9, LOCK, LOCK) \ + XX(10, MKCOL, MKCOL) \ + XX(11, MOVE, MOVE) \ + XX(12, PROPFIND, PROPFIND) \ + XX(13, PROPPATCH, PROPPATCH) \ + XX(14, SEARCH, SEARCH) \ + XX(15, UNLOCK, UNLOCK) \ + XX(16, BIND, BIND) \ + XX(17, REBIND, REBIND) \ + XX(18, UNBIND, UNBIND) \ + XX(19, ACL, ACL) \ + /* subversion */ \ + XX(20, REPORT, REPORT) \ + XX(21, MKACTIVITY, MKACTIVITY) \ + XX(22, CHECKOUT, CHECKOUT) \ + XX(23, MERGE, MERGE) \ + /* upnp */ \ + XX(24, MSEARCH, M-SEARCH) \ + XX(25, NOTIFY, NOTIFY) \ + XX(26, SUBSCRIBE, SUBSCRIBE) \ + XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ + /* RFC-5789 */ \ + XX(28, PATCH, PATCH) \ + XX(29, PURGE, PURGE) \ + /* CalDAV */ \ + XX(30, MKCALENDAR, MKCALENDAR) \ + /* RFC-2068, section 19.6.1.2 */ \ + XX(31, LINK, LINK) \ + XX(32, UNLINK, UNLINK) \ + /* icecast */ \ + XX(33, SOURCE, SOURCE) \ + +enum http_method + { +#define XX(num, name, string) HTTP_##name = num, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +/* Flag values for http_parser.flags field */ +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_CONNECTION_UPGRADE = 1 << 3 + , F_TRAILING = 1 << 4 + , F_UPGRADE = 1 << 5 + , F_SKIPBODY = 1 << 6 + , F_CONTENTLENGTH = 1 << 7 + }; + + +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "the on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + XX(CB_status, "the on_status callback failed") \ + XX(CB_chunk_header, "the on_chunk_header callback failed") \ + XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(UNEXPECTED_CONTENT_LENGTH, \ + "unexpected content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ + XX(UNKNOWN, "an unknown error occurred") + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) + + +struct http_parser { + /** PRIVATE **/ + unsigned int type : 2; /* enum http_parser_type */ + unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 7; /* enum state from http_parser.c */ + unsigned int header_state : 7; /* enum header_state from http_parser.c */ + unsigned int index : 7; /* index into current matcher */ + unsigned int lenient_http_headers : 1; + + uint32_t nread; /* # bytes read in various scenarios */ + uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned int status_code : 16; /* responses only */ + unsigned int method : 8; /* requests only */ + unsigned int http_errno : 7; + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + unsigned int upgrade : 1; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_url; + http_data_cb on_status; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; + /* When on_chunk_header is called, the current chunk length is stored + * in parser->content_length. + */ + http_cb on_chunk_header; + http_cb on_chunk_complete; +}; + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, patch); + */ +unsigned long http_parser_version(void); + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +/* Initialize http_parser_settings members to 0 + */ +void http_parser_settings_init(http_parser_settings *settings); + + +/* Executes the parser. Returns number of parsed bytes. Sets + * `parser->http_errno` on error. */ +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns 0, then this should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(const http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method m); + +/* Returns a string version of the HTTP status code. */ +const char *http_status_str(enum http_status s); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + +/* Checks if this is the final chunk of the body. */ +int http_body_is_final(const http_parser *parser); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/version.h b/src/version.h index 33be0af8..272d8337 100644 --- a/src/version.h +++ b/src/version.h @@ -25,18 +25,18 @@ #ifndef XMRIG_VERSION_H #define XMRIG_VERSION_H -#define APP_ID "xmrig" -#define APP_NAME "XMRig" -#define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.14.4-dev" -#define APP_DOMAIN "xmrig.com" -#define APP_SITE "www.xmrig.com" -#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" -#define APP_KIND "cpu" +#define APP_ID "ninjarig" +#define APP_NAME "NinjaRig" +#define APP_DESC "NinjaRig CPU/GPU miner" +#define APP_VERSION "1.0.0-dev" +//#define APP_DOMAIN "xmrig.com" +//#define APP_SITE "www.xmrig.com" +#define APP_COPYRIGHT "Copyright (C) 2019 Haifa Bogdan Adnan" +#define APP_KIND "cpu/gpu" -#define APP_VER_MAJOR 2 -#define APP_VER_MINOR 14 -#define APP_VER_PATCH 4 +#define APP_VER_MAJOR 1 +#define APP_VER_MINOR 0 +#define APP_VER_PATCH 0 #ifdef _MSC_VER # if (_MSC_VER >= 1920) diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp deleted file mode 100644 index c4981e93..00000000 --- a/src/workers/CpuThread.cpp +++ /dev/null @@ -1,825 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - - -#include "common/cpu/Cpu.h" -#include "common/log/Log.h" -#include "crypto/Asm.h" -#include "Mem.h" -#include "rapidjson/document.h" -#include "workers/CpuThread.h" - - -#if defined(XMRIG_ARM) -# include "crypto/CryptoNight_arm.h" -#else -# include "crypto/CryptoNight_x86.h" -#endif - -#ifndef XMRIG_NO_ARGON2 -# include "crypto/Argon2.h" -#endif - -xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly) : - m_algorithm(algorithm), - m_av(av), - m_assembly(assembly), - m_prefetch(prefetch), - m_softAES(softAES), - m_priority(priority), - m_affinity(affinity), - m_multiway(multiway), - m_index(index) -{ -} - - -#ifndef XMRIG_NO_ASM -template -static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask) -{ - const uint8_t* p = reinterpret_cast(src); - - // Workaround for Visual Studio placing trampoline in debug builds. -# if defined(_MSC_VER) - if (p[0] == 0xE9) { - p += *(int32_t*)(p + 1) + 5; - } -# endif - - size_t size = 0; - while (*(uint32_t*)(p + size) != 0xDEADC0DE) { - ++size; - } - size += sizeof(uint32_t); - - memcpy((void*) dst, (const void*) src, size); - - uint8_t* patched_data = reinterpret_cast(dst); - for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) { - switch (*(uint32_t*)(patched_data + i)) { - case xmrig::CRYPTONIGHT_ITER: - *(uint32_t*)(patched_data + i) = iterations; - break; - - case xmrig::CRYPTONIGHT_MASK: - *(uint32_t*)(patched_data + i) = mask; - break; - } - } -} - - -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); - - -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; - - -void xmrig::CpuThread::patchAsmVariants() -{ - const int allocation_size = 65536; - uint8_t *base = static_cast(Mem::allocateExecutableMemory(allocation_size)); - - cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); - cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); - cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); - cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); - - cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); - cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); - cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); - cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); - - cn_zls_mainloop_ivybridge_asm = reinterpret_cast (base + 0x8000); - cn_zls_mainloop_ryzen_asm = reinterpret_cast (base + 0x9000); - cn_zls_mainloop_bulldozer_asm = reinterpret_cast (base + 0xA000); - cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); - - cn_double_mainloop_ivybridge_asm = reinterpret_cast (base + 0xC000); - cn_double_mainloop_ryzen_asm = reinterpret_cast (base + 0xD000); - cn_double_mainloop_bulldozer_asm = reinterpret_cast (base + 0xE000); - cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); - - patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - - patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - - patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - - patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - - Mem::protectExecutableMemory(base, allocation_size); - Mem::flushInstructionCache(base, allocation_size); -} -#endif - - -bool xmrig::CpuThread::isSoftAES(AlgoVariant av) -{ - return av == AV_SINGLE_SOFT || av == AV_DOUBLE_SOFT || av > AV_PENTA; -} - - -#ifndef XMRIG_NO_ASM -template -static inline void add_asm_func(xmrig::CpuThread::cn_hash_fun(&asm_func_map)[xmrig::ALGO_MAX][xmrig::AV_MAX][xmrig::VARIANT_MAX][xmrig::ASM_MAX]) -{ - asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_INTEL] = cryptonight_single_hash_asm; - asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_RYZEN] = cryptonight_single_hash_asm; - asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_single_hash_asm; - - asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_INTEL] = cryptonight_double_hash_asm; - asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_RYZEN] = cryptonight_double_hash_asm; - asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_double_hash_asm; -} -#endif - -xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly) -{ - assert(variant >= VARIANT_0 && variant < VARIANT_MAX); - -# ifndef XMRIG_NO_ASM - if (assembly == ASM_AUTO) { - assembly = Cpu::info()->assembly(); - } - - static cn_hash_fun asm_func_map[ALGO_MAX][AV_MAX][VARIANT_MAX][ASM_MAX] = {}; - static bool asm_func_map_initialized = false; - - if (!asm_func_map_initialized) { - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - -# ifndef XMRIG_NO_CN_PICO - add_asm_func(asm_func_map); -# endif - - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - - asm_func_map_initialized = true; - } - - cn_hash_fun fun = asm_func_map[algorithm][av][variant][assembly]; - if (fun) { - return fun; - } -# endif - - constexpr const size_t count = VARIANT_MAX * 10 * ALGO_MAX; - - static const cn_hash_fun func_table[] = { - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - -# ifndef XMRIG_NO_CN_GPU - cryptonight_single_hash_gpu, - nullptr, - cryptonight_single_hash_gpu, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU -# endif - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE - -# ifndef XMRIG_NO_AEON - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# endif - -# ifndef XMRIG_NO_SUMO - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# endif - -# ifndef XMRIG_NO_CN_PICO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# endif - -# ifndef XMRIG_NO_ARGON2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - argon2_hash_function, - -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_CHUKWA_LITE -# endif - }; - - static_assert(count == sizeof(func_table) / sizeof(func_table[0]), "func_table size mismatch"); - - const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; - -# ifndef NDEBUG - cn_hash_fun func = func_table[index]; - - assert(index < sizeof(func_table) / sizeof(func_table[0])); - assert(func != nullptr); - - return func; -# else - return func_table[index]; -# endif -} - - -xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly) -{ - assert(av > AV_AUTO && av < AV_MAX); - - int64_t cpuId = -1L; - - if (affinity != -1L) { - size_t idx = 0; - - for (size_t i = 0; i < 64; i++) { - if (!(affinity & (1ULL << i))) { - continue; - } - - if (idx == index) { - cpuId = i; - break; - } - - idx++; - } - } - - return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false, assembly); -} - - -xmrig::CpuThread *xmrig::CpuThread::createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES) -{ - int av = AV_AUTO; - const Multiway multiway = data.multiway; - - if (multiway <= DoubleWay) { - av = softAES ? (multiway + 2) : multiway; - } - else { - av = softAES ? (multiway + 5) : (multiway + 2); - } - - assert(av > AV_AUTO && av < AV_MAX); - - return new CpuThread(index, algorithm, static_cast(av), multiway, data.affinity, priority, softAES, false, data.assembly); -} - - -xmrig::CpuThread::Data xmrig::CpuThread::parse(const rapidjson::Value &object) -{ - Data data; - - const auto &multiway = object["low_power_mode"]; - if (multiway.IsBool()) { - data.multiway = multiway.IsTrue() ? DoubleWay : SingleWay; - data.valid = true; - } - else if (multiway.IsUint()) { - data.setMultiway(multiway.GetInt()); - } - - if (!data.valid) { - return data; - } - - const auto &affinity = object["affine_to_cpu"]; - if (affinity.IsUint64()) { - data.affinity = affinity.GetInt64(); - } - -# ifndef XMRIG_NO_ASM - data.assembly = Asm::parse(object["asm"]); -# endif - - return data; -} - - -xmrig::IThread::Multiway xmrig::CpuThread::multiway(AlgoVariant av) -{ - switch (av) { - case AV_SINGLE: - case AV_SINGLE_SOFT: - return SingleWay; - - case AV_DOUBLE_SOFT: - case AV_DOUBLE: - return DoubleWay; - - case AV_TRIPLE_SOFT: - case AV_TRIPLE: - return TripleWay; - - case AV_QUAD_SOFT: - case AV_QUAD: - return QuadWay; - - case AV_PENTA_SOFT: - case AV_PENTA: - return PentaWay; - - default: - break; - } - - return SingleWay; -} - - -#ifdef APP_DEBUG -void xmrig::CpuThread::print() const -{ - LOG_DEBUG(GREEN_BOLD("CPU thread: ") " index " WHITE_BOLD("%zu") ", multiway " WHITE_BOLD("%d") ", av " WHITE_BOLD("%d") ",", - index(), static_cast(multiway()), static_cast(m_av)); - -# ifndef XMRIG_NO_ASM - LOG_DEBUG(" assembly: %s, affine_to_cpu: %" PRId64, Asm::toString(m_assembly), affinity()); -# else - LOG_DEBUG(" affine_to_cpu: %" PRId64, affinity()); -# endif -} -#endif - - -#ifndef XMRIG_NO_API -rapidjson::Value xmrig::CpuThread::toAPI(rapidjson::Document &doc) const -{ - using namespace rapidjson; - - Value obj(kObjectType); - auto &allocator = doc.GetAllocator(); - - obj.AddMember("type", "cpu", allocator); - obj.AddMember("av", m_av, allocator); - obj.AddMember("low_power_mode", multiway(), allocator); - obj.AddMember("affine_to_cpu", affinity(), allocator); - obj.AddMember("priority", priority(), allocator); - obj.AddMember("soft_aes", isSoftAES(), allocator); - - return obj; -} -#endif - - -rapidjson::Value xmrig::CpuThread::toConfig(rapidjson::Document &doc) const -{ - using namespace rapidjson; - - Value obj(kObjectType); - auto &allocator = doc.GetAllocator(); - - obj.AddMember("low_power_mode", multiway(), allocator); - obj.AddMember("affine_to_cpu", affinity() == -1L ? Value(kFalseType) : Value(affinity()), allocator); - -# ifndef XMRIG_NO_ASM - obj.AddMember("asm", Asm::toJSON(m_assembly), allocator); -# endif - - return obj; -} diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h deleted file mode 100644 index 05d4a066..00000000 --- a/src/workers/CpuThread.h +++ /dev/null @@ -1,115 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CPUTHREAD_H -#define XMRIG_CPUTHREAD_H - - -#include "common/xmrig.h" -#include "interfaces/IThread.h" - - -struct cryptonight_ctx; - - -namespace xmrig { - - -class CpuThread : public IThread -{ -public: - struct Data - { - inline Data() : assembly(ASM_AUTO), valid(false), affinity(-1L), multiway(SingleWay) {} - - inline void setMultiway(int value) - { - if (value >= SingleWay && value <= PentaWay) { - multiway = static_cast(value); - valid = true; - } - } - - Assembly assembly; - bool valid; - int64_t affinity; - Multiway multiway; - }; - - - CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly); - - typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height); - typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx); - -# ifndef XMRIG_NO_ASM - static void patchAsmVariants(); -# endif - - static bool isSoftAES(AlgoVariant av); - static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly); - static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly); - static CpuThread *createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES); - static Data parse(const rapidjson::Value &object); - static Multiway multiway(AlgoVariant av); - - inline bool isPrefetch() const { return m_prefetch; } - inline bool isSoftAES() const { return m_softAES; } - inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant, m_assembly); } - - inline Algo algorithm() const override { return m_algorithm; } - inline int priority() const override { return m_priority; } - inline int64_t affinity() const override { return m_affinity; } - inline Multiway multiway() const override { return m_multiway; } - inline size_t index() const override { return m_index; } - inline Type type() const override { return CPU; } - -protected: -# ifdef APP_DEBUG - void print() const override; -# endif - -# ifndef XMRIG_NO_API - rapidjson::Value toAPI(rapidjson::Document &doc) const override; -# endif - - rapidjson::Value toConfig(rapidjson::Document &doc) const override; - -private: - const Algo m_algorithm; - const AlgoVariant m_av; - const Assembly m_assembly; - const bool m_prefetch; - const bool m_softAES; - const int m_priority; - const int64_t m_affinity; - const Multiway m_multiway; - const size_t m_index; -}; - - -} /* namespace xmrig */ - - -#endif /* XMRIG_CPUTHREAD_H */ diff --git a/src/workers/Handle.cpp b/src/workers/Handle.cpp index d42ea368..cacaf636 100644 --- a/src/workers/Handle.cpp +++ b/src/workers/Handle.cpp @@ -22,25 +22,65 @@ */ +#include #include "workers/Handle.h" -Handle::Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays) : - m_worker(nullptr), - m_totalWays(totalWays), - m_offset(offset), - m_config(config) +Handle::Handle(xmrig::Config *config, xmrig::HasherConfig *hasherConfig, uint32_t offset) : + m_offset(offset), + m_config(config), + m_hasherConfig(hasherConfig), + m_hasher(nullptr) { -} + std::vector hashers = Hasher::getHashers(); + for(Hasher *hasher : hashers) { + if(hasherConfig->type() == hasher->subType()) { + if(hasher->initialize(hasherConfig->algorithm(), hasherConfig->variant()) && + hasher->configure(*hasherConfig) && + hasher->deviceCount() > 0) + m_hasher = hasher; + std::string hasherInfo = hasher->info(); + + if(config->isColors()) { + std::string redDisabled = RED_BOLD("DISABLED"); + std::string greenEnabled = GREEN_BOLD("ENABLED"); + + size_t startPos = hasherInfo.find("DISABLED"); + while (startPos != string::npos) { + hasherInfo.replace(startPos, 8, redDisabled); + startPos = hasherInfo.find("DISABLED", startPos + redDisabled.size()); + } + + startPos = hasherInfo.find("ENABLED"); + while (startPos != string::npos) { + hasherInfo.replace(startPos, 7, greenEnabled); + startPos = hasherInfo.find("ENABLED", startPos + greenEnabled.size()); + } + + Log::i()->text(GREEN_BOLD(" * Initializing %s hasher:") "\n%s", hasher->subType().c_str(), hasherInfo.c_str()); + } + else { + Log::i()->text(" * Initializing %s hasher:\n%s", hasher->subType().c_str(), hasherInfo.c_str()); + } + } + } +} void Handle::join() { - uv_thread_join(&m_thread); + for(uv_thread_t thread : m_threads) + uv_thread_join(&thread); } void Handle::start(void (*callback) (void *)) { - uv_thread_create(&m_thread, callback, this); + assert(m_hasher != nullptr); + for(int i=0; i < m_hasher->computingThreads(); i++) { + uv_thread_t thread; + HandleArg *arg = new HandleArg { this, i }; + uv_thread_create(&thread, callback, arg); + m_threads.push_back(thread); + } } diff --git a/src/workers/Handle.h b/src/workers/Handle.h index 4bb899f9..50c7a2b4 100644 --- a/src/workers/Handle.h +++ b/src/workers/Handle.h @@ -27,35 +27,48 @@ #include #include +#include #include +#include +#include "core/HasherConfig.h" -#include "interfaces/IThread.h" - +#include "crypto/argon2_hasher/common/common.h" +#include "crypto/argon2_hasher/hash/Hasher.h" class IWorker; - class Handle { public: - Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays); + Handle(xmrig::Config *config, xmrig::HasherConfig *hasherConfig, uint32_t offset); + + struct HandleArg { + Handle *handle; + int workerId; + }; + void join(); void start(void (*callback) (void *)); - inline IWorker *worker() const { return m_worker; } - inline size_t threadId() const { return m_config->index(); } - inline size_t totalWays() const { return m_totalWays; } + inline std::vector &workers() { return m_workers; } + inline size_t hasherId() const { return m_hasherConfig->index(); } + inline size_t parallelism(int workerIdx) const { return m_hasher != nullptr ? m_hasher->parallelism(workerIdx) : 0; } + inline size_t computingThreads() const { return m_hasher != nullptr ? m_hasher->computingThreads() : 0; } inline uint32_t offset() const { return m_offset; } - inline void setWorker(IWorker *worker) { assert(worker != nullptr); m_worker = worker; } - inline xmrig::IThread *config() const { return m_config; } + inline void addWorker(IWorker *worker) { assert(worker != nullptr); m_workers.push_back(worker); } + inline xmrig::HasherConfig *config() const { return m_hasherConfig; } + inline Hasher *hasher() const { return m_hasher; } private: - IWorker *m_worker; - size_t m_totalWays; + std::vector m_threads; + std::vector m_workers; + + Hasher *m_hasher; uint32_t m_offset; - uv_thread_t m_thread; - xmrig::IThread *m_config; + + xmrig::HasherConfig *m_hasherConfig; + xmrig::Config *m_config; }; diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp index 2a750318..dcb4982e 100644 --- a/src/workers/Hashrate.cpp +++ b/src/workers/Hashrate.cpp @@ -33,11 +33,12 @@ #include "core/Config.h" #include "core/Controller.h" #include "workers/Hashrate.h" +#include "workers/Handle.h" inline static const char *format(double h, char *buf, size_t size) { - if (isnormal(h)) { + if (std::isnormal(h)) { snprintf(buf, size, "%03.1f", h); return buf; } @@ -46,19 +47,26 @@ inline static const char *format(double h, char *buf, size_t size) } -Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) : +Hashrate::Hashrate(const std::vector &hashers, xmrig::Controller *controller) : m_highest(0.0), - m_threads(threads), m_controller(controller) { - m_counts = new uint64_t*[threads]; - m_timestamps = new uint64_t*[threads]; - m_top = new uint32_t[threads]; + m_hashers = hashers.size(); + m_workers = new size_t[m_hashers]; + m_counts = new uint64_t**[m_hashers]; + m_timestamps = new uint64_t**[m_hashers]; + m_top = new uint32_t*[m_hashers]; - for (size_t i = 0; i < threads; i++) { - m_counts[i] = new uint64_t[kBucketSize](); - m_timestamps[i] = new uint64_t[kBucketSize](); - m_top[i] = 0; + for (size_t i = 0; i < hashers.size(); i++) { + m_workers[i] = hashers[i]->hasher()->deviceCount(); + m_counts[i] = new uint64_t*[m_workers[i]]; + m_timestamps[i] = new uint64_t*[m_workers[i]]; + m_top[i] = new uint32_t[m_workers[i]]; + for (size_t j = 0; j < m_workers[i]; j++) { + m_counts[i][j] = new uint64_t[kBucketSize](); + m_timestamps[i][j] = new uint64_t[kBucketSize](); + m_top[i][j] = 0; + } } const int printTime = controller->config()->printTime(); @@ -77,10 +85,12 @@ double Hashrate::calc(size_t ms) const double result = 0.0; double data; - for (size_t i = 0; i < m_threads; ++i) { - data = calc(i, ms); - if (isnormal(data)) { - result += data; + for (size_t i = 0; i < m_hashers; ++i) { + for(size_t j = 0; j < m_workers[i]; j++) { + data = calc(i, j, ms); + if (std::isnormal(data)) { + result += data; + } } } @@ -88,10 +98,12 @@ double Hashrate::calc(size_t ms) const } -double Hashrate::calc(size_t threadId, size_t ms) const +double Hashrate::calc(size_t hasherId, size_t workerId, size_t ms) const { - assert(threadId < m_threads); - if (threadId >= m_threads) { + assert(hasherId < m_hashers); + assert(workerId < m_workers[hasherId]); + + if (hasherId >= m_hashers || workerId >= m_workers[hasherId]) { return nan(""); } @@ -105,24 +117,24 @@ double Hashrate::calc(size_t threadId, size_t ms) const bool haveFullSet = false; for (size_t i = 1; i < kBucketSize; i++) { - const size_t idx = (m_top[threadId] - i) & kBucketMask; + const size_t idx = (m_top[hasherId][workerId] - i) & kBucketMask; - if (m_timestamps[threadId][idx] == 0) { + if (m_timestamps[hasherId][workerId][idx] == 0) { break; } if (lastestStamp == 0) { - lastestStamp = m_timestamps[threadId][idx]; - lastestHashCnt = m_counts[threadId][idx]; + lastestStamp = m_timestamps[hasherId][workerId][idx]; + lastestHashCnt = m_counts[hasherId][workerId][idx]; } - if (now - m_timestamps[threadId][idx] > ms) { + if (now - m_timestamps[hasherId][workerId][idx] > ms) { haveFullSet = true; break; } - earliestStamp = m_timestamps[threadId][idx]; - earliestHashCount = m_counts[threadId][idx]; + earliestStamp = m_timestamps[hasherId][workerId][idx]; + earliestHashCount = m_counts[hasherId][workerId][idx]; } if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) { @@ -142,13 +154,13 @@ double Hashrate::calc(size_t threadId, size_t ms) const } -void Hashrate::add(size_t threadId, uint64_t count, uint64_t timestamp) +void Hashrate::add(size_t hasherId, size_t workerId, uint64_t count, uint64_t timestamp) { - const size_t top = m_top[threadId]; - m_counts[threadId][top] = count; - m_timestamps[threadId][top] = timestamp; + const size_t top = m_top[hasherId][workerId]; + m_counts[hasherId][workerId][top] = count; + m_timestamps[hasherId][workerId][top] = timestamp; - m_top[threadId] = (top + 1) & kBucketMask; + m_top[hasherId][workerId] = (top + 1) & kBucketMask; } @@ -178,7 +190,7 @@ void Hashrate::stop() void Hashrate::updateHighest() { double highest = calc(ShortInterval); - if (isnormal(highest) && highest > m_highest) { + if (std::isnormal(highest) && highest > m_highest) { m_highest = highest; } } diff --git a/src/workers/Hashrate.h b/src/workers/Hashrate.h index e766f117..a1f8733f 100644 --- a/src/workers/Hashrate.h +++ b/src/workers/Hashrate.h @@ -32,7 +32,7 @@ namespace xmrig { class Controller; } - +class Handle; class Hashrate { @@ -43,16 +43,15 @@ public: LargeInterval = 900000 }; - Hashrate(size_t threads, xmrig::Controller *controller); + Hashrate(const std::vector &workers, xmrig::Controller *controller); double calc(size_t ms) const; - double calc(size_t threadId, size_t ms) const; - void add(size_t threadId, uint64_t count, uint64_t timestamp); + double calc(size_t hasherId, size_t workerId, size_t ms) const; + void add(size_t hasherId, size_t workerId, uint64_t count, uint64_t timestamp); void print() const; void stop(); void updateHighest(); inline double highest() const { return m_highest; } - inline size_t threads() const { return m_threads; } static const char *format(double h, char *buf, size_t size); @@ -63,10 +62,11 @@ private: constexpr static size_t kBucketMask = kBucketSize - 1; double m_highest; - size_t m_threads; - uint32_t* m_top; - uint64_t** m_counts; - uint64_t** m_timestamps; + size_t m_hashers; + size_t* m_workers; + uint32_t** m_top; + uint64_t*** m_counts; + uint64_t*** m_timestamps; uv_timer_t m_timer; xmrig::Controller *m_controller; }; diff --git a/src/workers/MultiWorker.cpp b/src/workers/MultiWorker.cpp deleted file mode 100644 index 8045e200..00000000 --- a/src/workers/MultiWorker.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include - - -#include "crypto/CryptoNight_test.h" -#include "common/log/Log.h" -#include "workers/CpuThread.h" -#include "workers/MultiWorker.h" -#include "workers/Workers.h" - - -template -MultiWorker::MultiWorker(Handle *handle) - : Worker(handle) -{ - m_memory = Mem::create(m_ctx, m_thread->algorithm(), N); -} - - -template -MultiWorker::~MultiWorker() -{ - Mem::release(m_ctx, N, m_memory); -} - - -template -bool MultiWorker::selfTest() -{ - using namespace xmrig; - - if (m_thread->algorithm() == CRYPTONIGHT) { - const bool rc = verify(VARIANT_0, test_output_v0) && - verify(VARIANT_1, test_output_v1) && - verify(VARIANT_2, test_output_v2) && - verify(VARIANT_XTL, test_output_xtl) && - verify(VARIANT_MSR, test_output_msr) && - verify(VARIANT_XAO, test_output_xao) && - verify(VARIANT_RTO, test_output_rto) && - verify(VARIANT_HALF, test_output_half) && - verify2(VARIANT_WOW, test_output_wow) && - verify2(VARIANT_4, test_output_r) && - verify(VARIANT_RWZ, test_output_rwz) && - verify(VARIANT_ZLS, test_output_zls) && - verify(VARIANT_DOUBLE, test_output_double); - -# ifndef XMRIG_NO_CN_GPU - if (!rc || N > 1) { - return rc; - } - - return verify(VARIANT_GPU, test_output_gpu); -# else - return rc; -# endif - } - -# ifndef XMRIG_NO_AEON - if (m_thread->algorithm() == CRYPTONIGHT_LITE) { - return verify(VARIANT_0, test_output_v0_lite) && - verify(VARIANT_1, test_output_v1_lite); - } -# endif - -# ifndef XMRIG_NO_SUMO - if (m_thread->algorithm() == CRYPTONIGHT_HEAVY) { - return verify(VARIANT_0, test_output_v0_heavy) && - verify(VARIANT_XHV, test_output_xhv_heavy) && - verify(VARIANT_TUBE, test_output_tube_heavy); - } -# endif - -# ifndef XMRIG_NO_CN_PICO - if (m_thread->algorithm() == CRYPTONIGHT_PICO) { - return verify(VARIANT_TRTL, test_output_pico_trtl); - } -# endif - -# ifndef XMRIG_NO_ARGON2 - if (m_thread->algorithm() == ARGON2) { - return true; - // return verify(VARIANT_CHUKWA, test_output_pico_trtl); - } -# endif - - return false; -} - - -template -void MultiWorker::start() -{ - while (Workers::sequence() > 0) { - if (Workers::isPaused()) { - do { - std::this_thread::sleep_for(std::chrono::milliseconds(200)); - } - while (Workers::isPaused()); - - if (Workers::sequence() == 0) { - break; - } - - consumeJob(); - } - - while (!Workers::isOutdated(m_sequence)) { - if ((m_count & 0x7) == 0) { - storeStats(); - } - - m_thread->fn(m_state.job.algorithm().variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx, m_state.job.height()); - - for (size_t i = 0; i < N; ++i) { - if (*reinterpret_cast(m_hash + (i * 32) + 24) < m_state.job.target()) { - Workers::submit(xmrig::JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(), *nonce(i), m_hash + (i * 32), m_state.job.diff(), m_state.job.algorithm())); - } - - *nonce(i) += 1; - } - - m_count += N; - - std::this_thread::yield(); - } - - consumeJob(); - } -} - - -template -bool MultiWorker::resume(const xmrig::Job &job) -{ - if (m_state.job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState.job.id()) { - m_state = m_pausedState; - return true; - } - - return false; -} - - -template -bool MultiWorker::verify(xmrig::Variant variant, const uint8_t *referenceValue) -{ - - xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant); - if (!func) { - return false; - } - - func(test_input, 76, m_hash, m_ctx, 0); - return memcmp(m_hash, referenceValue, sizeof m_hash) == 0; -} - - -template -bool MultiWorker::verify2(xmrig::Variant variant, const uint8_t *referenceValue) -{ - xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant); - if (!func) { - return false; - } - - for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) { - const size_t size = cn_r_test_input[i].size; - for (size_t k = 0; k < N; ++k) { - memcpy(m_state.blob + (k * size), cn_r_test_input[i].data, size); - } - - func(m_state.blob, size, m_hash, m_ctx, cn_r_test_input[i].height); - - for (size_t k = 0; k < N; ++k) { - if (memcmp(m_hash + k * 32, referenceValue + i * 32, sizeof m_hash / N) != 0) { - return false; - } - } - } - - return true; -} - - -template<> -bool MultiWorker<1>::verify2(xmrig::Variant variant, const uint8_t *referenceValue) -{ - xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant); - if (!func) { - return false; - } - - for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) { - func(cn_r_test_input[i].data, cn_r_test_input[i].size, m_hash, m_ctx, cn_r_test_input[i].height); - - if (memcmp(m_hash, referenceValue + i * 32, sizeof m_hash) != 0) { - return false; - } - } - - return true; -} - - -template -void MultiWorker::consumeJob() -{ - xmrig::Job job = Workers::job(); - m_sequence = Workers::sequence(); - if (m_state.job == job) { - return; - } - - save(job); - - if (resume(job)) { - return; - } - - m_state.job = job; - - const size_t size = m_state.job.size(); - memcpy(m_state.blob, m_state.job.blob(), m_state.job.size()); - - if (N > 1) { - for (size_t i = 1; i < N; ++i) { - memcpy(m_state.blob + (i * size), m_state.blob, size); - } - } - - for (size_t i = 0; i < N; ++i) { - if (m_state.job.isNicehash()) { - *nonce(i) = (*nonce(i) & 0xff000000U) + (0xffffffU / m_totalWays * (m_offset + i)); - } - else { - *nonce(i) = 0xffffffffU / m_totalWays * (m_offset + i); - } - } -} - - -template -void MultiWorker::save(const xmrig::Job &job) -{ - if (job.poolId() == -1 && m_state.job.poolId() >= 0) { - m_pausedState = m_state; - } -} - - -template class MultiWorker<1>; -template class MultiWorker<2>; -template class MultiWorker<3>; -template class MultiWorker<4>; -template class MultiWorker<5>; diff --git a/src/workers/MultiWorker.h b/src/workers/MultiWorker.h deleted file mode 100644 index b7e4c8ca..00000000 --- a/src/workers/MultiWorker.h +++ /dev/null @@ -1,76 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_MULTIWORKER_H -#define XMRIG_MULTIWORKER_H - - -#include "common/net/Job.h" -#include "Mem.h" -#include "net/JobResult.h" -#include "workers/Worker.h" - - -class Handle; - - -template -class MultiWorker : public Worker -{ -public: - MultiWorker(Handle *handle); - ~MultiWorker(); - -protected: - bool selfTest() override; - void start() override; - -private: - bool resume(const xmrig::Job &job); - bool verify(xmrig::Variant variant, const uint8_t *referenceValue); - bool verify2(xmrig::Variant variant, const uint8_t *referenceValue); - void consumeJob(); - void save(const xmrig::Job &job); - - inline uint32_t *nonce(size_t index) - { - return reinterpret_cast(m_state.blob + (index * m_state.job.size()) + 39); - } - - struct State - { - alignas(16) uint8_t blob[xmrig::Job::kMaxBlobSize * N]; - xmrig::Job job; - }; - - - cryptonight_ctx *m_ctx[N]; - State m_pausedState; - State m_state; - uint8_t m_hash[N * 32]; -}; - - -#endif /* XMRIG_MULTIWORKER_H */ diff --git a/src/workers/Worker.cpp b/src/workers/Worker.cpp index c569908c..40cb338d 100644 --- a/src/workers/Worker.cpp +++ b/src/workers/Worker.cpp @@ -26,29 +26,25 @@ #include "common/cpu/Cpu.h" #include "common/Platform.h" -#include "workers/CpuThread.h" +#include "core/HasherConfig.h" #include "workers/Handle.h" #include "workers/Worker.h" +#include "workers/Workers.h" -Worker::Worker(Handle *handle) : - m_id(handle->threadId()), - m_totalWays(handle->totalWays()), - m_offset(handle->offset()), - m_hashCount(0), - m_timestamp(0), - m_count(0), - m_sequence(0), - m_thread(static_cast(handle->config())) +Worker::Worker(Handle *handle, int workerIdx) : + m_id(workerIdx), + m_hashCount(0), + m_timestamp(0), + m_count(0), + m_sequence(0), + m_config(static_cast(handle->config())), + m_hasher(handle->hasher()) { - if (xmrig::Cpu::info()->threads() > 1 && m_thread->affinity() != -1L) { - Platform::setThreadAffinity(m_thread->affinity()); - } - - Platform::setThreadPriority(m_thread->priority()); + m_offset = handle->offset() + m_id; + m_hash = new uint8_t[m_hasher->parallelism(m_id) * 36]; } - void Worker::storeStats() { using namespace std::chrono; @@ -57,3 +53,103 @@ void Worker::storeStats() m_hashCount.store(m_count, std::memory_order_relaxed); m_timestamp.store(timestamp, std::memory_order_relaxed); } + +bool Worker::selfTest() +{ + return true; +} + +void Worker::start() { + if(m_hasher->type() == "CPU" && m_hasher->subType() == "CPU") { + if (xmrig::Cpu::info()->threads() > 1 && m_config->getCPUAffinity(m_id) != -1L) { + Platform::setThreadAffinity(m_config->getCPUAffinity(m_id)); + } + } + + Platform::setThreadPriority(m_config->priority()); + int parallelism = m_hasher->parallelism(m_id); + + while (Workers::sequence() > 0) { + if (Workers::isPaused()) { + do { + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } + while (Workers::isPaused()); + + if (Workers::sequence() == 0) { + break; + } + + consumeJob(); + } + + while (!Workers::isOutdated(m_sequence)) { + int hashCount = m_hasher->compute(m_id, m_state.blob, m_state.job.size(), m_hash); + + if(hashCount == parallelism) { + + for (size_t i = 0; i < parallelism; ++i) { + if (*reinterpret_cast(m_hash + (i * 36) + 24) < m_state.job.target()) { + Workers::submit(xmrig::JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(), + *reinterpret_cast(m_hash + (i * 36) + 32), m_hash + (i * 36), m_state.job.diff(), + m_state.job.algorithm())); + } + } + + m_count += parallelism; + } + + storeStats(); + + std::this_thread::yield(); + } + + consumeJob(); + } +} + +bool Worker::consumeJob() { + xmrig::Job job = Workers::job(); + m_sequence = Workers::sequence(); + if (m_state.job == job) { + return false; + } + + save(job); + + if (resume(job)) { + return false; + } + + m_state.job = job; + + const size_t size = m_state.job.size(); + memcpy(m_state.blob, m_state.job.blob(), size); + + uint32_t *nonce = reinterpret_cast(m_state.blob + 39); + if (m_state.job.isNicehash()) { + *nonce = (*nonce & 0xff000000U) + (0xffffffU / Workers::totalThreads() * m_offset); + } + else { + *nonce = 0xffffffffU / Workers::totalThreads() * m_offset; + } + + return true; +} + +bool Worker::resume(const xmrig::Job &job) +{ + if (m_state.job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState.job.id()) { + m_state = m_pausedState; + return true; + } + + return false; +} + +void Worker::save(const xmrig::Job &job) +{ + if (job.poolId() == -1 && m_state.job.poolId() >= 0) { + m_pausedState = m_state; + } +} diff --git a/src/workers/Worker.h b/src/workers/Worker.h index 73e25033..c34029af 100644 --- a/src/workers/Worker.h +++ b/src/workers/Worker.h @@ -30,39 +30,54 @@ #include "interfaces/IWorker.h" -#include "Mem.h" - +#include "common/net/Job.h" +#include "net/JobResult.h" class Handle; namespace xmrig { - class CpuThread; + class HasherConfig; } class Worker : public IWorker { public: - Worker(Handle *handle); + Worker(Handle *handle, int workerIdx); - inline const MemInfo &memory() const { return m_memory; } inline size_t id() const override { return m_id; } inline uint64_t hashCount() const override { return m_hashCount.load(std::memory_order_relaxed); } inline uint64_t timestamp() const override { return m_timestamp.load(std::memory_order_relaxed); } + inline size_t parallelism() const override { return m_hasher->parallelism(m_id); } -protected: + bool selfTest() override; + void start() override; + +private: void storeStats(); + bool consumeJob(); + + bool resume(const xmrig::Job &job); + void save(const xmrig::Job &job); + + struct State + { + alignas(16) uint8_t blob[xmrig::Job::kMaxBlobSize]; + xmrig::Job job; + }; const size_t m_id; - const size_t m_totalWays; - const uint32_t m_offset; - MemInfo m_memory; + uint32_t m_offset; std::atomic m_hashCount; std::atomic m_timestamp; + Hasher *m_hasher; uint64_t m_count; uint64_t m_sequence; - xmrig::CpuThread *m_thread; + xmrig::HasherConfig *m_config; + State m_pausedState; + State m_state; + uint8_t *m_hash; }; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index f718a52c..77382c9d 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -28,18 +28,17 @@ #include "api/Api.h" +#include "api/ApiRouter.h" #include "common/log/Log.h" #include "core/Config.h" #include "core/Controller.h" -#include "crypto/CryptoNight_constants.h" +#include "crypto/Argon2_constants.h" #include "interfaces/IJobResultListener.h" -#include "interfaces/IThread.h" -#include "Mem.h" #include "rapidjson/document.h" #include "workers/Handle.h" #include "workers/Hashrate.h" -#include "workers/MultiWorker.h" #include "workers/Workers.h" +#include "workers/Worker.h" bool Workers::m_active = false; @@ -58,6 +57,7 @@ uv_mutex_t Workers::m_mutex; uv_rwlock_t Workers::m_rwlock; uv_timer_t Workers::m_timer; xmrig::Controller *Workers::m_controller = nullptr; +std::atomic Workers::m_totalThreads; xmrig::Job Workers::job() @@ -70,26 +70,6 @@ xmrig::Job Workers::job() } -size_t Workers::hugePages() -{ - uv_mutex_lock(&m_mutex); - const size_t hugePages = m_status.hugePages; - uv_mutex_unlock(&m_mutex); - - return hugePages; -} - - -size_t Workers::threads() -{ - uv_mutex_lock(&m_mutex); - const size_t threads = m_status.threads; - uv_mutex_unlock(&m_mutex); - - return threads; -} - - void Workers::printHashrate(bool detail) { assert(m_controller != nullptr); @@ -103,19 +83,23 @@ void Workers::printHashrate(bool detail) char num2[8] = { 0 }; char num3[8] = { 0 }; - Log::i()->text("%s| THREAD | AFFINITY | 10s H/s | 60s H/s | 15m H/s |", isColors ? "\x1B[1;37m" : ""); + Log::i()->text("%s| TYPE | ID | 10s H/s | 60s H/s | 15m H/s |", isColors ? "\x1B[1;37m" : ""); size_t i = 0; - for (const xmrig::IThread *thread : m_controller->config()->threads()) { - Log::i()->text("| %6zu | %8" PRId64 " | %7s | %7s | %7s |", - thread->index(), - thread->affinity(), - Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::ShortInterval), num1, sizeof num1), - Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::MediumInterval), num2, sizeof num2), - Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::LargeInterval), num3, sizeof num3) - ); - - i++; + for (const Handle *worker : m_workers) { + for(int i = 0; i < worker->hasher()->deviceCount(); i++) { + Log::i()->text("| %7s | %s%-2d | %7s | %7s | %7s |", + worker->hasher()->subType().c_str(), + worker->hasher()->subType(true).c_str(), + i, + Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::ShortInterval), num1, + sizeof num1), + Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::MediumInterval), num2, + sizeof num2), + Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::LargeInterval), num3, + sizeof num3) + ); + } } } @@ -159,38 +143,22 @@ void Workers::setJob(const xmrig::Job &job, bool donate) } -void Workers::start(xmrig::Controller *controller) +bool Workers::start(xmrig::Controller *controller) { -# ifdef APP_DEBUG - LOG_NOTICE("THREADS ------------------------------------------------------------------"); - for (const xmrig::IThread *thread : controller->config()->threads()) { - thread->print(); - } - LOG_NOTICE("--------------------------------------------------------------------------"); -# endif - -# ifndef XMRIG_NO_ASM - xmrig::CpuThread::patchAsmVariants(); -# endif - m_controller = controller; - const std::vector &threads = controller->config()->threads(); + const std::vector &hashers = controller->config()->hasherConfigs(); m_status.algo = controller->config()->algorithm().algo(); + m_status.variant = controller->config()->algorithm().variant(); m_status.colors = controller->config()->isColors(); - m_status.threads = threads.size(); - - for (const xmrig::IThread *thread : threads) { - m_status.ways += thread->multiway(); - } - - m_hashrate = new Hashrate(threads.size(), controller); + m_status.hashers = hashers.size(); uv_mutex_init(&m_mutex); uv_rwlock_init(&m_rwlock); m_sequence = 1; m_paused = 1; + m_totalThreads = 0; uv_async_init(uv_default_loop(), &m_async, Workers::onResult); uv_timer_init(uv_default_loop(), &m_timer); @@ -198,15 +166,29 @@ void Workers::start(xmrig::Controller *controller) uint32_t offset = 0; - for (xmrig::IThread *thread : threads) { - Handle *handle = new Handle(thread, offset, m_status.ways); - offset += thread->multiway(); + for (xmrig::HasherConfig *hasherConfig : hashers) { + Handle *handle = new Handle(controller->config(), hasherConfig, offset); + if(handle->hasher() != nullptr) { + offset += handle->computingThreads(); + m_totalThreads += handle->computingThreads(); - m_workers.push_back(handle); - handle->start(Workers::onReady); + m_workers.push_back(handle); + handle->start(Workers::onReady); + } } - controller->save(); + if(m_workers.size() > 0) { + Log::i()->text(m_status.colors ? GREEN_BOLD(" * Hashers initialization complete * ") : " * Hashers initialization complete * "); + + m_hashrate = new Hashrate(m_workers, controller); + + controller->save(); + } + else { + return false; + } + + return true; } @@ -236,60 +218,49 @@ void Workers::submit(const xmrig::JobResult &result) #ifndef XMRIG_NO_API -void Workers::threadsSummary(rapidjson::Document &doc) +void Workers::hashersSummary(rapidjson::Document &doc) { - uv_mutex_lock(&m_mutex); - const uint64_t pages[2] = { m_status.hugePages, m_status.pages }; - const uint64_t memory = m_status.ways * xmrig::cn_select_memory(m_status.algo); - uv_mutex_unlock(&m_mutex); - auto &allocator = doc.GetAllocator(); - rapidjson::Value hugepages(rapidjson::kArrayType); - hugepages.PushBack(pages[0], allocator); - hugepages.PushBack(pages[1], allocator); + rapidjson::Value hashers(rapidjson::kArrayType); - doc.AddMember("hugepages", hugepages, allocator); - doc.AddMember("memory", memory, allocator); + for(int i = 0; i < m_workers.size(); i++) { + Handle *worker = m_workers[i]; + for(int j=0; j < worker->hasher()->deviceCount(); j++) { + rapidjson::Value hasherDoc(rapidjson::kObjectType); + + xmrig::String type = worker->hasher()->type().data(); + xmrig::String id = (worker->hasher()->subType(true) + to_string(j)).data(); + + hasherDoc.AddMember("type", type.toJSON(doc), allocator); + hasherDoc.AddMember("id", id.toJSON(doc), allocator); + + rapidjson::Value hashrateEntry(rapidjson::kArrayType); + hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::ShortInterval)), allocator); + hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::MediumInterval)), allocator); + hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::LargeInterval)), allocator); + + hasherDoc.AddMember("hashrate", hashrateEntry, allocator); + + hashers.PushBack(hasherDoc, allocator); + } + } + + doc.AddMember("hashers", hashers, allocator); } #endif void Workers::onReady(void *arg) { - auto handle = static_cast(arg); + auto handleArg = static_cast(arg); - IWorker *worker = nullptr; + IWorker *worker = new Worker(handleArg->handle, handleArg->workerId); - switch (handle->config()->multiway()) { - case 1: - worker = new MultiWorker<1>(handle); - break; - - case 2: - worker = new MultiWorker<2>(handle); - break; - - case 3: - worker = new MultiWorker<3>(handle); - break; - - case 4: - worker = new MultiWorker<4>(handle); - break; - - case 5: - worker = new MultiWorker<5>(handle); - break; - - default: - break; - } - - handle->setWorker(worker); + handleArg->handle->addWorker(worker); if (!worker->selfTest()) { - LOG_ERR("thread %zu error: \"hash self-test failed\".", handle->worker()->id()); + LOG_ERR("hasher %zu error: \"hash self-test failed\".", worker->id()); return; } @@ -319,12 +290,28 @@ void Workers::onResult(uv_async_t *handle) void Workers::onTick(uv_timer_t *handle) { - for (Handle *handle : m_workers) { - if (!handle->worker()) { - return; - } + for (int h =0; h < m_workers.size(); h++) { + Handle *handle = m_workers[h]; - m_hashrate->add(handle->threadId(), handle->worker()->hashCount(), handle->worker()->timestamp()); + std::vector internalWorkers = handle->workers(); + if (internalWorkers.size() == 0) + return; + + int deviceCount = handle->hasher()->deviceCount(); + int computingThreads = internalWorkers.size(); + int multiplier = computingThreads / deviceCount; + + for(int i = 0; i < deviceCount; i++) { + uint64_t hashCount = 0; + uint64_t timeStamp = 0; + + for(int j = 0; j < multiplier; j++) { + hashCount += internalWorkers[i * multiplier + j]->hashCount(); + timeStamp = max(timeStamp, internalWorkers[i * multiplier + j]->timestamp()); + } + + m_hashrate->add(h, i, hashCount, timeStamp); + } } if ((m_ticks++ & 0xF) == 0) { @@ -339,23 +326,19 @@ void Workers::start(IWorker *worker) uv_mutex_lock(&m_mutex); m_status.started++; - m_status.pages += w->memory().pages; - m_status.hugePages += w->memory().hugePages; - if (m_status.started == m_status.threads) { - const double percent = (double) m_status.hugePages / m_status.pages * 100.0; - const size_t memory = m_status.ways * xmrig::cn_select_memory(m_status.algo) / 1024; - - if (m_status.colors) { - LOG_INFO(GREEN_BOLD("READY (CPU)") " threads " CYAN_BOLD("%zu(%zu)") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%zu KB") "", - m_status.threads, m_status.ways, + if (m_status.started == m_status.hashers) { +/// TODO better status description +/* if (m_status.colors) { + LOG_INFO(GREEN_BOLD("READY (CPU)") " threads " CYAN_BOLD("%zu") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%.2f KB") "", + m_status.hashers, (m_status.hugePages == m_status.pages ? "\x1B[1;32m" : (m_status.hugePages == 0 ? "\x1B[1;31m" : "\x1B[1;33m")), m_status.hugePages, m_status.pages, percent, memory); } else { - LOG_INFO("READY (CPU) threads %zu(%zu) huge pages %zu/%zu %1.0f%% memory %zu KB", - m_status.threads, m_status.ways, m_status.hugePages, m_status.pages, percent, memory); - } + LOG_INFO("READY (CPU) threads %zu huge pages %zu/%zu %1.0f%% memory %zu KB", + m_status.hashers, m_status.hugePages, m_status.pages, percent, memory); + } */ } uv_mutex_unlock(&m_mutex); diff --git a/src/workers/Workers.h b/src/workers/Workers.h index a9b8e695..8c42c8b3 100644 --- a/src/workers/Workers.h +++ b/src/workers/Workers.h @@ -51,12 +51,10 @@ class Workers { public: static xmrig::Job job(); - static size_t hugePages(); - static size_t threads(); static void printHashrate(bool detail); static void setEnabled(bool enabled); static void setJob(const xmrig::Job &job, bool donate); - static void start(xmrig::Controller *controller); + static bool start(xmrig::Controller *controller); static void stop(); static void submit(const xmrig::JobResult &result); @@ -67,9 +65,11 @@ public: static inline uint64_t sequence() { return m_sequence.load(std::memory_order_relaxed); } static inline void pause() { m_active = false; m_paused = 1; m_sequence++; } static inline void setListener(xmrig::IJobResultListener *listener) { m_listener = listener; } + static inline int totalThreads() { return m_totalThreads.load(std::memory_order_relaxed); } + static inline std::vector workers() { return m_workers; } # ifndef XMRIG_NO_API - static void threadsSummary(rapidjson::Document &doc); + static void hashersSummary(rapidjson::Document &doc); # endif private: @@ -82,22 +82,17 @@ private: { public: inline LaunchStatus() : - colors(true), - hugePages(0), - pages(0), - started(0), - threads(0), - ways(0), - algo(xmrig::CRYPTONIGHT) + colors(true), + started(0), + hashers(0), + algo(xmrig::ARGON2) {} bool colors; - size_t hugePages; - size_t pages; size_t started; - size_t threads; - size_t ways; + size_t hashers; xmrig::Algo algo; + xmrig::Variant variant; }; static bool m_active; @@ -110,6 +105,7 @@ private: static std::atomic m_sequence; static std::list m_queue; static std::vector m_workers; + static std::atomic m_totalThreads; static uint64_t m_ticks; static uv_async_t m_async; static uv_mutex_t m_mutex; From eb6959f3128f7e4f85da516a91f27ed761e6f0c0 Mon Sep 17 00:00:00 2001 From: Haifa Bogdan Adnan Date: Mon, 26 Aug 2019 13:33:04 +0300 Subject: [PATCH 17/18] Code fixes for naming convention consistency. --- CMakeLists.txt | 2 +- .../cuda/{cuda_hasher.cpp => CudaHasher.cpp} | 142 +++--- .../argon2_hasher/hash/gpu/cuda/CudaHasher.h | 126 +++++ .../argon2_hasher/hash/gpu/cuda/cuda_hasher.h | 126 ----- .../hash/gpu/cuda/cuda_kernel.cu | 228 ++++----- .../hash/gpu/opencl/OpenCLHasher.cpp | 432 +++++++++--------- .../hash/gpu/opencl/OpenCLHasher.h | 78 ++-- src/net/strategies/DonateStrategy.cpp | 4 +- src/net/strategies/Http.cpp | 30 +- src/net/strategies/Http.h | 24 +- 10 files changed, 596 insertions(+), 596 deletions(-) rename src/crypto/argon2_hasher/hash/gpu/cuda/{cuda_hasher.cpp => CudaHasher.cpp} (57%) create mode 100644 src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.h delete mode 100644 src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f328ccc..26b98ba3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,7 +171,7 @@ set(SOURCE_CPU_HASHER src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp src/crypto set(SOURCE_OPENCL_HASHER src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h) -set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h +set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.h src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu) set(ARGON2_FILL_BLOCKS_SRC diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp b/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp similarity index 57% rename from src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp rename to src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp index 2046a321..2a384cb9 100644 --- a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.cpp @@ -14,10 +14,10 @@ #include #include -#include "cuda_hasher.h" +#include "CudaHasher.h" #include "../../../common/DLLExport.h" -cuda_hasher::cuda_hasher() { +CudaHasher::CudaHasher() { m_type = "GPU"; m_subType = "CUDA"; m_shortSubType = "NVD"; @@ -27,24 +27,24 @@ cuda_hasher::cuda_hasher() { } -cuda_hasher::~cuda_hasher() { +CudaHasher::~CudaHasher() { this->cleanup(); } -bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { +bool CudaHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { cudaError_t error = cudaSuccess; string error_message; m_profile = getArgon2Profile(algorithm, variant); - __devices = __query_cuda_devices(error, error_message); + m_devices = queryCudaDevices(error, error_message); if(error != cudaSuccess) { m_description = "No compatible GPU detected: " + error_message; return false; } - if (__devices.empty()) { + if (m_devices.empty()) { m_description = "No compatible GPU detected."; return false; } @@ -52,8 +52,8 @@ bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { return true; } -vector cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) { - vector devices; +vector CudaHasher::queryCudaDevices(cudaError_t &error, string &error_message) { + vector devices; int devCount = 0; error = cudaGetDeviceCount(&devCount); @@ -67,12 +67,12 @@ vector cuda_hasher::__query_cuda_devices(cudaError_t &error, for (int i = 0; i < devCount; ++i) { - cuda_device_info *dev = __get_device_info(i); + CudaDeviceInfo *dev = getDeviceInfo(i); if(dev == NULL) continue; if(dev->error != cudaSuccess) { error = dev->error; - error_message = dev->error_message; + error_message = dev->errorMessage; continue; } devices.push_back(dev); @@ -80,45 +80,45 @@ vector cuda_hasher::__query_cuda_devices(cudaError_t &error, return devices; } -cuda_device_info *cuda_hasher::__get_device_info(int device_index) { - cuda_device_info *device_info = new cuda_device_info(); +CudaDeviceInfo *CudaHasher::getDeviceInfo(int device_index) { + CudaDeviceInfo *device_info = new CudaDeviceInfo(); device_info->error = cudaSuccess; - device_info->cuda_index = device_index; + device_info->cudaIndex = device_index; device_info->error = cudaSetDevice(device_index); if(device_info->error != cudaSuccess) { - device_info->error_message = "Error setting current device."; + device_info->errorMessage = "Error setting current device."; return device_info; } cudaDeviceProp devProp; device_info->error = cudaGetDeviceProperties(&devProp, device_index); if(device_info->error != cudaSuccess) { - device_info->error_message = "Error setting current device."; + device_info->errorMessage = "Error setting current device."; return device_info; } - device_info->device_string = devProp.name; + device_info->deviceString = devProp.name; size_t freemem, totalmem; device_info->error = cudaMemGetInfo(&freemem, &totalmem); if(device_info->error != cudaSuccess) { - device_info->error_message = "Error setting current device."; + device_info->errorMessage = "Error setting current device."; return device_info; } - device_info->free_mem_size = freemem; - device_info->max_allocable_mem_size = freemem / 4; + device_info->freeMemSize = freemem; + device_info->maxAllocableMemSize = freemem / 4; double mem_in_gb = totalmem / 1073741824.0; stringstream ss; ss << setprecision(2) << mem_in_gb; - device_info->device_string += (" (" + ss.str() + "GB)"); + device_info->deviceString += (" (" + ss.str() + "GB)"); return device_info; } -bool cuda_hasher::configure(xmrig::HasherConfig &config) { +bool CudaHasher::configure(xmrig::HasherConfig &config) { int index = config.getGPUCardsCount(); double intensity = 0; @@ -134,12 +134,12 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) { bool cards_selected = false; intensity = 0; - for(vector::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { + for(vector::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) { stringstream ss; - ss << "["<< (index + 1) << "] " << (*d)->device_string; + ss << "["<< (index + 1) << "] " << (*d)->deviceString; string device_description = ss.str(); - (*d)->device_index = index; - (*d)->profile_info.profile = m_profile; + (*d)->deviceIndex = index; + (*d)->profileInfo.profile = m_profile; if(config.gpuFilter().size() > 0) { bool found = false; @@ -150,7 +150,7 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) { } } if(!found) { - (*d)->profile_info.threads = 0; + (*d)->profileInfo.threads = 0; ss << " - DISABLED" << endl; m_description += ss.str(); continue; @@ -165,12 +165,12 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) { ss << endl; - double device_intensity = config.getGPUIntensity((*d)->device_index); + double device_intensity = config.getGPUIntensity((*d)->deviceIndex); m_description += ss.str(); - if(!(__setup_device_info((*d), device_intensity))) { - m_description += (*d)->error_message; + if(!(setupDeviceInfo((*d), device_intensity))) { + m_description += (*d)->errorMessage; m_description += "\n"; continue; }; @@ -178,7 +178,7 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) { DeviceInfo device; char bus_id[100]; - if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) { + if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cudaIndex) == cudaSuccess) { device.bus_id = bus_id; int domain_separator = device.bus_id.find(":"); if(domain_separator != string::npos) { @@ -186,13 +186,13 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) { } } - device.name = (*d)->device_string; + device.name = (*d)->deviceString; device.intensity = device_intensity; - storeDeviceInfo((*d)->device_index, device); + storeDeviceInfo((*d)->deviceIndex, device); - __enabledDevices.push_back(*d); + m_enabledDevices.push_back(*d); - total_threads += (*d)->profile_info.threads; + total_threads += (*d)->profileInfo.threads; intensity += device_intensity; } @@ -213,46 +213,46 @@ bool cuda_hasher::configure(xmrig::HasherConfig &config) { if(!buildThreadData()) return false; - m_intensity = intensity / __enabledDevices.size(); - m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device + m_intensity = intensity / m_enabledDevices.size(); + m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; return true; } -void cuda_hasher::cleanup() { - for(vector::iterator d = __devices.begin(); d != __devices.end(); d++) { +void CudaHasher::cleanup() { + for(vector::iterator d = m_devices.begin(); d != m_devices.end(); d++) { cuda_free(*d); } } -bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) { - device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize); - size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; +bool CudaHasher::setupDeviceInfo(CudaDeviceInfo *device, double intensity) { + device->profileInfo.threads_per_chunk = (uint32_t)(device->maxAllocableMemSize / device->profileInfo.profile->memSize); + size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize; if(chunk_size == 0) { device->error = cudaErrorInitializationError; - device->error_message = "Not enough memory on GPU."; + device->errorMessage = "Not enough memory on GPU."; return false; } - uint64_t usable_memory = device->free_mem_size; + uint64_t usable_memory = device->freeMemSize; double chunks = (double)usable_memory / (double)chunk_size; - uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks); + uint32_t max_threads = (uint32_t)(device->profileInfo.threads_per_chunk * chunks); if(max_threads == 0) { device->error = cudaErrorInitializationError; - device->error_message = "Not enough memory on GPU."; + device->errorMessage = "Not enough memory on GPU."; return false; } - device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0); - device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution - if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) - device->profile_info.threads = 2; + device->profileInfo.threads = (uint32_t)(max_threads * intensity / 100.0); + device->profileInfo.threads = (device->profileInfo.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution + if(max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0) + device->profileInfo.threads = 2; - chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk; + chunks = (double)device->profileInfo.threads / (double)device->profileInfo.threads_per_chunk; cuda_allocate(device, chunks, chunk_size); @@ -262,15 +262,15 @@ bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity return true; } -bool cuda_hasher::buildThreadData() { - __thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2]; +bool CudaHasher::buildThreadData() { + m_threadData = new CudaGpuMgmtThreadData[m_enabledDevices.size() * 2]; - for(int i=0; i < __enabledDevices.size(); i++) { - cuda_device_info *device = __enabledDevices[i]; + for(int i=0; i < m_enabledDevices.size(); i++) { + CudaDeviceInfo *device = m_enabledDevices[i]; for(int threadId = 0; threadId < 2; threadId ++) { - cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; + CudaGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId]; thread_data.device = device; - thread_data.thread_id = threadId; + thread_data.threadId = threadId; cudaStream_t stream; device->error = cudaStreamCreate(&stream); @@ -279,19 +279,19 @@ bool cuda_hasher::buildThreadData() { return false; } - thread_data.device_data = stream; + thread_data.deviceData = stream; #ifdef PARALLEL_CUDA if(threadId == 0) { - thread_data.threads_idx = 0; - thread_data.threads = device->profile_info.threads / 2; + thread_data.threadsIdx = 0; + thread_data.threads = device->profileInfo.threads / 2; } else { - thread_data.threads_idx = device->profile_info.threads / 2; - thread_data.threads = device->profile_info.threads - thread_data.threads_idx; + thread_data.threadsIdx = device->profileInfo.threads / 2; + thread_data.threads = device->profileInfo.threads - thread_data.threadsIdx; } #else - thread_data.threads_idx = 0; + thread_data.threadsIdx = 0; thread_data.threads = device->profile_info.threads; #endif @@ -305,17 +305,17 @@ bool cuda_hasher::buildThreadData() { return true; } -int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { - cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; +int CudaHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + CudaGpuMgmtThreadData &threadData = m_threadData[threadIdx]; - cudaSetDevice(threadData.device->cuda_index); + cudaSetDevice(threadData.device->cudaIndex); threadData.hashData.input = input; threadData.hashData.inSize = size; threadData.hashData.output = output; int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); if(threadData.device->error != cudaSuccess) { - LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); + LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage); return 0; } @@ -326,15 +326,15 @@ int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *ou } -size_t cuda_hasher::parallelism(int workerIdx) { - cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx]; +size_t CudaHasher::parallelism(int workerIdx) { + CudaGpuMgmtThreadData &threadData = m_threadData[workerIdx]; return threadData.threads; } -size_t cuda_hasher::deviceCount() { - return __enabledDevices.size(); +size_t CudaHasher::deviceCount() { + return m_enabledDevices.size(); } -REGISTER_HASHER(cuda_hasher); +REGISTER_HASHER(CudaHasher); #endif //WITH_CUDA diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.h b/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.h new file mode 100644 index 00000000..822ebead --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/CudaHasher.h @@ -0,0 +1,126 @@ +// +// Created by Haifa Bogdan Adnan on 18/09/2018. +// + +#ifndef ARGON2_CUDA_HASHER_H +#define ARGON2_CUDA_HASHER_H + +#if defined(WITH_CUDA) + +struct CudaKernelArguments { + void *memoryChunk_0; + void *memoryChunk_1; + void *memoryChunk_2; + void *memoryChunk_3; + void *memoryChunk_4; + void *memoryChunk_5; + + uint32_t *refs; + uint32_t *idxs; + uint32_t *segments; + + uint32_t *preseedMemory[2]; + uint32_t *seedMemory[2]; + uint32_t *outMemory[2]; + uint32_t *hashMemory[2]; + + uint32_t *hostSeedMemory[2]; +}; + +struct Argon2ProfileInfo { + Argon2ProfileInfo() { + threads = 0; + threads_per_chunk = 0; + } + uint32_t threads; + uint32_t threads_per_chunk; + Argon2Profile *profile; +}; + +struct CudaDeviceInfo { + CudaDeviceInfo() { + deviceIndex = 0; + deviceString = ""; + freeMemSize = 0; + maxAllocableMemSize = 0; + + error = cudaSuccess; + errorMessage = ""; + } + + int deviceIndex; + int cudaIndex; + + string deviceString; + uint64_t freeMemSize; + uint64_t maxAllocableMemSize; + + Argon2ProfileInfo profileInfo; + CudaKernelArguments arguments; + + mutex deviceLock; + + cudaError_t error; + string errorMessage; +}; + +struct CudaGpuMgmtThreadData { + void lock() { +#ifndef PARALLEL_CUDA + device->deviceLock.lock(); +#endif + } + + void unlock() { +#ifndef PARALLEL_CUDA + device->deviceLock.unlock(); +#endif + } + + int threadId; + CudaDeviceInfo *device; + Argon2 *argon2; + HashData hashData; + + void *deviceData; + + int threads; + int threadsIdx; +}; + +class CudaHasher : public Hasher { +public: + CudaHasher(); + ~CudaHasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + CudaDeviceInfo *getDeviceInfo(int device_index); + bool setupDeviceInfo(CudaDeviceInfo *device, double intensity); + vector queryCudaDevices(cudaError_t &error, string &error_message); + bool buildThreadData(); + + vector m_devices; + vector m_enabledDevices; + CudaGpuMgmtThreadData *m_threadData; + + Argon2Profile *m_profile; +}; + +// CUDA kernel exports +extern void cuda_allocate(CudaDeviceInfo *device, double chunks, size_t chunk_size); +extern void cuda_free(CudaDeviceInfo *device); +extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data); +extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data); +extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data); +// end CUDA kernel exports + +#endif //WITH_CUDA + +#endif //ARGON2_CUDA_HASHER_H \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h deleted file mode 100644 index 2e668b8e..00000000 --- a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h +++ /dev/null @@ -1,126 +0,0 @@ -// -// Created by Haifa Bogdan Adnan on 18/09/2018. -// - -#ifndef ARGON2_CUDA_HASHER_H -#define ARGON2_CUDA_HASHER_H - -#if defined(WITH_CUDA) - -struct cuda_kernel_arguments { - void *memory_chunk_0; - void *memory_chunk_1; - void *memory_chunk_2; - void *memory_chunk_3; - void *memory_chunk_4; - void *memory_chunk_5; - - uint32_t *refs; - uint32_t *idxs; - uint32_t *segments; - - uint32_t *preseed_memory[2]; - uint32_t *seed_memory[2]; - uint32_t *out_memory[2]; - uint32_t *hash_memory[2]; - - uint32_t *host_seed_memory[2]; -}; - -struct argon2profile_info { - argon2profile_info() { - threads = 0; - threads_per_chunk = 0; - } - uint32_t threads; - uint32_t threads_per_chunk; - Argon2Profile *profile; -}; - -struct cuda_device_info { - cuda_device_info() { - device_index = 0; - device_string = ""; - free_mem_size = 0; - max_allocable_mem_size = 0; - - error = cudaSuccess; - error_message = ""; - } - - int device_index; - int cuda_index; - - string device_string; - uint64_t free_mem_size; - uint64_t max_allocable_mem_size; - - argon2profile_info profile_info; - cuda_kernel_arguments arguments; - - mutex device_lock; - - cudaError_t error; - string error_message; -}; - -struct cuda_gpumgmt_thread_data { - void lock() { -#ifndef PARALLEL_CUDA - device->device_lock.lock(); -#endif - } - - void unlock() { -#ifndef PARALLEL_CUDA - device->device_lock.unlock(); -#endif - } - - int thread_id; - cuda_device_info *device; - Argon2 *argon2; - HashData hashData; - - void *device_data; - - int threads; - int threads_idx; -}; - -class cuda_hasher : public Hasher { -public: - cuda_hasher(); - ~cuda_hasher(); - - virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); - virtual bool configure(xmrig::HasherConfig &config); - virtual void cleanup(); - virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); - virtual size_t parallelism(int workerIdx); - virtual size_t deviceCount(); - -private: - cuda_device_info *__get_device_info(int device_index); - bool __setup_device_info(cuda_device_info *device, double intensity); - vector __query_cuda_devices(cudaError_t &error, string &error_message); - bool buildThreadData(); - - vector __devices; - vector __enabledDevices; - cuda_gpumgmt_thread_data *__thread_data; - - Argon2Profile *m_profile; -}; - -// CUDA kernel exports -extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size); -extern void cuda_free(cuda_device_info *device); -extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data); -extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data); -extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data); -// end CUDA kernel exports - -#endif //WITH_CUDA - -#endif //ARGON2_CUDA_HASHER_H \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu index eea358f2..312e35bf 100644 --- a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu @@ -7,7 +7,7 @@ #include "crypto/argon2_hasher/hash/Hasher.h" #include "crypto/argon2_hasher/hash/argon2/Argon2.h" -#include "cuda_hasher.h" +#include "CudaHasher.h" #define THREADS_PER_LANE 32 #define BLOCK_SIZE_UINT4 64 @@ -744,12 +744,12 @@ __global__ void posthash ( } } -void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { - Argon2Profile *profile = device->profile_info.profile; +void cuda_allocate(CudaDeviceInfo *device, double chunks, size_t chunk_size) { + Argon2Profile *profile = device->profileInfo.profile; - device->error = cudaSetDevice(device->cuda_index); + device->error = cudaSetDevice(device->cudaIndex); if(device->error != cudaSuccess) { - device->error_message = "Error setting current device for memory allocation."; + device->errorMessage = "Error setting current device for memory allocation."; return; } @@ -762,9 +762,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { else { allocated_mem_for_current_chunk = 1; } - device->error = cudaMalloc(&device->arguments.memory_chunk_0, allocated_mem_for_current_chunk); + device->error = cudaMalloc(&device->arguments.memoryChunk_0, allocated_mem_for_current_chunk); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } if (chunks > 0) { @@ -774,9 +774,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { else { allocated_mem_for_current_chunk = 1; } - device->error = cudaMalloc(&device->arguments.memory_chunk_1, allocated_mem_for_current_chunk); + device->error = cudaMalloc(&device->arguments.memoryChunk_1, allocated_mem_for_current_chunk); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } if (chunks > 0) { @@ -786,9 +786,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { else { allocated_mem_for_current_chunk = 1; } - device->error = cudaMalloc(&device->arguments.memory_chunk_2, allocated_mem_for_current_chunk); + device->error = cudaMalloc(&device->arguments.memoryChunk_2, allocated_mem_for_current_chunk); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } if (chunks > 0) { @@ -798,9 +798,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { else { allocated_mem_for_current_chunk = 1; } - device->error = cudaMalloc(&device->arguments.memory_chunk_3, allocated_mem_for_current_chunk); + device->error = cudaMalloc(&device->arguments.memoryChunk_3, allocated_mem_for_current_chunk); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } if (chunks > 0) { @@ -810,9 +810,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { else { allocated_mem_for_current_chunk = 1; } - device->error = cudaMalloc(&device->arguments.memory_chunk_4, allocated_mem_for_current_chunk); + device->error = cudaMalloc(&device->arguments.memoryChunk_4, allocated_mem_for_current_chunk); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } if (chunks > 0) { @@ -822,9 +822,9 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { else { allocated_mem_for_current_chunk = 1; } - device->error = cudaMalloc(&device->arguments.memory_chunk_5, allocated_mem_for_current_chunk); + device->error = cudaMalloc(&device->arguments.memoryChunk_5, allocated_mem_for_current_chunk); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } @@ -835,13 +835,13 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t)); if(device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice); if(device->error != cudaSuccess) { - device->error_message = "Error copying memory."; + device->errorMessage = "Error copying memory."; return; } free(refs); @@ -860,14 +860,14 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t)); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice); if (device->error != cudaSuccess) { - device->error_message = "Error copying memory."; + device->errorMessage = "Error copying memory."; return; } free(idxs); @@ -876,17 +876,17 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { //reorganize segments data device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t)); if(device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice); if(device->error != cudaSuccess) { - device->error_message = "Error copying memory."; + device->errorMessage = "Error copying memory."; return; } #ifdef PARALLEL_CUDA - int threads = device->profile_info.threads / 2; + int threads = device->profileInfo.threads / 2; #else int threads = device->profile_info.threads; #endif @@ -896,60 +896,60 @@ void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { size_t out_memory_size = threads * ARGON2_BLOCK_SIZE; size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4); - device->error = cudaMalloc(&device->arguments.preseed_memory[0], preseed_memory_size); + device->error = cudaMalloc(&device->arguments.preseedMemory[0], preseed_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMalloc(&device->arguments.seed_memory[0], seed_memory_size); + device->error = cudaMalloc(&device->arguments.seedMemory[0], seed_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMalloc(&device->arguments.out_memory[0], out_memory_size); + device->error = cudaMalloc(&device->arguments.outMemory[0], out_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMalloc(&device->arguments.hash_memory[0], hash_memory_size); + device->error = cudaMalloc(&device->arguments.hashMemory[0], hash_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMallocHost(&device->arguments.host_seed_memory[0], 132 * threads); + device->error = cudaMallocHost(&device->arguments.hostSeedMemory[0], 132 * threads); if (device->error != cudaSuccess) { - device->error_message = "Error allocating pinned memory."; + device->errorMessage = "Error allocating pinned memory."; return; } - device->error = cudaMalloc(&device->arguments.preseed_memory[1], preseed_memory_size); + device->error = cudaMalloc(&device->arguments.preseedMemory[1], preseed_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMalloc(&device->arguments.seed_memory[1], seed_memory_size); + device->error = cudaMalloc(&device->arguments.seedMemory[1], seed_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMalloc(&device->arguments.out_memory[1], out_memory_size); + device->error = cudaMalloc(&device->arguments.outMemory[1], out_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMalloc(&device->arguments.hash_memory[1], hash_memory_size); + device->error = cudaMalloc(&device->arguments.hashMemory[1], hash_memory_size); if (device->error != cudaSuccess) { - device->error_message = "Error allocating memory."; + device->errorMessage = "Error allocating memory."; return; } - device->error = cudaMallocHost(&device->arguments.host_seed_memory[1], 132 * threads); + device->error = cudaMallocHost(&device->arguments.hostSeedMemory[1], 132 * threads); if (device->error != cudaSuccess) { - device->error_message = "Error allocating pinned memory."; + device->errorMessage = "Error allocating pinned memory."; return; } } -void cuda_free(cuda_device_info *device) { - cudaSetDevice(device->cuda_index); +void cuda_free(CudaDeviceInfo *device) { + cudaSetDevice(device->cudaIndex); if(device->arguments.idxs != NULL) { cudaFree(device->arguments.idxs); @@ -966,73 +966,73 @@ void cuda_free(cuda_device_info *device) { device->arguments.segments = NULL; } - if(device->arguments.memory_chunk_0 != NULL) { - cudaFree(device->arguments.memory_chunk_0); - device->arguments.memory_chunk_0 = NULL; + if(device->arguments.memoryChunk_0 != NULL) { + cudaFree(device->arguments.memoryChunk_0); + device->arguments.memoryChunk_0 = NULL; } - if(device->arguments.memory_chunk_1 != NULL) { - cudaFree(device->arguments.memory_chunk_1); - device->arguments.memory_chunk_1 = NULL; + if(device->arguments.memoryChunk_1 != NULL) { + cudaFree(device->arguments.memoryChunk_1); + device->arguments.memoryChunk_1 = NULL; } - if(device->arguments.memory_chunk_2 != NULL) { - cudaFree(device->arguments.memory_chunk_2); - device->arguments.memory_chunk_2 = NULL; + if(device->arguments.memoryChunk_2 != NULL) { + cudaFree(device->arguments.memoryChunk_2); + device->arguments.memoryChunk_2 = NULL; } - if(device->arguments.memory_chunk_3 != NULL) { - cudaFree(device->arguments.memory_chunk_3); - device->arguments.memory_chunk_3 = NULL; + if(device->arguments.memoryChunk_3 != NULL) { + cudaFree(device->arguments.memoryChunk_3); + device->arguments.memoryChunk_3 = NULL; } - if(device->arguments.memory_chunk_4 != NULL) { - cudaFree(device->arguments.memory_chunk_4); - device->arguments.memory_chunk_4 = NULL; + if(device->arguments.memoryChunk_4 != NULL) { + cudaFree(device->arguments.memoryChunk_4); + device->arguments.memoryChunk_4 = NULL; } - if(device->arguments.memory_chunk_5 != NULL) { - cudaFree(device->arguments.memory_chunk_5); - device->arguments.memory_chunk_5 = NULL; + if(device->arguments.memoryChunk_5 != NULL) { + cudaFree(device->arguments.memoryChunk_5); + device->arguments.memoryChunk_5 = NULL; } - if(device->arguments.preseed_memory != NULL) { + if(device->arguments.preseedMemory != NULL) { for(int i=0;i<2;i++) { - if(device->arguments.preseed_memory[i] != NULL) - cudaFree(device->arguments.preseed_memory[i]); - device->arguments.preseed_memory[i] = NULL; + if(device->arguments.preseedMemory[i] != NULL) + cudaFree(device->arguments.preseedMemory[i]); + device->arguments.preseedMemory[i] = NULL; } } - if(device->arguments.seed_memory != NULL) { + if(device->arguments.seedMemory != NULL) { for(int i=0;i<2;i++) { - if(device->arguments.seed_memory[i] != NULL) - cudaFree(device->arguments.seed_memory[i]); - device->arguments.seed_memory[i] = NULL; + if(device->arguments.seedMemory[i] != NULL) + cudaFree(device->arguments.seedMemory[i]); + device->arguments.seedMemory[i] = NULL; } } - if(device->arguments.out_memory != NULL) { + if(device->arguments.outMemory != NULL) { for(int i=0;i<2;i++) { - if(device->arguments.out_memory[i] != NULL) - cudaFree(device->arguments.out_memory[i]); - device->arguments.out_memory[i] = NULL; + if(device->arguments.outMemory[i] != NULL) + cudaFree(device->arguments.outMemory[i]); + device->arguments.outMemory[i] = NULL; } } - if(device->arguments.hash_memory != NULL) { + if(device->arguments.hashMemory != NULL) { for(int i=0;i<2;i++) { - if(device->arguments.hash_memory[i] != NULL) - cudaFree(device->arguments.hash_memory[i]); - device->arguments.hash_memory[i] = NULL; + if(device->arguments.hashMemory[i] != NULL) + cudaFree(device->arguments.hashMemory[i]); + device->arguments.hashMemory[i] = NULL; } } - if(device->arguments.host_seed_memory != NULL) { + if(device->arguments.hostSeedMemory != NULL) { for(int i=0;i<2;i++) { - if(device->arguments.host_seed_memory[i] != NULL) - cudaFreeHost(device->arguments.host_seed_memory[i]); - device->arguments.host_seed_memory[i] = NULL; + if(device->arguments.hostSeedMemory[i] != NULL) + cudaFreeHost(device->arguments.hostSeedMemory[i]); + device->arguments.hostSeedMemory[i] = NULL; } } @@ -1040,9 +1040,9 @@ void cuda_free(cuda_device_info *device) { } bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { - cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; - cuda_device_info *device = gpumgmt_thread->device; - cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data; + CudaDeviceInfo *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData; int sessions = max(profile->thrCost * 2, (uint32_t)8); double hashes_per_block = sessions / (profile->thrCost * 2.0); @@ -1050,18 +1050,18 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo gpumgmt_thread->lock(); - memcpy(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], memory, gpumgmt_thread->hashData.inSize); + memcpy(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], memory, gpumgmt_thread->hashData.inSize); - device->error = cudaMemcpyAsync(device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.host_seed_memory[gpumgmt_thread->thread_id], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream); + device->error = cudaMemcpyAsync(device->arguments.preseedMemory[gpumgmt_thread->threadId], device->arguments.hostSeedMemory[gpumgmt_thread->threadId], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream); if (device->error != cudaSuccess) { - device->error_message = "Error writing to gpu memory."; + device->errorMessage = "Error writing to gpu memory."; gpumgmt_thread->unlock(); return false; } prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> ( - device->arguments.preseed_memory[gpumgmt_thread->thread_id], - device->arguments.seed_memory[gpumgmt_thread->thread_id], + device->arguments.preseedMemory[gpumgmt_thread->threadId], + device->arguments.seedMemory[gpumgmt_thread->threadId], profile->memCost, profile->thrCost, profile->segCount / (4 * profile->thrCost), @@ -1073,21 +1073,21 @@ bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, vo } void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { - cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; - cuda_device_info *device = gpumgmt_thread->device; - cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data; + CudaDeviceInfo *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData; size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost; size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0)); - fill_blocks <<>> ((uint32_t*)device->arguments.memory_chunk_0, - (uint32_t*)device->arguments.memory_chunk_1, - (uint32_t*)device->arguments.memory_chunk_2, - (uint32_t*)device->arguments.memory_chunk_3, - (uint32_t*)device->arguments.memory_chunk_4, - (uint32_t*)device->arguments.memory_chunk_5, - device->arguments.seed_memory[gpumgmt_thread->thread_id], - device->arguments.out_memory[gpumgmt_thread->thread_id], + fill_blocks <<>> ((uint32_t*)device->arguments.memoryChunk_0, + (uint32_t*)device->arguments.memoryChunk_1, + (uint32_t*)device->arguments.memoryChunk_2, + (uint32_t*)device->arguments.memoryChunk_3, + (uint32_t*)device->arguments.memoryChunk_4, + (uint32_t*)device->arguments.memoryChunk_5, + device->arguments.seedMemory[gpumgmt_thread->threadId], + device->arguments.outMemory[gpumgmt_thread->threadId], device->arguments.refs, device->arguments.idxs, device->arguments.segments, @@ -1095,27 +1095,27 @@ void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { profile->thrCost, profile->segSize, profile->segCount, - device->profile_info.threads_per_chunk, - gpumgmt_thread->threads_idx); + device->profileInfo.threads_per_chunk, + gpumgmt_thread->threadsIdx); return (void *)1; } bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { - cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; - cuda_device_info *device = gpumgmt_thread->device; - cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + CudaGpuMgmtThreadData *gpumgmt_thread = (CudaGpuMgmtThreadData *)user_data; + CudaDeviceInfo *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->deviceData; size_t work_items = 4; posthash <<>> ( - device->arguments.hash_memory[gpumgmt_thread->thread_id], - device->arguments.out_memory[gpumgmt_thread->thread_id], - device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + device->arguments.hashMemory[gpumgmt_thread->threadId], + device->arguments.outMemory[gpumgmt_thread->threadId], + device->arguments.preseedMemory[gpumgmt_thread->threadId]); - device->error = cudaMemcpyAsync(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], device->arguments.hash_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream); + device->error = cudaMemcpyAsync(device->arguments.hostSeedMemory[gpumgmt_thread->threadId], device->arguments.hashMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream); if (device->error != cudaSuccess) { - device->error_message = "Error reading gpu memory."; + device->errorMessage = "Error reading gpu memory."; gpumgmt_thread->unlock(); return false; } @@ -1125,7 +1125,7 @@ bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, v continue; } - memcpy(memory, device->arguments.host_seed_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4)); + memcpy(memory, device->arguments.hostSeedMemory[gpumgmt_thread->threadId], threads * (xmrig::ARGON2_HASHLEN + 4)); gpumgmt_thread->unlock(); return memory; diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp index b217dc79..1932baeb 100755 --- a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp @@ -36,7 +36,7 @@ typedef union #define KERNEL_WORKGROUP_SIZE 32 -opencl_hasher::opencl_hasher() { +OpenCLHasher::OpenCLHasher() { m_type = "GPU"; m_subType = "OPENCL"; m_shortSubType = "OCL"; @@ -45,23 +45,23 @@ opencl_hasher::opencl_hasher() { m_computingThreads = 0; } -opencl_hasher::~opencl_hasher() { +OpenCLHasher::~OpenCLHasher() { // this->cleanup(); } -bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { +bool OpenCLHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { cl_int error = CL_SUCCESS; string error_message; m_profile = getArgon2Profile(algorithm, variant); - __devices = __query_opencl_devices(error, error_message); + m_devices = queryOpenCLDevices(error, error_message); if(error != CL_SUCCESS) { m_description = "No compatible GPU detected: " + error_message; return false; } - if (__devices.empty()) { + if (m_devices.empty()) { m_description = "No compatible GPU detected."; return false; } @@ -69,13 +69,13 @@ bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { return true; } -vector opencl_hasher::__query_opencl_devices(cl_int &error, string &error_message) { +vector OpenCLHasher::queryOpenCLDevices(cl_int &error, string &error_message) { cl_int err; cl_uint platform_count = 0; cl_uint device_count = 0; - vector result; + vector result; clGetPlatformIDs(0, NULL, &platform_count); if(platform_count == 0) { @@ -112,13 +112,13 @@ vector opencl_hasher::__query_opencl_devices(cl_int &error, } for(uint32_t j=0; j < device_count; j++) { - opencl_device_info *info = __get_device_info(platforms[i], devices[j]); + OpenCLDeviceInfo *info = getDeviceInfo(platforms[i], devices[j]); if(info->error != CL_SUCCESS) { error = info->error; - error_message = info->error_message; + error_message = info->errorMessage; } else { - info->device_index = counter; + info->deviceIndex = counter; result.push_back(info); counter++; } @@ -132,8 +132,8 @@ vector opencl_hasher::__query_opencl_devices(cl_int &error, return result; } -opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl_device_id device) { - opencl_device_info *device_info = new opencl_device_info(CL_SUCCESS, ""); +OpenCLDeviceInfo *OpenCLHasher::getDeviceInfo(cl_platform_id platform, cl_device_id device) { + OpenCLDeviceInfo *device_info = new OpenCLDeviceInfo(CL_SUCCESS, ""); device_info->platform = platform; device_info->device = device; @@ -149,7 +149,7 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz); if(device_info->error != CL_SUCCESS) { free(buffer); - device_info->error_message = "Error querying device vendor."; + device_info->errorMessage = "Error querying device vendor."; return device_info; } else { @@ -170,7 +170,7 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz); if (device_info->error != CL_SUCCESS) { free(buffer); - device_info->error_message = "Error querying device name."; + device_info->errorMessage = "Error querying device name."; return device_info; } else { buffer[sz] = 0; @@ -185,7 +185,7 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz); if(device_info->error != CL_SUCCESS) { free(buffer); - device_info->error_message = "Error querying device version."; + device_info->errorMessage = "Error querying device version."; return device_info; } else { @@ -194,29 +194,29 @@ opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl free(buffer); } - device_info->device_string = device_vendor + " - " + device_name/* + " : " + device_version*/; + device_info->deviceString = device_vendor + " - " + device_name/* + " : " + device_version*/; - device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->max_mem_size), &(device_info->max_mem_size), NULL); + device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->maxMemSize), &(device_info->maxMemSize), NULL); if(device_info->error != CL_SUCCESS) { - device_info->error_message = "Error querying device global memory size."; + device_info->errorMessage = "Error querying device global memory size."; return device_info; } - device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->max_allocable_mem_size), &(device_info->max_allocable_mem_size), NULL); + device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->maxAllocableMemSize), &(device_info->maxAllocableMemSize), NULL); if(device_info->error != CL_SUCCESS) { - device_info->error_message = "Error querying device max memory allocation."; + device_info->errorMessage = "Error querying device max memory allocation."; return device_info; } - double mem_in_gb = device_info->max_mem_size / 1073741824.0; + double mem_in_gb = device_info->maxMemSize / 1073741824.0; stringstream ss; ss << setprecision(2) << mem_in_gb; - device_info->device_string += (" (" + ss.str() + "GB)"); + device_info->deviceString += (" (" + ss.str() + "GB)"); return device_info; } -bool opencl_hasher::configure(xmrig::HasherConfig &config) { +bool OpenCLHasher::configure(xmrig::HasherConfig &config) { int index = config.getGPUCardsCount(); double intensity = 0; @@ -233,12 +233,12 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) { intensity = 0; - for(vector::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { + for(vector::iterator d = m_devices.begin(); d != m_devices.end(); d++, index++) { stringstream ss; - ss << "["<< (index + 1) << "] " << (*d)->device_string; + ss << "["<< (index + 1) << "] " << (*d)->deviceString; string device_description = ss.str(); - (*d)->device_index = index; - (*d)->profile_info.profile = m_profile; + (*d)->deviceIndex = index; + (*d)->profileInfo.profile = m_profile; if(config.gpuFilter().size() > 0) { bool found = false; @@ -249,7 +249,7 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) { } } if(!found) { - (*d)->profile_info.threads = 0; + (*d)->profileInfo.threads = 0; ss << " - DISABLED" << endl; m_description += ss.str(); continue; @@ -264,19 +264,19 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) { ss << endl; - double device_intensity = config.getGPUIntensity((*d)->device_index); + double device_intensity = config.getGPUIntensity((*d)->deviceIndex); m_description += ss.str(); - if(!(__setup_device_info((*d), device_intensity))) { - m_description += (*d)->error_message; + if(!(setupDeviceInfo((*d), device_intensity))) { + m_description += (*d)->errorMessage; m_description += "\n"; continue; }; DeviceInfo device; - if((*d)->device_string.find("Advanced Micro Devices") != string::npos) { + if((*d)->deviceString.find("Advanced Micro Devices") != string::npos) { device_topology_amd amdtopo; if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) { char bus_id[50]; @@ -284,7 +284,7 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) { device.bus_id = bus_id; } } - else if((*d)->device_string.find("NVIDIA") != string::npos) { + else if((*d)->deviceString.find("NVIDIA") != string::npos) { cl_uint bus; cl_uint slot; @@ -297,13 +297,13 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) { } } - device.name = (*d)->device_string; + device.name = (*d)->deviceString; device.intensity = device_intensity; - storeDeviceInfo((*d)->device_index, device); + storeDeviceInfo((*d)->deviceIndex, device); - __enabledDevices.push_back(*d); + m_enabledDevices.push_back(*d); - total_threads += (*d)->profile_info.threads; + total_threads += (*d)->profileInfo.threads; intensity += device_intensity; } @@ -323,14 +323,14 @@ bool opencl_hasher::configure(xmrig::HasherConfig &config) { buildThreadData(); - m_intensity = intensity / __enabledDevices.size(); - m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device + m_intensity = intensity / m_enabledDevices.size(); + m_computingThreads = m_enabledDevices.size() * 2; // 2 computing threads for each device m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; return true; } -bool opencl_hasher::__setup_device_info(opencl_device_info *device, double intensity) { +bool OpenCLHasher::setupDeviceInfo(OpenCLDeviceInfo *device, double intensity) { cl_int error; cl_context_properties properties[] = { @@ -340,14 +340,14 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error getting device context."; + device->errorMessage = "Error getting device context."; return false; } device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error getting device command queue."; + device->errorMessage = "Error getting device command queue."; return false; } @@ -357,7 +357,7 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating opencl program for device."; + device->errorMessage = "Error creating opencl program for device."; return false; } @@ -372,55 +372,55 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten free(log); device->error = error; - device->error_message = "Error building opencl program for device: " + build_log; + device->errorMessage = "Error building opencl program for device: " + build_log; return false; } - device->kernel_prehash = clCreateKernel(device->program, "prehash", &error); + device->kernelPrehash = clCreateKernel(device->program, "prehash", &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating opencl prehash kernel for device."; + device->errorMessage = "Error creating opencl prehash kernel for device."; return false; } - device->kernel_fill_blocks = clCreateKernel(device->program, "fill_blocks", &error); + device->kernelFillBlocks = clCreateKernel(device->program, "fill_blocks", &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating opencl main kernel for device."; + device->errorMessage = "Error creating opencl main kernel for device."; return false; } - device->kernel_posthash = clCreateKernel(device->program, "posthash", &error); + device->kernelPosthash = clCreateKernel(device->program, "posthash", &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating opencl posthash kernel for device."; + device->errorMessage = "Error creating opencl posthash kernel for device."; return false; } - device->profile_info.threads_per_chunk = (uint32_t) (device->max_allocable_mem_size / device->profile_info.profile->memSize); - size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; + device->profileInfo.threads_per_chunk = (uint32_t) (device->maxAllocableMemSize / device->profileInfo.profile->memSize); + size_t chunk_size = device->profileInfo.threads_per_chunk * device->profileInfo.profile->memSize; if (chunk_size == 0) { device->error = -1; - device->error_message = "Not enough memory on GPU."; + device->errorMessage = "Not enough memory on GPU."; return false; } - uint64_t usable_memory = device->max_mem_size; + uint64_t usable_memory = device->maxMemSize; double chunks = (double) usable_memory / (double) chunk_size; - uint32_t max_threads = (uint32_t) (device->profile_info.threads_per_chunk * chunks); + uint32_t max_threads = (uint32_t) (device->profileInfo.threads_per_chunk * chunks); if (max_threads == 0) { device->error = -1; - device->error_message = "Not enough memory on GPU."; + device->errorMessage = "Not enough memory on GPU."; return false; } - device->profile_info.threads = (uint32_t) (max_threads * intensity / 100.0); - device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4 - if (max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) - device->profile_info.threads = 4; + device->profileInfo.threads = (uint32_t) (max_threads * intensity / 100.0); + device->profileInfo.threads = (device->profileInfo.threads / 4) * 4; // make it divisible by 4 + if (max_threads > 0 && device->profileInfo.threads == 0 && intensity > 0) + device->profileInfo.threads = 4; - double counter = (double) device->profile_info.threads / (double) device->profile_info.threads_per_chunk; + double counter = (double) device->profileInfo.threads / (double) device->profileInfo.threads_per_chunk; size_t allocated_mem_for_current_chunk = 0; if (counter > 0) { @@ -433,11 +433,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten } else { allocated_mem_for_current_chunk = 1; } - device->arguments.memory_chunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, - allocated_mem_for_current_chunk, NULL, &error); + device->arguments.memoryChunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } @@ -451,11 +451,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten } else { allocated_mem_for_current_chunk = 1; } - device->arguments.memory_chunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, - allocated_mem_for_current_chunk, NULL, &error); + device->arguments.memoryChunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } @@ -469,11 +469,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten } else { allocated_mem_for_current_chunk = 1; } - device->arguments.memory_chunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, - allocated_mem_for_current_chunk, NULL, &error); + device->arguments.memoryChunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } @@ -487,11 +487,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten } else { allocated_mem_for_current_chunk = 1; } - device->arguments.memory_chunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, - allocated_mem_for_current_chunk, NULL, &error); + device->arguments.memoryChunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } @@ -505,11 +505,11 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten } else { allocated_mem_for_current_chunk = 1; } - device->arguments.memory_chunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, - allocated_mem_for_current_chunk, NULL, &error); + device->arguments.memoryChunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } @@ -523,176 +523,176 @@ bool opencl_hasher::__setup_device_info(opencl_device_info *device, double inten } else { allocated_mem_for_current_chunk = 1; } - device->arguments.memory_chunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, - allocated_mem_for_current_chunk, NULL, &error); + device->arguments.memoryChunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, - device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, + device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - if (device->profile_info.profile->succesiveIdxs == 1) { + if (device->profileInfo.profile->succesiveIdxs == 1) { device->arguments.idxs = NULL; } else { device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, - device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, + device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), NULL, &error); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } } - device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), NULL, &error); + device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profileInfo.profile->segCount * 3 * sizeof(uint32_t), NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - size_t preseed_memory_size = device->profile_info.profile->pwdLen * 4; - size_t seed_memory_size = device->profile_info.threads * (device->profile_info.profile->thrCost * 2) * ARGON2_BLOCK_SIZE; - size_t out_memory_size = device->profile_info.threads * ARGON2_BLOCK_SIZE; - size_t hash_memory_size = device->profile_info.threads * (xmrig::ARGON2_HASHLEN + 4); + size_t preseed_memory_size = device->profileInfo.profile->pwdLen * 4; + size_t seed_memory_size = device->profileInfo.threads * (device->profileInfo.profile->thrCost * 2) * ARGON2_BLOCK_SIZE; + size_t out_memory_size = device->profileInfo.threads * ARGON2_BLOCK_SIZE; + size_t hash_memory_size = device->profileInfo.threads * (xmrig::ARGON2_HASHLEN + 4); - device->arguments.preseed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); + device->arguments.preseedMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.preseed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); + device->arguments.preseedMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.seed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); + device->arguments.seedMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.seed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); + device->arguments.seedMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.out_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); + device->arguments.outMemory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.out_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); + device->arguments.outMemory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.hash_memory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); + device->arguments.hashMemory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } - device->arguments.hash_memory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); + device->arguments.hashMemory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error creating memory buffer."; + device->errorMessage = "Error creating memory buffer."; return false; } //optimise address sizes - uint32_t *refs = (uint32_t *)malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); - for(int i=0;iprofile_info.profile->blockRefsSize;i++) { - refs[i] = device->profile_info.profile->blockRefs[i*3 + 1]; + uint32_t *refs = (uint32_t *)malloc(device->profileInfo.profile->blockRefsSize * sizeof(uint32_t)); + for(int i=0;iprofileInfo.profile->blockRefsSize; i++) { + refs[i] = device->profileInfo.profile->blockRefs[i * 3 + 1]; } - error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL); + error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error writing to gpu memory."; + device->errorMessage = "Error writing to gpu memory."; return false; } free(refs); - if(device->profile_info.profile->succesiveIdxs == 0) { - uint32_t *idxs = (uint32_t *) malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); - for (int i = 0; i < device->profile_info.profile->blockRefsSize; i++) { - idxs[i] = device->profile_info.profile->blockRefs[i * 3]; - if (device->profile_info.profile->blockRefs[i * 3 + 2] == 1) { + if(device->profileInfo.profile->succesiveIdxs == 0) { + uint32_t *idxs = (uint32_t *) malloc(device->profileInfo.profile->blockRefsSize * sizeof(uint32_t)); + for (int i = 0; i < device->profileInfo.profile->blockRefsSize; i++) { + idxs[i] = device->profileInfo.profile->blockRefs[i * 3]; + if (device->profileInfo.profile->blockRefs[i * 3 + 2] == 1) { idxs[i] |= 0x80000000; } } - error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL); + error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profileInfo.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error writing to gpu memory."; + device->errorMessage = "Error writing to gpu memory."; return false; } free(idxs); } - error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), device->profile_info.profile->segments, 0, NULL, NULL); + error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profileInfo.profile->segCount * 3 * sizeof(uint32_t), device->profileInfo.profile->segments, 0, NULL, NULL); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error writing to gpu memory."; + device->errorMessage = "Error writing to gpu memory."; return false; } - clSetKernelArg(device->kernel_fill_blocks, 0, sizeof(device->arguments.memory_chunk_0), &device->arguments.memory_chunk_0); - clSetKernelArg(device->kernel_fill_blocks, 1, sizeof(device->arguments.memory_chunk_1), &device->arguments.memory_chunk_1); - clSetKernelArg(device->kernel_fill_blocks, 2, sizeof(device->arguments.memory_chunk_2), &device->arguments.memory_chunk_2); - clSetKernelArg(device->kernel_fill_blocks, 3, sizeof(device->arguments.memory_chunk_3), &device->arguments.memory_chunk_3); - clSetKernelArg(device->kernel_fill_blocks, 4, sizeof(device->arguments.memory_chunk_4), &device->arguments.memory_chunk_4); - clSetKernelArg(device->kernel_fill_blocks, 5, sizeof(device->arguments.memory_chunk_5), &device->arguments.memory_chunk_5); - clSetKernelArg(device->kernel_fill_blocks, 8, sizeof(device->arguments.refs), &device->arguments.refs); - if(device->profile_info.profile->succesiveIdxs == 0) - clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs); + clSetKernelArg(device->kernelFillBlocks, 0, sizeof(device->arguments.memoryChunk_0), &device->arguments.memoryChunk_0); + clSetKernelArg(device->kernelFillBlocks, 1, sizeof(device->arguments.memoryChunk_1), &device->arguments.memoryChunk_1); + clSetKernelArg(device->kernelFillBlocks, 2, sizeof(device->arguments.memoryChunk_2), &device->arguments.memoryChunk_2); + clSetKernelArg(device->kernelFillBlocks, 3, sizeof(device->arguments.memoryChunk_3), &device->arguments.memoryChunk_3); + clSetKernelArg(device->kernelFillBlocks, 4, sizeof(device->arguments.memoryChunk_4), &device->arguments.memoryChunk_4); + clSetKernelArg(device->kernelFillBlocks, 5, sizeof(device->arguments.memoryChunk_5), &device->arguments.memoryChunk_5); + clSetKernelArg(device->kernelFillBlocks, 8, sizeof(device->arguments.refs), &device->arguments.refs); + if(device->profileInfo.profile->succesiveIdxs == 0) + clSetKernelArg(device->kernelFillBlocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs); else - clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(cl_mem), NULL); - clSetKernelArg(device->kernel_fill_blocks, 10, sizeof(device->arguments.segments), &device->arguments.segments); - clSetKernelArg(device->kernel_fill_blocks, 11, sizeof(int32_t), &device->profile_info.profile->memSize); - clSetKernelArg(device->kernel_fill_blocks, 12, sizeof(int32_t), &device->profile_info.profile->thrCost); - clSetKernelArg(device->kernel_fill_blocks, 13, sizeof(int32_t), &device->profile_info.profile->segSize); - clSetKernelArg(device->kernel_fill_blocks, 14, sizeof(int32_t), &device->profile_info.profile->segCount); - clSetKernelArg(device->kernel_fill_blocks, 15, sizeof(int32_t), &device->profile_info.threads_per_chunk); + clSetKernelArg(device->kernelFillBlocks, 9, sizeof(cl_mem), NULL); + clSetKernelArg(device->kernelFillBlocks, 10, sizeof(device->arguments.segments), &device->arguments.segments); + clSetKernelArg(device->kernelFillBlocks, 11, sizeof(int32_t), &device->profileInfo.profile->memSize); + clSetKernelArg(device->kernelFillBlocks, 12, sizeof(int32_t), &device->profileInfo.profile->thrCost); + clSetKernelArg(device->kernelFillBlocks, 13, sizeof(int32_t), &device->profileInfo.profile->segSize); + clSetKernelArg(device->kernelFillBlocks, 14, sizeof(int32_t), &device->profileInfo.profile->segCount); + clSetKernelArg(device->kernelFillBlocks, 15, sizeof(int32_t), &device->profileInfo.threads_per_chunk); - clSetKernelArg(device->kernel_prehash, 2, sizeof(int32_t), &device->profile_info.profile->memCost); - clSetKernelArg(device->kernel_prehash, 3, sizeof(int32_t), &device->profile_info.profile->thrCost); - int passes = device->profile_info.profile->segCount / (4 * device->profile_info.profile->thrCost); - clSetKernelArg(device->kernel_prehash, 4, sizeof(int32_t), &passes); - clSetKernelArg(device->kernel_prehash, 6, sizeof(int32_t), &device->profile_info.profile->saltLen); + clSetKernelArg(device->kernelPrehash, 2, sizeof(int32_t), &device->profileInfo.profile->memCost); + clSetKernelArg(device->kernelPrehash, 3, sizeof(int32_t), &device->profileInfo.profile->thrCost); + int passes = device->profileInfo.profile->segCount / (4 * device->profileInfo.profile->thrCost); + clSetKernelArg(device->kernelPrehash, 4, sizeof(int32_t), &passes); + clSetKernelArg(device->kernelPrehash, 6, sizeof(int32_t), &device->profileInfo.profile->saltLen); return true; } bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { - opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; - opencl_device_info *device = gpumgmt_thread->device; + OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data; + OpenCLDeviceInfo *device = gpumgmt_thread->device; cl_int error; @@ -702,29 +702,29 @@ bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block); size_t local_work_items = sessions * 4; - device->device_lock.lock(); + device->deviceLock.lock(); - error = clEnqueueWriteBuffer(device->queue, device->arguments.preseed_memory[gpumgmt_thread->thread_id], + error = clEnqueueWriteBuffer(device->queue, device->arguments.preseedMemory[gpumgmt_thread->threadId], CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error writing to gpu memory."; - device->device_lock.unlock(); + device->errorMessage = "Error writing to gpu memory."; + device->deviceLock.unlock(); return false; } int inSizeInInt = gpumgmt_thread->hashData.inSize / 4; - clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_prehash, 5, sizeof(int), &inSizeInInt); - clSetKernelArg(device->kernel_prehash, 7, sizeof(int), &threads); - clSetKernelArg(device->kernel_prehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes) + clSetKernelArg(device->kernelPrehash, 0, sizeof(device->arguments.preseedMemory[gpumgmt_thread->threadId]), &device->arguments.preseedMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelPrehash, 1, sizeof(device->arguments.seedMemory[gpumgmt_thread->threadId]), &device->arguments.seedMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelPrehash, 5, sizeof(int), &inSizeInInt); + clSetKernelArg(device->kernelPrehash, 7, sizeof(int), &threads); + clSetKernelArg(device->kernelPrehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes) - error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + error=clEnqueueNDRangeKernel(device->queue, device->kernelPrehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error running the kernel."; - device->device_lock.unlock(); + device->errorMessage = "Error running the kernel."; + device->deviceLock.unlock(); return false; } @@ -732,8 +732,8 @@ bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, } void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { - opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; - opencl_device_info *device = gpumgmt_thread->device; + OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data; + OpenCLDeviceInfo *device = gpumgmt_thread->device; cl_int error; @@ -742,15 +742,15 @@ void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK; - clSetKernelArg(device->kernel_fill_blocks, 6, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_fill_blocks, 7, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_fill_blocks, 16, sizeof(cl_ulong) * shared_mem, NULL); + clSetKernelArg(device->kernelFillBlocks, 6, sizeof(device->arguments.seedMemory[gpumgmt_thread->threadId]), &device->arguments.seedMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelFillBlocks, 7, sizeof(device->arguments.outMemory[gpumgmt_thread->threadId]), &device->arguments.outMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelFillBlocks, 16, sizeof(cl_ulong) * shared_mem, NULL); - error=clEnqueueNDRangeKernel(device->queue, device->kernel_fill_blocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + error=clEnqueueNDRangeKernel(device->queue, device->kernelFillBlocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error running the kernel."; - device->device_lock.unlock(); + device->errorMessage = "Error running the kernel."; + device->deviceLock.unlock(); return NULL; } @@ -758,107 +758,107 @@ void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) } bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { - opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; - opencl_device_info *device = gpumgmt_thread->device; + OpenCLGpuMgmtThreadData *gpumgmt_thread = (OpenCLGpuMgmtThreadData *)user_data; + OpenCLDeviceInfo *device = gpumgmt_thread->device; cl_int error; size_t total_work_items = threads * 4; size_t local_work_items = 4; - clSetKernelArg(device->kernel_posthash, 0, sizeof(device->arguments.hash_memory[gpumgmt_thread->thread_id]), &device->arguments.hash_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_posthash, 1, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_posthash, 2, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); - clSetKernelArg(device->kernel_posthash, 3, sizeof(cl_ulong) * 60, NULL); + clSetKernelArg(device->kernelPosthash, 0, sizeof(device->arguments.hashMemory[gpumgmt_thread->threadId]), &device->arguments.hashMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelPosthash, 1, sizeof(device->arguments.outMemory[gpumgmt_thread->threadId]), &device->arguments.outMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelPosthash, 2, sizeof(device->arguments.preseedMemory[gpumgmt_thread->threadId]), &device->arguments.preseedMemory[gpumgmt_thread->threadId]); + clSetKernelArg(device->kernelPosthash, 3, sizeof(cl_ulong) * 60, NULL); - error=clEnqueueNDRangeKernel(device->queue, device->kernel_posthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + error=clEnqueueNDRangeKernel(device->queue, device->kernelPosthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error running the kernel."; - device->device_lock.unlock(); + device->errorMessage = "Error running the kernel."; + device->deviceLock.unlock(); return false; } - error = clEnqueueReadBuffer(device->queue, device->arguments.hash_memory[gpumgmt_thread->thread_id], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL); + error = clEnqueueReadBuffer(device->queue, device->arguments.hashMemory[gpumgmt_thread->threadId], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL); if (error != CL_SUCCESS) { device->error = error; - device->error_message = "Error reading gpu memory."; - device->device_lock.unlock(); + device->errorMessage = "Error reading gpu memory."; + device->deviceLock.unlock(); return false; } error=clFinish(device->queue); if(error != CL_SUCCESS) { device->error = error; - device->error_message = "Error flushing GPU queue."; - device->device_lock.unlock(); + device->errorMessage = "Error flushing GPU queue."; + device->deviceLock.unlock(); return false; } - device->device_lock.unlock(); + device->deviceLock.unlock(); return true; } -void opencl_hasher::buildThreadData() { - __thread_data = new opencl_gpumgmt_thread_data[__enabledDevices.size() * 2]; +void OpenCLHasher::buildThreadData() { + m_threadData = new OpenCLGpuMgmtThreadData[m_enabledDevices.size() * 2]; - for(int i=0; i < __enabledDevices.size(); i++) { - opencl_device_info *device = __enabledDevices[i]; + for(int i=0; i < m_enabledDevices.size(); i++) { + OpenCLDeviceInfo *device = m_enabledDevices[i]; for(int threadId = 0; threadId < 2; threadId ++) { - opencl_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; + OpenCLGpuMgmtThreadData &thread_data = m_threadData[i * 2 + threadId]; thread_data.device = device; - thread_data.thread_id = threadId; + thread_data.threadId = threadId; thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher, nullptr, &thread_data); - thread_data.argon2->setThreads(device->profile_info.threads); + thread_data.argon2->setThreads(device->profileInfo.threads); thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4; } } } -int opencl_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { - opencl_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; +int OpenCLHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + OpenCLGpuMgmtThreadData &threadData = m_threadData[threadIdx]; threadData.hashData.input = input; threadData.hashData.inSize = size; threadData.hashData.output = output; int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); if(threadData.device->error != CL_SUCCESS) { - LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); + LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->errorMessage); return 0; } uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39)); - (*nonce) += threadData.device->profile_info.threads; + (*nonce) += threadData.device->profileInfo.threads; return hashCount; } -void opencl_hasher::cleanup() { +void OpenCLHasher::cleanup() { vector platforms; - for(vector::iterator it=__devices.begin(); it != __devices.end(); it++) { - if ((*it)->profile_info.threads != 0) { - clReleaseMemObject((*it)->arguments.memory_chunk_0); - clReleaseMemObject((*it)->arguments.memory_chunk_1); - clReleaseMemObject((*it)->arguments.memory_chunk_2); - clReleaseMemObject((*it)->arguments.memory_chunk_3); - clReleaseMemObject((*it)->arguments.memory_chunk_4); - clReleaseMemObject((*it)->arguments.memory_chunk_5); + for(vector::iterator it=m_devices.begin(); it != m_devices.end(); it++) { + if ((*it)->profileInfo.threads != 0) { + clReleaseMemObject((*it)->arguments.memoryChunk_0); + clReleaseMemObject((*it)->arguments.memoryChunk_1); + clReleaseMemObject((*it)->arguments.memoryChunk_2); + clReleaseMemObject((*it)->arguments.memoryChunk_3); + clReleaseMemObject((*it)->arguments.memoryChunk_4); + clReleaseMemObject((*it)->arguments.memoryChunk_5); clReleaseMemObject((*it)->arguments.refs); clReleaseMemObject((*it)->arguments.segments); - clReleaseMemObject((*it)->arguments.preseed_memory[0]); - clReleaseMemObject((*it)->arguments.preseed_memory[1]); - clReleaseMemObject((*it)->arguments.seed_memory[0]); - clReleaseMemObject((*it)->arguments.seed_memory[1]); - clReleaseMemObject((*it)->arguments.out_memory[0]); - clReleaseMemObject((*it)->arguments.out_memory[1]); - clReleaseMemObject((*it)->arguments.hash_memory[0]); - clReleaseMemObject((*it)->arguments.hash_memory[1]); + clReleaseMemObject((*it)->arguments.preseedMemory[0]); + clReleaseMemObject((*it)->arguments.preseedMemory[1]); + clReleaseMemObject((*it)->arguments.seedMemory[0]); + clReleaseMemObject((*it)->arguments.seedMemory[1]); + clReleaseMemObject((*it)->arguments.outMemory[0]); + clReleaseMemObject((*it)->arguments.outMemory[1]); + clReleaseMemObject((*it)->arguments.hashMemory[0]); + clReleaseMemObject((*it)->arguments.hashMemory[1]); - clReleaseKernel((*it)->kernel_prehash); - clReleaseKernel((*it)->kernel_fill_blocks); - clReleaseKernel((*it)->kernel_posthash); + clReleaseKernel((*it)->kernelPrehash); + clReleaseKernel((*it)->kernelFillBlocks); + clReleaseKernel((*it)->kernelPosthash); clReleaseProgram((*it)->program); clReleaseCommandQueue((*it)->queue); clReleaseContext((*it)->context); @@ -866,23 +866,23 @@ void opencl_hasher::cleanup() { clReleaseDevice((*it)->device); delete (*it); } - __devices.clear(); + m_devices.clear(); } -size_t opencl_hasher::parallelism(int workerIdx) { +size_t OpenCLHasher::parallelism(int workerIdx) { // there are 2 computing threads per device, so divide by 2 to get device index workerIdx /= 2; - if(workerIdx < 0 || workerIdx > __enabledDevices.size()) + if(workerIdx < 0 || workerIdx > m_enabledDevices.size()) return 0; - return __enabledDevices[workerIdx]->profile_info.threads; + return m_enabledDevices[workerIdx]->profileInfo.threads; } -size_t opencl_hasher::deviceCount() { - return __enabledDevices.size(); +size_t OpenCLHasher::deviceCount() { + return m_enabledDevices.size(); } -REGISTER_HASHER(opencl_hasher); +REGISTER_HASHER(OpenCLHasher); #endif // WITH_OPENCL diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h index ece7c971..8ff55bea 100755 --- a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h @@ -15,24 +15,24 @@ #include #endif // !__APPLE__ -struct opencl_kernel_arguments { - cl_mem memory_chunk_0; - cl_mem memory_chunk_1; - cl_mem memory_chunk_2; - cl_mem memory_chunk_3; - cl_mem memory_chunk_4; - cl_mem memory_chunk_5; +struct OpenCLKernelArguments { + cl_mem memoryChunk_0; + cl_mem memoryChunk_1; + cl_mem memoryChunk_2; + cl_mem memoryChunk_3; + cl_mem memoryChunk_4; + cl_mem memoryChunk_5; cl_mem refs; cl_mem idxs; cl_mem segments; - cl_mem preseed_memory[2]; - cl_mem seed_memory[2]; - cl_mem out_memory[2]; - cl_mem hash_memory[2]; + cl_mem preseedMemory[2]; + cl_mem seedMemory[2]; + cl_mem outMemory[2]; + cl_mem hashMemory[2]; }; -struct argon2profile_info { - argon2profile_info() { +struct Argon2ProfileInfo { + Argon2ProfileInfo() { threads = 0; threads_per_chunk = 0; } @@ -42,10 +42,10 @@ struct argon2profile_info { Argon2Profile *profile; }; -struct opencl_device_info { - opencl_device_info(cl_int err, const string &err_msg) { +struct OpenCLDeviceInfo { + OpenCLDeviceInfo(cl_int err, const string &err_msg) { error = err; - error_message = err_msg; + errorMessage = err_msg; } cl_platform_id platform; @@ -54,36 +54,36 @@ struct opencl_device_info { cl_command_queue queue; cl_program program; - cl_kernel kernel_prehash; - cl_kernel kernel_fill_blocks; - cl_kernel kernel_posthash; + cl_kernel kernelPrehash; + cl_kernel kernelFillBlocks; + cl_kernel kernelPosthash; - int device_index; + int deviceIndex; - opencl_kernel_arguments arguments; - argon2profile_info profile_info; + OpenCLKernelArguments arguments; + Argon2ProfileInfo profileInfo; - string device_string; - uint64_t max_mem_size; - uint64_t max_allocable_mem_size; + string deviceString; + uint64_t maxMemSize; + uint64_t maxAllocableMemSize; cl_int error; - string error_message; + string errorMessage; - mutex device_lock; + mutex deviceLock; }; -struct opencl_gpumgmt_thread_data { - int thread_id; - opencl_device_info *device; +struct OpenCLGpuMgmtThreadData { + int threadId; + OpenCLDeviceInfo *device; Argon2 *argon2; HashData hashData; }; -class opencl_hasher : public Hasher { +class OpenCLHasher : public Hasher { public: - opencl_hasher(); - ~opencl_hasher(); + OpenCLHasher(); + ~OpenCLHasher(); virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); virtual bool configure(xmrig::HasherConfig &config); @@ -93,14 +93,14 @@ public: virtual size_t deviceCount(); private: - opencl_device_info *__get_device_info(cl_platform_id platform, cl_device_id device); - bool __setup_device_info(opencl_device_info *device, double intensity); - vector __query_opencl_devices(cl_int &error, string &error_message); + OpenCLDeviceInfo *getDeviceInfo(cl_platform_id platform, cl_device_id device); + bool setupDeviceInfo(OpenCLDeviceInfo *device, double intensity); + vector queryOpenCLDevices(cl_int &error, string &error_message); void buildThreadData(); - vector __devices; - vector __enabledDevices; - opencl_gpumgmt_thread_data *__thread_data; + vector m_devices; + vector m_enabledDevices; + OpenCLGpuMgmtThreadData *m_threadData; Argon2Profile *m_profile; }; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index bd4b0353..551c65b4 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -190,8 +190,8 @@ xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, Va break; } - http_internal_impl donateConfigDownloader; - std::string coinFeeData = donateConfigDownloader._http_get("http://coinfee.changeling.biz/index.json"); + HttpInternalImpl donateConfigDownloader; + std::string coinFeeData = donateConfigDownloader.httpGet("http://coinfee.changeling.biz/index.json"); rapidjson::Document doc; if (!doc.ParseInsitu((char *)coinFeeData.data()).HasParseError() && doc.IsObject()) { diff --git a/src/net/strategies/Http.cpp b/src/net/strategies/Http.cpp index c63d255c..dec3200a 100755 --- a/src/net/strategies/Http.cpp +++ b/src/net/strategies/Http.cpp @@ -89,11 +89,11 @@ public: string payload; }; -int http::__socketlib_reference = 0; +int Http::m_socketlibReference = 0; -http::http() { +Http::Http() { #ifdef _WIN64 - if(__socketlib_reference == 0) { + if(m_socketlibReference == 0) { WSADATA wsaData; int iResult; @@ -105,19 +105,19 @@ http::http() { } } #endif - __socketlib_reference++; + m_socketlibReference++; } -http::~http() { - __socketlib_reference--; +Http::~Http() { + m_socketlibReference--; #ifdef _WIN64 - if(__socketlib_reference == 0) { + if(m_socketlibReference == 0) { WSACleanup(); } #endif } -vector http::_resolve_host(const string &hostname) +vector Http::resolveHost(const string &hostname) { string host = hostname; @@ -149,7 +149,7 @@ vector http::_resolve_host(const string &hostname) return addresses; } -string http::_encode(const string &src) { +string Http::encode(const string &src) { string new_str = ""; char c; int ic; @@ -174,7 +174,7 @@ string http::_encode(const string &src) { return new_str; } -string http_internal_impl::__get_response(const string &url, const string &post_data, const string &content_type) { +string HttpInternalImpl::getResponse(const string &url, const string &post_data, const string &content_type) { http_callback_data reply; reply.complete = false; @@ -182,7 +182,7 @@ string http_internal_impl::__get_response(const string &url, const string &post_ if(query.protocol != "http") return ""; - vector ips = _resolve_host(query.host); + vector ips = resolveHost(query.host); for(int i=0;i _resolve_host(const string &hostname); + virtual string httpGet(const string &url) { return ""; }; + virtual string httpPost(const string &url, const string &post_data, const string &content_type) { return ""; }; + string encode(const string &src); + vector resolveHost(const string &hostname); private: - static int __socketlib_reference; + static int m_socketlibReference; }; -class http_internal_impl : public http { +class HttpInternalImpl : public Http { public: - virtual string _http_get(const string &url); - virtual string _http_post(const string &url, const string &post_data, const string &content_type); + virtual string httpGet(const string &url); + virtual string httpPost(const string &url, const string &post_data, const string &content_type); private: - string __get_response(const string &url, const string &post_data, const string &content_type); + string getResponse(const string &url, const string &post_data, const string &content_type); }; #endif //DONATE_HTTP_H From 67fbbedd0a1201d2d895bd1b1a70d4dd889d64a6 Mon Sep 17 00:00:00 2001 From: Haifa Bogdan Adnan Date: Mon, 26 Aug 2019 13:58:16 +0300 Subject: [PATCH 18/18] Docs changes. --- CHANGELOG.md | 276 ----------------------------------------- README.md | 152 ++++++++--------------- doc/api/1/config.json | 32 ++--- doc/api/1/summary.json | 8 +- doc/api/1/threads.json | 60 ++------- 5 files changed, 70 insertions(+), 458 deletions(-) delete mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index be30f774..00000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,276 +0,0 @@ -# v2.14.4 -- [#992](https://github.com/xmrig/xmrig/pull/992) Fixed compilation with Clang 3.5. -- [#1012](https://github.com/xmrig/xmrig/pull/1012) Fixed compilation with Clang 9.0. -- In HTTP API for unknown hashrate now used `null` instead of `0.0`. -- Fixed MSVC 2019 version detection. -- Removed obsolete automatic variants. - -# v2.14.1 -* [#975](https://github.com/xmrig/xmrig/issues/975) Fixed crash on Linux if double thread mode used. - -# v2.14.0 -- **[#969](https://github.com/xmrig/xmrig/pull/969) Added new algorithm `cryptonight/rwz`, short alias `cn/rwz` (also known as CryptoNight ReverseWaltz), for upcoming [Graft](https://www.graft.network/) fork.** -- **[#931](https://github.com/xmrig/xmrig/issues/931) Added new algorithm `cryptonight/zls`, short alias `cn/zls` for [Zelerius Network](https://zelerius.org) fork.** -- **[#940](https://github.com/xmrig/xmrig/issues/940) Added new algorithm `cryptonight/double`, short alias `cn/double` (also known as CryptoNight HeavyX), for [X-CASH](https://x-cash.org/).** -- [#951](https://github.com/xmrig/xmrig/issues/951#issuecomment-469581529) Fixed crash if AVX was disabled on OS level. -- [#952](https://github.com/xmrig/xmrig/issues/952) Fixed compile error on some Linux. -- [#957](https://github.com/xmrig/xmrig/issues/957#issuecomment-468890667) Added support for embedded config. -- [#958](https://github.com/xmrig/xmrig/pull/958) Fixed incorrect user agent on ARM platforms. -- [#968](https://github.com/xmrig/xmrig/pull/968) Optimized `cn/r` algorithm performance. - -# v2.13.1 -- [#946](https://github.com/xmrig/xmrig/pull/946) Optimized software AES implementations for CPUs without hardware AES support. `cn/r`, `cn/wow` up to 2.6 times faster, 4-9% improvements for other algorithms. - -# v2.13.0 -- **[#938](https://github.com/xmrig/xmrig/issues/938) Added support for new algorithm `cryptonight/r`, short alias `cn/r` (also known as CryptoNightR or CryptoNight variant 4), for upcoming [Monero](https://www.getmonero.org/) fork on March 9, thanks [@SChernykh](https://github.com/SChernykh).** -- [#939](https://github.com/xmrig/xmrig/issues/939) Added support for dynamic (runtime) pools reload. -- [#932](https://github.com/xmrig/xmrig/issues/932) Fixed `cn-pico` hashrate drop, regression since v2.11.0. - -# v2.12.0 -- [#929](https://github.com/xmrig/xmrig/pull/929) Added support for new algorithm `cryptonight/wow`, short alias `cn/wow` (also known as CryptonightR), for upcoming [Wownero](http://wownero.org) fork on February 14. - -# v2.11.0 -- [#928](https://github.com/xmrig/xmrig/issues/928) Added support for new algorithm `cryptonight/gpu`, short alias `cn/gpu` (original name `cryptonight-gpu`), for upcoming [Ryo currency](https://ryo-currency.com) fork on February 14. -- [#749](https://github.com/xmrig/xmrig/issues/749) Added support for detect hardware AES in runtime on ARMv8 platforms. -- [#292](https://github.com/xmrig/xmrig/issues/292) Fixed build on ARMv8 platforms if compiler not support hardware AES. - -# v2.10.0 -- [#904](https://github.com/xmrig/xmrig/issues/904) Added new algorithm `cn-pico/trtl` (aliases `cryptonight-turtle`, `cn-trtl`) for upcoming TurtleCoin (TRTL) fork. -- Default value for option `max-cpu-usage` changed to `100` also this option now deprecated. - -# v2.9.4 -- [#913](https://github.com/xmrig/xmrig/issues/913) Fixed Masari (MSR) support (this update required for upcoming fork). -- [#915](https://github.com/xmrig/xmrig/pull/915) Improved security, JIT memory now read-only after patching. - -# v2.9.3 -- [#909](https://github.com/xmrig/xmrig/issues/909) Fixed compile errors on FreeBSD. -- [#912](https://github.com/xmrig/xmrig/pull/912) Fixed, C++ implementation of `cn/half` was produce up to 13% of invalid hashes. - -# v2.9.2 -- [#907](https://github.com/xmrig/xmrig/pull/907) Fixed crash on Linux. - -# v2.9.1 -- Restored compatibility with https://stellite.hashvault.pro. - -# v2.9.0 -- [#899](https://github.com/xmrig/xmrig/issues/899) Added support for new algorithm `cn/half` for Masari and Stellite forks. -- [#834](https://github.com/xmrig/xmrig/pull/834) Added ASM optimized code for AMD Bulldozer. -- [#839](https://github.com/xmrig/xmrig/issues/839) Fixed FreeBSD compile. -- [#857](https://github.com/xmrig/xmrig/pull/857) Fixed impossible to build for macOS without clang. - -# v2.8.3 -- [#813](https://github.com/xmrig/xmrig/issues/813) Fixed critical bug with Minergate pool and variant 2. - -# v2.8.1 -- [#768](https://github.com/xmrig/xmrig/issues/768) Fixed build with Visual Studio 2015. -- [#769](https://github.com/xmrig/xmrig/issues/769) Fixed regression, some ANSI escape sequences was in log with disabled colors. -- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues. -- Simplified checks for ASM auto detection, only AES support necessary. -- Added missing options to `--help` output. - -# v2.8.0 -- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).** - - Added global and per thread option `"asm"` and and command line equivalent. -- **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.** - - Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents. -- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners. -- [#245](https://github.com/xmrig/xmrig-proxy/issues/245) Fixed API ID collision when run multiple miners on same machine. -- [#757](https://github.com/xmrig/xmrig/issues/757) Fixed send buffer overflow. - -# v2.6.4 -- [#700](https://github.com/xmrig/xmrig/issues/700) `cryptonight-lite/ipbc` replaced to `cryptonight-heavy/tube` for **Bittube (TUBE)**. -- Added `cryptonight/rto` (cryptonight variant 1 with IPBC/TUBE mod) variant for **Arto (RTO)** coin. -- Added `cryptonight/xao` (original cryptonight with bigger iteration count) variant for **Alloy (XAO)** coin. -- Better variant detection for **nicehash.com** and **minergate.com**. -- [#692](https://github.com/xmrig/xmrig/issues/692) Added support for specify both algorithm and variant via single `algo` option. - -# v2.6.3 -- **Added support for new cryptonight-heavy variant xhv** (`cn-heavy/xhv`) for upcoming Haven Protocol fork. -- **Added support for new cryptonight variant msr** (`cn/msr`) also known as `cryptonight-fast` for upcoming Masari fork. -- Added new detailed hashrate report. -- [#446](https://github.com/xmrig/xmrig/issues/446) Likely fixed SIGBUS error on 32 bit ARM CPUs. -- [#551](https://github.com/xmrig/xmrig/issues/551) Fixed `cn-heavy` algorithm on ARMv8. -- [#614](https://github.com/xmrig/xmrig/issues/614) Fixed display issue with huge pages percentage when colors disabled. -- [#615](https://github.com/xmrig/xmrig/issues/615) Fixed build without libcpuid. -- [#629](https://github.com/xmrig/xmrig/pull/629) Fixed file logging with non-seekable files. -- [#672](https://github.com/xmrig/xmrig/pull/672) Reverted back `cryptonight-light` and exit if no valid algorithm specified. - -# v2.6.2 - - [#607](https://github.com/xmrig/xmrig/issues/607) Fixed donation bug. - - [#610](https://github.com/xmrig/xmrig/issues/610) Fixed ARM build. - -# v2.6.1 - - [#168](https://github.com/xmrig/xmrig-proxy/issues/168) Added support for [mining algorithm negotiation](https://github.com/xmrig/xmrig-proxy/blob/dev/doc/STRATUM_EXT.md#1-mining-algorithm-negotiation). - - Added IPBC coin support, base algorithm `cn-lite` variant `ipbc`. - - [#581](https://github.com/xmrig/xmrig/issues/581) Added support for upcoming Stellite (XTL) fork, base algorithm `cn` variant `xtl`, variant can set now, no need do it after fork. - - Added support for **rig-id** stratum protocol extensions, compatible with xmr-stak. - - Changed behavior for option `variant=-1` for `cryptonight`, now variant is `1` by default, if you mine old coins need change `variant` to `0`. - - A lot of small fixes and better unification with proxy code. - -# v2.6.0-beta3 -- [#563](https://github.com/xmrig/xmrig/issues/563) **Added [advanced threads mode](https://github.com/xmrig/xmrig/issues/563), now possible configure each thread individually.** -- [#255](https://github.com/xmrig/xmrig/issues/563) Low power mode extended to **triple**, **quard** and **penta** modes. -- [#519](https://github.com/xmrig/xmrig/issues/519) Fixed high donation levels, improved donation start time randomization. -- [#554](https://github.com/xmrig/xmrig/issues/554) Fixed regression with `print-time` option. - -# v2.6.0-beta2 -- Improved performance for `cryptonight v7` especially in double hash mode. -- [#499](https://github.com/xmrig/xmrig/issues/499) IPv6 disabled for internal HTTP API by default, was causing issues on some systems. -- Added short aliases for algorithm names: `cn`, `cn-lite` and `cn-heavy`. -- Fixed regressions (v2.6.0-beta1 affected) - - [#494](https://github.com/xmrig/xmrig/issues/494) Command line option `--donate-level` was broken. - - [#502](https://github.com/xmrig/xmrig/issues/502) Build without libmicrohttpd was broken. - - Fixed nonce calculation for `--av 4` (software AES, double hash) was causing reduction of effective hashrate and rejected shares on nicehash. - -# v2.6.0-beta1 - - [#476](https://github.com/xmrig/xmrig/issues/476) **Added Cryptonight-Heavy support for Sumokoin ASIC resistance fork.** - - HTTP server now runs in main loop, it make possible easy extend API without worry about thread synchronization. - - Added initial graceful reload support, miner will reload configuration if config file changed, disabled by default until it will be fully implemented and tested. - - Added API endpoint `PUT /1/config` to update current config. - - Added API endpoint `GET /1/config` to get current active config. - - Added API endpoint `GET /1/threads` to get current active threads configuration. - - API endpoint `GET /` now deprecated, use `GET /1/summary` instead. - - Added `--api-no-ipv6` and similar config option to disable IPv6 support for HTTP API. - - Added `--api-no-restricted` to enable full access to api, this option has no effect if `--api-access-token` not specified. - -# v2.5.3 -- Fixed critical bug, in some cases miner was can't recovery connection and switch to failover pool, version 2.5.2 affected. If you use v2.6.0-beta3 this issue doesn't concern you. -- [#499](https://github.com/xmrig/xmrig/issues/499) IPv6 support disabled for internal HTTP API. -- Added workaround for nicehash.com if you use `cryptonightv7..nicehash.com` option `variant=1` will be set automatically. - -# v2.5.2 -- [#448](https://github.com/xmrig/xmrig/issues/478) Fixed broken reconnect. - -# v2.5.1 -- [#454](https://github.com/xmrig/xmrig/issues/454) Fixed build with libmicrohttpd version below v0.9.35. -- [#456](https://github.com/xmrig/xmrig/issues/459) Verbose errors related to donation pool was not fully silenced. -- [#459](https://github.com/xmrig/xmrig/issues/459) Fixed regression (version 2.5.0 affected) with connection to **xmr.f2pool.com**. - -# v2.5.0 -- [#434](https://github.com/xmrig/xmrig/issues/434) **Added support for Monero v7 PoW, scheduled on April 6.** -- Added full IPv6 support. -- Added protocol extension, when use the miner with xmrig-proxy 2.5+ no more need manually specify `nicehash` option. -- [#123](https://github.com/xmrig/xmrig-proxy/issues/123) Fixed regression (all versions since 2.4 affected) fragmented responses from pool/proxy was parsed incorrectly. -- [#428](https://github.com/xmrig/xmrig/issues/428) Fixed regression (version 2.4.5 affected) with CPU cache size detection. - -# v2.4.5 -- [#324](https://github.com/xmrig/xmrig/pull/324) Fixed build without libmicrohttpd (CMake cache issue). -- [#341](https://github.com/xmrig/xmrig/issues/341) Fixed wrong exit code and added command line option `--dry-run`. -- [#385](https://github.com/xmrig/xmrig/pull/385) Up to 20% performance increase for non-AES CPU and fixed Intel Core 2 cache detection. - -# v2.4.4 - - Added libmicrohttpd version to --version output. - - Fixed bug in singal handler, in some cases miner wasn't shutdown properly. - - Fixed recent MSVC 2017 version detection. - - [#279](https://github.com/xmrig/xmrig/pull/279) Fixed build on some macOS versions. - -# v2.4.3 - - [#94](https://github.com/xmrig/xmrig/issues/94#issuecomment-342019257) [#216](https://github.com/xmrig/xmrig/issues/216) Added **ARMv8** and **ARMv7** support. Hardware AES supported, thanks [Imran Yusuff](https://github.com/imranyusuff). - - [#157](https://github.com/xmrig/xmrig/issues/157) [#196](https://github.com/xmrig/xmrig/issues/196) Fixed Linux compile issues. - - [#184](https://github.com/xmrig/xmrig/issues/184) Fixed cache size detection for CPUs with disabled Hyper-Threading. - - [#200](https://github.com/xmrig/xmrig/issues/200) In some cases miner was doesn't write log to stdout. - -# v2.4.2 - - [#60](https://github.com/xmrig/xmrig/issues/60) Added FreeBSD support, thanks [vcambur](https://github.com/vcambur). - - [#153](https://github.com/xmrig/xmrig/issues/153) Fixed issues with dwarfpool.com. - -# v2.4.1 - - [#147](https://github.com/xmrig/xmrig/issues/147) Fixed comparability with monero-stratum. - -# v2.4.0 - - Added [HTTP API](https://github.com/xmrig/xmrig/wiki/API). - - Added comments support in config file. - - libjansson replaced to rapidjson. - - [#98](https://github.com/xmrig/xmrig/issues/98) Ignore `keepalive` option with minergate.com and nicehash.com. - - [#101](https://github.com/xmrig/xmrig/issues/101) Fixed MSVC 2017 (15.3) compile time version detection. - - [#108](https://github.com/xmrig/xmrig/issues/108) Silently ignore invalid values for `donate-level` option. - - [#111](https://github.com/xmrig/xmrig/issues/111) Fixed build without AEON support. - -# v2.3.1 -- [#68](https://github.com/xmrig/xmrig/issues/68) Fixed compatibility with Docker containers, was nothing print on console. - -# v2.3.0 -- Added `--cpu-priority` option (0 idle, 2 normal to 5 highest). -- Added `--user-agent` option, to set custom user-agent string for pool. For example `cpuminer-multi/0.1`. -- Added `--no-huge-pages` option, to disable huge pages support. -- [#62](https://github.com/xmrig/xmrig/issues/62) Don't send the login to the dev pool. -- Force reconnect if pool block miner IP address. helps switch to backup pool. -- Fixed: failed open default config file if path contains non English characters. -- Fixed: error occurred if try use unavailable stdin or stdout, regression since version 2.2.0. -- Fixed: message about huge pages support successfully enabled on Windows was not shown in release builds. - -# v2.2.1 -- Fixed [terminal issues](https://github.com/xmrig/xmrig-proxy/issues/2#issuecomment-319914085) after exit on Linux and OS X. - -# v2.2.0 -- [#46](https://github.com/xmrig/xmrig/issues/46) Restored config file support. Now possible use multiple config files and combine with command line options also added support for default config. -- Improved colors support on Windows, now used uv_tty, legacy code removed. -- QuickEdit Mode now disabled on Windows. -- Added interactive commands in console window:: **h**ashrate, **p**ause, **r**esume. -- Fixed autoconf mode for AMD FX CPUs. - -# v2.1.0 -- [#40](https://github.com/xmrig/xmrig/issues/40) -Improved miner shutdown, fixed crash on exit for Linux and OS X. -- Fixed, login request was contain malformed JSON if username or password has some special characters for example `\`. -- [#220](https://github.com/fireice-uk/xmr-stak-cpu/pull/220) Better support for Round Robin DNS, IP address now always chosen randomly instead of stuck on first one. -- Changed donation address, new [xmrig-proxy](https://github.com/xmrig/xmrig-proxy) is coming soon. - -# v2.0.2 -- Better deal with possible duplicate jobs from pool, show warning and ignore duplicates. -- For Windows builds libuv updated to version 1.13.1 and gcc to 7.1.0. - -# v2.0.1 - - [#27](https://github.com/xmrig/xmrig/issues/27) Fixed possibility crash on 32bit systems. - -# v2.0.0 - - Option `--backup-url` removed, instead now possibility specify multiple pools for example: `-o example1.com:3333 -u user1 -p password1 -k -o example2.com:5555 -u user2 -o example3.com:4444 -u user3` - - [#15](https://github.com/xmrig/xmrig/issues/15) Added option `-l, --log-file=FILE` to write log to file. - - [#15](https://github.com/xmrig/xmrig/issues/15) Added option `-S, --syslog` to use syslog for logging, Linux only. - - [#18](https://github.com/xmrig/xmrig/issues/18) Added nice messages for accepted/rejected shares with diff and network latency. - - [#20](https://github.com/xmrig/xmrig/issues/20) Fixed `--cpu-affinity` for more than 32 threads. - - Fixed Windows XP support. - - Fixed regression, option `--no-color` was not fully disable colored output. - - Show resolved pool IP address in miner output. - -# v1.0.1 -- Fix broken software AES implementation, app has crashed if CPU not support AES-NI, only version 1.0.0 affected. - -# v1.0.0 -- Miner complete rewritten in C++ with libuv. -- This version should be fully compatible (except config file) with previos versions, many new nice features will come in next versions. -- This is still beta. If you found regression, stability or perfomance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new). -- Added new option `--print-time=N`, print hashrate report every N seconds. -- New hashrate reports, by default every 60 secons. -- Added Microsoft Visual C++ 2015 and 2017 support. -- Removed dependency on libcurl. -- To compile this version from source please switch to [dev](https://github.com/xmrig/xmrig/tree/dev) branch. - -# v0.8.2 -- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture). - -# v0.8.2 -- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture). -- Fixed gcc 7.1 support. - -# v0.8.1 -- Added nicehash support, detects automaticaly by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`. - -# v0.8.0 -- Added double hash mode, also known as lower power mode. `--av=2` and `--av=4`. -- Added smart automatic CPU configuration. Default threads count now depends on size of the L3 cache of CPU. -- Added CryptoNight-Lite support for AEON `-a cryptonight-lite`. -- Added `--max-cpu-usage` option for auto CPU configuration mode. -- Added `--safe` option for adjust threads and algorithm variations to current CPU. -- No more manual steps to enable huge pages on Windows. XMRig will do it automatically. -- Removed BMI2 algorithm variation. -- Removed default pool URL. - -# v0.6.0 -- Added automatic cryptonight self test. -- New software AES algorithm variation. Will be automatically selected if cpu not support AES-NI. -- Added 32 bit builds. -- Documented [algorithm variations](https://github.com/xmrig/xmrig#algorithm-variations). - -# v0.5.0 -- Initial public release. diff --git a/README.md b/README.md index ee16862a..f33082fd 100644 --- a/README.md +++ b/README.md @@ -1,75 +1,70 @@ -# XMRig +# NinjaRig v1.0 +### Argon2 miner for CPU and GPU -[![Github All Releases](https://img.shields.io/github/downloads/xmrig/xmrig/total.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub release](https://img.shields.io/github/release/xmrig/xmrig/all.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub Release Date](https://img.shields.io/github/release-date-pre/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/releases) -[![GitHub license](https://img.shields.io/github/license/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/blob/master/LICENSE) -[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers) -[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network) - -XMRig is a high performance Monero (XMR) CPU miner, with official support for Windows. -Originally based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code, since version 1.0.0 completely rewritten from scratch on C++. - -* This is the **CPU-mining** version, there is also a [NVIDIA GPU version](https://github.com/xmrig/xmrig-nvidia) and [AMD GPU version]( https://github.com/xmrig/xmrig-amd). -* [Roadmap](https://github.com/xmrig/xmrig/issues/106) for next releases. - - - -#### Table of contents -* [Features](#features) -* [Download](#download) -* [Usage](#usage) -* [Algorithm variations](#algorithm-variations) -* [Build](https://github.com/xmrig/xmrig/wiki/Build) -* [Common Issues](#common-issues) -* [Other information](#other-information) -* [Donations](#donations) -* [Release checksums](#release-checksums) -* [Contacts](#contacts) +## Dev Fee +In order to support development, this miner has 1-5% configurable dev fee - 1-5 minutes from 100 minutes it will mine for developer. Mining settings are downloaded from http://coinfee.changeling.biz/index.json at startup. ## Features -* High performance. -* Official Windows support. -* Small Windows executable, without dependencies. -* x86/x64 support. -* Support for backup (failover) mining server. -* keepalived support. -* Command line options compatible with cpuminer. -* CryptoNight-Lite support for AEON. -* Smart automatic [CPU configuration](https://github.com/xmrig/xmrig/wiki/Threads). -* Nicehash support -* It's open source software. +- optimized argon2 hashing library - both in speed and in memory usage; everything not related to actual mining was stripped down, indexing calculation for argon2i and argon2id sequence was replaced with precalculated versions +- support for both CPU and GPU mining using multiple engines perfectly adapted to your hardware +- support for autodetecting the best version of the CPU hasher for your machine (SSE2/SSSE3/AVX/AVX2/AVX512F) -## Download -* Binary releases: https://github.com/xmrig/xmrig/releases -* Git tree: https://github.com/xmrig/xmrig.git - * Clone with `git clone https://github.com/xmrig/xmrig.git` :hammer: [Build instructions](https://github.com/xmrig/xmrig/wiki/Build). +## Releases +There are binaries compiled for Windows 10 and Linux/HiveOS. Just pick the one matching your OS and skip to usage information. If for some reason the binaries don't work for you or you want the cutting edge version of this software you can try building it yourself using below instructions (build instructions are only provided for Ubuntu, you will need to adapt them accordingly for other distribution). +You can get the binaries from here: +https://github.com/bogdanadnan/ninjarig/releases -## Usage -Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or share configurations. +## Build it yourself +What you need: +- Recent Linux distribution (recommended - Ubuntu 16.04 or higher) +- Git client +- CMake 3 +- GCC & G++ version 7 or higher or LLVM/Clang 7 or higher. Provided binaries are compiled with Clang 8, it seems to give a slightly higher hashrate for CPU mining. +- CUDA developer toolkit 9 or higher. Provided binaries are compiled with CUDA 10.1. Follow instructions from NVidia site to get the latest version up and running: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html (be careful that CUDA might have specific requirements for compiler version as well) +- OpenCL libraries and headers +- OpenSSL, libuv and microhttpd libraries and headers + +Instructions: +- run the following snippet: +```sh +$ git clone http://github.com/bogdanadnan/ninjarig.git +$ cd ninjarig +$ mkdir build +$ cd build +$ cmake .. -DCMAKE_BUILD_TYPE=Release +$ make +``` + +## Basic usage: +**!!! In some cases (mostly on Windows) the miner doesn't properly detect AVX2 optimization for CPU. If AVX2 doesn't appear in optimization features list for CPU at miner startup, please verify on google if your CPU model has it. If it does have AVX2 support, please run it with "--cpu-optimization AVX2" option. This will give a serious boost to hash rate speed so it does worth the effort to check. !!!** + +```sh + ninjarig -a chukwa -o stratum+tcp://: -u -p -t --use-gpu --gpu-filter --gpu-intensity +``` ### Options ``` -a, --algo=ALGO specify the algorithm to use - cryptonight - cryptonight-lite - cryptonight-heavy + chukwa + chukwa/wrkz -o, --url=URL URL of mining server -O, --userpass=U:P username:password pair for mining server -u, --user=USERNAME username for mining server -p, --pass=PASSWORD password for mining server --rig-id=ID rig identifier for pool-side statistics (needs pool support) - -t, --threads=N number of miner threads - -v, --av=N algorithm variation, 0 auto select - -k, --keepalive send keepalived packet for prevent timeout (needs pool support) + -t, --cpu-threads=N number of cpu mining threads + --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1 + --cpu-optimization=REF|SSE2|SSSE3|AVX|AVX2|AVX512F|NEON force specific optimization for cpu mining + --use-gpu=CUDA,OPENCL gpu engine to use, ignore this param to disable gpu support + --gpu-intensity=v1,v2... percent of gpu memory to use - you can have different values for each card (default 50) + --gpu-filter=,CUDA:,OPENCL: gpu filters to select cards + -k, --keepalive send keepalived packet for prevent timeout (needs pool support) --nicehash enable nicehash.com support --tls enable SSL/TLS support (needs pool support) --tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning -r, --retries=N number of times to retry before switch to backup server (default: 5) -R, --retry-pause=N time to pause between retries (default: 5) - --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1 - --cpu-priority set process priority (0 idle, 2 normal to 5 highest) - --no-huge-pages disable huge pages support + --priority set process priority (0 idle, 2 normal to 5 highest) --no-color disable colored output --variant algorithm PoW variant --donate-level=N donate level, default 5% (5 minutes in 100 minutes) @@ -78,9 +73,6 @@ Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or shar -c, --config=FILE load a JSON-format configuration file -l, --log-file=FILE log all output to a file -S, --syslog use system log for output messages - --max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75) - --safe safe adjust threads and av settings for current CPU - --asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen. --print-time=N print hashrate report every N seconds --api-port=N port for the miner API --api-access-token=T access token for API @@ -93,54 +85,8 @@ Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or shar -V, --version output version information and exit ``` -Also you can use configuration via config file, default name **config.json**. Some options available only via config file: [`autosave`](https://github.com/xmrig/xmrig/issues/767), [`hw-aes`](https://github.com/xmrig/xmrig/issues/563). `watch` option currently not implemented in miners only in proxy. - -## Algorithm variations - -- `av` option used for automatic and simple threads mode (when you specify only threads count). -- For [advanced threads mode](https://github.com/xmrig/xmrig/issues/563) each thread configured individually and `av` option not used. - -| av | Hashes per round | Hardware AES | -|----|------------------|--------------| -| 1 | 1 (Single) | yes | -| 2 | 2 (Double) | yes | -| 3 | 1 (Single) | no | -| 4 | 2 (Double) | no | -| 5 | 3 (Triple) | yes | -| 6 | 4 (Quard) | yes | -| 7 | 5 (Penta) | yes | -| 8 | 3 (Triple) | no | -| 9 | 4 (Quard) | no | -| 10 | 5 (Penta) | no | - -## Common Issues -### HUGE PAGES unavailable -* Run XMRig as Administrator. -* Since version 0.8.0 XMRig automatically enables SeLockMemoryPrivilege for current user, but reboot or sign out still required. [Manual instruction](https://msdn.microsoft.com/en-gb/library/ms190730.aspx). +Also you can use configuration via config file, default name **config.json**. ## Other information * No HTTP support, only stratum protocol support. * Default donation 5% (5 minutes in 100 minutes) can be reduced to 1% via option `donate-level`. - - -### CPU mining performance -* **Intel i7-7700** - 307 H/s (4 threads) -* **AMD Ryzen 7 1700X** - 560 H/s (8 threads) - -Please note performance is highly dependent on system load. The numbers above are obtained on an idle system. Tasks heavily using a processor cache, such as video playback, can greatly degrade hashrate. Optimal number of threads depends on the size of the L3 cache of a processor, 1 thread requires 2 MB of cache. - -### Maximum performance checklist -* Idle operating system. -* Do not exceed optimal thread count. -* Use modern CPUs with AES-NI instruction set. -* Try setup optimal cpu affinity. -* Enable fast memory (Large/Huge pages). - -## Donations -* XMR: `48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD` -* BTC: `1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT` - -## Contacts -* support@xmrig.com -* [reddit](https://www.reddit.com/user/XMRig/) -* [twitter](https://twitter.com/xmrig_dev) diff --git a/doc/api/1/config.json b/doc/api/1/config.json index 560ff810..32e4e8ee 100644 --- a/doc/api/1/config.json +++ b/doc/api/1/config.json @@ -7,16 +7,21 @@ "ipv6": false, "restricted": false }, - "av": 1, "background": false, "colors": true, + "threads": "all", "cpu-affinity": null, "cpu-priority": null, + "use-gpu": [ "OPENCL", "CUDA" ], + "gpu-intensity": [ 50 ], + "gpu-filter": [ + { + "engine": "OPENCL", + "filter": "AMD" + } + ], "donate-level": 5, - "huge-pages": true, - "hw-aes": null, "log-file": null, - "max-cpu-usage": 75, "pools": [ { "url": "publicnode.ydns.eu:4666", @@ -38,25 +43,6 @@ "print-time": 60, "retries": 5, "retry-pause": 5, - "safe": false, - "threads": [ - { - "low_power_mode": 1, - "affine_to_cpu": 0 - }, - { - "low_power_mode": 1, - "affine_to_cpu": 1 - }, - { - "low_power_mode": 1, - "affine_to_cpu": 2 - }, - { - "low_power_mode": 1, - "affine_to_cpu": 3 - } - ], "user-agent": null, "syslog": false, "watch": false diff --git a/doc/api/1/summary.json b/doc/api/1/summary.json index 95519d56..9b4794c7 100644 --- a/doc/api/1/summary.json +++ b/doc/api/1/summary.json @@ -2,14 +2,8 @@ "id": "92f3104f9a2ee78c", "worker_id": "Ubuntu-1604-xenial-64-minimal", "version": "1.0.0-alpha", - "kind": "cpu", + "kind": "cpu/gpu", "ua": "NinjaRig/1.0.0-alpha (Linux x86_64) libuv/1.8.0 gcc/5.4.0", - "cpu": { - "brand": "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz", - "aes": true, - "x64": true, - "sockets": 1 - }, "algo": "chukwa", "donate_level": 5, "hashrate": { diff --git a/doc/api/1/threads.json b/doc/api/1/threads.json index 5b302af6..f74fc746 100644 --- a/doc/api/1/threads.json +++ b/doc/api/1/threads.json @@ -1,59 +1,21 @@ { - "memory": 8388608, - "threads": [ + "hashers": [ { - "type": "cpu", - "algo": "chukwa", - "low_power_mode": 1, - "affine_to_cpu": 0, - "priority": -1, - "soft_aes": false, + "type": "GPU", + "id": "OCL0", "hashrate": [ - 73.39, - 73.4, - 73.28 + 18328.8, + 18324.21, + 18328.5 ] }, { - "type": "cpu", - "algo": "chukwa", - "av": 1, - "low_power_mode": 1, - "affine_to_cpu": 1, - "priority": -1, - "soft_aes": false, + "type": "GPU", + "id": "NVD0", "hashrate": [ - 74.72, - 74.72, - 74.7 - ] - }, - { - "type": "cpu", - "algo": "chukwa", - "av": 1, - "low_power_mode": 1, - "affine_to_cpu": 2, - "priority": -1, - "soft_aes": false, - "hashrate": [ - 74.71, - 74.72, - 74.7 - ] - }, - { - "type": "cpu", - "algo": "chukwa", - "av": 1, - "low_power_mode": 1, - "affine_to_cpu": 3, - "priority": -1, - "soft_aes": false, - "hashrate": [ - 73.39, - 73.4, - 73.28 + 22351.22, + 22361.15, + 22353.10 ] } ]