Merged v4.0.0-beta

This commit is contained in:
MoneroOcean 2019-09-17 15:48:01 -07:00
commit 993733cb1f
231 changed files with 32642 additions and 3380 deletions

View file

@ -27,8 +27,8 @@
#define XMRIG_CN_ALGO_H
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include "crypto/common/Algorithm.h"
@ -42,19 +42,13 @@ template<Algorithm::Id ALGO = Algorithm::INVALID>
class CnAlgo
{
public:
constexpr inline CnAlgo()
{
static_assert(ALGO != Algorithm::INVALID && m_memory[ALGO] > 0, "invalid CRYPTONIGHT algorithm");
static_assert(sizeof(m_memory) / sizeof(m_memory)[0] == Algorithm::MAX, "memory table size mismatch");
static_assert(sizeof(m_iterations) / sizeof(m_iterations)[0] == Algorithm::MAX, "iterations table size mismatch");
static_assert(sizeof(m_base) / sizeof(m_base)[0] == Algorithm::MAX, "iterations table size mismatch");
}
constexpr CnAlgo() {};
constexpr inline Algorithm::Id base() const { return m_base[ALGO]; }
constexpr inline Algorithm::Id base() const { static_assert(ALGO > Algorithm::INVALID && ALGO < Algorithm::RX_0, "invalid CRYPTONIGHT algorithm"); return Algorithm::CN_2; }
constexpr inline bool isHeavy() const { return memory() == CN_MEMORY * 2; }
constexpr inline bool isR() const { return ALGO == Algorithm::CN_R || ALGO == Algorithm::CN_WOW; }
constexpr inline size_t memory() const { return m_memory[ALGO]; }
constexpr inline uint32_t iterations() const { return m_iterations[ALGO]; }
constexpr inline bool isR() const { return ALGO == Algorithm::CN_R; }
constexpr inline size_t memory() const { static_assert(ALGO > Algorithm::INVALID && ALGO < Algorithm::RX_0, "invalid CRYPTONIGHT algorithm"); return CN_MEMORY; }
constexpr inline uint32_t iterations() const { static_assert(ALGO > Algorithm::INVALID && ALGO < Algorithm::RX_0, "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
constexpr inline uint32_t mask() const { return ((memory() - 1) / 16) * 16; }
inline static size_t memory(Algorithm::Id algo)
@ -79,6 +73,54 @@ public:
return 0;
}
inline static uint32_t iterations(Algorithm::Id algo)
{
switch (algo) {
case Algorithm::CN_0:
case Algorithm::CN_1:
case Algorithm::CN_2:
case Algorithm::CN_R:
case Algorithm::CN_RTO:
return CN_ITER;
case Algorithm::CN_FAST:
case Algorithm::CN_HALF:
# ifdef XMRIG_ALGO_CN_LITE
case Algorithm::CN_LITE_0:
case Algorithm::CN_LITE_1:
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
case Algorithm::CN_HEAVY_0:
case Algorithm::CN_HEAVY_TUBE:
case Algorithm::CN_HEAVY_XHV:
# endif
return CN_ITER / 2;
case Algorithm::CN_RWZ:
case Algorithm::CN_ZLS:
return 0x60000;
case Algorithm::CN_XAO:
case Algorithm::CN_DOUBLE:
return CN_ITER * 2;
# ifdef XMRIG_ALGO_CN_GPU
case Algorithm::CN_GPU:
return 0xC000;
# endif
# ifdef XMRIG_ALGO_CN_PICO
case Algorithm::CN_PICO_0:
return CN_ITER / 8;
# endif
default:
break;
}
return 0;
}
inline static uint32_t mask(Algorithm::Id algo)
{
# ifdef XMRIG_ALGO_CN_GPU
@ -96,139 +138,97 @@ public:
return ((memory(algo) - 1) / 16) * 16;
}
inline static Algorithm::Id base(Algorithm::Id algo)
{
switch (algo) {
case Algorithm::CN_0:
case Algorithm::CN_XAO:
# ifdef XMRIG_ALGO_CN_LITE
case Algorithm::CN_LITE_0:
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
case Algorithm::CN_HEAVY_0:
case Algorithm::CN_HEAVY_XHV:
# endif
return Algorithm::CN_0;
case Algorithm::CN_1:
case Algorithm::CN_FAST:
case Algorithm::CN_RTO:
# ifdef XMRIG_ALGO_CN_LITE
case Algorithm::CN_LITE_1:
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
case Algorithm::CN_HEAVY_TUBE:
return Algorithm::CN_1;
# endif
case Algorithm::CN_2:
case Algorithm::CN_R:
case Algorithm::CN_HALF:
case Algorithm::CN_RWZ:
case Algorithm::CN_ZLS:
case Algorithm::CN_DOUBLE:
# ifdef XMRIG_ALGO_CN_PICO
case Algorithm::CN_PICO_0:
# endif
return Algorithm::CN_2;
# ifdef XMRIG_ALGO_CN_GPU
case Algorithm::CN_GPU:
return Algorithm::CN_GPU;
# endif
default:
break;
}
return Algorithm::INVALID;
}
private:
constexpr const static size_t CN_MEMORY = 0x200000;
constexpr const static uint32_t CN_ITER = 0x80000;
constexpr const static size_t m_memory[] = {
CN_MEMORY, // CN_0
CN_MEMORY, // CN_1
CN_MEMORY, // CN_2
CN_MEMORY, // CN_R
CN_MEMORY, // CN_WOW
CN_MEMORY, // CN_FAST
CN_MEMORY, // CN_HALF
CN_MEMORY, // CN_XAO
CN_MEMORY, // CN_RTO
CN_MEMORY, // CN_RWZ
CN_MEMORY, // CN_ZLS
CN_MEMORY, // CN_DOUBLE
# ifdef XMRIG_ALGO_CN_GPU
CN_MEMORY, // CN_GPU
# endif
# ifdef XMRIG_ALGO_CN_LITE
CN_MEMORY / 2, // CN_LITE_0
CN_MEMORY / 2, // CN_LITE_1
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
CN_MEMORY * 2, // CN_HEAVY_0
CN_MEMORY * 2, // CN_HEAVY_TUBE
CN_MEMORY * 2, // CN_HEAVY_XHV
# endif
# ifdef XMRIG_ALGO_CN_PICO
CN_MEMORY / 8, // CN_PICO_0
# endif
# ifdef XMRIG_ALGO_RANDOMX
0, // RX_0
0, // RX_WOW
0, // RX_LOKI
0, // DEFYX
# endif
# ifdef XMRIG_ALGO_ARGON2
0, // AR2_CHUKWA
0, // AR2_WRKZ
# endif
};
constexpr const static uint32_t m_iterations[] = {
CN_ITER, // CN_0
CN_ITER, // CN_1
CN_ITER, // CN_2
CN_ITER, // CN_R
CN_ITER, // CN_WOW
CN_ITER / 2, // CN_FAST
CN_ITER / 2, // CN_HALF
CN_ITER * 2, // CN_XAO
CN_ITER, // CN_RTO
0x60000, // CN_RWZ
0x60000, // CN_ZLS
CN_ITER * 2, // CN_DOUBLE
# ifdef XMRIG_ALGO_CN_GPU
0xC000, // CN_GPU
# endif
# ifdef XMRIG_ALGO_CN_LITE
CN_ITER / 2, // CN_LITE_0
CN_ITER / 2, // CN_LITE_1
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
CN_ITER / 2, // CN_HEAVY_0
CN_ITER / 2, // CN_HEAVY_TUBE
CN_ITER / 2, // CN_HEAVY_XHV
# endif
# ifdef XMRIG_ALGO_CN_PICO
CN_ITER / 8, // CN_PICO_0
# endif
# ifdef XMRIG_ALGO_RANDOMX
0, // RX_0
0, // RX_WOW
0, // RX_LOKI
0, // DEFYX
# endif
# ifdef XMRIG_ALGO_ARGON2
0, // AR2_CHUKWA
0, // AR2_WRKZ
# endif
};
constexpr const static Algorithm::Id m_base[] = {
Algorithm::CN_0, // CN_0
Algorithm::CN_1, // CN_1
Algorithm::CN_2, // CN_2
Algorithm::CN_2, // CN_R
Algorithm::CN_2, // CN_WOW
Algorithm::CN_1, // CN_FAST
Algorithm::CN_2, // CN_HALF
Algorithm::CN_0, // CN_XAO
Algorithm::CN_1, // CN_RTO
Algorithm::CN_2, // CN_RWZ
Algorithm::CN_2, // CN_ZLS
Algorithm::CN_2, // CN_DOUBLE
# ifdef XMRIG_ALGO_CN_GPU
Algorithm::CN_GPU, // CN_GPU
# endif
# ifdef XMRIG_ALGO_CN_LITE
Algorithm::CN_0, // CN_LITE_0
Algorithm::CN_1, // CN_LITE_1
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
Algorithm::CN_0, // CN_HEAVY_0
Algorithm::CN_1, // CN_HEAVY_TUBE
Algorithm::CN_0, // CN_HEAVY_XHV
# endif
# ifdef XMRIG_ALGO_CN_PICO
Algorithm::CN_2, // CN_PICO_0,
# endif
# ifdef XMRIG_ALGO_RANDOMX
Algorithm::INVALID, // RX_0
Algorithm::INVALID, // RX_WOW
Algorithm::INVALID, // RX_LOKI
Algorithm::INVALID, // DEFYX
# endif
# ifdef XMRIG_ALGO_ARGON2
Algorithm::INVALID, // AR2_CHUKWA
Algorithm::INVALID, // AR2_WRKZ
# endif
};
};
#ifdef XMRIG_ALGO_CN_GPU
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
#endif
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_0>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_XAO>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_0>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_0>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_XHV>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_1>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_FAST>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_RTO>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_1>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_TUBE>::base() const { return Algorithm::CN_1; }
#ifdef XMRIG_ALGO_CN_PICO
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
#endif
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_FAST>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_HALF>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_LITE_0>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_LITE_1>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_HEAVY_0>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_HEAVY_TUBE>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_HEAVY_XHV>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_XAO>::iterations() const { return CN_ITER * 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_DOUBLE>::iterations() const { return CN_ITER * 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_RWZ>::iterations() const { return 0x60000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() const { return 0x60000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_1>::memory() const { return CN_MEMORY / 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_0>::memory() const { return CN_MEMORY * 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_TUBE>::memory() const { return CN_MEMORY * 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_XHV>::memory() const { return CN_MEMORY * 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_0>::memory() const { return CN_MEMORY / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
} /* namespace xmrig */

View file

@ -23,7 +23,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <cstdio>
#include "backend/cpu/Cpu.h"
@ -66,12 +66,6 @@
m_map[algo][AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>;
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
namespace xmrig {
@ -99,7 +93,7 @@ cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
template<typename T, typename U>
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask = CnAlgo<Algorithm::CN_HALF>().mask())
{
const uint8_t* p = reinterpret_cast<const uint8_t*>(src);
auto p = reinterpret_cast<const uint8_t*>(src);
// Workaround for Visual Studio placing trampoline in debug builds.
# if defined(_MSC_VER)
@ -117,7 +111,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
memcpy((void*) dst, (const void*) src, size);
uint8_t* patched_data = reinterpret_cast<uint8_t*>(dst);
auto patched_data = reinterpret_cast<uint8_t*>(dst);
for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) {
switch (*(uint32_t*)(patched_data + i)) {
case CnAlgo<Algorithm::CN_2>().iterations():
@ -135,7 +129,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
static void patchAsmVariants()
{
const int allocation_size = 65536;
uint8_t *base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size));
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);
@ -216,7 +210,6 @@ xmrig::CnHash::CnHash()
ADD_FN(Algorithm::CN_1);
ADD_FN(Algorithm::CN_2);
ADD_FN(Algorithm::CN_R);
ADD_FN(Algorithm::CN_WOW);
ADD_FN(Algorithm::CN_FAST);
ADD_FN(Algorithm::CN_HALF);
ADD_FN(Algorithm::CN_XAO);
@ -228,7 +221,6 @@ xmrig::CnHash::CnHash()
ADD_FN_ASM(Algorithm::CN_2);
ADD_FN_ASM(Algorithm::CN_HALF);
ADD_FN_ASM(Algorithm::CN_R);
ADD_FN_ASM(Algorithm::CN_WOW);
ADD_FN_ASM(Algorithm::CN_RWZ);
ADD_FN_ASM(Algorithm::CN_ZLS);
ADD_FN_ASM(Algorithm::CN_DOUBLE);

View file

@ -27,8 +27,8 @@
#define XMRIG_CN_HASH_H
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include "crypto/cn/CnAlgo.h"
@ -41,8 +41,8 @@ struct cryptonight_ctx;
namespace xmrig
{
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height);
typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);
using cn_hash_fun = void (*)(const uint8_t *, size_t, uint8_t *, cryptonight_ctx **, uint64_t);
using cn_mainloop_fun = void (*)(cryptonight_ctx **);
class CnHash

View file

@ -84,21 +84,6 @@ const static cn_r_test_input_data cn_r_test_input[] = {
};
// "cn/wow"
const static uint8_t test_output_wow[] = {
0x9d, 0x47, 0xbf, 0x4c, 0x41, 0xb7, 0xe8, 0xe7, 0x27, 0xe6, 0x81, 0x71, 0x5a, 0xcb, 0x47, 0xfa, 0x16, 0x77, 0xcd, 0xba, 0x9c, 0xa7, 0xbc, 0xb0, 0x5a, 0xd8, 0xcc, 0x8a, 0xbd, 0x5d, 0xaa, 0x66,
0x0d, 0x4a, 0x49, 0x5c, 0xb8, 0x44, 0xa3, 0xca, 0x8b, 0xa4, 0xed, 0xb8, 0xe6, 0xbc, 0xf8, 0x29, 0xef, 0x1c, 0x06, 0xd9, 0xcd, 0xea, 0x2b, 0x62, 0xca, 0x46, 0xc2, 0xa2, 0x1b, 0x8b, 0x0a, 0x79,
0xa1, 0xd6, 0xd8, 0x48, 0xb5, 0xc5, 0x91, 0x5f, 0xcc, 0xd2, 0xf6, 0x4c, 0xf2, 0x16, 0xc6, 0xb1, 0xa0, 0x2c, 0xf7, 0xc7, 0x7b, 0xc8, 0x0d, 0x8d, 0x4e, 0x51, 0xb4, 0x19, 0xe8, 0x8f, 0xf0, 0xdd,
0xaf, 0x3a, 0x85, 0x44, 0xa0, 0x22, 0x1a, 0x14, 0x8c, 0x2a, 0xc9, 0x04, 0x84, 0xb1, 0x98, 0x61, 0xe3, 0xaf, 0xca, 0x33, 0xfe, 0x17, 0x02, 0x1e, 0xfb, 0x8a, 0xd6, 0x49, 0x6b, 0x56, 0x79, 0x15,
0x31, 0x33, 0x99, 0xe0, 0x96, 0x3a, 0xe8, 0xa9, 0x9d, 0xab, 0x8a, 0xf6, 0x6d, 0x34, 0x3e, 0x09, 0x7d, 0xae, 0x0c, 0x0f, 0xeb, 0x08, 0xdb, 0xc4, 0x3c, 0xcd, 0xaf, 0xef, 0x55, 0x15, 0xf4, 0x13,
0x60, 0x21, 0xc6, 0xef, 0x90, 0xbf, 0xf9, 0xae, 0x94, 0xa7, 0x50, 0x6d, 0x62, 0x3d, 0x3a, 0x7a, 0x86, 0xc1, 0x75, 0x6d, 0x65, 0x5f, 0x50, 0xdd, 0x55, 0x8f, 0x71, 0x6d, 0x64, 0x62, 0x2a, 0x34,
0x2b, 0x13, 0x00, 0x05, 0x35, 0xf3, 0xdb, 0x5f, 0x9b, 0x9b, 0x84, 0xa6, 0x5c, 0x43, 0x51, 0xf3, 0x86, 0xcd, 0x2c, 0xde, 0xde, 0xbb, 0x8c, 0x3a, 0xd2, 0xea, 0xb0, 0x86, 0xe6, 0xa3, 0xfe, 0xe5,
0xfc, 0x0e, 0x1d, 0xad, 0x8e, 0x89, 0x57, 0x49, 0xdc, 0x90, 0xeb, 0x69, 0x0b, 0xc1, 0xba, 0x05, 0x9a, 0x1c, 0xd7, 0x72, 0xaf, 0xaa, 0xf6, 0x5a, 0x10, 0x6b, 0xf9, 0xe5, 0xe6, 0xb8, 0x05, 0x03,
0xb6, 0x0b, 0x0a, 0xfe, 0x14, 0x4d, 0xef, 0xf7, 0xd9, 0x03, 0xed, 0x2d, 0x55, 0x45, 0xe7, 0x7e, 0xbe, 0x66, 0xa3, 0xc5, 0x1f, 0xee, 0x70, 0x16, 0xee, 0xb8, 0xfe, 0xe9, 0xeb, 0x63, 0x0c, 0x0f,
0x64, 0x77, 0x4b, 0x27, 0xe7, 0xd5, 0xfe, 0xc8, 0x62, 0xfc, 0x4c, 0x0c, 0x13, 0xac, 0x6b, 0xf0, 0x91, 0x23, 0xb6, 0xf0, 0x5b, 0xb0, 0xe4, 0xb7, 0x5c, 0x97, 0xf3, 0x79, 0xa2, 0xb3, 0xa6, 0x79,
};
// "cn/r"
const static uint8_t test_output_r[] = {
0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,

View file

@ -514,7 +514,6 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
}
void wow_soft_aes_compile_code(const V4_Instruction *code, int code_size, void *machine_code, xmrig::Assembly ASM);
void v4_soft_aes_compile_code(const V4_Instruction *code, int code_size, void *machine_code, xmrig::Assembly ASM);
@ -576,10 +575,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
V4_Instruction code[256];
const int code_size = v4_random_math_init<ALGO>(code, height);
if (ALGO == Algorithm::CN_WOW) {
wow_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
}
else if (ALGO == Algorithm::CN_R) {
if (ALGO == Algorithm::CN_R) {
v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
}
@ -812,9 +808,7 @@ extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
} // namespace xmrig
void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
@ -832,20 +826,6 @@ void cn_r_compile_code_double(const V4_Instruction* code, int code_size, void* m
}
template<>
void cn_r_compile_code<xmrig::Algorithm::CN_WOW>(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
wow_compile_code(code, code_size, machine_code, ASM);
}
template<>
void cn_r_compile_code_double<xmrig::Algorithm::CN_WOW>(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
wow_compile_code_double(code, code_size, machine_code, ASM);
}
namespace xmrig {

View file

@ -529,9 +529,7 @@ PUBLIC FN_PREFIX(CryptonightR_instruction_mov254)
PUBLIC FN_PREFIX(CryptonightR_instruction_mov255)
PUBLIC FN_PREFIX(CryptonightR_instruction_mov256)
#include "CryptonightWOW_template.inc"
#include "CryptonightR_template.inc"
#include "CryptonightWOW_soft_aes_template.inc"
#include "CryptonightR_soft_aes_template.inc"
FN_PREFIX(CryptonightR_instruction0):

View file

@ -516,9 +516,7 @@ PUBLIC CryptonightR_instruction_mov254
PUBLIC CryptonightR_instruction_mov255
PUBLIC CryptonightR_instruction_mov256
INCLUDE CryptonightWOW_template_win.inc
INCLUDE CryptonightR_template_win.inc
INCLUDE CryptonightWOW_soft_aes_template_win.inc
INCLUDE CryptonightR_soft_aes_template_win.inc
CryptonightR_instruction0:

View file

@ -2,18 +2,6 @@
extern "C"
{
void CryptonightWOW_template_part1();
void CryptonightWOW_template_mainloop();
void CryptonightWOW_template_part2();
void CryptonightWOW_template_part3();
void CryptonightWOW_template_end();
void CryptonightWOW_template_double_part1();
void CryptonightWOW_template_double_mainloop();
void CryptonightWOW_template_double_part2();
void CryptonightWOW_template_double_part3();
void CryptonightWOW_template_double_part4();
void CryptonightWOW_template_double_end();
void CryptonightR_template_part1();
void CryptonightR_template_mainloop();
void CryptonightR_template_part2();
@ -26,18 +14,6 @@ extern "C"
void CryptonightR_template_double_part4();
void CryptonightR_template_double_end();
void CryptonightWOW_soft_aes_template_part1();
void CryptonightWOW_soft_aes_template_mainloop();
void CryptonightWOW_soft_aes_template_part2();
void CryptonightWOW_soft_aes_template_part3();
void CryptonightWOW_soft_aes_template_end();
void CryptonightWOW_soft_aes_template_double_part1();
void CryptonightWOW_soft_aes_template_double_mainloop();
void CryptonightWOW_soft_aes_template_double_part2();
void CryptonightWOW_soft_aes_template_double_part3();
void CryptonightWOW_soft_aes_template_double_part4();
void CryptonightWOW_soft_aes_template_double_end();
void CryptonightR_soft_aes_template_part1();
void CryptonightR_soft_aes_template_mainloop();
void CryptonightR_soft_aes_template_part2();

View file

@ -1,268 +0,0 @@
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
ALIGN(64)
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 232
mov eax, [rcx+96]
mov ebx, [rcx+100]
mov esi, [rcx+104]
mov edx, [rcx+108]
mov [rsp+144], eax
mov [rsp+148], ebx
mov [rsp+152], esi
mov [rsp+156], edx
mov rax, QWORD PTR [rcx+48]
mov r10, rcx
xor rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+40]
xor r9, QWORD PTR [rcx+8]
movq xmm4, rax
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov r11, QWORD PTR [rcx+224]
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r10+72]
mov rax, QWORD PTR [r10+80]
movq xmm0, rdx
xor rax, QWORD PTR [r10+64]
movaps XMMWORD PTR [rsp+16], xmm6
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+48], xmm8
movaps XMMWORD PTR [rsp+64], xmm9
movaps XMMWORD PTR [rsp+80], xmm10
movaps XMMWORD PTR [rsp+96], xmm11
movaps XMMWORD PTR [rsp+112], xmm12
movaps XMMWORD PTR [rsp+128], xmm13
movq xmm5, rax
mov rax, r8
punpcklqdq xmm4, xmm0
and eax, 2097136
movq xmm10, QWORD PTR [r10+96]
movq xmm0, rcx
mov rcx, QWORD PTR [r10+104]
xorps xmm9, xmm9
mov QWORD PTR [rsp+328], rax
movq xmm12, r11
mov QWORD PTR [rsp+320], r9
punpcklqdq xmm5, xmm0
movq xmm13, rcx
mov r12d, 524288
ALIGN(64)
FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
movd xmm11, r12d
mov r12, QWORD PTR [r10+272]
lea r13, QWORD PTR [rax+r11]
mov esi, DWORD PTR [r13]
movq xmm0, r9
mov r10d, DWORD PTR [r13+4]
movq xmm7, r8
mov ebp, DWORD PTR [r13+12]
mov r14d, DWORD PTR [r13+8]
mov rdx, QWORD PTR [rsp+328]
movzx ecx, sil
shr esi, 8
punpcklqdq xmm7, xmm0
mov r15d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
mov edi, DWORD PTR [r12+rcx*4]
movzx ecx, r14b
shr r14d, 8
mov ebx, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
shr ebp, 8
mov r9d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
xor r15d, DWORD PTR [r12+rcx*4+1024]
movzx ecx, r14b
shr r14d, 8
mov eax, r14d
shr eax, 8
xor edi, DWORD PTR [r12+rcx*4+1024]
add eax, 256
movzx ecx, bpl
shr ebp, 8
xor ebx, DWORD PTR [r12+rcx*4+1024]
movzx ecx, sil
shr esi, 8
xor r9d, DWORD PTR [r12+rcx*4+1024]
add r12, 2048
movzx ecx, r10b
shr r10d, 8
add r10d, 256
mov r11d, DWORD PTR [r12+rax*4]
xor r11d, DWORD PTR [r12+rcx*4]
xor r11d, r9d
movzx ecx, sil
mov r10d, DWORD PTR [r12+r10*4]
shr esi, 8
add esi, 256
xor r10d, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
xor r10d, ebx
shr ebp, 8
movd xmm1, r11d
add ebp, 256
movq r11, xmm12
mov r9d, DWORD PTR [r12+rcx*4]
xor r9d, DWORD PTR [r12+rsi*4]
mov eax, DWORD PTR [r12+rbp*4]
xor r9d, edi
movzx ecx, r14b
movd xmm0, r10d
movd xmm2, r9d
xor eax, DWORD PTR [r12+rcx*4]
mov rcx, rdx
xor eax, r15d
punpckldq xmm2, xmm1
xor rcx, 16
movd xmm6, eax
mov rax, rdx
punpckldq xmm6, xmm0
xor rax, 32
punpckldq xmm6, xmm2
xor rdx, 48
movdqu xmm2, XMMWORD PTR [rcx+r11]
pxor xmm6, xmm7
paddq xmm2, xmm4
movdqu xmm1, XMMWORD PTR [rax+r11]
movdqu xmm0, XMMWORD PTR [rdx+r11]
paddq xmm0, xmm5
movdqu XMMWORD PTR [rcx+r11], xmm0
movdqu XMMWORD PTR [rax+r11], xmm2
movq rcx, xmm13
paddq xmm1, xmm7
movdqu XMMWORD PTR [rdx+r11], xmm1
movq rdi, xmm6
mov r10, rdi
and r10d, 2097136
movdqa xmm0, xmm6
pxor xmm0, xmm4
movdqu XMMWORD PTR [r13], xmm0
mov ebx, [rsp+144]
mov ebp, [rsp+152]
add ebx, [rsp+148]
add ebp, [rsp+156]
shl rbp, 32
or rbx, rbp
xor rbx, QWORD PTR [r10+r11]
lea r14, QWORD PTR [r10+r11]
mov rbp, QWORD PTR [r14+8]
mov [rsp+160], rbx
mov [rsp+168], rdi
mov [rsp+176], rbp
mov [rsp+184], r10
mov r10, rsp
mov ebx, [rsp+144]
mov esi, [rsp+148]
mov edi, [rsp+152]
mov ebp, [rsp+156]
movd esp, xmm7
movaps xmm0, xmm7
psrldq xmm0, 8
movd r15d, xmm0
movd eax, xmm4
movd edx, xmm5
FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
mov rsp, r10
mov [rsp+144], ebx
mov [rsp+148], esi
mov [rsp+152], edi
mov [rsp+156], ebp
mov rbx, [rsp+160]
mov rdi, [rsp+168]
mov rbp, [rsp+176]
mov r10, [rsp+184]
mov r9, r10
xor r9, 16
mov rcx, r10
xor rcx, 32
xor r10, 48
mov rax, rbx
mul rdi
movdqu xmm2, XMMWORD PTR [r9+r11]
movdqu xmm1, XMMWORD PTR [rcx+r11]
paddq xmm1, xmm7
movq xmm0, rax
movq xmm3, rdx
xor rax, QWORD PTR [r11+rcx+8]
xor rdx, QWORD PTR [rcx+r11]
punpcklqdq xmm3, xmm0
add r8, rdx
movdqu xmm0, XMMWORD PTR [r10+r11]
pxor xmm2, xmm3
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [r9+r11], xmm0
movdqa xmm5, xmm4
mov r9, QWORD PTR [rsp+320]
movdqa xmm4, xmm6
add r9, rax
movdqu XMMWORD PTR [rcx+r11], xmm2
movdqu XMMWORD PTR [r10+r11], xmm1
mov r10, QWORD PTR [rsp+304]
movd r12d, xmm11
mov QWORD PTR [r14], r8
xor r8, rbx
mov rax, r8
mov QWORD PTR [r14+8], r9
and eax, 2097136
xor r9, rbp
mov QWORD PTR [rsp+320], r9
mov QWORD PTR [rsp+328], rax
sub r12d, 1
jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
movaps xmm6, XMMWORD PTR [rsp+16]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+48]
movaps xmm9, XMMWORD PTR [rsp+64]
movaps xmm10, XMMWORD PTR [rsp+80]
movaps xmm11, XMMWORD PTR [rsp+96]
movaps xmm12, XMMWORD PTR [rsp+112]
movaps xmm13, XMMWORD PTR [rsp+128]
add rsp, 232
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
ret
FN_PREFIX(CryptonightWOW_soft_aes_template_end):

View file

@ -1,268 +0,0 @@
PUBLIC CryptonightWOW_soft_aes_template_part1
PUBLIC CryptonightWOW_soft_aes_template_mainloop
PUBLIC CryptonightWOW_soft_aes_template_part2
PUBLIC CryptonightWOW_soft_aes_template_part3
PUBLIC CryptonightWOW_soft_aes_template_end
ALIGN(64)
CryptonightWOW_soft_aes_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 232
mov eax, [rcx+96]
mov ebx, [rcx+100]
mov esi, [rcx+104]
mov edx, [rcx+108]
mov [rsp+144], eax
mov [rsp+148], ebx
mov [rsp+152], esi
mov [rsp+156], edx
mov rax, QWORD PTR [rcx+48]
mov r10, rcx
xor rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+40]
xor r9, QWORD PTR [rcx+8]
movq xmm4, rax
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov r11, QWORD PTR [rcx+224]
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r10+72]
mov rax, QWORD PTR [r10+80]
movq xmm0, rdx
xor rax, QWORD PTR [r10+64]
movaps XMMWORD PTR [rsp+16], xmm6
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+48], xmm8
movaps XMMWORD PTR [rsp+64], xmm9
movaps XMMWORD PTR [rsp+80], xmm10
movaps XMMWORD PTR [rsp+96], xmm11
movaps XMMWORD PTR [rsp+112], xmm12
movaps XMMWORD PTR [rsp+128], xmm13
movq xmm5, rax
mov rax, r8
punpcklqdq xmm4, xmm0
and eax, 2097136
movq xmm10, QWORD PTR [r10+96]
movq xmm0, rcx
mov rcx, QWORD PTR [r10+104]
xorps xmm9, xmm9
mov QWORD PTR [rsp+328], rax
movq xmm12, r11
mov QWORD PTR [rsp+320], r9
punpcklqdq xmm5, xmm0
movq xmm13, rcx
mov r12d, 524288
ALIGN(64)
CryptonightWOW_soft_aes_template_mainloop:
movd xmm11, r12d
mov r12, QWORD PTR [r10+272]
lea r13, QWORD PTR [rax+r11]
mov esi, DWORD PTR [r13]
movq xmm0, r9
mov r10d, DWORD PTR [r13+4]
movq xmm7, r8
mov ebp, DWORD PTR [r13+12]
mov r14d, DWORD PTR [r13+8]
mov rdx, QWORD PTR [rsp+328]
movzx ecx, sil
shr esi, 8
punpcklqdq xmm7, xmm0
mov r15d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
mov edi, DWORD PTR [r12+rcx*4]
movzx ecx, r14b
shr r14d, 8
mov ebx, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
shr ebp, 8
mov r9d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
xor r15d, DWORD PTR [r12+rcx*4+1024]
movzx ecx, r14b
shr r14d, 8
mov eax, r14d
shr eax, 8
xor edi, DWORD PTR [r12+rcx*4+1024]
add eax, 256
movzx ecx, bpl
shr ebp, 8
xor ebx, DWORD PTR [r12+rcx*4+1024]
movzx ecx, sil
shr esi, 8
xor r9d, DWORD PTR [r12+rcx*4+1024]
add r12, 2048
movzx ecx, r10b
shr r10d, 8
add r10d, 256
mov r11d, DWORD PTR [r12+rax*4]
xor r11d, DWORD PTR [r12+rcx*4]
xor r11d, r9d
movzx ecx, sil
mov r10d, DWORD PTR [r12+r10*4]
shr esi, 8
add esi, 256
xor r10d, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
xor r10d, ebx
shr ebp, 8
movd xmm1, r11d
add ebp, 256
movq r11, xmm12
mov r9d, DWORD PTR [r12+rcx*4]
xor r9d, DWORD PTR [r12+rsi*4]
mov eax, DWORD PTR [r12+rbp*4]
xor r9d, edi
movzx ecx, r14b
movd xmm0, r10d
movd xmm2, r9d
xor eax, DWORD PTR [r12+rcx*4]
mov rcx, rdx
xor eax, r15d
punpckldq xmm2, xmm1
xor rcx, 16
movd xmm6, eax
mov rax, rdx
punpckldq xmm6, xmm0
xor rax, 32
punpckldq xmm6, xmm2
xor rdx, 48
movdqu xmm2, XMMWORD PTR [rcx+r11]
pxor xmm6, xmm7
paddq xmm2, xmm4
movdqu xmm1, XMMWORD PTR [rax+r11]
movdqu xmm0, XMMWORD PTR [rdx+r11]
paddq xmm0, xmm5
movdqu XMMWORD PTR [rcx+r11], xmm0
movdqu XMMWORD PTR [rax+r11], xmm2
movq rcx, xmm13
paddq xmm1, xmm7
movdqu XMMWORD PTR [rdx+r11], xmm1
movq rdi, xmm6
mov r10, rdi
and r10d, 2097136
movdqa xmm0, xmm6
pxor xmm0, xmm4
movdqu XMMWORD PTR [r13], xmm0
mov ebx, [rsp+144]
mov ebp, [rsp+152]
add ebx, [rsp+148]
add ebp, [rsp+156]
shl rbp, 32
or rbx, rbp
xor rbx, QWORD PTR [r10+r11]
lea r14, QWORD PTR [r10+r11]
mov rbp, QWORD PTR [r14+8]
mov [rsp+160], rbx
mov [rsp+168], rdi
mov [rsp+176], rbp
mov [rsp+184], r10
mov r10, rsp
mov ebx, [rsp+144]
mov esi, [rsp+148]
mov edi, [rsp+152]
mov ebp, [rsp+156]
movd esp, xmm7
movaps xmm0, xmm7
psrldq xmm0, 8
movd r15d, xmm0
movd eax, xmm4
movd edx, xmm5
CryptonightWOW_soft_aes_template_part2:
mov rsp, r10
mov [rsp+144], ebx
mov [rsp+148], esi
mov [rsp+152], edi
mov [rsp+156], ebp
mov rbx, [rsp+160]
mov rdi, [rsp+168]
mov rbp, [rsp+176]
mov r10, [rsp+184]
mov r9, r10
xor r9, 16
mov rcx, r10
xor rcx, 32
xor r10, 48
mov rax, rbx
mul rdi
movdqu xmm2, XMMWORD PTR [r9+r11]
movdqu xmm1, XMMWORD PTR [rcx+r11]
paddq xmm1, xmm7
movq xmm0, rax
movq xmm3, rdx
xor rax, QWORD PTR [r11+rcx+8]
xor rdx, QWORD PTR [rcx+r11]
punpcklqdq xmm3, xmm0
add r8, rdx
movdqu xmm0, XMMWORD PTR [r10+r11]
pxor xmm2, xmm3
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [r9+r11], xmm0
movdqa xmm5, xmm4
mov r9, QWORD PTR [rsp+320]
movdqa xmm4, xmm6
add r9, rax
movdqu XMMWORD PTR [rcx+r11], xmm2
movdqu XMMWORD PTR [r10+r11], xmm1
mov r10, QWORD PTR [rsp+304]
movd r12d, xmm11
mov QWORD PTR [r14], r8
xor r8, rbx
mov rax, r8
mov QWORD PTR [r14+8], r9
and eax, 2097136
xor r9, rbp
mov QWORD PTR [rsp+320], r9
mov QWORD PTR [rsp+328], rax
sub r12d, 1
jne CryptonightWOW_soft_aes_template_mainloop
CryptonightWOW_soft_aes_template_part3:
movaps xmm6, XMMWORD PTR [rsp+16]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+48]
movaps xmm9, XMMWORD PTR [rsp+64]
movaps xmm10, XMMWORD PTR [rsp+80]
movaps xmm11, XMMWORD PTR [rsp+96]
movaps xmm12, XMMWORD PTR [rsp+112]
movaps xmm13, XMMWORD PTR [rsp+128]
add rsp, 232
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
ret
CryptonightWOW_soft_aes_template_end:

View file

@ -1,491 +0,0 @@
PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
PUBLIC FN_PREFIX(CryptonightWOW_template_end)
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
ALIGN(64)
FN_PREFIX(CryptonightWOW_template_part1):
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movq xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movq xmm0, r12
movq xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movq xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
FN_PREFIX(CryptonightWOW_template_mainloop):
movdqa xmm5, XMMWORD PTR [r9+r11]
movq xmm0, r15
movq xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movq r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
pextrd r9d, xmm7, 2
FN_PREFIX(CryptonightWOW_template_part2):
mov rax, r13
mul r12
movq xmm0, rax
movq xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz FN_PREFIX(CryptonightWOW_template_mainloop)
FN_PREFIX(CryptonightWOW_template_part3):
movq rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
FN_PREFIX(CryptonightWOW_template_end):
ALIGN(64)
FN_PREFIX(CryptonightWOW_template_double_part1):
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movq xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movq xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movq xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movq xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movq xmm0, rcx
mov r11d, 524288
movq xmm10, rax
punpcklqdq xmm10, xmm0
movq xmm14, QWORD PTR [rsp+128]
movq xmm15, QWORD PTR [rsp+136]
ALIGN(64)
FN_PREFIX(CryptonightWOW_template_double_mainloop):
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movq xmm0, r12
mov ecx, ebx
movq xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movq rdx, xmm6
movq xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movq xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movq rdi, xmm5
movq rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movq xmm0, rsp
movq xmm1, rsi
movq xmm2, rdi
movq xmm11, rbp
movq xmm12, r15
movq xmm13, rdx
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
pextrd r9d, xmm9, 2
FN_PREFIX(CryptonightWOW_template_double_part2):
movq rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movq rsi, xmm1
movq rdi, xmm2
movq rbp, xmm11
movq r15, xmm12
movq rdx, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movq xmm1, rdx
movq xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movq rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movq xmm0, rsp
movq xmm1, rbx
movq xmm2, rsi
movq xmm11, rdi
movq xmm12, rbp
movq xmm13, r15
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movq xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
pextrd r9d, xmm10, 2
FN_PREFIX(CryptonightWOW_template_double_part3):
movq rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movq rbx, xmm1
movq rsi, xmm2
movq rdi, xmm11
movq rbp, xmm12
movq r15, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rax, r8
mul rdi
movq xmm1, rdx
movq xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movq rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz FN_PREFIX(CryptonightWOW_template_double_mainloop)
FN_PREFIX(CryptonightWOW_template_double_part4):
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
FN_PREFIX(CryptonightWOW_template_double_end):

View file

@ -1,491 +0,0 @@
PUBLIC CryptonightWOW_template_part1
PUBLIC CryptonightWOW_template_mainloop
PUBLIC CryptonightWOW_template_part2
PUBLIC CryptonightWOW_template_part3
PUBLIC CryptonightWOW_template_end
PUBLIC CryptonightWOW_template_double_part1
PUBLIC CryptonightWOW_template_double_mainloop
PUBLIC CryptonightWOW_template_double_part2
PUBLIC CryptonightWOW_template_double_part3
PUBLIC CryptonightWOW_template_double_part4
PUBLIC CryptonightWOW_template_double_end
ALIGN(64)
CryptonightWOW_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movq xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movq xmm0, r12
movq xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movq xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
CryptonightWOW_template_mainloop:
movdqa xmm5, XMMWORD PTR [r9+r11]
movq xmm0, r15
movq xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movq r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
pextrd r9d, xmm7, 2
CryptonightWOW_template_part2:
mov rax, r13
mul r12
movq xmm0, rax
movq xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz CryptonightWOW_template_mainloop
CryptonightWOW_template_part3:
movq rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
CryptonightWOW_template_end:
ALIGN(64)
CryptonightWOW_template_double_part1:
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movq xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movq xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movq xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movq xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movq xmm0, rcx
mov r11d, 524288
movq xmm10, rax
punpcklqdq xmm10, xmm0
movq xmm14, QWORD PTR [rsp+128]
movq xmm15, QWORD PTR [rsp+136]
ALIGN(64)
CryptonightWOW_template_double_mainloop:
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movq xmm0, r12
mov ecx, ebx
movq xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movq rdx, xmm6
movq xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movq xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movq rdi, xmm5
movq rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movq xmm0, rsp
movq xmm1, rsi
movq xmm2, rdi
movq xmm11, rbp
movq xmm12, r15
movq xmm13, rdx
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
pextrd r9d, xmm9, 2
CryptonightWOW_template_double_part2:
movq rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movq rsi, xmm1
movq rdi, xmm2
movq rbp, xmm11
movq r15, xmm12
movq rdx, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movq xmm1, rdx
movq xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movq rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movq xmm0, rsp
movq xmm1, rbx
movq xmm2, rsi
movq xmm11, rdi
movq xmm12, rbp
movq xmm13, r15
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movq xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
pextrd r9d, xmm10, 2
CryptonightWOW_template_double_part3:
movq rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movq rbx, xmm1
movq rsi, xmm2
movq rdi, xmm11
movq rbp, xmm12
movq r15, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rax, r8
mul rdi
movq xmm1, rdx
movq xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movq rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz CryptonightWOW_template_double_mainloop
CryptonightWOW_template_double_part4:
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
CryptonightWOW_template_double_end:

View file

@ -516,9 +516,7 @@ PUBLIC CryptonightR_instruction_mov254
PUBLIC CryptonightR_instruction_mov255
PUBLIC CryptonightR_instruction_mov256
INCLUDE CryptonightWOW_template_win.inc
INCLUDE CryptonightR_template_win.inc
INCLUDE CryptonightWOW_soft_aes_template_win.inc
INCLUDE CryptonightR_soft_aes_template_win.inc
CryptonightR_instruction0:

View file

@ -1,268 +0,0 @@
PUBLIC CryptonightWOW_soft_aes_template_part1
PUBLIC CryptonightWOW_soft_aes_template_mainloop
PUBLIC CryptonightWOW_soft_aes_template_part2
PUBLIC CryptonightWOW_soft_aes_template_part3
PUBLIC CryptonightWOW_soft_aes_template_end
ALIGN(64)
CryptonightWOW_soft_aes_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 232
mov eax, [rcx+96]
mov ebx, [rcx+100]
mov esi, [rcx+104]
mov edx, [rcx+108]
mov [rsp+144], eax
mov [rsp+148], ebx
mov [rsp+152], esi
mov [rsp+156], edx
mov rax, QWORD PTR [rcx+48]
mov r10, rcx
xor rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+40]
xor r9, QWORD PTR [rcx+8]
movd xmm4, rax
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov r11, QWORD PTR [rcx+224]
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r10+72]
mov rax, QWORD PTR [r10+80]
movd xmm0, rdx
xor rax, QWORD PTR [r10+64]
movaps XMMWORD PTR [rsp+16], xmm6
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+48], xmm8
movaps XMMWORD PTR [rsp+64], xmm9
movaps XMMWORD PTR [rsp+80], xmm10
movaps XMMWORD PTR [rsp+96], xmm11
movaps XMMWORD PTR [rsp+112], xmm12
movaps XMMWORD PTR [rsp+128], xmm13
movd xmm5, rax
mov rax, r8
punpcklqdq xmm4, xmm0
and eax, 2097136
movd xmm10, QWORD PTR [r10+96]
movd xmm0, rcx
mov rcx, QWORD PTR [r10+104]
xorps xmm9, xmm9
mov QWORD PTR [rsp+328], rax
movd xmm12, r11
mov QWORD PTR [rsp+320], r9
punpcklqdq xmm5, xmm0
movd xmm13, rcx
mov r12d, 524288
ALIGN(64)
CryptonightWOW_soft_aes_template_mainloop:
movd xmm11, r12d
mov r12, QWORD PTR [r10+272]
lea r13, QWORD PTR [rax+r11]
mov esi, DWORD PTR [r13]
movd xmm0, r9
mov r10d, DWORD PTR [r13+4]
movd xmm7, r8
mov ebp, DWORD PTR [r13+12]
mov r14d, DWORD PTR [r13+8]
mov rdx, QWORD PTR [rsp+328]
movzx ecx, sil
shr esi, 8
punpcklqdq xmm7, xmm0
mov r15d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
mov edi, DWORD PTR [r12+rcx*4]
movzx ecx, r14b
shr r14d, 8
mov ebx, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
shr ebp, 8
mov r9d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
xor r15d, DWORD PTR [r12+rcx*4+1024]
movzx ecx, r14b
shr r14d, 8
mov eax, r14d
shr eax, 8
xor edi, DWORD PTR [r12+rcx*4+1024]
add eax, 256
movzx ecx, bpl
shr ebp, 8
xor ebx, DWORD PTR [r12+rcx*4+1024]
movzx ecx, sil
shr esi, 8
xor r9d, DWORD PTR [r12+rcx*4+1024]
add r12, 2048
movzx ecx, r10b
shr r10d, 8
add r10d, 256
mov r11d, DWORD PTR [r12+rax*4]
xor r11d, DWORD PTR [r12+rcx*4]
xor r11d, r9d
movzx ecx, sil
mov r10d, DWORD PTR [r12+r10*4]
shr esi, 8
add esi, 256
xor r10d, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
xor r10d, ebx
shr ebp, 8
movd xmm1, r11d
add ebp, 256
movd r11, xmm12
mov r9d, DWORD PTR [r12+rcx*4]
xor r9d, DWORD PTR [r12+rsi*4]
mov eax, DWORD PTR [r12+rbp*4]
xor r9d, edi
movzx ecx, r14b
movd xmm0, r10d
movd xmm2, r9d
xor eax, DWORD PTR [r12+rcx*4]
mov rcx, rdx
xor eax, r15d
punpckldq xmm2, xmm1
xor rcx, 16
movd xmm6, eax
mov rax, rdx
punpckldq xmm6, xmm0
xor rax, 32
punpckldq xmm6, xmm2
xor rdx, 48
movdqu xmm2, XMMWORD PTR [rcx+r11]
pxor xmm6, xmm7
paddq xmm2, xmm4
movdqu xmm1, XMMWORD PTR [rax+r11]
movdqu xmm0, XMMWORD PTR [rdx+r11]
paddq xmm0, xmm5
movdqu XMMWORD PTR [rcx+r11], xmm0
movdqu XMMWORD PTR [rax+r11], xmm2
movd rcx, xmm13
paddq xmm1, xmm7
movdqu XMMWORD PTR [rdx+r11], xmm1
movd rdi, xmm6
mov r10, rdi
and r10d, 2097136
movdqa xmm0, xmm6
pxor xmm0, xmm4
movdqu XMMWORD PTR [r13], xmm0
mov ebx, [rsp+144]
mov ebp, [rsp+152]
add ebx, [rsp+148]
add ebp, [rsp+156]
shl rbp, 32
or rbx, rbp
xor rbx, QWORD PTR [r10+r11]
lea r14, QWORD PTR [r10+r11]
mov rbp, QWORD PTR [r14+8]
mov [rsp+160], rbx
mov [rsp+168], rdi
mov [rsp+176], rbp
mov [rsp+184], r10
mov r10, rsp
mov ebx, [rsp+144]
mov esi, [rsp+148]
mov edi, [rsp+152]
mov ebp, [rsp+156]
movd esp, xmm7
movaps xmm0, xmm7
psrldq xmm0, 8
movd r15d, xmm0
movd eax, xmm4
movd edx, xmm5
CryptonightWOW_soft_aes_template_part2:
mov rsp, r10
mov [rsp+144], ebx
mov [rsp+148], esi
mov [rsp+152], edi
mov [rsp+156], ebp
mov rbx, [rsp+160]
mov rdi, [rsp+168]
mov rbp, [rsp+176]
mov r10, [rsp+184]
mov r9, r10
xor r9, 16
mov rcx, r10
xor rcx, 32
xor r10, 48
mov rax, rbx
mul rdi
movdqu xmm2, XMMWORD PTR [r9+r11]
movdqu xmm1, XMMWORD PTR [rcx+r11]
paddq xmm1, xmm7
movd xmm0, rax
movd xmm3, rdx
xor rax, QWORD PTR [r11+rcx+8]
xor rdx, QWORD PTR [rcx+r11]
punpcklqdq xmm3, xmm0
add r8, rdx
movdqu xmm0, XMMWORD PTR [r10+r11]
pxor xmm2, xmm3
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [r9+r11], xmm0
movdqa xmm5, xmm4
mov r9, QWORD PTR [rsp+320]
movdqa xmm4, xmm6
add r9, rax
movdqu XMMWORD PTR [rcx+r11], xmm2
movdqu XMMWORD PTR [r10+r11], xmm1
mov r10, QWORD PTR [rsp+304]
movd r12d, xmm11
mov QWORD PTR [r14], r8
xor r8, rbx
mov rax, r8
mov QWORD PTR [r14+8], r9
and eax, 2097136
xor r9, rbp
mov QWORD PTR [rsp+320], r9
mov QWORD PTR [rsp+328], rax
sub r12d, 1
jne CryptonightWOW_soft_aes_template_mainloop
CryptonightWOW_soft_aes_template_part3:
movaps xmm6, XMMWORD PTR [rsp+16]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+48]
movaps xmm9, XMMWORD PTR [rsp+64]
movaps xmm10, XMMWORD PTR [rsp+80]
movaps xmm11, XMMWORD PTR [rsp+96]
movaps xmm12, XMMWORD PTR [rsp+112]
movaps xmm13, XMMWORD PTR [rsp+128]
add rsp, 232
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
ret
CryptonightWOW_soft_aes_template_end:

View file

@ -1,491 +0,0 @@
PUBLIC CryptonightWOW_template_part1
PUBLIC CryptonightWOW_template_mainloop
PUBLIC CryptonightWOW_template_part2
PUBLIC CryptonightWOW_template_part3
PUBLIC CryptonightWOW_template_end
PUBLIC CryptonightWOW_template_double_part1
PUBLIC CryptonightWOW_template_double_mainloop
PUBLIC CryptonightWOW_template_double_part2
PUBLIC CryptonightWOW_template_double_part3
PUBLIC CryptonightWOW_template_double_part4
PUBLIC CryptonightWOW_template_double_end
ALIGN(64)
CryptonightWOW_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movd xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movd xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movd xmm0, r12
movd xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movd xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
CryptonightWOW_template_mainloop:
movdqa xmm5, XMMWORD PTR [r9+r11]
movd xmm0, r15
movd xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movd r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
pextrd r9d, xmm7, 2
CryptonightWOW_template_part2:
mov rax, r13
mul r12
movd xmm0, rax
movd xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz CryptonightWOW_template_mainloop
CryptonightWOW_template_part3:
movd rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
CryptonightWOW_template_end:
ALIGN(64)
CryptonightWOW_template_double_part1:
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movd xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movd xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movd xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movd xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movd xmm0, rcx
mov r11d, 524288
movd xmm10, rax
punpcklqdq xmm10, xmm0
movd xmm14, QWORD PTR [rsp+128]
movd xmm15, QWORD PTR [rsp+136]
ALIGN(64)
CryptonightWOW_template_double_mainloop:
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movd xmm0, r12
mov ecx, ebx
movd xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movd rdx, xmm6
movd xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movd xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movd rdi, xmm5
movd rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movd xmm0, rsp
movd xmm1, rsi
movd xmm2, rdi
movd xmm11, rbp
movd xmm12, r15
movd xmm13, rdx
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
pextrd r9d, xmm9, 2
CryptonightWOW_template_double_part2:
movd rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movd rsi, xmm1
movd rdi, xmm2
movd rbp, xmm11
movd r15, xmm12
movd rdx, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movd xmm1, rdx
movd xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movd rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movd xmm0, rsp
movd xmm1, rbx
movd xmm2, rsi
movd xmm11, rdi
movd xmm12, rbp
movd xmm13, r15
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
pextrd r9d, xmm10, 2
CryptonightWOW_template_double_part3:
movd rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movd rbx, xmm1
movd rsi, xmm2
movd rdi, xmm11
movd rbp, xmm12
movd r15, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rax, r8
mul rdi
movd xmm1, rdx
movd xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movd rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz CryptonightWOW_template_double_mainloop
CryptonightWOW_template_double_part4:
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
CryptonightWOW_template_double_end:

View file

@ -99,20 +99,6 @@ static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int
}
}
void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
uint8_t* p = p0;
add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0));
add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end);
xmrig::VirtualMemory::flushInstructionCache(machine_code, p - p0);
}
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
@ -127,22 +113,6 @@ void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_co
xmrig::VirtualMemory::flushInstructionCache(machine_code, p - p0);
}
void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
uint8_t* p = p0;
add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0));
add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end);
xmrig::VirtualMemory::flushInstructionCache(machine_code, p - p0);
}
void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
@ -159,20 +129,6 @@ void v4_compile_code_double(const V4_Instruction* code, int code_size, void* mac
xmrig::VirtualMemory::flushInstructionCache(machine_code, p - p0);
}
void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
uint8_t* p = p0;
add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);
xmrig::VirtualMemory::flushInstructionCache(machine_code, p - p0);
}
void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);

View file

@ -255,7 +255,7 @@ static int v4_random_math_init(struct V4_Instruction* code, const uint64_t heigh
code_size = 0;
int total_iterations = 0;
r8_used = (ALGO == xmrig::Algorithm::CN_WOW);
r8_used = false;
// Generate random code to achieve minimal required latency for our abstract CPU
// Try to get this latency for all 4 registers
@ -299,7 +299,7 @@ static int v4_random_math_init(struct V4_Instruction* code, const uint64_t heigh
// Don't do ADD/SUB/XOR with the same register
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b)) {
// a is always < 4, so we don't need to check bounds here
b = (ALGO == xmrig::Algorithm::CN_WOW) ? (a + 4) : 8;
b = 8;
src_index = b;
}

View file

@ -24,27 +24,24 @@
*/
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "crypto/common/Algorithm.h"
#include "crypto/cn/CnAlgo.h"
#include "crypto/common/Algorithm.h"
#include "rapidjson/document.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#ifdef _MSC_VER
# define strcasecmp _stricmp
#endif
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
namespace xmrig {
@ -67,7 +64,6 @@ static AlgoName const algorithm_names[] = {
{ "cryptonight_v8", nullptr, Algorithm::CN_2 },
{ "cryptonight/r", "cn/r", Algorithm::CN_R },
{ "cryptonight_r", nullptr, Algorithm::CN_R },
{ "cryptonight/wow", "cn/wow", Algorithm::CN_WOW },
{ "cryptonight/fast", "cn/fast", Algorithm::CN_FAST },
{ "cryptonight/msr", "cn/msr", Algorithm::CN_FAST },
{ "cryptonight/half", "cn/half", Algorithm::CN_HALF },
@ -239,7 +235,6 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
case CN_1:
case CN_2:
case CN_R:
case CN_WOW:
case CN_FAST:
case CN_HALF:
case CN_XAO:
@ -284,9 +279,8 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
return ARGON2;
# endif
case INVALID:
case MAX:
return UNKNOWN;
default:
break;
}
return UNKNOWN;
@ -299,9 +293,9 @@ xmrig::Algorithm::Id xmrig::Algorithm::parse(const char *name)
return INVALID;
}
for (size_t i = 0; i < ARRAY_SIZE(algorithm_names); i++) {
if ((strcasecmp(name, algorithm_names[i].name) == 0) || (algorithm_names[i].shortName != nullptr && strcasecmp(name, algorithm_names[i].shortName) == 0)) {
return algorithm_names[i].id;
for (const AlgoName &item : algorithm_names) {
if ((strcasecmp(name, item.name) == 0) || (item.shortName != nullptr && strcasecmp(name, item.shortName) == 0)) {
return item.id;
}
}
@ -311,9 +305,9 @@ xmrig::Algorithm::Id xmrig::Algorithm::parse(const char *name)
const char *xmrig::Algorithm::name(bool shortName) const
{
for (size_t i = 0; i < ARRAY_SIZE(algorithm_names); i++) {
if (algorithm_names[i].id == m_id) {
return (shortName && algorithm_names[i].shortName) ? algorithm_names[i].shortName : algorithm_names[i].name;
for (const AlgoName &item : algorithm_names) {
if (item.id == m_id) {
return (shortName && item.shortName) ? item.shortName : item.name;
}
}

View file

@ -39,13 +39,16 @@ namespace xmrig {
class Algorithm
{
public:
// Changes in following file is required if this enum changed:
//
// src/backend/opencl/cl/cn/algorithm.cl
//
enum Id : int {
INVALID = -1,
CN_0, // "cn/0" CryptoNight (original).
CN_1, // "cn/1" CryptoNight variant 1 also known as Monero7 and CryptoNightV7.
CN_2, // "cn/2" CryptoNight variant 2.
CN_R, // "cn/r" CryptoNightR (Monero's variant 4).
CN_WOW, // "cn/wow" CryptoNightR (Wownero).
CN_FAST, // "cn/fast" CryptoNight variant 1 with half iterations.
CN_HALF, // "cn/half" CryptoNight variant 2 with half iterations (Masari/Torque).
CN_XAO, // "cn/xao" CryptoNight variant 0 (modified, Alloy only).
@ -53,31 +56,19 @@ public:
CN_RWZ, // "cn/rwz" CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft).
CN_ZLS, // "cn/zls" CryptoNight variant 2 with 3/4 iterations (Zelerius).
CN_DOUBLE, // "cn/double" CryptoNight variant 2 with double iterations (X-CASH).
# ifdef XMRIG_ALGO_CN_GPU
CN_GPU, // "cn/gpu" CryptoNight-GPU (Ryo).
# endif
# ifdef XMRIG_ALGO_CN_LITE
CN_LITE_0, // "cn-lite/0" CryptoNight-Lite variant 0.
CN_LITE_1, // "cn-lite/1" CryptoNight-Lite variant 1.
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
CN_HEAVY_0, // "cn-heavy/0" CryptoNight-Heavy (4 MB).
CN_HEAVY_TUBE, // "cn-heavy/tube" CryptoNight-Heavy (modified, TUBE only).
CN_HEAVY_XHV, // "cn-heavy/xhv" CryptoNight-Heavy (modified, Haven Protocol only).
# endif
# ifdef XMRIG_ALGO_CN_PICO
CN_PICO_0, // "cn-pico" CryptoNight Turtle (TRTL)
# endif
# ifdef XMRIG_ALGO_RANDOMX
RX_0, // "rx/0" RandomX (reference configuration).
RX_WOW, // "rx/wow" RandomWOW (Wownero).
RX_LOKI, // "rx/loki" RandomXL (Loki).
DEFYX, // "defyx" DefyX (Scala).
# endif
# ifdef XMRIG_ALGO_ARGON2
AR2_CHUKWA, // "argon2/chukwa"
AR2_WRKZ, // "argon2/wrkz"
# endif
AR2_CHUKWA, // "argon2/chukwa" Argon2id (Chukwa).
AR2_WRKZ, // "argon2/wrkz" Argon2id (WRKZ)
MAX
};
@ -91,10 +82,11 @@ public:
ARGON2
};
inline Algorithm() {}
inline Algorithm() = default;
inline Algorithm(const char *algo) : m_id(parse(algo)) {}
inline Algorithm(Id id) : m_id(id) {}
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO; }
inline bool isEqual(const Algorithm &other) const { return m_id == other.m_id; }
inline bool isValid() const { return m_id != INVALID; }
inline const char *name() const { return name(false); }
@ -123,7 +115,7 @@ private:
};
typedef std::vector<Algorithm> Algorithms;
using Algorithms = std::vector<Algorithm>;
} /* namespace xmrig */

View file

@ -47,7 +47,7 @@ namespace randomx {
rx_aligned_free(ptr);
}
template class AlignedAllocator<CacheLineSize>;
template struct AlignedAllocator<CacheLineSize>;
void* LargePageAllocator::allocMemory(size_t count) {
return allocLargePagesMemory(count);

View file

@ -1,5 +1,3 @@
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0]
@ -10,8 +8,6 @@
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0]

View file

@ -1,4 +1,3 @@
xor eax, eax
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9

View file

@ -12,6 +12,13 @@
mov rcx, rdi
mov rbp, qword ptr [rsi] ;# "mx", "ma"
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
;# dataset prefetch for the first iteration of the main loop
mov rax, rbp
shr rax, 32
and eax, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rax]
mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp

View file

@ -24,6 +24,13 @@
push rcx ;# RegisterFile& registerFile
mov rbp, qword ptr [rdx] ;# "mx", "ma"
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
;# dataset prefetch for the first iteration of the main loop
mov rax, rbp
shr rax, 32
and eax, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rax]
mov rsi, r8 ;# uint8_t* scratchpad
mov rbx, r9 ;# loop counter

View file

@ -3,8 +3,10 @@
#include <string.h>
#if defined(_MSC_VER)
#define FORCE_INLINE __inline
#elif defined(__GNUC__) || defined(__clang__)
#define FORCE_INLINE __forceinline
#elif defined(__GNUC__)
#define FORCE_INLINE __attribute__((always_inline)) inline
#elif defined(__clang__)
#define FORCE_INLINE __inline__
#else
#define FORCE_INLINE

View file

@ -76,6 +76,8 @@ namespace randomx {
*/
const uint8_t* codePrefetchScratchpad = (uint8_t*)&randomx_prefetch_scratchpad;
const uint8_t* codePrefetchScratchpadEnd = (uint8_t*)&randomx_prefetch_scratchpad_end;
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
@ -93,6 +95,7 @@ namespace randomx {
const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end;
const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init;
const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad;
const int32_t prologueSize = codeLoopBegin - codePrologue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
@ -160,7 +163,7 @@ namespace randomx {
static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f };
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 };
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC };
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
@ -214,7 +217,7 @@ namespace randomx {
generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, RandomX_CurrentConfig.codeReadDatasetTweaked, readDatasetSize);
codePos += readDatasetSize;
generateProgramEpilogue(prog);
generateProgramEpilogue(prog, pcfg);
}
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
@ -225,7 +228,7 @@ namespace randomx {
emitByte(CALL, code, codePos);
emit32(superScalarHashOffset - (codePos + 4), code, codePos);
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize, code, codePos);
generateProgramEpilogue(prog);
generateProgramEpilogue(prog, pcfg);
}
template<size_t N>
@ -266,13 +269,16 @@ namespace randomx {
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
memset(registerUsage, -1, sizeof(registerUsage));
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + 2] = 0xc0 + pcfg.readReg0;
code[codePos + 5] = 0xc0 + pcfg.readReg1;
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
emit(REX_XOR_RAX_R64, code, codePos);
emitByte(0xc0 + pcfg.readReg0, code, codePos);
emit(REX_XOR_RAX_R64, code, codePos);
emitByte(0xc0 + pcfg.readReg1, code, codePos);
memcpy(code + codePos, RandomX_CurrentConfig.codeLoopLoadTweaked, loopLoadSize);
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
@ -287,7 +293,12 @@ namespace randomx {
emitByte(0xc0 + pcfg.readReg3, code, codePos);
}
void JitCompilerX86::generateProgramEpilogue(Program& prog) {
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
emit(REX_MOV_RR64, code, codePos);
emitByte(0xc0 + pcfg.readReg0, code, codePos);
emit(REX_XOR_RAX_R64, code, codePos);
emitByte(0xc0 + pcfg.readReg1, code, codePos);
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
emit(SUB_EBX, code, codePos);

View file

@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
class Program;
class ProgramConfiguration;
struct ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerX86;
class Instruction;
@ -72,7 +72,7 @@ namespace randomx {
int32_t codePos;
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
static void genAddressReg(Instruction&, uint8_t* code, int& codePos, bool rax = true);
static void genAddressRegDst(Instruction&, uint8_t* code, int& codePos);
static void genAddressImm(Instruction&, uint8_t* code, int& codePos);

View file

@ -37,7 +37,10 @@
#define WINABI
#endif
.global DECL(randomx_prefetch_scratchpad)
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
@ -61,6 +64,16 @@
#define db .byte
DECL(randomx_prefetch_scratchpad):
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rax]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rdx]
DECL(randomx_prefetch_scratchpad_end):
.balign 64
DECL(randomx_program_prologue):
#if defined(WINABI)
@ -71,6 +84,14 @@ DECL(randomx_program_prologue):
movapd xmm13, xmmword ptr [mantissaMask+rip]
movapd xmm14, xmmword ptr [exp240+rip]
movapd xmm15, xmmword ptr [scaleMask+rip]
DECL(randomx_program_prologue_first_load):
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp DECL(randomx_program_loop_begin)
.balign 64

View file

@ -28,7 +28,10 @@ IFDEF RAX
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
@ -50,15 +53,36 @@ RANDOMX_SCRATCHPAD_MASK EQU 2097088
RANDOMX_DATASET_BASE_MASK EQU 2147483584
RANDOMX_CACHE_MASK EQU 4194303
randomx_prefetch_scratchpad PROC
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rax]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rdx]
randomx_prefetch_scratchpad ENDP
randomx_prefetch_scratchpad_end PROC
randomx_prefetch_scratchpad_end ENDP
ALIGN 64
randomx_program_prologue PROC
include asm/program_prologue_win64.inc
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
jmp randomx_program_loop_begin
randomx_program_prologue ENDP
randomx_program_prologue_first_load PROC
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp randomx_program_loop_begin
randomx_program_prologue_first_load ENDP
ALIGN 64
include asm/program_xmm_constants.inc

View file

@ -29,7 +29,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
extern "C" {
void randomx_prefetch_scratchpad();
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_start();

View file

@ -149,9 +149,9 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
memcpy(codeReadDatasetLightSshInitTweaked, a, b - a);
}
{
const uint8_t* a = (const uint8_t*)&randomx_program_loop_load;
const uint8_t* b = (const uint8_t*)&randomx_program_start;
memcpy(codeLoopLoadTweaked, a, b - a);
const uint8_t* a = (const uint8_t*)&randomx_prefetch_scratchpad;
const uint8_t* b = (const uint8_t*)&randomx_prefetch_scratchpad_end;
memcpy(codePrefetchScratchpadTweaked, a, b - a);
}
#endif
}
@ -177,8 +177,8 @@ void RandomX_ConfigurationBase::Apply()
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
#if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeLoopLoadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codeLoopLoadTweaked + 50) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
#endif
ConditionMask_Calculated = (1 << JumpBits) - 1;

View file

@ -29,8 +29,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef RANDOMX_H
#define RANDOMX_H
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "crypto/randomx/intrin_portable.h"
@ -41,17 +41,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_EXPORT
#endif
typedef enum {
enum randomx_flags {
RANDOMX_FLAG_DEFAULT = 0,
RANDOMX_FLAG_LARGE_PAGES = 1,
RANDOMX_FLAG_HARD_AES = 2,
RANDOMX_FLAG_FULL_MEM = 4,
RANDOMX_FLAG_JIT = 8,
} randomx_flags;
};
struct randomx_dataset;
struct randomx_cache;
class randomx_vm;
typedef struct randomx_dataset randomx_dataset;
typedef struct randomx_cache randomx_cache;
typedef struct randomx_vm randomx_vm;
struct RandomX_ConfigurationBase
{
@ -116,7 +119,7 @@ struct RandomX_ConfigurationBase
uint8_t codeShhPrefetchTweaked[20];
uint8_t codeReadDatasetTweaked[64];
uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codeLoopLoadTweaked[140];
uint8_t codePrefetchScratchpadTweaked[32];
uint32_t CacheLineAlignMask_Calculated;
uint32_t DatasetExtraItems_Calculated;

View file

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019 SChernykh <https://github.com/SChernykh>
All rights reserved.
@ -28,337 +29,91 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/soft_aes.h"
alignas(16) const uint8_t sbox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
alignas(64) uint32_t lutEnc0[256];
alignas(64) uint32_t lutEnc1[256];
alignas(64) uint32_t lutEnc2[256];
alignas(64) uint32_t lutEnc3[256];
alignas(16) const uint32_t lutEnc0[256] = {
0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
};
alignas(64) uint32_t lutDec0[256];
alignas(64) uint32_t lutDec1[256];
alignas(64) uint32_t lutDec2[256];
alignas(64) uint32_t lutDec3[256];
alignas(16) const uint32_t lutEnc1[256] = {
0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154,
0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a,
0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b,
0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f,
0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5,
0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f,
0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397,
0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed,
0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194,
0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3,
0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d,
0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39,
0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83,
0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76,
0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b,
0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0,
0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751,
0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85,
0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9,
0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7,
0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8,
0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a,
};
static uint32_t mul_gf2(uint32_t b, uint32_t c)
{
uint32_t s = 0;
for (uint32_t i = b, j = c, k = 1; (k < 0x100) && j; k <<= 1)
{
if (j & k)
{
s ^= i;
j ^= k;
}
alignas(16) const uint32_t lutEnc2[256] = {
0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5,
0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76,
0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0,
0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc,
0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a,
0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75,
0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784,
0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b,
0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485,
0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8,
0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2,
0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917,
0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388,
0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db,
0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79,
0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9,
0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6,
0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a,
0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e,
0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794,
0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868,
0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16,
};
i <<= 1;
if (i & 0x100)
i ^= (1 << 8) | (1 << 4) | (1 << 3) | (1 << 1) | (1 << 0);
}
alignas(16) const uint32_t lutEnc3[256] = {
0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5,
0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676,
0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0,
0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc,
0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a,
0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575,
0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484,
0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b,
0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585,
0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8,
0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2,
0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717,
0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888,
0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb,
0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979,
0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9,
0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6,
0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a,
0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e,
0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494,
0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868,
0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616,
};
alignas(16) const uint32_t lutDec0[256] = {
0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5,
0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e,
0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d,
0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566,
0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed,
0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd,
0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060,
0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c,
0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624,
0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14,
0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b,
0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177,
0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322,
0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382,
0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb,
0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235,
0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117,
0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d,
0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a,
0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff,
0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
};
alignas(16) const uint32_t lutDec1[256] = {
0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93,
0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f,
0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44,
0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4,
0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a,
0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c,
0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51,
0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff,
0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e,
0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a,
0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8,
0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34,
0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0,
0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef,
0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5,
0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b,
0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0,
0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f,
0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13,
0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c,
0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41,
0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042,
};
alignas(16) const uint32_t lutDec2[256] = {
0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303,
0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3,
0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8,
0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a,
0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab,
0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82,
0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110,
0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15,
0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72,
0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e,
0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9,
0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e,
0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3,
0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90,
0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af,
0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb,
0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066,
0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6,
0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347,
0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1,
0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195,
0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257,
};
alignas(16) const uint32_t lutDec3[256] = {
0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3,
0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362,
0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9,
0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace,
0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55,
0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216,
0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e,
0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550,
0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a,
0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36,
0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e,
0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb,
0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1,
0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033,
0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3,
0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b,
0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2,
0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691,
0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6,
0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147,
0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d,
0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8,
};
rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key) {
uint32_t s0, s1, s2, s3;
s0 = rx_vec_i128_w(in);
s1 = rx_vec_i128_z(in);
s2 = rx_vec_i128_y(in);
s3 = rx_vec_i128_x(in);
rx_vec_i128 out = rx_set_int_vec_i128(
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
);
return rx_xor_vec_i128(out, key);
return s;
}
rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key) {
uint32_t s0, s1, s2, s3;
#define ROTL8(x,shift) ((uint8_t) ((x) << (shift)) | ((x) >> (8 - (shift))))
s0 = rx_vec_i128_w(in);
s1 = rx_vec_i128_z(in);
s2 = rx_vec_i128_y(in);
s3 = rx_vec_i128_x(in);
static struct SAESInitializer
{
SAESInitializer()
{
static uint8_t sbox[256];
static uint8_t sbox_reverse[256];
rx_vec_i128 out = rx_set_int_vec_i128(
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
);
uint8_t p = 1, q = 1;
return rx_xor_vec_i128(out, key);
}
do {
p = p ^ (p << 1) ^ (p & 0x80 ? 0x1B : 0);
q ^= q << 1;
q ^= q << 2;
q ^= q << 4;
q ^= (q & 0x80) ? 0x09 : 0;
const uint8_t value = q ^ ROTL8(q, 1) ^ ROTL8(q, 2) ^ ROTL8(q, 3) ^ ROTL8(q, 4) ^ 0x63;
sbox[p] = value;
sbox_reverse[value] = p;
} while (p != 1);
sbox[0] = 0x63;
sbox_reverse[0x63] = 0;
for (uint32_t i = 0; i < 0x100; ++i)
{
union
{
uint32_t w;
uint8_t p[4];
};
uint32_t s = sbox[i];
p[0] = mul_gf2(s, 2);
p[1] = s;
p[2] = s;
p[3] = mul_gf2(s, 3);
lutEnc0[i] = w; w = (w << 8) | (w >> 24);
lutEnc1[i] = w; w = (w << 8) | (w >> 24);
lutEnc2[i] = w; w = (w << 8) | (w >> 24);
lutEnc3[i] = w;
s = sbox_reverse[i];
p[0] = mul_gf2(s, 0xe);
p[1] = mul_gf2(s, 0x9);
p[2] = mul_gf2(s, 0xd);
p[3] = mul_gf2(s, 0xb);
lutDec0[i] = w; w = (w << 8) | (w >> 24);
lutDec1[i] = w; w = (w << 8) | (w >> 24);
lutDec2[i] = w; w = (w << 8) | (w >> 24);
lutDec3[i] = w;
}
}
} aes_initializer;

View file

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019 SChernykh <https://github.com/SChernykh>
All rights reserved.
@ -31,16 +32,80 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include "crypto/randomx/intrin_portable.h"
rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key);
extern uint32_t lutEnc0[256];
extern uint32_t lutEnc1[256];
extern uint32_t lutEnc2[256];
extern uint32_t lutEnc3[256];
extern uint32_t lutDec0[256];
extern uint32_t lutDec1[256];
extern uint32_t lutDec2[256];
extern uint32_t lutDec3[256];
rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key);
template<bool soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
template<bool soft>
inline rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) {
return soft ? soft_aesenc(in, key) : rx_aesenc_vec_i128(in, key);
template<>
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
volatile uint8_t s[16];
memcpy((void*) s, &in, 16);
uint32_t s0 = lutEnc0[s[ 0]];
uint32_t s1 = lutEnc0[s[ 4]];
uint32_t s2 = lutEnc0[s[ 8]];
uint32_t s3 = lutEnc0[s[12]];
s0 ^= lutEnc1[s[ 5]];
s1 ^= lutEnc1[s[ 9]];
s2 ^= lutEnc1[s[13]];
s3 ^= lutEnc1[s[ 1]];
s0 ^= lutEnc2[s[10]];
s1 ^= lutEnc2[s[14]];
s2 ^= lutEnc2[s[ 2]];
s3 ^= lutEnc2[s[ 6]];
s0 ^= lutEnc3[s[15]];
s1 ^= lutEnc3[s[ 3]];
s2 ^= lutEnc3[s[ 7]];
s3 ^= lutEnc3[s[11]];
return rx_xor_vec_i128(rx_set_int_vec_i128(s3, s2, s1, s0), key);
}
template<bool soft>
inline rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) {
return soft ? soft_aesdec(in, key) : rx_aesdec_vec_i128(in, key);
template<>
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
volatile uint8_t s[16];
memcpy((void*) s, &in, 16);
uint32_t s0 = lutDec0[s[ 0]];
uint32_t s1 = lutDec0[s[ 4]];
uint32_t s2 = lutDec0[s[ 8]];
uint32_t s3 = lutDec0[s[12]];
s0 ^= lutDec1[s[13]];
s1 ^= lutDec1[s[ 1]];
s2 ^= lutDec1[s[ 5]];
s3 ^= lutDec1[s[ 9]];
s0 ^= lutDec2[s[10]];
s1 ^= lutDec2[s[14]];
s2 ^= lutDec2[s[ 2]];
s3 ^= lutDec2[s[ 6]];
s0 ^= lutDec3[s[ 7]];
s1 ^= lutDec3[s[11]];
s2 ^= lutDec3[s[15]];
s3 ^= lutDec3[s[ 3]];
return rx_xor_vec_i128(rx_set_int_vec_i128(s3, s2, s1, s0), key);
}
template<>
FORCE_INLINE rx_vec_i128 aesenc<false>(rx_vec_i128 in, rx_vec_i128 key) {
return rx_aesenc_vec_i128(in, key);
}
template<>
FORCE_INLINE rx_vec_i128 aesdec<false>(rx_vec_i128 in, rx_vec_i128 key) {
return rx_aesdec_vec_i128(in, key);
}

View file

@ -25,9 +25,20 @@
*/
#include <map>
#include <mutex>
#include <thread>
#include "crypto/rx/Rx.h"
#include "backend/common/interfaces/IRxListener.h"
#include "backend/cpu/Cpu.h"
#include "base/io/log/Log.h"
#include "base/kernel/Platform.h"
#include "base/net/stratum/Job.h"
#include "base/tools/Buffer.h"
#include "base/tools/Chrono.h"
#include "base/tools/Handle.h"
#include "base/tools/Object.h"
#include "crypto/rx/RxAlgo.h"
#include "crypto/rx/RxCache.h"
#include "crypto/rx/RxDataset.h"
#ifdef XMRIG_FEATURE_HWLOC
@ -36,16 +47,10 @@
#endif
#include "backend/cpu/Cpu.h"
#include "base/io/log/Log.h"
#include "base/kernel/Platform.h"
#include "base/net/stratum/Job.h"
#include "base/tools/Buffer.h"
#include "base/tools/Chrono.h"
#include "crypto/rx/Rx.h"
#include "crypto/rx/RxAlgo.h"
#include "crypto/rx/RxCache.h"
#include "crypto/rx/RxDataset.h"
#include <map>
#include <mutex>
#include <thread>
#include <uv.h>
namespace xmrig {
@ -56,6 +61,7 @@ class RxPrivate;
static const char *tag = BLUE_BG(WHITE_BOLD_S " rx ") " ";
static RxPrivate *d_ptr = nullptr;
static std::mutex mutex;
#ifdef XMRIG_FEATURE_HWLOC
@ -88,9 +94,15 @@ inline static void bindToNUMANode(uint32_t) {}
class RxPrivate
{
public:
inline RxPrivate() :
m_seed()
XMRIG_DISABLE_COPY_MOVE(RxPrivate)
inline RxPrivate()
{
m_async = new uv_async_t;
m_async->data = this;
uv_async_init(uv_default_loop(), m_async, RxPrivate::onReady);
# ifdef XMRIG_FEATURE_HWLOC
if (Cpu::info()->nodes() > 1) {
for (uint32_t nodeId : HwlocCpuInfo::nodeIndexes()) {
@ -107,6 +119,8 @@ public:
inline ~RxPrivate()
{
Handle::close(m_async);
for (auto const &item : datasets) {
delete item.second;
}
@ -117,8 +131,9 @@ public:
inline bool isNUMA() const { return m_numa; }
inline const Algorithm &algorithm() const { return m_algorithm; }
inline const uint8_t *seed() const { return m_seed; }
inline const Buffer &seed() const { return m_seed; }
inline size_t count() const { return isNUMA() ? datasets.size() : 1; }
inline void asyncSend() { m_ready++; if (m_ready == count()) { uv_async_send(m_async); } }
static void allocate(uint32_t nodeId)
@ -132,12 +147,12 @@ public:
LOG_INFO("%s" CYAN_BOLD("#%u") MAGENTA_BOLD(" allocate") CYAN_BOLD(" %zu MB") BLACK_BOLD(" (%zu+%zu) for RandomX dataset & cache"),
tag,
nodeId,
(RxDataset::size() + RxCache::size()) / 1024 / 1024,
RxDataset::size() / 1024 / 1024,
RxCache::size() / 1024 / 1024
(RxDataset::maxSize() + RxCache::maxSize()) / 1024 / 1024,
RxDataset::maxSize() / 1024 / 1024,
RxCache::maxSize() / 1024 / 1024
);
RxDataset *dataset = new RxDataset(d_ptr->m_hugePages);
auto dataset = new RxDataset(d_ptr->m_hugePages);
d_ptr->datasets[nodeId] = dataset;
if (dataset->get() != nullptr) {
@ -163,14 +178,14 @@ public:
static void initDataset(uint32_t nodeId, uint32_t threads)
{
std::lock_guard<std::mutex> lock(d_ptr->mutex);
std::lock_guard<std::mutex> lock(mutex);
const uint64_t ts = Chrono::steadyMSecs();
d_ptr->getOrAllocate(nodeId)->init(d_ptr->seed(), threads);
d_ptr->m_ready++;
d_ptr->asyncSend();
LOG_INFO("%s" CYAN_BOLD("#%u") GREEN(" init done") BLACK_BOLD(" (%" PRIu64 " ms)"), tag, nodeId, Chrono::steadyMSecs() - ts);
LOG_INFO("%s" CYAN_BOLD("#%u") GREEN(" init done ") CYAN_BOLD("%zu/%zu") BLACK_BOLD(" (%" PRIu64 " ms)"), tag, nodeId, d_ptr->m_ready, d_ptr->count(), Chrono::steadyMSecs() - ts);
}
@ -196,7 +211,7 @@ public:
}
inline void setState(const Job &job, bool hugePages, bool numa)
inline void setState(const Job &job, bool hugePages, bool numa, IRxListener *listener)
{
if (m_algorithm != job.algorithm()) {
m_algorithm = RxAlgo::apply(job.algorithm());
@ -205,35 +220,85 @@ public:
m_ready = 0;
m_numa = numa && Cpu::info()->nodes() > 1;
m_hugePages = hugePages;
memcpy(m_seed, job.seedHash(), sizeof(m_seed));
m_listener = listener;
m_seed = job.seed();
}
inline bool isReady(const Job &job)
{
return m_ready == count() && m_algorithm == job.algorithm() && memcmp(m_seed, job.seedHash(), sizeof(m_seed)) == 0;
return m_ready == count() && m_algorithm == job.algorithm() && m_seed == job.seed();
}
std::map<uint32_t, RxDataset *> datasets;
std::mutex mutex;
private:
bool m_hugePages = true;
bool m_numa = true;
static void onReady(uv_async_t *)
{
if (d_ptr->m_listener) {
d_ptr->m_listener->onDatasetReady();
}
}
Algorithm m_algorithm;
size_t m_ready = 0;
uint8_t m_seed[32];
bool m_hugePages = true;
bool m_numa = true;
Buffer m_seed;
IRxListener *m_listener = nullptr;
size_t m_ready = 0;
uv_async_t *m_async;
};
} // namespace xmrig
bool xmrig::Rx::init(const Job &job, int initThreads, bool hugePages, bool numa, IRxListener *listener)
{
if (job.algorithm().family() != Algorithm::RANDOM_X) {
return true;
}
std::lock_guard<std::mutex> lock(mutex);
if (d_ptr->isReady(job)) {
return true;
}
d_ptr->setState(job, hugePages, numa, listener);
const uint32_t threads = initThreads < 1 ? static_cast<uint32_t>(Cpu::info()->threads()) : static_cast<uint32_t>(initThreads);
const String buf = Buffer::toHex(job.seed().data(), 8);
LOG_INFO("%s" MAGENTA_BOLD("init dataset%s") " algo " WHITE_BOLD("%s (") CYAN_BOLD("%u") WHITE_BOLD(" threads)") BLACK_BOLD(" seed %s..."),
tag,
d_ptr->count() > 1 ? "s" : "",
job.algorithm().shortName(),
threads,
buf.data()
);
# ifdef XMRIG_FEATURE_HWLOC
if (d_ptr->isNUMA()) {
for (auto const &item : d_ptr->datasets) {
std::thread thread(RxPrivate::initDataset, item.first, threads);
thread.detach();
}
}
else
# endif
{
std::thread thread(RxPrivate::initDataset, 0, threads);
thread.detach();
}
return false;
}
bool xmrig::Rx::isReady(const Job &job)
{
std::lock_guard<std::mutex> lock(d_ptr->mutex);
std::lock_guard<std::mutex> lock(mutex);
return d_ptr->isReady(job);
}
@ -241,7 +306,7 @@ bool xmrig::Rx::isReady(const Job &job)
xmrig::RxDataset *xmrig::Rx::dataset(const Job &job, uint32_t nodeId)
{
std::lock_guard<std::mutex> lock(d_ptr->mutex);
std::lock_guard<std::mutex> lock(mutex);
if (!d_ptr->isReady(job)) {
return nullptr;
}
@ -253,7 +318,7 @@ xmrig::RxDataset *xmrig::Rx::dataset(const Job &job, uint32_t nodeId)
std::pair<unsigned, unsigned> xmrig::Rx::hugePages()
{
std::pair<unsigned, unsigned> pages(0, 0);
std::lock_guard<std::mutex> lock(d_ptr->mutex);
std::lock_guard<std::mutex> lock(mutex);
for (auto const &item : d_ptr->datasets) {
if (!item.second) {
@ -281,43 +346,3 @@ void xmrig::Rx::init()
{
d_ptr = new RxPrivate();
}
void xmrig::Rx::init(const Job &job, int initThreads, bool hugePages, bool numa)
{
if (job.algorithm().family() != Algorithm::RANDOM_X) {
return;
}
std::lock_guard<std::mutex> lock(d_ptr->mutex);
if (d_ptr->isReady(job)) {
return;
}
d_ptr->setState(job, hugePages, numa);
const uint32_t threads = initThreads < 1 ? static_cast<uint32_t>(Cpu::info()->threads()) : static_cast<uint32_t>(initThreads);
const String buf = Buffer::toHex(job.seedHash(), 8);
LOG_INFO("%s" MAGENTA_BOLD("init dataset%s") " algo " WHITE_BOLD("%s (") CYAN_BOLD("%u") WHITE_BOLD(" threads)") BLACK_BOLD(" seed %s..."),
tag,
d_ptr->count() > 1 ? "s" : "",
job.algorithm().shortName(),
threads,
buf.data()
);
# ifdef XMRIG_FEATURE_HWLOC
if (d_ptr->isNUMA()) {
for (auto const &item : d_ptr->datasets) {
std::thread thread(RxPrivate::initDataset, item.first, threads);
thread.detach();
}
}
else
# endif
{
std::thread thread(RxPrivate::initDataset, 0, threads);
thread.detach();
}
}

View file

@ -28,7 +28,7 @@
#define XMRIG_RX_H
#include <stdint.h>
#include <cstdint>
#include <utility>
@ -37,19 +37,20 @@ namespace xmrig
class Algorithm;
class RxDataset;
class IRxListener;
class Job;
class RxDataset;
class Rx
{
public:
static bool init(const Job &job, int initThreads, bool hugePages, bool numa, IRxListener *listener);
static bool isReady(const Job &job);
static RxDataset *dataset(const Job &job, uint32_t nodeId);
static std::pair<unsigned, unsigned> hugePages();
static void destroy();
static void init();
static void init(const Job &job, int initThreads, bool hugePages, bool numa);
};

View file

@ -31,14 +31,21 @@
xmrig::Algorithm::Id xmrig::RxAlgo::apply(Algorithm::Id algorithm)
{
randomx_apply_config(*base(algorithm));
return algorithm;
}
const RandomX_ConfigurationBase *xmrig::RxAlgo::base(Algorithm::Id algorithm)
{
switch (algorithm) {
case Algorithm::RX_WOW:
randomx_apply_config(RandomX_WowneroConfig);
break;
return &RandomX_WowneroConfig;
case Algorithm::RX_LOKI:
randomx_apply_config(RandomX_LokiConfig);
return &RandomX_LokiConfig;
break;
case Algorithm::DEFYX:
@ -46,9 +53,32 @@ xmrig::Algorithm::Id xmrig::RxAlgo::apply(Algorithm::Id algorithm)
break;
default:
randomx_apply_config(RandomX_MoneroConfig);
break;
}
return algorithm;
return &RandomX_MoneroConfig;
}
uint32_t xmrig::RxAlgo::version(Algorithm::Id algorithm)
{
return algorithm == Algorithm::RX_WOW ? 103 : 104;
}
uint32_t xmrig::RxAlgo::programCount(Algorithm::Id algorithm)
{
return base(algorithm)->ProgramCount;
}
uint32_t xmrig::RxAlgo::programIterations(Algorithm::Id algorithm)
{
return base(algorithm)->ProgramIterations;
}
uint32_t xmrig::RxAlgo::programSize(Algorithm::Id algorithm)
{
return base(algorithm)->ProgramSize;
}

View file

@ -28,13 +28,16 @@
#define XMRIG_RX_ALGO_H
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include "crypto/common/Algorithm.h"
struct RandomX_ConfigurationBase;
namespace xmrig
{
@ -43,6 +46,11 @@ class RxAlgo
{
public:
static Algorithm::Id apply(Algorithm::Id algorithm);
static const RandomX_ConfigurationBase *base(Algorithm::Id algorithm);
static uint32_t programCount(Algorithm::Id algorithm);
static uint32_t programIterations(Algorithm::Id algorithm);
static uint32_t programSize(Algorithm::Id algorithm);
static uint32_t version(Algorithm::Id algorithm);
};

View file

@ -34,8 +34,7 @@ static_assert(RANDOMX_FLAG_LARGE_PAGES == 1, "RANDOMX_FLAG_LARGE_PAGES flag mism
xmrig::RxCache::RxCache(bool hugePages) :
m_seed()
xmrig::RxCache::RxCache(bool hugePages)
{
if (hugePages) {
m_flags = RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES;
@ -62,22 +61,14 @@ xmrig::RxCache::~RxCache()
}
bool xmrig::RxCache::init(const uint8_t *seed)
bool xmrig::RxCache::init(const Buffer &seed)
{
if (isReady(seed)) {
if (m_seed == seed) {
return false;
}
memcpy(m_seed, seed, sizeof(m_seed));
randomx_init_cache(m_cache, m_seed, sizeof(m_seed));
m_initCount++;
m_seed = seed;
randomx_init_cache(m_cache, m_seed.data(), m_seed.size());
return true;
}
bool xmrig::RxCache::isReady(const uint8_t *seed) const
{
return m_initCount && memcmp(m_seed, seed, sizeof(m_seed)) == 0;
}

View file

@ -28,9 +28,11 @@
#define XMRIG_RX_CACHE_H
#include <stdint.h>
#include <cstdint>
#include "base/tools/Buffer.h"
#include "base/tools/Object.h"
#include "crypto/randomx/configuration.h"
@ -44,26 +46,24 @@ namespace xmrig
class RxCache
{
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxCache)
RxCache(bool hugePages = true);
~RxCache();
inline bool isHugePages() const { return m_flags & 1; }
inline bool isJIT() const { return m_flags & 8; }
inline const uint8_t *seed() const { return m_seed; }
inline const Buffer &seed() const { return m_seed; }
inline randomx_cache *get() const { return m_cache; }
inline uint64_t initCount() const { return m_initCount; }
bool init(const uint8_t *seed);
bool init(const Buffer &seed);
static inline constexpr size_t size() { return RANDOMX_CACHE_MAX_SIZE; }
static inline constexpr size_t maxSize() { return RANDOMX_CACHE_MAX_SIZE; }
private:
bool isReady(const uint8_t *seed) const;
Buffer m_seed;
int m_flags = 0;
randomx_cache *m_cache = nullptr;
uint64_t m_initCount = 0;
uint8_t m_seed[32];
};

View file

@ -64,7 +64,7 @@ xmrig::RxDataset::~RxDataset()
}
bool xmrig::RxDataset::init(const uint8_t *seed, uint32_t numThreads)
bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads)
{
cache()->init(seed);
@ -72,13 +72,13 @@ bool xmrig::RxDataset::init(const uint8_t *seed, uint32_t numThreads)
return true;
}
const uint32_t datasetItemCount = randomx_dataset_item_count();
const uint64_t datasetItemCount = randomx_dataset_item_count();
if (numThreads > 1) {
std::vector<std::thread> threads;
threads.reserve(numThreads);
for (uint32_t i = 0; i < numThreads; ++i) {
for (uint64_t i = 0; i < numThreads; ++i) {
const uint32_t a = (datasetItemCount * i) / numThreads;
const uint32_t b = (datasetItemCount * (i + 1)) / numThreads;
threads.emplace_back(randomx_init_dataset, m_dataset, m_cache->get(), a, b - a);
@ -99,16 +99,22 @@ bool xmrig::RxDataset::init(const uint8_t *seed, uint32_t numThreads)
std::pair<size_t, size_t> xmrig::RxDataset::hugePages() const
{
constexpr size_t twoMiB = 2u * 1024u * 1024u;
constexpr const size_t total = (VirtualMemory::align(size(), twoMiB) + VirtualMemory::align(RxCache::size(), twoMiB)) / twoMiB;
constexpr const size_t total = (VirtualMemory::align(maxSize(), twoMiB) + VirtualMemory::align(RxCache::maxSize(), twoMiB)) / twoMiB;
size_t count = 0;
if (isHugePages()) {
count += VirtualMemory::align(size(), twoMiB) / twoMiB;
count += VirtualMemory::align(maxSize(), twoMiB) / twoMiB;
}
if (m_cache->isHugePages()) {
count += VirtualMemory::align(RxCache::size(), twoMiB) / twoMiB;
count += VirtualMemory::align(RxCache::maxSize(), twoMiB) / twoMiB;
}
return std::pair<size_t, size_t>(count, total);
return { count, total };
}
void *xmrig::RxDataset::raw() const
{
return m_dataset ? randomx_get_dataset_memory(m_dataset) : nullptr;
}

View file

@ -30,6 +30,7 @@
#include "crypto/common/Algorithm.h"
#include "crypto/randomx/configuration.h"
#include "base/tools/Object.h"
struct randomx_dataset;
@ -39,23 +40,28 @@ namespace xmrig
{
class Buffer;
class RxCache;
class RxDataset
{
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxDataset)
RxDataset(bool hugePages = true);
~RxDataset();
inline bool isHugePages() const { return m_flags & 1; }
inline randomx_dataset *get() const { return m_dataset; }
inline RxCache *cache() const { return m_cache; }
inline size_t size() const { return maxSize(); }
bool init(const uint8_t *seed, uint32_t numThreads);
bool init(const Buffer &seed, uint32_t numThreads);
std::pair<size_t, size_t> hugePages() const;
void *raw() const;
static inline constexpr size_t size() { return RANDOMX_DATASET_MAX_SIZE; }
static inline constexpr size_t maxSize() { return RANDOMX_DATASET_MAX_SIZE; }
private:
Algorithm m_algorithm;

View file

@ -28,10 +28,13 @@
#define XMRIG_RX_VM_H
#include <stdint.h>
#include "base/tools/Object.h"
struct randomx_vm;
#include <cstdint>
class randomx_vm;
namespace xmrig
@ -44,6 +47,8 @@ class RxDataset;
class RxVm
{
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm);
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes);
~RxVm();