Merge xmrig v6.16.1 into master
This commit is contained in:
commit
ecdb1929e2
121 changed files with 1944 additions and 1118 deletions
|
@ -9,7 +9,7 @@
|
|||
|
||||
/* Guess the endianness of the target architecture. */
|
||||
|
||||
/*
|
||||
/*
|
||||
* The LITTLE endian machines:
|
||||
*/
|
||||
#if defined(__ultrix) /* Older MIPS */
|
||||
|
@ -27,15 +27,15 @@
|
|||
#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
|
||||
#define ECRYPT_LITTLE_ENDIAN
|
||||
|
||||
/*
|
||||
* The BIG endian machines:
|
||||
/*
|
||||
* The BIG endian machines:
|
||||
*/
|
||||
#elif defined(sun) /* Newer Sparc's */
|
||||
#define ECRYPT_BIG_ENDIAN
|
||||
#elif defined(__ppc__) /* PowerPC */
|
||||
#define ECRYPT_BIG_ENDIAN
|
||||
|
||||
/*
|
||||
/*
|
||||
* Finally machines with UNKNOWN endianness:
|
||||
*/
|
||||
#elif defined (_AIX) /* RS6000 */
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
* and should be used carefully. They should NOT be used with
|
||||
* parameters which perform some action. E.g., the following two lines
|
||||
* are not equivalent:
|
||||
*
|
||||
* 1) ++x; y = ROTL32(x, n);
|
||||
*
|
||||
* 1) ++x; y = ROTL32(x, n);
|
||||
* 2) y = ROTL32(++x, n);
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
/* ecrypt-sync.h */
|
||||
|
||||
/*
|
||||
/*
|
||||
* Header file for synchronous stream ciphers without authentication
|
||||
* mechanism.
|
||||
*
|
||||
*
|
||||
* *** Please only edit parts marked with "[edit]". ***
|
||||
*/
|
||||
|
||||
|
@ -16,10 +16,10 @@
|
|||
|
||||
/* Cipher parameters */
|
||||
|
||||
/*
|
||||
/*
|
||||
* The name of your cipher.
|
||||
*/
|
||||
#define ECRYPT_NAME "Salsa20" /* [edit] */
|
||||
#define ECRYPT_NAME "Salsa20" /* [edit] */
|
||||
#define ECRYPT_PROFILE "S!_H."
|
||||
|
||||
/*
|
||||
|
@ -47,15 +47,15 @@
|
|||
|
||||
/* Data structures */
|
||||
|
||||
/*
|
||||
/*
|
||||
* ECRYPT_ctx is the structure containing the representation of the
|
||||
* internal state of your cipher.
|
||||
* internal state of your cipher.
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 input[16]; /* could be compressed */
|
||||
/*
|
||||
/*
|
||||
* [edit]
|
||||
*
|
||||
* Put here all state variable needed during the encryption process.
|
||||
|
@ -79,10 +79,10 @@ void ECRYPT_init();
|
|||
* above.
|
||||
*/
|
||||
void ECRYPT_keysetup(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* key,
|
||||
u32 keysize, /* Key size in bits. */
|
||||
u32 ivsize); /* IV size in bits. */
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* key,
|
||||
u32 keysize, /* Key size in bits. */
|
||||
u32 ivsize); /* IV size in bits. */
|
||||
|
||||
/*
|
||||
* IV setup. After having called ECRYPT_keysetup(), the user is
|
||||
|
@ -91,7 +91,7 @@ void ECRYPT_keysetup(
|
|||
* IV's.
|
||||
*/
|
||||
void ECRYPT_ivsetup(
|
||||
ECRYPT_ctx* ctx,
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* iv);
|
||||
|
||||
/*
|
||||
|
@ -102,7 +102,7 @@ void ECRYPT_ivsetup(
|
|||
* (declared here) encrypts byte strings of arbitrary length, while
|
||||
* the ECRYPT_encrypt_blocks() function (defined later) only accepts
|
||||
* lengths which are multiples of ECRYPT_BLOCKLENGTH.
|
||||
*
|
||||
*
|
||||
* The user is allowed to make multiple calls to
|
||||
* ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
|
||||
* but he is NOT allowed to make additional encryption calls once he
|
||||
|
@ -122,7 +122,7 @@ void ECRYPT_ivsetup(
|
|||
*
|
||||
* ECRYPT_ivsetup();
|
||||
* ECRYPT_encrypt_bytes();
|
||||
*
|
||||
*
|
||||
* The following sequence is not:
|
||||
*
|
||||
* ECRYPT_keysetup();
|
||||
|
@ -133,22 +133,22 @@ void ECRYPT_ivsetup(
|
|||
*/
|
||||
|
||||
void ECRYPT_encrypt_bytes(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 msglen); /* Message length in bytes. */
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 msglen); /* Message length in bytes. */
|
||||
|
||||
void ECRYPT_decrypt_bytes(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 msglen); /* Message length in bytes. */
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 msglen); /* Message length in bytes. */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Optional features */
|
||||
|
||||
/*
|
||||
/*
|
||||
* For testing purposes it can sometimes be useful to have a function
|
||||
* which immediately generates keystream without having to provide it
|
||||
* with a zero plaintext. If your cipher cannot provide this function
|
||||
|
@ -170,7 +170,7 @@ void ECRYPT_keystream_bytes(
|
|||
|
||||
/* Optional optimizations */
|
||||
|
||||
/*
|
||||
/*
|
||||
* By default, the functions in this section are implemented using
|
||||
* calls to functions declared above. However, you might want to
|
||||
* implement them differently for performance reasons.
|
||||
|
@ -186,22 +186,22 @@ void ECRYPT_keystream_bytes(
|
|||
#define ECRYPT_USES_DEFAULT_ALL_IN_ONE /* [edit] */
|
||||
|
||||
void ECRYPT_encrypt_packet(
|
||||
ECRYPT_ctx* ctx,
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* iv,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 msglen);
|
||||
|
||||
void ECRYPT_decrypt_packet(
|
||||
ECRYPT_ctx* ctx,
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* iv,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 msglen);
|
||||
|
||||
/*
|
||||
* Encryption/decryption of blocks.
|
||||
*
|
||||
*
|
||||
* By default, these functions are defined as macros. If you want to
|
||||
* provide a different implementation, please undef the
|
||||
* ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
|
||||
|
@ -232,23 +232,23 @@ void ECRYPT_decrypt_packet(
|
|||
#else
|
||||
|
||||
void ECRYPT_encrypt_blocks(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 blocks); /* Message length in blocks. */
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* plaintext,
|
||||
u8* ciphertext,
|
||||
u32 blocks); /* Message length in blocks. */
|
||||
|
||||
void ECRYPT_decrypt_blocks(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 blocks); /* Message length in blocks. */
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* ciphertext,
|
||||
u8* plaintext,
|
||||
u32 blocks); /* Message length in blocks. */
|
||||
|
||||
#ifdef ECRYPT_GENERATES_KEYSTREAM
|
||||
|
||||
void ECRYPT_keystream_blocks(
|
||||
ECRYPT_ctx* ctx,
|
||||
const u8* keystream,
|
||||
u32 blocks); /* Keystream length in blocks. */
|
||||
u32 blocks); /* Keystream length in blocks. */
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -262,7 +262,7 @@ void ECRYPT_keystream_blocks(
|
|||
* significant difference and keep the number of variants
|
||||
* (ECRYPT_MAXVARIANT) as small as possible (definitely not more than
|
||||
* 10). Note also that all variants should have exactly the same
|
||||
* external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
|
||||
* external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
|
||||
*/
|
||||
#define ECRYPT_MAXVARIANT 1 /* [edit] */
|
||||
|
||||
|
|
|
@ -354,7 +354,7 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
|
|||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
|
||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
|
||||
switch (algorithm.id()) {
|
||||
case Algorithm::CN_HEAVY_0:
|
||||
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;
|
||||
|
|
|
@ -466,6 +466,29 @@ const static uint8_t astrobwt_dero_test_out[256] = {
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
// "GhostRider"
|
||||
const static uint8_t test_output_gr[256] = {
|
||||
0x42, 0x17, 0x0C, 0xC1, 0x85, 0xE6, 0x76, 0x3C, 0xC7, 0xCB, 0x27, 0xC4, 0x17, 0x39, 0x2D, 0xE2,
|
||||
0x29, 0x6B, 0x40, 0x66, 0x85, 0xA4, 0xE3, 0xD3, 0x8C, 0xE9, 0xA5, 0x8F, 0x10, 0xFC, 0x81, 0xE4,
|
||||
0x90, 0x56, 0xF2, 0x9E, 0x00, 0xD0, 0xF8, 0xA1, 0x88, 0x82, 0x86, 0xC0, 0x86, 0x04, 0x6B, 0x0E,
|
||||
0x9A, 0xDB, 0xDB, 0xFD, 0x23, 0x16, 0x77, 0x94, 0xFE, 0x58, 0x93, 0x05, 0x10, 0x3F, 0x27, 0x75,
|
||||
0x51, 0x44, 0xF3, 0x5F, 0xE2, 0xF9, 0x61, 0xBE, 0xC0, 0x30, 0xB5, 0x8E, 0xB1, 0x1B, 0xA1, 0xF7,
|
||||
0x06, 0x4E, 0xF1, 0x6A, 0xFD, 0xA5, 0x44, 0x8E, 0x64, 0x47, 0x8C, 0x67, 0x51, 0xE2, 0x5C, 0x55,
|
||||
0x3E, 0x39, 0xA6, 0xA5, 0xF7, 0xB8, 0xD0, 0x5E, 0xE2, 0xBF, 0x92, 0x44, 0xD9, 0xAA, 0x76, 0x22,
|
||||
0xE3, 0x3E, 0x15, 0x96, 0xD8, 0x6A, 0x78, 0x2D, 0xA9, 0x77, 0x24, 0x1A, 0x4B, 0xE7, 0x5A, 0x2E,
|
||||
0x89, 0x77, 0xAE, 0x92, 0xE4, 0xA4, 0x2D, 0xAF, 0x0B, 0x27, 0x09, 0xB2, 0x5F, 0x95, 0x61, 0xA9,
|
||||
0xA8, 0xBE, 0x5D, 0x39, 0xBE, 0x41, 0x5F, 0x9C, 0x67, 0x28, 0x48, 0x4F, 0xAE, 0x2A, 0x50, 0x2B,
|
||||
0xB8, 0xC7, 0x42, 0x73, 0x51, 0x60, 0x59, 0xD8, 0x9C, 0xBA, 0x22, 0x2F, 0x8E, 0x34, 0xDE, 0xC8,
|
||||
0x1B, 0xAE, 0x9E, 0xBD, 0xF7, 0xE8, 0xFD, 0x8A, 0x97, 0xBE, 0xF0, 0x47, 0xAC, 0x27, 0xDD, 0x28,
|
||||
0xC9, 0x28, 0xA8, 0x7B, 0x2A, 0xB8, 0x90, 0x3E, 0xCA, 0xB4, 0x78, 0x44, 0xCE, 0xCD, 0x91, 0xEC,
|
||||
0xC2, 0x5A, 0x17, 0x59, 0x7C, 0x14, 0xF8, 0x95, 0x28, 0x14, 0xC3, 0xAD, 0xC4, 0xE1, 0x13, 0x5A,
|
||||
0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25,
|
||||
0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
|
|
|
@ -43,6 +43,11 @@
|
|||
#include "crypto/cn/soft_aes.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
# include "crypto/cn/CryptoNight_x86_vaes.h"
|
||||
#endif
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "crypto/cn/c_groestl.h"
|
||||
|
@ -289,6 +294,13 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
|
|||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_explode_scratchpad_vaes<ALGO>(ctx);
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
|
@ -341,7 +353,7 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
|
|||
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
__m128i* e = output + N - prefetch_dist;
|
||||
__m128i* e = output + (N << interleave) - prefetch_dist;
|
||||
__m128i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
|
@ -396,6 +408,13 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
|
|||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_implode_scratchpad_vaes<ALGO>(ctx);
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
constexpr bool IS_HEAVY = props.isHeavy() || ALGO == Algorithm::CN_GPU;
|
||||
# else
|
||||
|
@ -1070,8 +1089,17 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
|
@ -1110,8 +1138,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
|
@ -1166,8 +1202,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
|
@ -1362,8 +1407,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
@ -1424,10 +1477,19 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
|||
ctx[3]->first_half = true;
|
||||
}
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
|
@ -1548,10 +1610,19 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
|||
if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
@ -1788,7 +1859,20 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1840,8 +1924,21 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
|
530
src/crypto/cn/CryptoNight_x86_vaes.cpp
Normal file
530
src/crypto/cn/CryptoNight_x86_vaes.cpp
Normal file
|
@ -0,0 +1,530 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "CryptoNight_x86_vaes.h"
|
||||
#include "CryptoNight_monero.h"
|
||||
#include "CryptoNight.h"
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
# include <x86intrin.h>
|
||||
#if !defined(__clang__) && !defined(__ICC) && __GNUC__ < 10
|
||||
static inline __m256i
|
||||
__attribute__((__always_inline__))
|
||||
_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr)
|
||||
{
|
||||
return _mm256_inserti128_si256(
|
||||
_mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
__attribute__((__always_inline__))
|
||||
_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a)
|
||||
{
|
||||
_mm_storeu_si128(loaddr, _mm256_castsi256_si128(a));
|
||||
_mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1));
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static FORCEINLINE __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t rcon>
|
||||
static FORCEINLINE void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void vaes_genkey(const __m128i* memory, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = _mm256_set_m128i(xout0, xout0);
|
||||
*k1 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x01>(&xout0, &xout2);
|
||||
*k2 = _mm256_set_m128i(xout0, xout0);
|
||||
*k3 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x02>(&xout0, &xout2);
|
||||
*k4 = _mm256_set_m128i(xout0, xout0);
|
||||
*k5 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x04>(&xout0, &xout2);
|
||||
*k6 = _mm256_set_m128i(xout0, xout0);
|
||||
*k7 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x08>(&xout0, &xout2);
|
||||
*k8 = _mm256_set_m128i(xout0, xout0);
|
||||
*k9 = _mm256_set_m128i(xout2, xout2);
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void vaes_genkey_double(const __m128i* memory1, const __m128i* memory2, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory1);
|
||||
__m128i xout1 = _mm_load_si128(memory1 + 1);
|
||||
__m128i xout2 = _mm_load_si128(memory2);
|
||||
__m128i xout3 = _mm_load_si128(memory2 + 1);
|
||||
*k0 = _mm256_set_m128i(xout2, xout0);
|
||||
*k1 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x01>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x01>(&xout2, &xout3);
|
||||
*k2 = _mm256_set_m128i(xout2, xout0);
|
||||
*k3 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x02>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x02>(&xout2, &xout3);
|
||||
*k4 = _mm256_set_m128i(xout2, xout0);
|
||||
*k5 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x04>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x04>(&xout2, &xout3);
|
||||
*k6 = _mm256_set_m128i(xout2, xout0);
|
||||
*k7 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x08>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x08>(&xout2, &xout3);
|
||||
*k8 = _mm256_set_m128i(xout2, xout0);
|
||||
*k9 = _mm256_set_m128i(xout3, xout1);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void vaes_round(__m256i key, __m256i& x01, __m256i& x23, __m256i& x45, __m256i& x67)
|
||||
{
|
||||
x01 = _mm256_aesenc_epi128(x01, key);
|
||||
x23 = _mm256_aesenc_epi128(x23, key);
|
||||
x45 = _mm256_aesenc_epi128(x45, key);
|
||||
x67 = _mm256_aesenc_epi128(x67, key);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void vaes_round(__m256i key, __m256i& x0, __m256i& x1, __m256i& x2, __m256i& x3, __m256i& x4, __m256i& x5, __m256i& x6, __m256i& x7)
|
||||
{
|
||||
x0 = _mm256_aesenc_epi128(x0, key);
|
||||
x1 = _mm256_aesenc_epi128(x1, key);
|
||||
x2 = _mm256_aesenc_epi128(x2, key);
|
||||
x3 = _mm256_aesenc_epi128(x3, key);
|
||||
x4 = _mm256_aesenc_epi128(x4, key);
|
||||
x5 = _mm256_aesenc_epi128(x5, key);
|
||||
x6 = _mm256_aesenc_epi128(x6, key);
|
||||
x7 = _mm256_aesenc_epi128(x7, key);
|
||||
}
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
NOINLINE void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m256i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m256i xin01, xin23, xin45, xin67;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
|
||||
__m256i* output = reinterpret_cast<__m256i*>(ctx->memory);
|
||||
|
||||
vaes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
if (props.half_mem() && !ctx->first_half) {
|
||||
const __m256i* p = reinterpret_cast<const __m256i*>(ctx->save_state);
|
||||
xin01 = _mm256_load_si256(p + 0);
|
||||
xin23 = _mm256_load_si256(p + 1);
|
||||
xin45 = _mm256_load_si256(p + 2);
|
||||
xin67 = _mm256_load_si256(p + 3);
|
||||
}
|
||||
else {
|
||||
xin01 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 4));
|
||||
xin23 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 6));
|
||||
xin45 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 8));
|
||||
xin67 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 10));
|
||||
}
|
||||
|
||||
constexpr int output_increment = 64 / sizeof(__m256i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m256i);
|
||||
|
||||
__m256i* e = output + N - prefetch_dist;
|
||||
__m256i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
|
||||
|
||||
vaes_round(k0, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k1, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k2, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k3, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k4, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k5, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k6, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k7, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k8, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k9, xin01, xin23, xin45, xin67);
|
||||
|
||||
_mm256_store_si256(output + 0, xin01);
|
||||
_mm256_store_si256(output + 1, xin23);
|
||||
|
||||
_mm256_store_si256(output + output_increment + 0, xin45);
|
||||
_mm256_store_si256(output + output_increment + 1, xin67);
|
||||
|
||||
output += output_increment * 2;
|
||||
prefetch_ptr += output_increment * 2;
|
||||
} while (output < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr = output;
|
||||
}
|
||||
|
||||
if (props.half_mem() && ctx->first_half) {
|
||||
__m256i* p = reinterpret_cast<__m256i*>(ctx->save_state);
|
||||
_mm256_store_si256(p + 0, xin01);
|
||||
_mm256_store_si256(p + 1, xin23);
|
||||
_mm256_store_si256(p + 2, xin45);
|
||||
_mm256_store_si256(p + 3, xin67);
|
||||
}
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
NOINLINE void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m256i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->state);
|
||||
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->state);
|
||||
|
||||
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->memory);
|
||||
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->memory);
|
||||
|
||||
vaes_genkey_double(input1, input2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
{
|
||||
const bool b = props.half_mem() && !ctx1->first_half && !ctx2->first_half;
|
||||
const __m128i* p1 = b ? reinterpret_cast<const __m128i*>(ctx1->save_state) : (input1 + 4);
|
||||
const __m128i* p2 = b ? reinterpret_cast<const __m128i*>(ctx2->save_state) : (input2 + 4);
|
||||
xin0 = _mm256_loadu2_m128i(p2 + 0, p1 + 0);
|
||||
xin1 = _mm256_loadu2_m128i(p2 + 1, p1 + 1);
|
||||
xin2 = _mm256_loadu2_m128i(p2 + 2, p1 + 2);
|
||||
xin3 = _mm256_loadu2_m128i(p2 + 3, p1 + 3);
|
||||
xin4 = _mm256_loadu2_m128i(p2 + 4, p1 + 4);
|
||||
xin5 = _mm256_loadu2_m128i(p2 + 5, p1 + 5);
|
||||
xin6 = _mm256_loadu2_m128i(p2 + 6, p1 + 6);
|
||||
xin7 = _mm256_loadu2_m128i(p2 + 7, p1 + 7);
|
||||
}
|
||||
|
||||
constexpr int output_increment = 64 / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
__m128i* e = output1 + N - prefetch_dist;
|
||||
__m128i* prefetch_ptr1 = output1 + prefetch_dist;
|
||||
__m128i* prefetch_ptr2 = output2 + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr1), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr1 + output_increment), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr2), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr2 + output_increment), _MM_HINT_T0);
|
||||
|
||||
vaes_round(k0, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k1, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k2, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k3, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k4, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k5, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k6, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k7, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k8, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k9, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
|
||||
_mm256_storeu2_m128i(output2 + 0, output1 + 0, xin0);
|
||||
_mm256_storeu2_m128i(output2 + 1, output1 + 1, xin1);
|
||||
_mm256_storeu2_m128i(output2 + 2, output1 + 2, xin2);
|
||||
_mm256_storeu2_m128i(output2 + 3, output1 + 3, xin3);
|
||||
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 0, output1 + output_increment + 0, xin4);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 1, output1 + output_increment + 1, xin5);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 2, output1 + output_increment + 2, xin6);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 3, output1 + output_increment + 3, xin7);
|
||||
|
||||
output1 += output_increment * 2;
|
||||
prefetch_ptr1 += output_increment * 2;
|
||||
output2 += output_increment * 2;
|
||||
prefetch_ptr2 += output_increment * 2;
|
||||
} while (output1 < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr1 = output1;
|
||||
prefetch_ptr2 = output2;
|
||||
}
|
||||
|
||||
if (props.half_mem() && ctx1->first_half && ctx2->first_half) {
|
||||
__m128i* p1 = reinterpret_cast<__m128i*>(ctx1->save_state);
|
||||
__m128i* p2 = reinterpret_cast<__m128i*>(ctx2->save_state);
|
||||
_mm256_storeu2_m128i(p2 + 0, p1 + 0, xin0);
|
||||
_mm256_storeu2_m128i(p2 + 1, p1 + 1, xin1);
|
||||
_mm256_storeu2_m128i(p2 + 2, p1 + 2, xin2);
|
||||
_mm256_storeu2_m128i(p2 + 3, p1 + 3, xin3);
|
||||
_mm256_storeu2_m128i(p2 + 4, p1 + 4, xin4);
|
||||
_mm256_storeu2_m128i(p2 + 5, p1 + 5, xin5);
|
||||
_mm256_storeu2_m128i(p2 + 6, p1 + 6, xin6);
|
||||
_mm256_storeu2_m128i(p2 + 7, p1 + 7, xin7);
|
||||
}
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
NOINLINE void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m256i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m256i xout01, xout23, xout45, xout67;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m256i* input = reinterpret_cast<const __m256i*>(ctx->memory);
|
||||
__m256i* output = reinterpret_cast<__m256i*>(ctx->state);
|
||||
|
||||
vaes_genkey(reinterpret_cast<__m128i*>(output) + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout01 = _mm256_load_si256(output + 2);
|
||||
xout23 = _mm256_load_si256(output + 3);
|
||||
xout45 = _mm256_load_si256(output + 4);
|
||||
xout67 = _mm256_load_si256(output + 5);
|
||||
|
||||
const __m256i* input_begin = input;
|
||||
for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
|
||||
if (props.half_mem() && (part == 1)) {
|
||||
input = input_begin;
|
||||
ctx->first_half = false;
|
||||
cn_explode_scratchpad_vaes<ALGO>(ctx);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout01 = _mm256_xor_si256(xout01, input[0]);
|
||||
xout23 = _mm256_xor_si256(xout23, input[1]);
|
||||
|
||||
constexpr int input_increment = 64 / sizeof(__m256i);
|
||||
|
||||
xout45 = _mm256_xor_si256(xout45, input[input_increment]);
|
||||
xout67 = _mm256_xor_si256(xout67, input[input_increment + 1]);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 4;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
vaes_round(k0, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k1, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k2, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k3, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k4, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k5, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k6, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k7, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k8, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k9, xout01, xout23, xout45, xout67);
|
||||
}
|
||||
}
|
||||
|
||||
_mm256_store_si256(output + 2, xout01);
|
||||
_mm256_store_si256(output + 3, xout23);
|
||||
_mm256_store_si256(output + 4, xout45);
|
||||
_mm256_store_si256(output + 5, xout67);
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
NOINLINE void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m256i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->memory);
|
||||
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->memory);
|
||||
|
||||
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->state);
|
||||
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->state);
|
||||
|
||||
vaes_genkey_double(output1 + 2, output2 + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm256_loadu2_m128i(output2 + 4, output1 + 4);
|
||||
xout1 = _mm256_loadu2_m128i(output2 + 5, output1 + 5);
|
||||
xout2 = _mm256_loadu2_m128i(output2 + 6, output1 + 6);
|
||||
xout3 = _mm256_loadu2_m128i(output2 + 7, output1 + 7);
|
||||
xout4 = _mm256_loadu2_m128i(output2 + 8, output1 + 8);
|
||||
xout5 = _mm256_loadu2_m128i(output2 + 9, output1 + 9);
|
||||
xout6 = _mm256_loadu2_m128i(output2 + 10, output1 + 10);
|
||||
xout7 = _mm256_loadu2_m128i(output2 + 11, output1 + 11);
|
||||
|
||||
const __m128i* input_begin1 = input1;
|
||||
const __m128i* input_begin2 = input2;
|
||||
for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
|
||||
if (props.half_mem() && (part == 1)) {
|
||||
input1 = input_begin1;
|
||||
input2 = input_begin2;
|
||||
ctx1->first_half = false;
|
||||
ctx2->first_half = false;
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(ctx1, ctx2);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 0, input1 + 0), xout0);
|
||||
xout1 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 1, input1 + 1), xout1);
|
||||
xout2 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 2, input1 + 2), xout2);
|
||||
xout3 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 3, input1 + 3), xout3);
|
||||
|
||||
constexpr int input_increment = 64 / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 0, input1 + input_increment + 0), xout4);
|
||||
xout5 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 1, input1 + input_increment + 1), xout5);
|
||||
xout6 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 2, input1 + input_increment + 2), xout6);
|
||||
xout7 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 3, input1 + input_increment + 3), xout7);
|
||||
|
||||
input1 += input_increment * 2;
|
||||
input2 += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input1), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input1 + input_increment), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input2), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input2 + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
vaes_round(k0, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k1, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k2, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k3, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k4, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k5, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k6, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k7, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k8, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k9, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
}
|
||||
|
||||
_mm256_storeu2_m128i(output2 + 4, output1 + 4, xout0);
|
||||
_mm256_storeu2_m128i(output2 + 5, output1 + 5, xout1);
|
||||
_mm256_storeu2_m128i(output2 + 6, output1 + 6, xout2);
|
||||
_mm256_storeu2_m128i(output2 + 7, output1 + 7, xout3);
|
||||
_mm256_storeu2_m128i(output2 + 8, output1 + 8, xout4);
|
||||
_mm256_storeu2_m128i(output2 + 9, output1 + 9, xout5);
|
||||
_mm256_storeu2_m128i(output2 + 10, output1 + 10, xout6);
|
||||
_mm256_storeu2_m128i(output2 + 11, output1 + 11, xout7);
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
void VAES_Instance()
|
||||
{
|
||||
cn_explode_scratchpad_vaes<ALGO>(nullptr);
|
||||
cn_explode_scratchpad_vaes_double<ALGO>(nullptr, nullptr);
|
||||
cn_implode_scratchpad_vaes<ALGO>(nullptr);
|
||||
cn_implode_scratchpad_vaes_double<ALGO>(nullptr, nullptr);
|
||||
}
|
||||
|
||||
|
||||
void (*vaes_instances[])() = {
|
||||
VAES_Instance<Algorithm::CN_0>,
|
||||
VAES_Instance<Algorithm::CN_1>,
|
||||
VAES_Instance<Algorithm::CN_2>,
|
||||
VAES_Instance<Algorithm::CN_R>,
|
||||
VAES_Instance<Algorithm::CN_FAST>,
|
||||
VAES_Instance<Algorithm::CN_HALF>,
|
||||
VAES_Instance<Algorithm::CN_XAO>,
|
||||
VAES_Instance<Algorithm::CN_RTO>,
|
||||
VAES_Instance<Algorithm::CN_RWZ>,
|
||||
VAES_Instance<Algorithm::CN_ZLS>,
|
||||
VAES_Instance<Algorithm::CN_DOUBLE>,
|
||||
VAES_Instance<Algorithm::CN_CCX>,
|
||||
VAES_Instance<Algorithm::CN_LITE_0>,
|
||||
VAES_Instance<Algorithm::CN_LITE_1>,
|
||||
VAES_Instance<Algorithm::CN_HEAVY_0>,
|
||||
VAES_Instance<Algorithm::CN_HEAVY_TUBE>,
|
||||
VAES_Instance<Algorithm::CN_HEAVY_XHV>,
|
||||
VAES_Instance<Algorithm::CN_PICO_0>,
|
||||
VAES_Instance<Algorithm::CN_PICO_TLO>,
|
||||
VAES_Instance<Algorithm::CN_UPX2>,
|
||||
VAES_Instance<Algorithm::CN_GR_0>,
|
||||
VAES_Instance<Algorithm::CN_GR_1>,
|
||||
VAES_Instance<Algorithm::CN_GR_2>,
|
||||
VAES_Instance<Algorithm::CN_GR_3>,
|
||||
VAES_Instance<Algorithm::CN_GR_4>,
|
||||
VAES_Instance<Algorithm::CN_GR_5>,
|
||||
};
|
||||
|
||||
|
||||
} // xmrig
|
48
src/crypto/cn/CryptoNight_x86_vaes.h
Normal file
48
src/crypto/cn/CryptoNight_x86_vaes.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_X86_VAES_H
|
||||
#define XMRIG_CRYPTONIGHT_X86_VAES_H
|
||||
|
||||
|
||||
#include "crypto/cn/CnAlgo.h"
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO> void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx);
|
||||
template<Algorithm::Id ALGO> void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2);
|
||||
template<Algorithm::Id ALGO> void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx);
|
||||
template<Algorithm::Id ALGO> void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2);
|
||||
|
||||
|
||||
} // xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_X86_VAES_H */
|
|
@ -4,7 +4,7 @@
|
|||
*
|
||||
* This work is based on the implementation of
|
||||
* Soeren S. Thomsen and Krystian Matusiewicz
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -22,7 +22,7 @@ const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
|
|||
#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
|
||||
v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
|
||||
v1 = temp_var;}
|
||||
|
||||
|
||||
|
||||
#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \
|
||||
tu = T[2*(uint32_t)x[4*c0+0]]; \
|
||||
|
@ -161,11 +161,11 @@ static void F512(uint32_t *h, const uint32_t *m) {
|
|||
|
||||
/* digest up to msglen bytes of input (full blocks only) */
|
||||
static void Transform(groestlHashState *ctx,
|
||||
const uint8_t *input,
|
||||
const uint8_t *input,
|
||||
int msglen) {
|
||||
|
||||
/* digest message, one block at a time */
|
||||
for (; msglen >= SIZE512;
|
||||
for (; msglen >= SIZE512;
|
||||
msglen -= SIZE512, input += SIZE512) {
|
||||
F512(ctx->chaining,(uint32_t*)input);
|
||||
|
||||
|
@ -199,7 +199,7 @@ static void OutputTransformation(groestlHashState *ctx) {
|
|||
RND512P((uint8_t*)y, temp, 0x00000009);
|
||||
for (j = 0; j < 2*COLS512; j++) {
|
||||
ctx->chaining[j] ^= temp[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* initialise context */
|
||||
|
@ -313,7 +313,7 @@ static void Final(groestlHashState* ctx,
|
|||
ctx->block_counter2 >>= 8;
|
||||
}
|
||||
/* digest final padding block */
|
||||
Transform(ctx, ctx->buffer, SIZE512);
|
||||
Transform(ctx, ctx->buffer, SIZE512);
|
||||
/* perform output transformation */
|
||||
OutputTransformation(ctx);
|
||||
|
||||
|
@ -332,7 +332,7 @@ static void Final(groestlHashState* ctx,
|
|||
}
|
||||
|
||||
/* hash bit sequence */
|
||||
void groestl(const BitSequence* data,
|
||||
void groestl(const BitSequence* data,
|
||||
DataLength databitlen,
|
||||
BitSequence* hashval) {
|
||||
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
#include "crypto_uint8.h"
|
||||
#include "crypto_uint32.h"
|
||||
#include "crypto_uint64.h"
|
||||
#include "crypto_hash.h"
|
||||
#include "crypto_hash.h"
|
||||
|
||||
typedef crypto_uint8 uint8_t;
|
||||
typedef crypto_uint32 uint32_t;
|
||||
typedef crypto_uint8 uint8_t;
|
||||
typedef crypto_uint32 uint32_t;
|
||||
typedef crypto_uint64 uint64_t;
|
||||
*/
|
||||
#include <stdint.h>
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
** Source code author: Doug Whiting, 2008.
|
||||
**
|
||||
** This algorithm and source code is released to the public domain.
|
||||
**
|
||||
**
|
||||
************************************************************************/
|
||||
|
||||
#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
|
||||
|
@ -57,7 +57,7 @@ static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
|
|||
|
||||
/*****************************************************************
|
||||
** "Internal" Skein definitions
|
||||
** -- not needed for sequential hashing API, but will be
|
||||
** -- not needed for sequential hashing API, but will be
|
||||
** helpful for other uses of Skein (e.g., tree hash mode).
|
||||
** -- included here so that they can be shared between
|
||||
** reference and optimized code.
|
||||
|
@ -179,11 +179,11 @@ static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
|
|||
#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
|
||||
#define Skein_assert(x)
|
||||
#elif defined(SKEIN_ASSERT)
|
||||
#include <assert.h>
|
||||
#define Skein_Assert(x,retCode) assert(x)
|
||||
#define Skein_assert(x) assert(x)
|
||||
#include <assert.h>
|
||||
#define Skein_Assert(x,retCode) assert(x)
|
||||
#define Skein_assert(x) assert(x)
|
||||
#else
|
||||
#include <assert.h>
|
||||
#include <assert.h>
|
||||
#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */
|
||||
#define Skein_assert(x) assert(x) /* internal error */
|
||||
#endif
|
||||
|
@ -191,8 +191,8 @@ static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
|
|||
/*****************************************************************
|
||||
** Skein block function constants (shared across Ref and Opt code)
|
||||
******************************************************************/
|
||||
enum
|
||||
{
|
||||
enum
|
||||
{
|
||||
/* Skein_512 round rotation constants */
|
||||
R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
|
||||
R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
|
||||
|
@ -251,7 +251,7 @@ const u64b_t SKEIN_512_IV_256[] =
|
|||
#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
|
||||
#define KW_TWK_BASE (0)
|
||||
#define KW_KEY_BASE (3)
|
||||
#define ks (kw + KW_KEY_BASE)
|
||||
#define ks (kw + KW_KEY_BASE)
|
||||
#define ts (kw + KW_TWK_BASE)
|
||||
|
||||
#ifdef SKEIN_DEBUG
|
||||
|
@ -310,7 +310,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
|
|||
ks[5] = ctx->X[5];
|
||||
ks[6] = ctx->X[6];
|
||||
ks[7] = ctx->X[7];
|
||||
ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
|
||||
ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
|
||||
ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
|
||||
|
||||
ts[2] = ts[0] ^ ts[1];
|
||||
|
@ -338,7 +338,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
|
|||
X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
|
||||
X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
|
||||
|
||||
#if SKEIN_UNROLL_512 == 0
|
||||
#if SKEIN_UNROLL_512 == 0
|
||||
#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \
|
||||
Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
|
||||
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
|
||||
|
@ -469,7 +469,7 @@ static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
|
|||
u08b_t b[SKEIN_512_STATE_BYTES];
|
||||
u64b_t w[SKEIN_512_STATE_WORDS];
|
||||
} cfg; /* config block */
|
||||
|
||||
|
||||
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
|
||||
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
|
||||
|
||||
|
@ -548,7 +548,7 @@ static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msg
|
|||
|
||||
return SKEIN_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
|
||||
/* finalize the hash computation and output the result */
|
||||
static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
|
||||
|
@ -562,7 +562,7 @@ static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
|
|||
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
|
||||
|
||||
Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
|
||||
|
||||
|
||||
/* now output the result */
|
||||
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
** This algorithm and source code is released to the public domain.
|
||||
**
|
||||
***************************************************************************
|
||||
**
|
||||
**
|
||||
** The following compile-time switches may be defined to control some
|
||||
** tradeoffs between speed, code size, error checking, and security.
|
||||
**
|
||||
|
@ -20,8 +20,8 @@
|
|||
** [default: no callouts (no overhead)]
|
||||
**
|
||||
** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
|
||||
** code. If not defined, most error checking
|
||||
** is disabled (for performance). Otherwise,
|
||||
** code. If not defined, most error checking
|
||||
** is disabled (for performance). Otherwise,
|
||||
** the switch value is interpreted as:
|
||||
** 0: use assert() to flag errors
|
||||
** 1: return SKEIN_FAIL to flag errors
|
||||
|
|
|
@ -124,9 +124,9 @@ static inline __m128i soft_aesenc(__m128i in, __m128i key)
|
|||
|
||||
static inline uint32_t sub_word(uint32_t key)
|
||||
{
|
||||
return (saes_sbox[key >> 24 ] << 24) |
|
||||
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
|
||||
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
|
||||
return (saes_sbox[key >> 24 ] << 24) |
|
||||
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
|
||||
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
|
||||
saes_sbox[key & 0xff];
|
||||
}
|
||||
|
||||
|
|
|
@ -18,19 +18,21 @@ xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS
|
|||
|
||||
You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).
|
||||
|
||||
**Using `--threads` or `-t` option is NOT recommended because it turns off advanced built-in config.** If you want to tweak the nubmer of threads used for GhostRider, it's recommended to start using config.json instead of command line. The best suitable command line option for this is `--cpu-max-threads-hint=N` where N can be between 0 and 100.
|
||||
|
||||
## Performance
|
||||
|
||||
While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.
|
||||
|
||||
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system. Also, this is why it reports using only half the threads at startup - it's actually 2 threads per each reported thread.
|
||||
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system.
|
||||
|
||||
**Windows** (detailed results [here](https://imgur.com/a/GCjEWpl))
|
||||
**Windows** (detailed results [here](https://imgur.com/a/uRU1yO2))
|
||||
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (MSVC build), h/s|Speedup
|
||||
-|-|-|-
|
||||
AMD Ryzen 7 4700U|632.6|731|+15.5%
|
||||
Intel Core i7-2600|496.4|533.6|+7.5%
|
||||
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2469.1|+0.65%
|
||||
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2221.2|+5.1%
|
||||
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2313.2|+9.5%
|
||||
|
||||
**Linux** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
|
||||
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (GCC build), h/s|Speedup
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -26,10 +26,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -538,7 +538,7 @@ void destroy_helper_thread(HelperThread* t)
|
|||
}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose)
|
||||
{
|
||||
enum { N = 8 };
|
||||
|
||||
|
@ -554,11 +554,13 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
|||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, data + 4);
|
||||
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
if (verbose) {
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -765,7 +767,7 @@ HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { retur
|
|||
void destroy_helper_thread(HelperThread*) {}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*, bool verbose)
|
||||
{
|
||||
constexpr uint32_t N = 8;
|
||||
|
||||
|
@ -784,11 +786,13 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
|||
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
||||
#endif
|
||||
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
if (verbose) {
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ struct HelperThread;
|
|||
void benchmark();
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
|
||||
void destroy_helper_thread(HelperThread* t);
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose = true);
|
||||
|
||||
|
||||
} // namespace ghostrider
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -59,10 +59,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -16,10 +16,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -15,10 +15,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -16,10 +16,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
@ -1029,4 +1029,4 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -15,10 +15,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
@ -312,7 +312,7 @@ void sph_echo512_close(void *cc, void *dst);
|
|||
*/
|
||||
void sph_echo512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -15,10 +15,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -15,10 +15,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
@ -288,9 +288,9 @@ void sph_luffa512_close(void *cc, void *dst);
|
|||
*/
|
||||
void sph_luffa512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -692,9 +692,9 @@ memcpy( state_out, state_in, 32 );
|
|||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
{
|
||||
memcpy( state_out, state_in, 32 );
|
||||
#define SHA2_IN(x) sph_dec32be_aligned( data+(x) )
|
||||
SHA2_ROUND_BODY( SHA2_IN, state_out );
|
||||
|
@ -775,7 +775,7 @@ void sph_sha256_full( void *dst, const void *data, size_t len )
|
|||
sph_sha256_init( &cc );
|
||||
sph_sha256( &cc, data, len );
|
||||
sph_sha256_close( &cc, dst );
|
||||
}
|
||||
}
|
||||
|
||||
void sha256d(void* hash, const void* data, int len)
|
||||
{
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -19,10 +19,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
@ -99,7 +99,7 @@ static const sph_u32 IV512[] = {
|
|||
/*
|
||||
* This is the code needed to match the "reference implementation" as
|
||||
* published on Nov 23rd, 2009, instead of the published specification.
|
||||
*
|
||||
*
|
||||
|
||||
#define AES_BIG_ENDIAN 1
|
||||
#include "aes_helper.c"
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -17,10 +17,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
@ -306,9 +306,9 @@ void sph_shavite512_close(void *cc, void *dst);
|
|||
*/
|
||||
void sph_shavite512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -15,10 +15,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -13,10 +13,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -20,10 +20,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
|
@ -26,10 +26,10 @@
|
|||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
|
|
@ -166,9 +166,9 @@ static inline uint32_t popcount_soft(uint64_t x)
|
|||
constexpr uint64_t h01 = 0x0101010101010101ull;
|
||||
|
||||
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
|
||||
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
|
||||
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
|
||||
return (x * h01) >> 56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
||||
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
|
||||
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
|
||||
return (x * h01) >> 56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -180,7 +180,7 @@ init_block_loop:
|
|||
prefetchw byte ptr [rsi]
|
||||
mov rbx, rbp
|
||||
.byte 232 ;# 0xE8 = call
|
||||
;# .set CALL_LOC,
|
||||
;# .set CALL_LOC,
|
||||
.int 32768 - (call_offset - DECL(randomx_dataset_init))
|
||||
call_offset:
|
||||
mov qword ptr [rsi+0], r8
|
||||
|
|
|
@ -231,7 +231,7 @@ namespace randomx {
|
|||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
|
||||
|
||||
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
|
||||
|
||||
//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
|
||||
|
@ -494,7 +494,7 @@ namespace randomx {
|
|||
// * value must be ready at the required cycle
|
||||
// * cannot be the same as the source register unless the instruction allows it
|
||||
// - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
|
||||
// * register cannot be multiplied twice in a row unless allowChainedMul is true
|
||||
// * register cannot be multiplied twice in a row unless allowChainedMul is true
|
||||
// - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
|
||||
// - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
|
||||
// * either the last instruction applied to the register or its source must be different than this instruction
|
||||
|
@ -619,7 +619,7 @@ namespace randomx {
|
|||
if (commit)
|
||||
if (trace) std::cout << "; (eliminated)" << std::endl;
|
||||
return cycle;
|
||||
}
|
||||
}
|
||||
else if (mop.isSimple()) {
|
||||
//this macro-op has only one uOP
|
||||
return scheduleUop<commit>(mop.getUop1(), portBusy, cycle);
|
||||
|
@ -676,7 +676,7 @@ namespace randomx {
|
|||
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
|
||||
|
||||
int bufferIndex = 0;
|
||||
|
||||
|
||||
//fill all instruction slots in the current decode buffer
|
||||
while (bufferIndex < decodeBuffer->getSize()) {
|
||||
int topCycle = cycle;
|
||||
|
@ -831,7 +831,7 @@ namespace randomx {
|
|||
prog.decodeCycles = decodeCycle;
|
||||
prog.ipc = ipc;
|
||||
prog.mulCount = mulCount;
|
||||
|
||||
|
||||
|
||||
/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
|
||||
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue