Merge xmrig v6.16.1 into master

This commit is contained in:
MoneroOcean 2021-11-29 15:34:55 +00:00
commit ecdb1929e2
121 changed files with 1944 additions and 1118 deletions

View file

@ -9,7 +9,7 @@
/* Guess the endianness of the target architecture. */
/*
/*
* The LITTLE endian machines:
*/
#if defined(__ultrix) /* Older MIPS */
@ -27,15 +27,15 @@
#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
#define ECRYPT_LITTLE_ENDIAN
/*
* The BIG endian machines:
/*
* The BIG endian machines:
*/
#elif defined(sun) /* Newer Sparc's */
#define ECRYPT_BIG_ENDIAN
#elif defined(__ppc__) /* PowerPC */
#define ECRYPT_BIG_ENDIAN
/*
/*
* Finally machines with UNKNOWN endianness:
*/
#elif defined (_AIX) /* RS6000 */

View file

@ -5,8 +5,8 @@
* and should be used carefully. They should NOT be used with
* parameters which perform some action. E.g., the following two lines
* are not equivalent:
*
* 1) ++x; y = ROTL32(x, n);
*
* 1) ++x; y = ROTL32(x, n);
* 2) y = ROTL32(++x, n);
*/

View file

@ -1,9 +1,9 @@
/* ecrypt-sync.h */
/*
/*
* Header file for synchronous stream ciphers without authentication
* mechanism.
*
*
* *** Please only edit parts marked with "[edit]". ***
*/
@ -16,10 +16,10 @@
/* Cipher parameters */
/*
/*
* The name of your cipher.
*/
#define ECRYPT_NAME "Salsa20" /* [edit] */
#define ECRYPT_NAME "Salsa20" /* [edit] */
#define ECRYPT_PROFILE "S!_H."
/*
@ -47,15 +47,15 @@
/* Data structures */
/*
/*
* ECRYPT_ctx is the structure containing the representation of the
* internal state of your cipher.
* internal state of your cipher.
*/
typedef struct
{
u32 input[16]; /* could be compressed */
/*
/*
* [edit]
*
* Put here all state variable needed during the encryption process.
@ -79,10 +79,10 @@ void ECRYPT_init();
* above.
*/
void ECRYPT_keysetup(
ECRYPT_ctx* ctx,
const u8* key,
u32 keysize, /* Key size in bits. */
u32 ivsize); /* IV size in bits. */
ECRYPT_ctx* ctx,
const u8* key,
u32 keysize, /* Key size in bits. */
u32 ivsize); /* IV size in bits. */
/*
* IV setup. After having called ECRYPT_keysetup(), the user is
@ -91,7 +91,7 @@ void ECRYPT_keysetup(
* IV's.
*/
void ECRYPT_ivsetup(
ECRYPT_ctx* ctx,
ECRYPT_ctx* ctx,
const u8* iv);
/*
@ -102,7 +102,7 @@ void ECRYPT_ivsetup(
* (declared here) encrypts byte strings of arbitrary length, while
* the ECRYPT_encrypt_blocks() function (defined later) only accepts
* lengths which are multiples of ECRYPT_BLOCKLENGTH.
*
*
* The user is allowed to make multiple calls to
* ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
* but he is NOT allowed to make additional encryption calls once he
@ -122,7 +122,7 @@ void ECRYPT_ivsetup(
*
* ECRYPT_ivsetup();
* ECRYPT_encrypt_bytes();
*
*
* The following sequence is not:
*
* ECRYPT_keysetup();
@ -133,22 +133,22 @@ void ECRYPT_ivsetup(
*/
void ECRYPT_encrypt_bytes(
ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 msglen); /* Message length in bytes. */
ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 msglen); /* Message length in bytes. */
void ECRYPT_decrypt_bytes(
ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 msglen); /* Message length in bytes. */
ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 msglen); /* Message length in bytes. */
/* ------------------------------------------------------------------------- */
/* Optional features */
/*
/*
* For testing purposes it can sometimes be useful to have a function
* which immediately generates keystream without having to provide it
* with a zero plaintext. If your cipher cannot provide this function
@ -170,7 +170,7 @@ void ECRYPT_keystream_bytes(
/* Optional optimizations */
/*
/*
* By default, the functions in this section are implemented using
* calls to functions declared above. However, you might want to
* implement them differently for performance reasons.
@ -186,22 +186,22 @@ void ECRYPT_keystream_bytes(
#define ECRYPT_USES_DEFAULT_ALL_IN_ONE /* [edit] */
void ECRYPT_encrypt_packet(
ECRYPT_ctx* ctx,
ECRYPT_ctx* ctx,
const u8* iv,
const u8* plaintext,
u8* ciphertext,
const u8* plaintext,
u8* ciphertext,
u32 msglen);
void ECRYPT_decrypt_packet(
ECRYPT_ctx* ctx,
ECRYPT_ctx* ctx,
const u8* iv,
const u8* ciphertext,
u8* plaintext,
const u8* ciphertext,
u8* plaintext,
u32 msglen);
/*
* Encryption/decryption of blocks.
*
*
* By default, these functions are defined as macros. If you want to
* provide a different implementation, please undef the
* ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
@ -232,23 +232,23 @@ void ECRYPT_decrypt_packet(
#else
void ECRYPT_encrypt_blocks(
ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 blocks); /* Message length in blocks. */
ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 blocks); /* Message length in blocks. */
void ECRYPT_decrypt_blocks(
ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 blocks); /* Message length in blocks. */
ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 blocks); /* Message length in blocks. */
#ifdef ECRYPT_GENERATES_KEYSTREAM
void ECRYPT_keystream_blocks(
ECRYPT_ctx* ctx,
const u8* keystream,
u32 blocks); /* Keystream length in blocks. */
u32 blocks); /* Keystream length in blocks. */
#endif
@ -262,7 +262,7 @@ void ECRYPT_keystream_blocks(
* significant difference and keep the number of variants
* (ECRYPT_MAXVARIANT) as small as possible (definitely not more than
* 10). Note also that all variants should have exactly the same
* external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
* external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
*/
#define ECRYPT_MAXVARIANT 1 /* [edit] */

View file

@ -354,7 +354,7 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
# ifdef XMRIG_ALGO_CN_HEAVY
// cn-heavy optimization for Zen3 CPUs
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
switch (algorithm.id()) {
case Algorithm::CN_HEAVY_0:
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;

View file

@ -466,6 +466,29 @@ const static uint8_t astrobwt_dero_test_out[256] = {
#endif
#ifdef XMRIG_ALGO_GHOSTRIDER
// "GhostRider"
const static uint8_t test_output_gr[256] = {
0x42, 0x17, 0x0C, 0xC1, 0x85, 0xE6, 0x76, 0x3C, 0xC7, 0xCB, 0x27, 0xC4, 0x17, 0x39, 0x2D, 0xE2,
0x29, 0x6B, 0x40, 0x66, 0x85, 0xA4, 0xE3, 0xD3, 0x8C, 0xE9, 0xA5, 0x8F, 0x10, 0xFC, 0x81, 0xE4,
0x90, 0x56, 0xF2, 0x9E, 0x00, 0xD0, 0xF8, 0xA1, 0x88, 0x82, 0x86, 0xC0, 0x86, 0x04, 0x6B, 0x0E,
0x9A, 0xDB, 0xDB, 0xFD, 0x23, 0x16, 0x77, 0x94, 0xFE, 0x58, 0x93, 0x05, 0x10, 0x3F, 0x27, 0x75,
0x51, 0x44, 0xF3, 0x5F, 0xE2, 0xF9, 0x61, 0xBE, 0xC0, 0x30, 0xB5, 0x8E, 0xB1, 0x1B, 0xA1, 0xF7,
0x06, 0x4E, 0xF1, 0x6A, 0xFD, 0xA5, 0x44, 0x8E, 0x64, 0x47, 0x8C, 0x67, 0x51, 0xE2, 0x5C, 0x55,
0x3E, 0x39, 0xA6, 0xA5, 0xF7, 0xB8, 0xD0, 0x5E, 0xE2, 0xBF, 0x92, 0x44, 0xD9, 0xAA, 0x76, 0x22,
0xE3, 0x3E, 0x15, 0x96, 0xD8, 0x6A, 0x78, 0x2D, 0xA9, 0x77, 0x24, 0x1A, 0x4B, 0xE7, 0x5A, 0x2E,
0x89, 0x77, 0xAE, 0x92, 0xE4, 0xA4, 0x2D, 0xAF, 0x0B, 0x27, 0x09, 0xB2, 0x5F, 0x95, 0x61, 0xA9,
0xA8, 0xBE, 0x5D, 0x39, 0xBE, 0x41, 0x5F, 0x9C, 0x67, 0x28, 0x48, 0x4F, 0xAE, 0x2A, 0x50, 0x2B,
0xB8, 0xC7, 0x42, 0x73, 0x51, 0x60, 0x59, 0xD8, 0x9C, 0xBA, 0x22, 0x2F, 0x8E, 0x34, 0xDE, 0xC8,
0x1B, 0xAE, 0x9E, 0xBD, 0xF7, 0xE8, 0xFD, 0x8A, 0x97, 0xBE, 0xF0, 0x47, 0xAC, 0x27, 0xDD, 0x28,
0xC9, 0x28, 0xA8, 0x7B, 0x2A, 0xB8, 0x90, 0x3E, 0xCA, 0xB4, 0x78, 0x44, 0xCE, 0xCD, 0x91, 0xEC,
0xC2, 0x5A, 0x17, 0x59, 0x7C, 0x14, 0xF8, 0x95, 0x28, 0x14, 0xC3, 0xAD, 0xC4, 0xE1, 0x13, 0x5A,
0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25,
0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01
};
#endif
} // namespace xmrig

View file

@ -43,6 +43,11 @@
#include "crypto/cn/soft_aes.h"
#ifdef XMRIG_VAES
# include "crypto/cn/CryptoNight_x86_vaes.h"
#endif
extern "C"
{
#include "crypto/cn/c_groestl.h"
@ -289,6 +294,13 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
{
constexpr CnAlgo<ALGO> props;
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes<ALGO>(ctx);
return;
}
# endif
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
@ -341,7 +353,7 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
__m128i* e = output + N - prefetch_dist;
__m128i* e = output + (N << interleave) - prefetch_dist;
__m128i* prefetch_ptr = output + prefetch_dist;
for (int i = 0; i < 2; ++i) {
@ -396,6 +408,13 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
{
constexpr CnAlgo<ALGO> props;
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes<ALGO>(ctx);
return;
}
# endif
# ifdef XMRIG_ALGO_CN_GPU
constexpr bool IS_HEAVY = props.isHeavy() || ALGO == Algorithm::CN_GPU;
# else
@ -1070,8 +1089,17 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
# ifdef XMRIG_VAES
if (!props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else
# endif
{
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
}
if (ALGO == Algorithm::CN_2) {
cnv2_double_mainloop_sandybridge_asm(ctx);
@ -1110,8 +1138,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
# ifdef XMRIG_VAES
if (!props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else
# endif
{
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
}
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@ -1166,8 +1202,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else
# endif
{
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
}
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@ -1362,8 +1407,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
bx10 = cx1;
}
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
}
else
# endif
{
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
}
keccakf(h0, 24);
keccakf(h1, 24);
@ -1424,10 +1477,19 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
ctx[3]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else
# endif
{
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@ -1548,10 +1610,19 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
}
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else
# endif
{
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
keccakf(h0, 24);
keccakf(h1, 24);
@ -1788,7 +1859,20 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
if (props.half_mem()) {
ctx[i]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
}
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else
# endif
{
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
uint8_t* l0 = ctx[0]->memory;
@ -1840,8 +1924,21 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
}
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
}
else
# endif
{
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
for (size_t i = 0; i < 4; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}

View file

@ -0,0 +1,530 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "CryptoNight_x86_vaes.h"
#include "CryptoNight_monero.h"
#include "CryptoNight.h"
#ifdef __GNUC__
# include <x86intrin.h>
#if !defined(__clang__) && !defined(__ICC) && __GNUC__ < 10
static inline __m256i
__attribute__((__always_inline__))
_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr)
{
return _mm256_inserti128_si256(
_mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1);
}
static inline void
__attribute__((__always_inline__))
_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a)
{
_mm_storeu_si128(loaddr, _mm256_castsi256_si128(a));
_mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1));
}
#endif
#else
# include <intrin.h>
#endif
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static FORCEINLINE __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
template<uint8_t rcon>
static FORCEINLINE void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
{
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static NOINLINE void vaes_genkey(const __m128i* memory, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = _mm256_set_m128i(xout0, xout0);
*k1 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x01>(&xout0, &xout2);
*k2 = _mm256_set_m128i(xout0, xout0);
*k3 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x02>(&xout0, &xout2);
*k4 = _mm256_set_m128i(xout0, xout0);
*k5 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x04>(&xout0, &xout2);
*k6 = _mm256_set_m128i(xout0, xout0);
*k7 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x08>(&xout0, &xout2);
*k8 = _mm256_set_m128i(xout0, xout0);
*k9 = _mm256_set_m128i(xout2, xout2);
}
static NOINLINE void vaes_genkey_double(const __m128i* memory1, const __m128i* memory2, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
{
__m128i xout0 = _mm_load_si128(memory1);
__m128i xout1 = _mm_load_si128(memory1 + 1);
__m128i xout2 = _mm_load_si128(memory2);
__m128i xout3 = _mm_load_si128(memory2 + 1);
*k0 = _mm256_set_m128i(xout2, xout0);
*k1 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x01>(&xout0, &xout1);
aes_genkey_sub<0x01>(&xout2, &xout3);
*k2 = _mm256_set_m128i(xout2, xout0);
*k3 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x02>(&xout0, &xout1);
aes_genkey_sub<0x02>(&xout2, &xout3);
*k4 = _mm256_set_m128i(xout2, xout0);
*k5 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x04>(&xout0, &xout1);
aes_genkey_sub<0x04>(&xout2, &xout3);
*k6 = _mm256_set_m128i(xout2, xout0);
*k7 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x08>(&xout0, &xout1);
aes_genkey_sub<0x08>(&xout2, &xout3);
*k8 = _mm256_set_m128i(xout2, xout0);
*k9 = _mm256_set_m128i(xout3, xout1);
}
static FORCEINLINE void vaes_round(__m256i key, __m256i& x01, __m256i& x23, __m256i& x45, __m256i& x67)
{
x01 = _mm256_aesenc_epi128(x01, key);
x23 = _mm256_aesenc_epi128(x23, key);
x45 = _mm256_aesenc_epi128(x45, key);
x67 = _mm256_aesenc_epi128(x67, key);
}
static FORCEINLINE void vaes_round(__m256i key, __m256i& x0, __m256i& x1, __m256i& x2, __m256i& x3, __m256i& x4, __m256i& x5, __m256i& x6, __m256i& x7)
{
x0 = _mm256_aesenc_epi128(x0, key);
x1 = _mm256_aesenc_epi128(x1, key);
x2 = _mm256_aesenc_epi128(x2, key);
x3 = _mm256_aesenc_epi128(x3, key);
x4 = _mm256_aesenc_epi128(x4, key);
x5 = _mm256_aesenc_epi128(x5, key);
x6 = _mm256_aesenc_epi128(x6, key);
x7 = _mm256_aesenc_epi128(x7, key);
}
namespace xmrig {
template<Algorithm::Id ALGO>
NOINLINE void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx)
{
constexpr CnAlgo<ALGO> props;
constexpr size_t N = (props.memory() / sizeof(__m256i)) / (props.half_mem() ? 2 : 1);
__m256i xin01, xin23, xin45, xin67;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
__m256i* output = reinterpret_cast<__m256i*>(ctx->memory);
vaes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
if (props.half_mem() && !ctx->first_half) {
const __m256i* p = reinterpret_cast<const __m256i*>(ctx->save_state);
xin01 = _mm256_load_si256(p + 0);
xin23 = _mm256_load_si256(p + 1);
xin45 = _mm256_load_si256(p + 2);
xin67 = _mm256_load_si256(p + 3);
}
else {
xin01 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 4));
xin23 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 6));
xin45 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 8));
xin67 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 10));
}
constexpr int output_increment = 64 / sizeof(__m256i);
constexpr int prefetch_dist = 2048 / sizeof(__m256i);
__m256i* e = output + N - prefetch_dist;
__m256i* prefetch_ptr = output + prefetch_dist;
for (int i = 0; i < 2; ++i) {
do {
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
vaes_round(k0, xin01, xin23, xin45, xin67);
vaes_round(k1, xin01, xin23, xin45, xin67);
vaes_round(k2, xin01, xin23, xin45, xin67);
vaes_round(k3, xin01, xin23, xin45, xin67);
vaes_round(k4, xin01, xin23, xin45, xin67);
vaes_round(k5, xin01, xin23, xin45, xin67);
vaes_round(k6, xin01, xin23, xin45, xin67);
vaes_round(k7, xin01, xin23, xin45, xin67);
vaes_round(k8, xin01, xin23, xin45, xin67);
vaes_round(k9, xin01, xin23, xin45, xin67);
_mm256_store_si256(output + 0, xin01);
_mm256_store_si256(output + 1, xin23);
_mm256_store_si256(output + output_increment + 0, xin45);
_mm256_store_si256(output + output_increment + 1, xin67);
output += output_increment * 2;
prefetch_ptr += output_increment * 2;
} while (output < e);
e += prefetch_dist;
prefetch_ptr = output;
}
if (props.half_mem() && ctx->first_half) {
__m256i* p = reinterpret_cast<__m256i*>(ctx->save_state);
_mm256_store_si256(p + 0, xin01);
_mm256_store_si256(p + 1, xin23);
_mm256_store_si256(p + 2, xin45);
_mm256_store_si256(p + 3, xin67);
}
_mm256_zeroupper();
}
template<Algorithm::Id ALGO>
NOINLINE void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2)
{
constexpr CnAlgo<ALGO> props;
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
__m256i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->state);
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->state);
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->memory);
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->memory);
vaes_genkey_double(input1, input2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
{
const bool b = props.half_mem() && !ctx1->first_half && !ctx2->first_half;
const __m128i* p1 = b ? reinterpret_cast<const __m128i*>(ctx1->save_state) : (input1 + 4);
const __m128i* p2 = b ? reinterpret_cast<const __m128i*>(ctx2->save_state) : (input2 + 4);
xin0 = _mm256_loadu2_m128i(p2 + 0, p1 + 0);
xin1 = _mm256_loadu2_m128i(p2 + 1, p1 + 1);
xin2 = _mm256_loadu2_m128i(p2 + 2, p1 + 2);
xin3 = _mm256_loadu2_m128i(p2 + 3, p1 + 3);
xin4 = _mm256_loadu2_m128i(p2 + 4, p1 + 4);
xin5 = _mm256_loadu2_m128i(p2 + 5, p1 + 5);
xin6 = _mm256_loadu2_m128i(p2 + 6, p1 + 6);
xin7 = _mm256_loadu2_m128i(p2 + 7, p1 + 7);
}
constexpr int output_increment = 64 / sizeof(__m128i);
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
__m128i* e = output1 + N - prefetch_dist;
__m128i* prefetch_ptr1 = output1 + prefetch_dist;
__m128i* prefetch_ptr2 = output2 + prefetch_dist;
for (int i = 0; i < 2; ++i) {
do {
_mm_prefetch((const char*)(prefetch_ptr1), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr1 + output_increment), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr2), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr2 + output_increment), _MM_HINT_T0);
vaes_round(k0, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k1, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k2, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k3, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k4, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k5, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k6, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k7, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k8, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k9, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
_mm256_storeu2_m128i(output2 + 0, output1 + 0, xin0);
_mm256_storeu2_m128i(output2 + 1, output1 + 1, xin1);
_mm256_storeu2_m128i(output2 + 2, output1 + 2, xin2);
_mm256_storeu2_m128i(output2 + 3, output1 + 3, xin3);
_mm256_storeu2_m128i(output2 + output_increment + 0, output1 + output_increment + 0, xin4);
_mm256_storeu2_m128i(output2 + output_increment + 1, output1 + output_increment + 1, xin5);
_mm256_storeu2_m128i(output2 + output_increment + 2, output1 + output_increment + 2, xin6);
_mm256_storeu2_m128i(output2 + output_increment + 3, output1 + output_increment + 3, xin7);
output1 += output_increment * 2;
prefetch_ptr1 += output_increment * 2;
output2 += output_increment * 2;
prefetch_ptr2 += output_increment * 2;
} while (output1 < e);
e += prefetch_dist;
prefetch_ptr1 = output1;
prefetch_ptr2 = output2;
}
if (props.half_mem() && ctx1->first_half && ctx2->first_half) {
__m128i* p1 = reinterpret_cast<__m128i*>(ctx1->save_state);
__m128i* p2 = reinterpret_cast<__m128i*>(ctx2->save_state);
_mm256_storeu2_m128i(p2 + 0, p1 + 0, xin0);
_mm256_storeu2_m128i(p2 + 1, p1 + 1, xin1);
_mm256_storeu2_m128i(p2 + 2, p1 + 2, xin2);
_mm256_storeu2_m128i(p2 + 3, p1 + 3, xin3);
_mm256_storeu2_m128i(p2 + 4, p1 + 4, xin4);
_mm256_storeu2_m128i(p2 + 5, p1 + 5, xin5);
_mm256_storeu2_m128i(p2 + 6, p1 + 6, xin6);
_mm256_storeu2_m128i(p2 + 7, p1 + 7, xin7);
}
_mm256_zeroupper();
}
template<Algorithm::Id ALGO>
NOINLINE void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx)
{
constexpr CnAlgo<ALGO> props;
constexpr size_t N = (props.memory() / sizeof(__m256i)) / (props.half_mem() ? 2 : 1);
__m256i xout01, xout23, xout45, xout67;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m256i* input = reinterpret_cast<const __m256i*>(ctx->memory);
__m256i* output = reinterpret_cast<__m256i*>(ctx->state);
vaes_genkey(reinterpret_cast<__m128i*>(output) + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout01 = _mm256_load_si256(output + 2);
xout23 = _mm256_load_si256(output + 3);
xout45 = _mm256_load_si256(output + 4);
xout67 = _mm256_load_si256(output + 5);
const __m256i* input_begin = input;
for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
if (props.half_mem() && (part == 1)) {
input = input_begin;
ctx->first_half = false;
cn_explode_scratchpad_vaes<ALGO>(ctx);
}
for (size_t i = 0; i < N;) {
xout01 = _mm256_xor_si256(xout01, input[0]);
xout23 = _mm256_xor_si256(xout23, input[1]);
constexpr int input_increment = 64 / sizeof(__m256i);
xout45 = _mm256_xor_si256(xout45, input[input_increment]);
xout67 = _mm256_xor_si256(xout67, input[input_increment + 1]);
input += input_increment * 2;
i += 4;
if (i < N) {
_mm_prefetch((const char*)(input), _MM_HINT_T0);
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
}
vaes_round(k0, xout01, xout23, xout45, xout67);
vaes_round(k1, xout01, xout23, xout45, xout67);
vaes_round(k2, xout01, xout23, xout45, xout67);
vaes_round(k3, xout01, xout23, xout45, xout67);
vaes_round(k4, xout01, xout23, xout45, xout67);
vaes_round(k5, xout01, xout23, xout45, xout67);
vaes_round(k6, xout01, xout23, xout45, xout67);
vaes_round(k7, xout01, xout23, xout45, xout67);
vaes_round(k8, xout01, xout23, xout45, xout67);
vaes_round(k9, xout01, xout23, xout45, xout67);
}
}
_mm256_store_si256(output + 2, xout01);
_mm256_store_si256(output + 3, xout23);
_mm256_store_si256(output + 4, xout45);
_mm256_store_si256(output + 5, xout67);
_mm256_zeroupper();
}
template<Algorithm::Id ALGO>
NOINLINE void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2)
{
constexpr CnAlgo<ALGO> props;
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
__m256i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->memory);
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->memory);
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->state);
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->state);
vaes_genkey_double(output1 + 2, output2 + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm256_loadu2_m128i(output2 + 4, output1 + 4);
xout1 = _mm256_loadu2_m128i(output2 + 5, output1 + 5);
xout2 = _mm256_loadu2_m128i(output2 + 6, output1 + 6);
xout3 = _mm256_loadu2_m128i(output2 + 7, output1 + 7);
xout4 = _mm256_loadu2_m128i(output2 + 8, output1 + 8);
xout5 = _mm256_loadu2_m128i(output2 + 9, output1 + 9);
xout6 = _mm256_loadu2_m128i(output2 + 10, output1 + 10);
xout7 = _mm256_loadu2_m128i(output2 + 11, output1 + 11);
const __m128i* input_begin1 = input1;
const __m128i* input_begin2 = input2;
for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
if (props.half_mem() && (part == 1)) {
input1 = input_begin1;
input2 = input_begin2;
ctx1->first_half = false;
ctx2->first_half = false;
cn_explode_scratchpad_vaes_double<ALGO>(ctx1, ctx2);
}
for (size_t i = 0; i < N;) {
xout0 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 0, input1 + 0), xout0);
xout1 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 1, input1 + 1), xout1);
xout2 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 2, input1 + 2), xout2);
xout3 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 3, input1 + 3), xout3);
constexpr int input_increment = 64 / sizeof(__m128i);
xout4 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 0, input1 + input_increment + 0), xout4);
xout5 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 1, input1 + input_increment + 1), xout5);
xout6 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 2, input1 + input_increment + 2), xout6);
xout7 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 3, input1 + input_increment + 3), xout7);
input1 += input_increment * 2;
input2 += input_increment * 2;
i += 8;
if (i < N) {
_mm_prefetch((const char*)(input1), _MM_HINT_T0);
_mm_prefetch((const char*)(input1 + input_increment), _MM_HINT_T0);
_mm_prefetch((const char*)(input2), _MM_HINT_T0);
_mm_prefetch((const char*)(input2 + input_increment), _MM_HINT_T0);
}
vaes_round(k0, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k1, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k2, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k3, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k4, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k5, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k6, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k7, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k8, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k9, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
}
}
_mm256_storeu2_m128i(output2 + 4, output1 + 4, xout0);
_mm256_storeu2_m128i(output2 + 5, output1 + 5, xout1);
_mm256_storeu2_m128i(output2 + 6, output1 + 6, xout2);
_mm256_storeu2_m128i(output2 + 7, output1 + 7, xout3);
_mm256_storeu2_m128i(output2 + 8, output1 + 8, xout4);
_mm256_storeu2_m128i(output2 + 9, output1 + 9, xout5);
_mm256_storeu2_m128i(output2 + 10, output1 + 10, xout6);
_mm256_storeu2_m128i(output2 + 11, output1 + 11, xout7);
_mm256_zeroupper();
}
template<Algorithm::Id ALGO>
void VAES_Instance()
{
cn_explode_scratchpad_vaes<ALGO>(nullptr);
cn_explode_scratchpad_vaes_double<ALGO>(nullptr, nullptr);
cn_implode_scratchpad_vaes<ALGO>(nullptr);
cn_implode_scratchpad_vaes_double<ALGO>(nullptr, nullptr);
}
void (*vaes_instances[])() = {
VAES_Instance<Algorithm::CN_0>,
VAES_Instance<Algorithm::CN_1>,
VAES_Instance<Algorithm::CN_2>,
VAES_Instance<Algorithm::CN_R>,
VAES_Instance<Algorithm::CN_FAST>,
VAES_Instance<Algorithm::CN_HALF>,
VAES_Instance<Algorithm::CN_XAO>,
VAES_Instance<Algorithm::CN_RTO>,
VAES_Instance<Algorithm::CN_RWZ>,
VAES_Instance<Algorithm::CN_ZLS>,
VAES_Instance<Algorithm::CN_DOUBLE>,
VAES_Instance<Algorithm::CN_CCX>,
VAES_Instance<Algorithm::CN_LITE_0>,
VAES_Instance<Algorithm::CN_LITE_1>,
VAES_Instance<Algorithm::CN_HEAVY_0>,
VAES_Instance<Algorithm::CN_HEAVY_TUBE>,
VAES_Instance<Algorithm::CN_HEAVY_XHV>,
VAES_Instance<Algorithm::CN_PICO_0>,
VAES_Instance<Algorithm::CN_PICO_TLO>,
VAES_Instance<Algorithm::CN_UPX2>,
VAES_Instance<Algorithm::CN_GR_0>,
VAES_Instance<Algorithm::CN_GR_1>,
VAES_Instance<Algorithm::CN_GR_2>,
VAES_Instance<Algorithm::CN_GR_3>,
VAES_Instance<Algorithm::CN_GR_4>,
VAES_Instance<Algorithm::CN_GR_5>,
};
} // xmrig

View file

@ -0,0 +1,48 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_X86_VAES_H
#define XMRIG_CRYPTONIGHT_X86_VAES_H
#include "crypto/cn/CnAlgo.h"
struct cryptonight_ctx;
namespace xmrig {
template<Algorithm::Id ALGO> void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx);
template<Algorithm::Id ALGO> void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2);
template<Algorithm::Id ALGO> void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx);
template<Algorithm::Id ALGO> void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2);
} // xmrig
#endif /* XMRIG_CRYPTONIGHT_X86_VAES_H */

View file

@ -4,7 +4,7 @@
*
* This work is based on the implementation of
* Soeren S. Thomsen and Krystian Matusiewicz
*
*
*
*/
@ -22,7 +22,7 @@ const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
v1 = temp_var;}
#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \
tu = T[2*(uint32_t)x[4*c0+0]]; \
@ -161,11 +161,11 @@ static void F512(uint32_t *h, const uint32_t *m) {
/* digest up to msglen bytes of input (full blocks only) */
static void Transform(groestlHashState *ctx,
const uint8_t *input,
const uint8_t *input,
int msglen) {
/* digest message, one block at a time */
for (; msglen >= SIZE512;
for (; msglen >= SIZE512;
msglen -= SIZE512, input += SIZE512) {
F512(ctx->chaining,(uint32_t*)input);
@ -199,7 +199,7 @@ static void OutputTransformation(groestlHashState *ctx) {
RND512P((uint8_t*)y, temp, 0x00000009);
for (j = 0; j < 2*COLS512; j++) {
ctx->chaining[j] ^= temp[j];
}
}
}
/* initialise context */
@ -313,7 +313,7 @@ static void Final(groestlHashState* ctx,
ctx->block_counter2 >>= 8;
}
/* digest final padding block */
Transform(ctx, ctx->buffer, SIZE512);
Transform(ctx, ctx->buffer, SIZE512);
/* perform output transformation */
OutputTransformation(ctx);
@ -332,7 +332,7 @@ static void Final(groestlHashState* ctx,
}
/* hash bit sequence */
void groestl(const BitSequence* data,
void groestl(const BitSequence* data,
DataLength databitlen,
BitSequence* hashval) {

View file

@ -4,10 +4,10 @@
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "crypto_hash.h"
#include "crypto_hash.h"
typedef crypto_uint8 uint8_t;
typedef crypto_uint32 uint32_t;
typedef crypto_uint8 uint8_t;
typedef crypto_uint32 uint32_t;
typedef crypto_uint64 uint64_t;
*/
#include <stdint.h>

View file

@ -5,7 +5,7 @@
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
**
************************************************************************/
#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
@ -57,7 +57,7 @@ static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
/*****************************************************************
** "Internal" Skein definitions
** -- not needed for sequential hashing API, but will be
** -- not needed for sequential hashing API, but will be
** helpful for other uses of Skein (e.g., tree hash mode).
** -- included here so that they can be shared between
** reference and optimized code.
@ -179,11 +179,11 @@ static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
#define Skein_assert(x)
#elif defined(SKEIN_ASSERT)
#include <assert.h>
#define Skein_Assert(x,retCode) assert(x)
#define Skein_assert(x) assert(x)
#include <assert.h>
#define Skein_Assert(x,retCode) assert(x)
#define Skein_assert(x) assert(x)
#else
#include <assert.h>
#include <assert.h>
#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */
#define Skein_assert(x) assert(x) /* internal error */
#endif
@ -191,8 +191,8 @@ static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
/*****************************************************************
** Skein block function constants (shared across Ref and Opt code)
******************************************************************/
enum
{
enum
{
/* Skein_512 round rotation constants */
R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
@ -251,7 +251,7 @@ const u64b_t SKEIN_512_IV_256[] =
#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
#define KW_TWK_BASE (0)
#define KW_KEY_BASE (3)
#define ks (kw + KW_KEY_BASE)
#define ks (kw + KW_KEY_BASE)
#define ts (kw + KW_TWK_BASE)
#ifdef SKEIN_DEBUG
@ -310,7 +310,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
ks[5] = ctx->X[5];
ks[6] = ctx->X[6];
ks[7] = ctx->X[7];
ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
ts[2] = ts[0] ^ ts[1];
@ -338,7 +338,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
#if SKEIN_UNROLL_512 == 0
#if SKEIN_UNROLL_512 == 0
#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \
Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
@ -469,7 +469,7 @@ static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
u08b_t b[SKEIN_512_STATE_BYTES];
u64b_t w[SKEIN_512_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
@ -548,7 +548,7 @@ static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msg
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the result */
static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
@ -562,7 +562,7 @@ static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */

View file

@ -9,7 +9,7 @@
** This algorithm and source code is released to the public domain.
**
***************************************************************************
**
**
** The following compile-time switches may be defined to control some
** tradeoffs between speed, code size, error checking, and security.
**
@ -20,8 +20,8 @@
** [default: no callouts (no overhead)]
**
** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
** code. If not defined, most error checking
** is disabled (for performance). Otherwise,
** code. If not defined, most error checking
** is disabled (for performance). Otherwise,
** the switch value is interpreted as:
** 0: use assert() to flag errors
** 1: return SKEIN_FAIL to flag errors

View file

@ -124,9 +124,9 @@ static inline __m128i soft_aesenc(__m128i in, __m128i key)
static inline uint32_t sub_word(uint32_t key)
{
return (saes_sbox[key >> 24 ] << 24) |
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
return (saes_sbox[key >> 24 ] << 24) |
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
saes_sbox[key & 0xff];
}

View file

@ -18,19 +18,21 @@ xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS
You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).
**Using `--threads` or `-t` option is NOT recommended because it turns off advanced built-in config.** If you want to tweak the nubmer of threads used for GhostRider, it's recommended to start using config.json instead of command line. The best suitable command line option for this is `--cpu-max-threads-hint=N` where N can be between 0 and 100.
## Performance
While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system. Also, this is why it reports using only half the threads at startup - it's actually 2 threads per each reported thread.
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system.
**Windows** (detailed results [here](https://imgur.com/a/GCjEWpl))
**Windows** (detailed results [here](https://imgur.com/a/uRU1yO2))
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (MSVC build), h/s|Speedup
-|-|-|-
AMD Ryzen 7 4700U|632.6|731|+15.5%
Intel Core i7-2600|496.4|533.6|+7.5%
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2469.1|+0.65%
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2221.2|+5.1%
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2313.2|+9.5%
**Linux** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (GCC build), h/s|Speedup

View file

@ -18,7 +18,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -26,10 +26,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -538,7 +538,7 @@ void destroy_helper_thread(HelperThread* t)
}
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose)
{
enum { N = 8 };
@ -554,11 +554,13 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
uint32_t cn_indices[6];
select_indices(cn_indices, data + 4);
static uint32_t prev_indices[3];
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
for (int i = 0; i < 3; ++i) {
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
if (verbose) {
static uint32_t prev_indices[3];
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
for (int i = 0; i < 3; ++i) {
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
}
}
}
@ -765,7 +767,7 @@ HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { retur
void destroy_helper_thread(HelperThread*) {}
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*, bool verbose)
{
constexpr uint32_t N = 8;
@ -784,11 +786,13 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
#endif
static uint32_t prev_indices[3];
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
for (int i = 0; i < 3; ++i) {
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
if (verbose) {
static uint32_t prev_indices[3];
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
for (int i = 0; i < 3; ++i) {
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
}
}
}

View file

@ -41,7 +41,7 @@ struct HelperThread;
void benchmark();
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
void destroy_helper_thread(HelperThread* t);
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose = true);
} // namespace ghostrider

View file

@ -51,7 +51,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -59,10 +59,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -8,7 +8,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -16,10 +16,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -7,7 +7,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -8,7 +8,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -16,10 +16,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -1029,4 +1029,4 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
}
#ifdef __cplusplus
}
#endif
#endif

View file

@ -7,7 +7,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -312,7 +312,7 @@ void sph_echo512_close(void *cc, void *dst);
*/
void sph_echo512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -7,7 +7,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -7,7 +7,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -288,9 +288,9 @@ void sph_luffa512_close(void *cc, void *dst);
*/
void sph_luffa512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -692,9 +692,9 @@ memcpy( state_out, state_in, 32 );
#undef SHA2_IN
}
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
const uint32_t *state_in )
{
{
memcpy( state_out, state_in, 32 );
#define SHA2_IN(x) sph_dec32be_aligned( data+(x) )
SHA2_ROUND_BODY( SHA2_IN, state_out );
@ -775,7 +775,7 @@ void sph_sha256_full( void *dst, const void *data, size_t len )
sph_sha256_init( &cc );
sph_sha256( &cc, data, len );
sph_sha256_close( &cc, dst );
}
}
void sha256d(void* hash, const void* data, int len)
{

View file

@ -11,7 +11,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -19,10 +19,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -99,7 +99,7 @@ static const sph_u32 IV512[] = {
/*
* This is the code needed to match the "reference implementation" as
* published on Nov 23rd, 2009, instead of the published specification.
*
*
#define AES_BIG_ENDIAN 1
#include "aes_helper.c"

View file

@ -9,7 +9,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -17,10 +17,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -306,9 +306,9 @@ void sph_shavite512_close(void *cc, void *dst);
*/
void sph_shavite512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif
#endif

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -7,7 +7,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -5,7 +5,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -12,7 +12,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -20,10 +20,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -18,7 +18,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@ -26,10 +26,10 @@
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

View file

@ -166,9 +166,9 @@ static inline uint32_t popcount_soft(uint64_t x)
constexpr uint64_t h01 = 0x0101010101010101ull;
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
return (x * h01) >> 56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
return (x * h01) >> 56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
}

View file

@ -180,7 +180,7 @@ init_block_loop:
prefetchw byte ptr [rsi]
mov rbx, rbp
.byte 232 ;# 0xE8 = call
;# .set CALL_LOC,
;# .set CALL_LOC,
.int 32768 - (call_offset - DECL(randomx_dataset_init))
call_offset:
mov qword ptr [rsi+0], r8

View file

@ -231,7 +231,7 @@ namespace randomx {
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
@ -494,7 +494,7 @@ namespace randomx {
// * value must be ready at the required cycle
// * cannot be the same as the source register unless the instruction allows it
// - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
// * register cannot be multiplied twice in a row unless allowChainedMul is true
// * register cannot be multiplied twice in a row unless allowChainedMul is true
// - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
// - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
// * either the last instruction applied to the register or its source must be different than this instruction
@ -619,7 +619,7 @@ namespace randomx {
if (commit)
if (trace) std::cout << "; (eliminated)" << std::endl;
return cycle;
}
}
else if (mop.isSimple()) {
//this macro-op has only one uOP
return scheduleUop<commit>(mop.getUop1(), portBusy, cycle);
@ -676,7 +676,7 @@ namespace randomx {
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
int bufferIndex = 0;
//fill all instruction slots in the current decode buffer
while (bufferIndex < decodeBuffer->getSize()) {
int topCycle = cycle;
@ -831,7 +831,7 @@ namespace randomx {
prog.decodeCycles = decodeCycle;
prog.ipc = ipc;
prog.mulCount = mulCount;
/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;