RandomX: added performance profiler (for developers)

Also optimized Blake2b SSE4.1 code size to avoid code cache pollution.
This commit is contained in:
SChernykh 2020-09-12 23:07:52 +02:00
parent adf833b60a
commit a05393727c
19 changed files with 390 additions and 481 deletions

View file

@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/randomx.h"
#include "base/tools/Profiler.h"
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
@ -215,6 +216,8 @@ template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
PROFILE_SCOPE(RandomX_AES);
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;

View file

@ -92,7 +92,7 @@ extern "C" {
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
/* Simple API */
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen);
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen);
/* Argon2 Team - Begin Code */
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);

View file

@ -1,402 +0,0 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_LOAD_SSE41_H
#define BLAKE2B_LOAD_SSE41_H
#define LOAD_MSG_0_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#endif

View file

@ -102,17 +102,21 @@
row4l = t1; \
row4h = t0;
#include "blake2b-load-sse41.h"
#define LOAD_MSG(r, i, b0, b1) \
do { \
b0 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 1]], m[blake2b_sigma_sse41[r][i * 4 + 0]]); \
b1 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 3]], m[blake2b_sigma_sse41[r][i * 4 + 2]]); \
} while(0)
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(b0, b1); \
LOAD_MSG(r, 0, b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
LOAD_MSG(r, 1, b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
LOAD_MSG(r, 2, b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
LOAD_MSG(r, 3, b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);

View file

@ -56,6 +56,23 @@ static const uint64_t blake2b_IV[8] = {
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
#if defined(_M_X64) || defined(__x86_64__)
static const uint8_t blake2b_sigma_sse41[12][16] = {
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
};
#endif
static const uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
@ -203,15 +220,6 @@ static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
const __m128i m0 = LOADU(block + 00);
const __m128i m1 = LOADU(block + 16);
const __m128i m2 = LOADU(block + 32);
const __m128i m3 = LOADU(block + 48);
const __m128i m4 = LOADU(block + 64);
const __m128i m5 = LOADU(block + 80);
const __m128i m6 = LOADU(block + 96);
const __m128i m7 = LOADU(block + 112);
row1l = LOADU(&S->h[0]);
row1h = LOADU(&S->h[2]);
row2l = LOADU(&S->h[4]);
@ -221,18 +229,11 @@ static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
ROUND(10);
ROUND(11);
const uint64_t* m = (const uint64_t*)(block);
for (uint32_t r = 0; r < 12; ++r) {
ROUND(r);
}
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
@ -388,8 +389,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
return 0;
}
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen) {
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen) {
blake2b_state S;
int ret = -1;
@ -402,25 +402,14 @@ int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
goto fail;
}
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
if (rx_blake2b_init(&S, outlen) < 0) {
goto fail;
}
if (keylen > 0) {
if (rx_blake2b_init_key(&S, outlen, key, keylen) < 0) {
goto fail;
}
}
else {
if (rx_blake2b_init(&S, outlen) < 0) {
goto fail;
}
}
if (rx_blake2b_update(&S, in, inlen) < 0) {
if (rx_blake2b_update(&S, in, inlen) < 0) {
goto fail;
}
ret = rx_blake2b_final(&S, out, outlen);
ret = rx_blake2b_final(&S, out, outlen);
fail:
//clear_internal_memory(&S, sizeof(S));
@ -442,43 +431,42 @@ int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
store32(outlen_bytes, (uint32_t)outlen);
#define TRY(statement) \
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void)0, 0)
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void)0, 0)
if (outlen <= BLAKE2B_OUTBYTES) {
TRY(rx_blake2b_init(&blake_state, outlen));
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(rx_blake2b_update(&blake_state, in, inlen));
TRY(rx_blake2b_final(&blake_state, out, outlen));
TRY(rx_blake2b_init(&blake_state, outlen));
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(rx_blake2b_update(&blake_state, in, inlen));
TRY(rx_blake2b_final(&blake_state, out, outlen));
}
else {
uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES];
uint8_t in_buffer[BLAKE2B_OUTBYTES];
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(rx_blake2b_update(&blake_state, in, inlen));
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(rx_blake2b_update(&blake_state, in, inlen));
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) {
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
BLAKE2B_OUTBYTES, NULL, 0));
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
0));
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, toproduce);
}
fail:

View file

@ -55,7 +55,7 @@ namespace randomx {
void Blake2Generator::checkData(const size_t bytesNeeded) {
if (dataIndex + bytesNeeded > sizeof(data)) {
rx_blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
rx_blake2b(data, sizeof(data), data, sizeof(data));
dataIndex = 0;
}
}

View file

@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/virtual_memory.hpp"
#include "base/tools/Profiler.h"
#ifdef XMRIG_FIX_RYZEN
# include "crypto/rx/Rx.h"
@ -255,6 +256,8 @@ namespace randomx {
}
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
PROFILE_SCOPE(RandomX_JIT_compile);
vm_flags = flags;
generateProgramPrologue(prog, pcfg);

View file

@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cassert>
#include "base/tools/Profiler.h"
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
{
ArgonSalt = "RandomWOW\x01";
@ -574,33 +576,35 @@ extern "C" {
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
alignas(16) uint64_t tempHash[8];
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
machine->getFinalResult(output);
}
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
machine->initScratchpad(tempHash);
}
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
PROFILE_SCOPE(RandomX_hash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
}
machine->run(&tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), nextInput, nextInputSize);
machine->hashAndFill(output, tempHash);
}
}

View file

@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/blake2/blake2.h"
#include "crypto/randomx/intrin_portable.h"
#include "crypto/randomx/allocator.hpp"
#include "base/tools/Profiler.h"
randomx_vm::~randomx_vm() {
@ -109,15 +110,15 @@ namespace randomx {
}
template<bool softAes>
void VmBase<softAes>::getFinalResult(void* out, size_t outSize) {
void VmBase<softAes>::getFinalResult(void* out) {
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a);
rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));
}
template<bool softAes>
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a, fill_state);
rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));
}
template<bool softAes>
@ -127,6 +128,7 @@ namespace randomx {
template<bool softAes>
void VmBase<softAes>::generateProgram(void* seed) {
PROFILE_SCOPE(RandomX_generate_program);
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
}

View file

@ -38,8 +38,8 @@ class randomx_vm
public:
virtual ~randomx_vm() = 0;
virtual void setScratchpad(uint8_t *scratchpad) = 0;
virtual void getFinalResult(void* out, size_t outSize) = 0;
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
virtual void getFinalResult(void* out) = 0;
virtual void hashAndFill(void* out, uint64_t (&fill_state)[8]) = 0;
virtual void setDataset(randomx_dataset* dataset) { }
virtual void setCache(randomx_cache* cache) { }
virtual void initScratchpad(void* seed) = 0;
@ -86,8 +86,8 @@ namespace randomx {
~VmBase() override;
void setScratchpad(uint8_t *scratchpad) override;
void initScratchpad(void* seed) override;
void getFinalResult(void* out, size_t outSize) override;
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
void getFinalResult(void* out) override;
void hashAndFill(void* out, uint64_t (&fill_state)[8]) override;
protected:
void generateProgram(void* seed);

View file

@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/vm_compiled.hpp"
#include "crypto/randomx/common.hpp"
#include "base/tools/Profiler.h"
namespace randomx {
@ -41,6 +42,8 @@ namespace randomx {
template<bool softAes>
void CompiledVm<softAes>::run(void* seed) {
PROFILE_SCOPE(RandomX_run);
compiler.prepare();
VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize();
@ -51,6 +54,8 @@ namespace randomx {
template<bool softAes>
void CompiledVm<softAes>::execute() {
PROFILE_SCOPE(RandomX_JIT_execute);
#ifdef XMRIG_ARM
memcpy(reg.f, config.eMask, sizeof(config.eMask));
#endif

View file

@ -120,8 +120,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
}
# endif
const int mode = Json::getInt(value, kScratchpadPrefetchMode, static_cast<int>(m_scratchpadPrefetchMode));
if ((mode >= ScratchpadPrefetchOff) && (mode < ScratchpadPrefetchMax)) {
const uint32_t mode = static_cast<uint32_t>(Json::getInt(value, kScratchpadPrefetchMode, static_cast<int>(m_scratchpadPrefetchMode)));
if (mode < ScratchpadPrefetchMax) {
m_scratchpadPrefetchMode = static_cast<ScratchpadPrefetchMode>(mode);
}