RandomX: added performance profiler (for developers)
Also optimized Blake2b SSE4.1 code size to avoid code cache pollution.
This commit is contained in:
parent
adf833b60a
commit
a05393727c
19 changed files with 390 additions and 481 deletions
|
@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||
|
@ -215,6 +216,8 @@ template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
|||
|
||||
template<bool softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
PROFILE_SCOPE(RandomX_AES);
|
||||
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ extern "C" {
|
|||
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen);
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
|
|
|
@ -1,402 +0,0 @@
|
|||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
#ifndef BLAKE2B_LOAD_SSE41_H
|
||||
#define BLAKE2B_LOAD_SSE41_H
|
||||
|
||||
#define LOAD_MSG_0_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m1); \
|
||||
b1 = _mm_unpacklo_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m0, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m4, m5); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m5); \
|
||||
b1 = _mm_unpackhi_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m7, m2); \
|
||||
b1 = _mm_unpackhi_epi64(m4, m6); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
b1 = _mm_alignr_epi8(m3, m7, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
||||
b1 = _mm_unpackhi_epi64(m5, m2); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m6, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m6, m5, 8); \
|
||||
b1 = _mm_unpackhi_epi64(m2, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m4, m0); \
|
||||
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m7, m3); \
|
||||
b1 = _mm_alignr_epi8(m2, m0, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m6, m5); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m0); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
|
||||
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m3, m5); \
|
||||
b1 = _mm_unpacklo_epi64(m0, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m2); \
|
||||
b1 = _mm_unpacklo_epi64(m1, m5); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
|
||||
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
|
||||
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m6, m0, 8); \
|
||||
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m1, m3); \
|
||||
b1 = _mm_unpacklo_epi64(m0, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m6, m5); \
|
||||
b1 = _mm_unpackhi_epi64(m5, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
|
||||
b1 = _mm_unpackhi_epi64(m7, m0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m6, m2); \
|
||||
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
|
||||
b1 = _mm_unpacklo_epi64(m7, m2); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m2, m7); \
|
||||
b1 = _mm_alignr_epi8(m5, m6, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m3); \
|
||||
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m6, m3); \
|
||||
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m7, m5, 8); \
|
||||
b1 = _mm_unpackhi_epi64(m0, m4); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m2, m7); \
|
||||
b1 = _mm_unpacklo_epi64(m4, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m2); \
|
||||
b1 = _mm_unpacklo_epi64(m3, m5); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m3, m7); \
|
||||
b1 = _mm_alignr_epi8(m0, m5, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m7, m4); \
|
||||
b1 = _mm_alignr_epi8(m4, m1, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = m6; \
|
||||
b1 = _mm_alignr_epi8(m5, m0, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
|
||||
b1 = m2; \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m1, m2); \
|
||||
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m7, m4); \
|
||||
b1 = _mm_unpackhi_epi64(m1, m6); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_alignr_epi8(m7, m5, 8); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m0); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m0, m1); \
|
||||
b1 = _mm_unpacklo_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m0, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m2, m3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m4, m5); \
|
||||
b1 = _mm_unpacklo_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpackhi_epi64(m4, m5); \
|
||||
b1 = _mm_unpackhi_epi64(m6, m7); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m7, m2); \
|
||||
b1 = _mm_unpackhi_epi64(m4, m6); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
b1 = _mm_alignr_epi8(m3, m7, 8); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
||||
b1 = _mm_unpackhi_epi64(m5, m2); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
b0 = _mm_unpacklo_epi64(m6, m1); \
|
||||
b1 = _mm_unpackhi_epi64(m3, m1); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#endif
|
|
@ -102,17 +102,21 @@
|
|||
row4l = t1; \
|
||||
row4h = t0;
|
||||
|
||||
#include "blake2b-load-sse41.h"
|
||||
#define LOAD_MSG(r, i, b0, b1) \
|
||||
do { \
|
||||
b0 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 1]], m[blake2b_sigma_sse41[r][i * 4 + 0]]); \
|
||||
b1 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 3]], m[blake2b_sigma_sse41[r][i * 4 + 2]]); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND(r) \
|
||||
LOAD_MSG_ ##r ##_1(b0, b1); \
|
||||
LOAD_MSG(r, 0, b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG_ ##r ##_2(b0, b1); \
|
||||
LOAD_MSG(r, 1, b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
||||
LOAD_MSG_ ##r ##_3(b0, b1); \
|
||||
LOAD_MSG(r, 2, b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG_ ##r ##_4(b0, b1); \
|
||||
LOAD_MSG(r, 3, b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
||||
|
||||
|
|
|
@ -56,6 +56,23 @@ static const uint64_t blake2b_IV[8] = {
|
|||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
static const uint8_t blake2b_sigma_sse41[12][16] = {
|
||||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
|
||||
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
|
||||
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
|
||||
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
|
||||
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
|
||||
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
|
||||
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
|
||||
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
|
||||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||
};
|
||||
#endif
|
||||
|
||||
static const uint8_t blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
|
@ -203,15 +220,6 @@ static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
|
|||
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
||||
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
||||
|
||||
const __m128i m0 = LOADU(block + 00);
|
||||
const __m128i m1 = LOADU(block + 16);
|
||||
const __m128i m2 = LOADU(block + 32);
|
||||
const __m128i m3 = LOADU(block + 48);
|
||||
const __m128i m4 = LOADU(block + 64);
|
||||
const __m128i m5 = LOADU(block + 80);
|
||||
const __m128i m6 = LOADU(block + 96);
|
||||
const __m128i m7 = LOADU(block + 112);
|
||||
|
||||
row1l = LOADU(&S->h[0]);
|
||||
row1h = LOADU(&S->h[2]);
|
||||
row2l = LOADU(&S->h[4]);
|
||||
|
@ -221,18 +229,11 @@ static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
|
|||
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
|
||||
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
|
||||
|
||||
ROUND(0);
|
||||
ROUND(1);
|
||||
ROUND(2);
|
||||
ROUND(3);
|
||||
ROUND(4);
|
||||
ROUND(5);
|
||||
ROUND(6);
|
||||
ROUND(7);
|
||||
ROUND(8);
|
||||
ROUND(9);
|
||||
ROUND(10);
|
||||
ROUND(11);
|
||||
const uint64_t* m = (const uint64_t*)(block);
|
||||
|
||||
for (uint32_t r = 0; r < 12; ++r) {
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
row1l = _mm_xor_si128(row3l, row1l);
|
||||
row1h = _mm_xor_si128(row3h, row1h);
|
||||
|
@ -388,8 +389,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
|
@ -402,25 +402,14 @@ int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
if (rx_blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (rx_blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (rx_blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (rx_blake2b_update(&S, in, inlen) < 0) {
|
||||
if (rx_blake2b_update(&S, in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = rx_blake2b_final(&S, out, outlen);
|
||||
ret = rx_blake2b_final(&S, out, outlen);
|
||||
|
||||
fail:
|
||||
//clear_internal_memory(&S, sizeof(S));
|
||||
|
@ -442,43 +431,42 @@ int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
|||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(rx_blake2b_init(&blake_state, outlen));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out, outlen));
|
||||
TRY(rx_blake2b_init(&blake_state, outlen));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out, outlen));
|
||||
}
|
||||
else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES, NULL, 0));
|
||||
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
|
|
|
@ -55,7 +55,7 @@ namespace randomx {
|
|||
|
||||
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
||||
if (dataIndex + bytesNeeded > sizeof(data)) {
|
||||
rx_blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
rx_blake2b(data, sizeof(data), data, sizeof(data));
|
||||
dataIndex = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "crypto/randomx/program.hpp"
|
||||
#include "crypto/randomx/reciprocal.h"
|
||||
#include "crypto/randomx/virtual_memory.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
#ifdef XMRIG_FIX_RYZEN
|
||||
# include "crypto/rx/Rx.h"
|
||||
|
@ -255,6 +256,8 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||
PROFILE_SCOPE(RandomX_JIT_compile);
|
||||
|
||||
vm_flags = flags;
|
||||
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
|
|
|
@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include <cassert>
|
||||
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||
{
|
||||
ArgonSalt = "RandomWOW\x01";
|
||||
|
@ -574,33 +576,35 @@ extern "C" {
|
|||
assert(inputSize == 0 || input != nullptr);
|
||||
assert(output != nullptr);
|
||||
alignas(16) uint64_t tempHash[8];
|
||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
|
||||
machine->initScratchpad(&tempHash);
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
machine->getFinalResult(output);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
|
||||
machine->initScratchpad(tempHash);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
||||
PROFILE_SCOPE(RandomX_hash);
|
||||
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
|
||||
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), nextInput, nextInputSize);
|
||||
machine->hashAndFill(output, tempHash);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "crypto/randomx/blake2/blake2.h"
|
||||
#include "crypto/randomx/intrin_portable.h"
|
||||
#include "crypto/randomx/allocator.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
randomx_vm::~randomx_vm() {
|
||||
|
||||
|
@ -109,15 +110,15 @@ namespace randomx {
|
|||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::getFinalResult(void* out, size_t outSize) {
|
||||
void VmBase<softAes>::getFinalResult(void* out) {
|
||||
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
|
||||
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
|
||||
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
|
@ -127,6 +128,7 @@ namespace randomx {
|
|||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::generateProgram(void* seed) {
|
||||
PROFILE_SCOPE(RandomX_generate_program);
|
||||
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,8 +38,8 @@ class randomx_vm
|
|||
public:
|
||||
virtual ~randomx_vm() = 0;
|
||||
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
|
||||
virtual void getFinalResult(void* out) = 0;
|
||||
virtual void hashAndFill(void* out, uint64_t (&fill_state)[8]) = 0;
|
||||
virtual void setDataset(randomx_dataset* dataset) { }
|
||||
virtual void setCache(randomx_cache* cache) { }
|
||||
virtual void initScratchpad(void* seed) = 0;
|
||||
|
@ -86,8 +86,8 @@ namespace randomx {
|
|||
~VmBase() override;
|
||||
void setScratchpad(uint8_t *scratchpad) override;
|
||||
void initScratchpad(void* seed) override;
|
||||
void getFinalResult(void* out, size_t outSize) override;
|
||||
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
|
||||
void getFinalResult(void* out) override;
|
||||
void hashAndFill(void* out, uint64_t (&fill_state)[8]) override;
|
||||
|
||||
protected:
|
||||
void generateProgram(void* seed);
|
||||
|
|
|
@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "crypto/randomx/vm_compiled.hpp"
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
|
@ -41,6 +42,8 @@ namespace randomx {
|
|||
|
||||
template<bool softAes>
|
||||
void CompiledVm<softAes>::run(void* seed) {
|
||||
PROFILE_SCOPE(RandomX_run);
|
||||
|
||||
compiler.prepare();
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
|
@ -51,6 +54,8 @@ namespace randomx {
|
|||
|
||||
template<bool softAes>
|
||||
void CompiledVm<softAes>::execute() {
|
||||
PROFILE_SCOPE(RandomX_JIT_execute);
|
||||
|
||||
#ifdef XMRIG_ARM
|
||||
memcpy(reg.f, config.eMask, sizeof(config.eMask));
|
||||
#endif
|
||||
|
|
|
@ -120,8 +120,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
|
|||
}
|
||||
# endif
|
||||
|
||||
const int mode = Json::getInt(value, kScratchpadPrefetchMode, static_cast<int>(m_scratchpadPrefetchMode));
|
||||
if ((mode >= ScratchpadPrefetchOff) && (mode < ScratchpadPrefetchMax)) {
|
||||
const uint32_t mode = static_cast<uint32_t>(Json::getInt(value, kScratchpadPrefetchMode, static_cast<int>(m_scratchpadPrefetchMode)));
|
||||
if (mode < ScratchpadPrefetchMax) {
|
||||
m_scratchpadPrefetchMode = static_cast<ScratchpadPrefetchMode>(mode);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue