RandomX: optimized program generation

This commit is contained in:
SChernykh 2023-05-21 00:39:32 +02:00
parent 7b51e23aa0
commit 5dcbab7e3a
4 changed files with 113 additions and 73 deletions

View file

@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "base/tools/Chrono.h"
#include "crypto/randomx/randomx.h"
#include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/instruction.hpp"
#include "crypto/randomx/common.hpp"
#include "crypto/rx/Profiler.h"
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
@ -165,6 +167,17 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
static const rx_vec_i128 inst_mask = []() {
constexpr randomx::Instruction inst{ 0xFF, randomx::RegistersCount - 1, randomx::RegistersCount - 1, 0xFF, 0xFFFFFFFFU };
union {
randomx::Instruction mask[2];
rx_vec_i128 vec;
} result = { inst, inst };
return result.vec;
}();
template<int softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
@ -187,32 +200,41 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key0);
state2 = aesdec<softAes>(state2, key4);
state3 = aesenc<softAes>(state3, key4);
state0 = aesdec<softAes>(state0, key1);
state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key5);
state3 = aesenc<softAes>(state3, key5);
state0 = aesdec<softAes>(state0, key2);
state1 = aesenc<softAes>(state1, key2);
state2 = aesdec<softAes>(state2, key6);
state3 = aesenc<softAes>(state3, key6);
state0 = aesdec<softAes>(state0, key3);
state1 = aesenc<softAes>(state1, key3);
state2 = aesdec<softAes>(state2, key7);
state3 = aesenc<softAes>(state3, key7);
#define TRANSFORM do { \
state0 = aesdec<softAes>(state0, key0); \
state1 = aesenc<softAes>(state1, key0); \
state2 = aesdec<softAes>(state2, key4); \
state3 = aesenc<softAes>(state3, key4); \
state0 = aesdec<softAes>(state0, key1); \
state1 = aesenc<softAes>(state1, key1); \
state2 = aesdec<softAes>(state2, key5); \
state3 = aesenc<softAes>(state3, key5); \
state0 = aesdec<softAes>(state0, key2); \
state1 = aesenc<softAes>(state1, key2); \
state2 = aesdec<softAes>(state2, key6); \
state3 = aesenc<softAes>(state3, key6); \
state0 = aesdec<softAes>(state0, key3); \
state1 = aesenc<softAes>(state1, key3); \
state2 = aesdec<softAes>(state2, key7); \
state3 = aesenc<softAes>(state3, key7); \
} while (0)
for (int i = 0; i < 2; ++i, outptr += 64) {
TRANSFORM;
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
}
const rx_vec_i128 mask = inst_mask;
while (outptr < outputEnd) {
TRANSFORM;
rx_store_vec_i128((rx_vec_i128*)outptr + 0, rx_and_vec_i128(state0, mask));
rx_store_vec_i128((rx_vec_i128*)outptr + 1, rx_and_vec_i128(state1, mask));
rx_store_vec_i128((rx_vec_i128*)outptr + 2, rx_and_vec_i128(state2, mask));
rx_store_vec_i128((rx_vec_i128*)outptr + 3, rx_and_vec_i128(state3, mask));
outptr += 64;
}
}