Add ASM code.

This commit is contained in:
XMRig 2018-10-05 15:02:52 +03:00
parent e0dc51edf9
commit 11748fad78
14 changed files with 1062 additions and 26 deletions

View file

@ -33,6 +33,7 @@
# include "xmrig.h"
#endif
#include "cpu.h"
#include "crypto/c_blake256.h"
#include "crypto/c_groestl.h"
#include "crypto/c_jh.h"
@ -68,6 +69,13 @@ void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output,
#endif
#ifndef XMRIG_NO_ASM
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
#endif
static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
{
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
@ -116,12 +124,46 @@ static bool self_test() {
}
size_t fn_index(enum Algo algorithm, enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
{
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
# ifndef XMRIG_NO_ASM
if (assembly == ASM_AUTO) {
assembly = cpu_info.assembly;
}
if (assembly == ASM_NONE) {
return index;
}
const size_t offset = VARIANT_MAX * 4 * 2;
if (algorithm == ALGO_CRYPTONIGHT && variant == VARIANT_2) {
if (av == AV_SINGLE) {
return offset + assembly - 2;
}
if (av == AV_DOUBLE) {
return offset + 2;
}
}
# endif
return index;
}
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
{
assert(av > AV_AUTO && av < AV_MAX);
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
# ifndef XMRIG_NO_ASM
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2 + 3] = {
# else
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
# endif
cryptonight_av1_v0,
cryptonight_av2_v0,
cryptonight_av3_v0,
@ -147,13 +189,31 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
NULL,
NULL,
NULL,
NULL
NULL,
# else
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
# endif
# ifndef XMRIG_NO_ASM
cryptonight_single_hash_asm_intel,
cryptonight_single_hash_asm_ryzen,
cryptonight_double_hash_asm
# endif
};
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
# ifndef NDEBUG
const size_t index = fn_index(algorithm, av, variant, opt_assembly);
cn_hash_fun func = func_table[index];
assert(index < sizeof(func_table) / sizeof(func_table[0]));
@ -161,7 +221,7 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
return func;
# else
return func_table[index];
return func_table[fn_index(algorithm, av, variant, opt_assembly)];
# endif
}

View file

@ -191,3 +191,57 @@ void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *res
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
#ifndef XMRIG_NO_ASM
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_ivybridge_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_ryzen_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
keccakf((uint64_t*) ctx[1]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#endif