Unified ASM functions signature

This commit is contained in:
SChernykh 2019-03-13 22:00:44 +01:00
parent 5d9ebb0b91
commit 1bb8f77b52
30 changed files with 121 additions and 55 deletions

View file

@ -36,8 +36,7 @@
#endif
struct cryptonight_ctx;
typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE;
typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE;
typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE;
struct cryptonight_r_data {
int variant;
@ -54,7 +53,7 @@ struct cryptonight_ctx {
const uint32_t* saes_table;
cn_mainloop_fun_ms_abi generated_code;
cn_mainloop_double_fun_ms_abi generated_code_double;
cn_mainloop_fun_ms_abi generated_code_double;
cryptonight_r_data generated_code_data;
cryptonight_r_data generated_code_double_data;
};

View file

@ -590,7 +590,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
ctx[0]->saes_table = (const uint32_t*)saes_table;
ctx[0]->generated_code(ctx[0]);
ctx[0]->generated_code(ctx);
} else {
#endif
@ -750,32 +750,32 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_
#ifndef XMRIG_NO_ASM
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
@ -824,64 +824,64 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (VARIANT == xmrig::VARIANT_2) {
if (ASM == xmrig::ASM_INTEL) {
cnv2_mainloop_ivybridge_asm(ctx[0]);
cnv2_mainloop_ivybridge_asm(ctx);
}
else if (ASM == xmrig::ASM_RYZEN) {
cnv2_mainloop_ryzen_asm(ctx[0]);
cnv2_mainloop_ryzen_asm(ctx);
}
else {
cnv2_mainloop_bulldozer_asm(ctx[0]);
cnv2_mainloop_bulldozer_asm(ctx);
}
}
else if (VARIANT == xmrig::VARIANT_HALF) {
if (ASM == xmrig::ASM_INTEL) {
cn_half_mainloop_ivybridge_asm(ctx[0]);
cn_half_mainloop_ivybridge_asm(ctx);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_half_mainloop_ryzen_asm(ctx[0]);
cn_half_mainloop_ryzen_asm(ctx);
}
else {
cn_half_mainloop_bulldozer_asm(ctx[0]);
cn_half_mainloop_bulldozer_asm(ctx);
}
}
else if (VARIANT == xmrig::VARIANT_TRTL) {
if (ASM == xmrig::ASM_INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx[0]);
cn_trtl_mainloop_ivybridge_asm(ctx);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_trtl_mainloop_ryzen_asm(ctx[0]);
cn_trtl_mainloop_ryzen_asm(ctx);
}
else {
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
cn_trtl_mainloop_bulldozer_asm(ctx);
}
}
else if (VARIANT == xmrig::VARIANT_RWZ) {
cnv2_rwz_mainloop_asm(ctx[0]);
cnv2_rwz_mainloop_asm(ctx);
}
else if (VARIANT == xmrig::VARIANT_ZLS) {
if (ASM == xmrig::ASM_INTEL) {
cn_zls_mainloop_ivybridge_asm(ctx[0]);
cn_zls_mainloop_ivybridge_asm(ctx);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_zls_mainloop_ryzen_asm(ctx[0]);
cn_zls_mainloop_ryzen_asm(ctx);
}
else {
cn_zls_mainloop_bulldozer_asm(ctx[0]);
cn_zls_mainloop_bulldozer_asm(ctx);
}
}
else if (VARIANT == xmrig::VARIANT_DOUBLE) {
if (ASM == xmrig::ASM_INTEL) {
cn_double_mainloop_ivybridge_asm(ctx[0]);
cn_double_mainloop_ivybridge_asm(ctx);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_double_mainloop_ryzen_asm(ctx[0]);
cn_double_mainloop_ryzen_asm(ctx);
}
else {
cn_double_mainloop_bulldozer_asm(ctx[0]);
cn_double_mainloop_bulldozer_asm(ctx);
}
}
else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
ctx[0]->generated_code(ctx[0]);
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
@ -910,25 +910,25 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
if (VARIANT == xmrig::VARIANT_2) {
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cnv2_double_mainloop_sandybridge_asm(ctx);
}
else if (VARIANT == xmrig::VARIANT_HALF) {
cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_half_double_mainloop_sandybridge_asm(ctx);
}
else if (VARIANT == xmrig::VARIANT_TRTL) {
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_trtl_double_mainloop_sandybridge_asm(ctx);
}
else if (VARIANT == xmrig::VARIANT_RWZ) {
cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]);
cnv2_rwz_double_mainloop_asm(ctx);
}
else if (VARIANT == xmrig::VARIANT_ZLS) {
cn_zls_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_zls_double_mainloop_sandybridge_asm(ctx);
}
else if (VARIANT == xmrig::VARIANT_DOUBLE) {
cn_double_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_double_double_mainloop_sandybridge_asm(ctx);
}
else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
ctx[0]->generated_code_double(ctx[0], ctx[1]);
ctx[0]->generated_code_double(ctx);
}
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));

View file

@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
ALIGN(64)
FN_PREFIX(CryptonightR_soft_aes_template_part1):
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp

View file

@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end
ALIGN(64)
CryptonightR_soft_aes_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp

View file

@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightR_template_double_end)
ALIGN(64)
FN_PREFIX(CryptonightR_template_part1):
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
@ -183,6 +185,9 @@ FN_PREFIX(CryptonightR_template_end):
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_part1):
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end
ALIGN(64)
CryptonightR_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
@ -183,6 +185,9 @@ CryptonightR_template_end:
ALIGN(64)
CryptonightR_template_double_part1:
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
ALIGN(64)
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp

View file

@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end
ALIGN(64)
CryptonightWOW_soft_aes_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp

View file

@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
ALIGN(64)
FN_PREFIX(CryptonightWOW_template_part1):
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
@ -165,6 +167,9 @@ FN_PREFIX(CryptonightWOW_template_end):
ALIGN(64)
FN_PREFIX(CryptonightWOW_template_double_part1):
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end
ALIGN(64)
CryptonightWOW_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
@ -165,6 +167,9 @@ CryptonightWOW_template_end:
ALIGN(64)
CryptonightWOW_template_double_part1:
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -1,3 +1,6 @@
mov rdx, [rcx+8]
mov rcx, [rcx]
mov rax, rsp
push rbx
push rbp

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi

View file

@ -1,3 +1,6 @@
mov rdx, [rcx+8]
mov rcx, [rcx]
mov rax, rsp
push rbx
push rbp

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -49,7 +49,6 @@ ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
sub rsp, 48
mov rcx, rdi
mov rdx, rsi
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
add rsp, 48
ret 0
@ -68,7 +67,6 @@ ALIGN(64)
FN_PREFIX(cnv2_rwz_double_mainloop_asm):
sub rsp, 48
mov rcx, rdi
mov rdx, rsi
#include "cn2/cnv2_rwz_double_main_loop.inc"
add rsp, 48
ret 0

View file

@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end
ALIGN(64)
CryptonightR_soft_aes_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp

View file

@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end
ALIGN(64)
CryptonightR_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
@ -183,6 +185,9 @@ CryptonightR_template_end:
ALIGN(64)
CryptonightR_template_double_part1:
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end
ALIGN(64)
CryptonightWOW_soft_aes_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp

View file

@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end
ALIGN(64)
CryptonightWOW_template_part1:
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
@ -165,6 +167,9 @@ CryptonightWOW_template_end:
ALIGN(64)
CryptonightWOW_template_double_part1:
mov rdx, [rcx+8]
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -1,3 +1,6 @@
mov rdx, [rcx+8]
mov rcx, [rcx]
mov rax, rsp
push rbx
push rbp

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi

View file

@ -1,3 +1,6 @@
mov rdx, [rcx+8]
mov rcx, [rcx]
mov rax, rsp
push rbx
push rbp

View file

@ -1,3 +1,5 @@
mov rcx, [rcx]
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi