Initial ASM wrapper.

This commit is contained in:
XMRig 2018-09-24 09:51:21 +03:00
parent f163aad38c
commit ba65a34a01
7 changed files with 58 additions and 29 deletions

View file

@ -256,4 +256,4 @@ if (WITH_DEBUG_LOG)
endif() endif()
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})

View file

@ -17,7 +17,6 @@ if (WITH_ASM AND NOT XMRIG_ARM)
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
else() else()
# set(XMRIG_ASM_SOURCES "")
set(XMRIG_ASM_LIBRARY "") set(XMRIG_ASM_LIBRARY "")
add_definitions(/DXMRIG_NO_ASM) add_definitions(/DXMRIG_NO_ASM)
endif() endif()

View file

@ -22,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __CRYPTONIGHT_H__ #ifndef XMRIG_CRYPTONIGHT_H
#define __CRYPTONIGHT_H__ #define XMRIG_CRYPTONIGHT_H
#include <stddef.h> #include <stddef.h>
@ -31,9 +31,9 @@
struct cryptonight_ctx { struct cryptonight_ctx {
alignas(16) uint8_t state[200]; alignas(16) uint8_t state[224];
alignas(16) uint8_t* memory; alignas(16) uint8_t *memory;
}; };
#endif /* __CRYPTONIGHT_H__ */ #endif /* XMRIG_CRYPTONIGHT_H */

View file

@ -561,6 +561,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
} }
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
xmrig::keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
if (ASM == xmrig::ASM_INTEL) {
cnv2_mainloop_ivybridge_asm(ctx[0]);
}
else {
cnv2_mainloop_ryzen_asm(ctx[0]);
}
cn_implode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{ {

View file

@ -48,10 +48,10 @@
punpcklqdq xmm4, xmm0 punpcklqdq xmm4, xmm0
movq xmm0, rcx movq xmm0, rcx
punpcklqdq xmm5, xmm0 punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16 ALIGN 16
main_loop_ivybridge: main_loop_ivybridge:
movdqu xmm6, XMMWORD PTR [r10+rbx]
lea rdx, QWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d mov ecx, r10d
mov eax, r10d mov eax, r10d
@ -63,28 +63,29 @@ main_loop_ivybridge:
movq xmm7, r8 movq xmm7, r8
punpcklqdq xmm7, xmm0 punpcklqdq xmm7, xmm0
aesenc xmm6, xmm7 aesenc xmm6, xmm7
movq rbp, xmm6
mov r9, rbp
and r9d, 2097136
movdqu xmm2, XMMWORD PTR [rcx+rbx]
movdqu xmm1, XMMWORD PTR [rax+rbx] movdqu xmm1, XMMWORD PTR [rax+rbx]
movdqu xmm0, XMMWORD PTR [r10+rbx] movdqu xmm0, XMMWORD PTR [r10+rbx]
paddq xmm1, xmm7 paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rcx+rbx]
paddq xmm0, xmm5 paddq xmm0, xmm5
paddq xmm2, xmm4 paddq xmm2, xmm4
movdqu XMMWORD PTR [rcx+rbx], xmm0 movdqu XMMWORD PTR [rcx+rbx], xmm0
movq rcx, xmm3
movdqu XMMWORD PTR [rax+rbx], xmm2 movdqu XMMWORD PTR [rax+rbx], xmm2
mov rax, rcx
movdqu XMMWORD PTR [r10+rbx], xmm1 movdqu XMMWORD PTR [r10+rbx], xmm1
mov r10, r9
xor r10d, 32
movq rcx, xmm3
mov rax, rcx
shl rax, 32 shl rax, 32
xor rdi, rax xor rdi, rax
movq rbp, xmm6
movdqa xmm0, xmm6 movdqa xmm0, xmm6
pxor xmm0, xmm4 pxor xmm0, xmm4
mov r10, rbp
and r10d, 2097136
movdqu XMMWORD PTR [rdx], xmm0 movdqu XMMWORD PTR [rdx], xmm0
xor rdi, QWORD PTR [r10+rbx] xor rdi, QWORD PTR [r9+rbx]
lea r14, QWORD PTR [r10+rbx] lea r14, QWORD PTR [r9+rbx]
xor r10d, 32
mov r12, QWORD PTR [r14+8] mov r12, QWORD PTR [r14+8]
xor edx, edx xor edx, edx
lea r9d, DWORD PTR [ecx+ecx] lea r9d, DWORD PTR [ecx+ecx]
@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret:
mul rbp mul rbp
movq xmm2, rdx movq xmm2, rdx
xor rdx, [rcx+rbx] xor rdx, [rcx+rbx]
add r8, rdx
mov QWORD PTR [r14], r8
xor r8, rdi
mov edi, r8d
and edi, 2097136
movq xmm0, rax movq xmm0, rax
xor rax, [rcx+rbx+8] xor rax, [rcx+rbx+8]
add r11, rax
mov QWORD PTR [r14+8], r11
punpcklqdq xmm2, xmm0 punpcklqdq xmm2, xmm0
mov r9d, r10d mov r9d, r10d
@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret:
movdqa xmm4, xmm6 movdqa xmm4, xmm6
movdqu XMMWORD PTR [rcx+rbx], xmm2 movdqu XMMWORD PTR [rcx+rbx], xmm2
movdqu XMMWORD PTR [r10+rbx], xmm1 movdqu XMMWORD PTR [r10+rbx], xmm1
add r8, rdx movdqu xmm6, [rdi+rbx]
mov QWORD PTR [r14], r8 mov r10d, edi
xor r8, rdi
mov r10, r8
add r11, rax
mov QWORD PTR [r14+8], r11
and r10d, 2097136
xor r11, r12 xor r11, r12
dec rsi dec rsi
jne main_loop_ivybridge jne main_loop_ivybridge

View file

@ -65,7 +65,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
{ {
assert(variant >= VARIANT_0 && variant < VARIANT_MAX); assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = { constexpr const size_t count = VARIANT_MAX * 10 * 3;
static const cn_hash_fun func_table[count + 2] = {
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>, cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>, cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>, cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>,
@ -242,6 +243,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
# endif # endif
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>
}; };
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;

View file

@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __WORKER_H__ #ifndef XMRIG_WORKER_H
#define __WORKER_H__ #define XMRIG_WORKER_H
#include <atomic> #include <atomic>
@ -33,7 +33,6 @@
#include "Mem.h" #include "Mem.h"
struct cryptonight_ctx;
class Handle; class Handle;
@ -67,4 +66,4 @@ protected:
}; };
#endif /* __WORKER_H__ */ #endif /* XMRIG_WORKER_H */