diff --git a/CMakeLists.txt b/CMakeLists.txt index 22bf9a78..1b3fe6f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -256,4 +256,4 @@ if (WITH_DEBUG_LOG) endif() add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) -target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) +target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) diff --git a/cmake/asm.cmake b/cmake/asm.cmake index 02093810..3a0bc894 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -17,7 +17,6 @@ if (WITH_ASM AND NOT XMRIG_ARM) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) else() -# set(XMRIG_ASM_SOURCES "") set(XMRIG_ASM_LIBRARY "") add_definitions(/DXMRIG_NO_ASM) endif() diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index e8e86dc4..680f1740 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -22,8 +22,8 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_H__ -#define __CRYPTONIGHT_H__ +#ifndef XMRIG_CRYPTONIGHT_H +#define XMRIG_CRYPTONIGHT_H #include @@ -31,9 +31,9 @@ struct cryptonight_ctx { - alignas(16) uint8_t state[200]; - alignas(16) uint8_t* memory; + alignas(16) uint8_t state[224]; + alignas(16) uint8_t *memory; }; -#endif /* __CRYPTONIGHT_H__ */ +#endif /* XMRIG_CRYPTONIGHT_H */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 1cb06687..064dbdc2 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -561,6 +561,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si } +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); + + +template +inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +{ + constexpr size_t MEM = xmrig::cn_select_memory(); + + xmrig::keccak(input, size, ctx[0]->state); + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + if (ASM == xmrig::ASM_INTEL) { + cnv2_mainloop_ivybridge_asm(ctx[0]); + } + else { + cnv2_mainloop_ryzen_asm(ctx[0]); + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + template inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) { diff --git a/src/crypto/asm/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cnv2_main_loop_ivybridge.inc index a253a549..8c2c2d3b 100644 --- a/src/crypto/asm/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cnv2_main_loop_ivybridge.inc @@ -48,10 +48,10 @@ punpcklqdq xmm4, xmm0 movq xmm0, rcx punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] ALIGN 16 main_loop_ivybridge: - movdqu xmm6, XMMWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d mov eax, r10d @@ -63,28 +63,29 @@ main_loop_ivybridge: movq xmm7, r8 punpcklqdq xmm7, xmm0 aesenc xmm6, xmm7 + movq rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm2, XMMWORD PTR [rcx+rbx] movdqu xmm1, XMMWORD PTR [rax+rbx] movdqu xmm0, XMMWORD PTR [r10+rbx] paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rcx+rbx] paddq xmm0, xmm5 paddq xmm2, xmm4 movdqu XMMWORD PTR [rcx+rbx], xmm0 - movq rcx, xmm3 movdqu XMMWORD PTR [rax+rbx], xmm2 - mov rax, rcx movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movq rcx, xmm3 + mov rax, rcx shl rax, 32 xor rdi, rax - movq rbp, xmm6 movdqa xmm0, xmm6 pxor xmm0, xmm4 - mov r10, rbp - and r10d, 2097136 movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r10+rbx] - lea r14, QWORD PTR [r10+rbx] - xor r10d, 32 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] mov r12, QWORD PTR [r14+8] xor edx, edx lea r9d, DWORD PTR [ecx+ecx] @@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret: mul rbp movq xmm2, rdx xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 movq xmm0, rax xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 punpcklqdq xmm2, xmm0 mov r9d, r10d @@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret: movdqa xmm4, xmm6 movdqu XMMWORD PTR [rcx+rbx], xmm2 movdqu XMMWORD PTR [r10+rbx], xmm1 - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov r10, r8 - add r11, rax - mov QWORD PTR [r14+8], r11 - and r10d, 2097136 + movdqu xmm6, [rdi+rbx] + mov r10d, edi xor r11, r12 dec rsi jne main_loop_ivybridge diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index ca7681f0..d9d60f51 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -65,7 +65,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a { assert(variant >= VARIANT_0 && variant < VARIANT_MAX); - static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = { + constexpr const size_t count = VARIANT_MAX * 10 * 3; + static const cn_hash_fun func_table[count + 2] = { cryptonight_single_hash, cryptonight_double_hash, cryptonight_single_hash, @@ -242,6 +243,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, # endif + cryptonight_single_hash_asm, + cryptonight_single_hash_asm }; const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; diff --git a/src/workers/Worker.h b/src/workers/Worker.h index aad9e3c5..73e25033 100644 --- a/src/workers/Worker.h +++ b/src/workers/Worker.h @@ -21,8 +21,8 @@ * along with this program. If not, see . */ -#ifndef __WORKER_H__ -#define __WORKER_H__ +#ifndef XMRIG_WORKER_H +#define XMRIG_WORKER_H #include @@ -33,7 +33,6 @@ #include "Mem.h" -struct cryptonight_ctx; class Handle; @@ -67,4 +66,4 @@ protected: }; -#endif /* __WORKER_H__ */ +#endif /* XMRIG_WORKER_H */