Initial ASM wrapper.
This commit is contained in:
parent
f163aad38c
commit
ba65a34a01
7 changed files with 58 additions and 29 deletions
|
@ -256,4 +256,4 @@ if (WITH_DEBUG_LOG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
|
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
|
||||||
target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||||
|
|
|
@ -17,7 +17,6 @@ if (WITH_ASM AND NOT XMRIG_ARM)
|
||||||
|
|
||||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||||
else()
|
else()
|
||||||
# set(XMRIG_ASM_SOURCES "")
|
|
||||||
set(XMRIG_ASM_LIBRARY "")
|
set(XMRIG_ASM_LIBRARY "")
|
||||||
add_definitions(/DXMRIG_NO_ASM)
|
add_definitions(/DXMRIG_NO_ASM)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -22,8 +22,8 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __CRYPTONIGHT_H__
|
#ifndef XMRIG_CRYPTONIGHT_H
|
||||||
#define __CRYPTONIGHT_H__
|
#define XMRIG_CRYPTONIGHT_H
|
||||||
|
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
@ -31,9 +31,9 @@
|
||||||
|
|
||||||
|
|
||||||
struct cryptonight_ctx {
|
struct cryptonight_ctx {
|
||||||
alignas(16) uint8_t state[200];
|
alignas(16) uint8_t state[224];
|
||||||
alignas(16) uint8_t* memory;
|
alignas(16) uint8_t *memory;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* __CRYPTONIGHT_H__ */
|
#endif /* XMRIG_CRYPTONIGHT_H */
|
||||||
|
|
|
@ -561,6 +561,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
|
||||||
|
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
||||||
|
|
||||||
|
|
||||||
|
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
|
||||||
|
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||||
|
{
|
||||||
|
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||||
|
|
||||||
|
xmrig::keccak(input, size, ctx[0]->state);
|
||||||
|
cn_explode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
if (ASM == xmrig::ASM_INTEL) {
|
||||||
|
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cnv2_mainloop_ryzen_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cn_implode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
||||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||||
{
|
{
|
||||||
|
|
|
@ -48,10 +48,10 @@
|
||||||
punpcklqdq xmm4, xmm0
|
punpcklqdq xmm4, xmm0
|
||||||
movq xmm0, rcx
|
movq xmm0, rcx
|
||||||
punpcklqdq xmm5, xmm0
|
punpcklqdq xmm5, xmm0
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
main_loop_ivybridge:
|
main_loop_ivybridge:
|
||||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
|
||||||
lea rdx, QWORD PTR [r10+rbx]
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
mov ecx, r10d
|
mov ecx, r10d
|
||||||
mov eax, r10d
|
mov eax, r10d
|
||||||
|
@ -63,28 +63,29 @@ main_loop_ivybridge:
|
||||||
movq xmm7, r8
|
movq xmm7, r8
|
||||||
punpcklqdq xmm7, xmm0
|
punpcklqdq xmm7, xmm0
|
||||||
aesenc xmm6, xmm7
|
aesenc xmm6, xmm7
|
||||||
|
movq rbp, xmm6
|
||||||
|
mov r9, rbp
|
||||||
|
and r9d, 2097136
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
paddq xmm1, xmm7
|
paddq xmm1, xmm7
|
||||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
|
||||||
paddq xmm0, xmm5
|
paddq xmm0, xmm5
|
||||||
paddq xmm2, xmm4
|
paddq xmm2, xmm4
|
||||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
movq rcx, xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
mov rax, rcx
|
|
||||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
mov r10, r9
|
||||||
|
xor r10d, 32
|
||||||
|
movq rcx, xmm3
|
||||||
|
mov rax, rcx
|
||||||
shl rax, 32
|
shl rax, 32
|
||||||
xor rdi, rax
|
xor rdi, rax
|
||||||
movq rbp, xmm6
|
|
||||||
movdqa xmm0, xmm6
|
movdqa xmm0, xmm6
|
||||||
pxor xmm0, xmm4
|
pxor xmm0, xmm4
|
||||||
mov r10, rbp
|
|
||||||
and r10d, 2097136
|
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
xor rdi, QWORD PTR [r10+rbx]
|
xor rdi, QWORD PTR [r9+rbx]
|
||||||
lea r14, QWORD PTR [r10+rbx]
|
lea r14, QWORD PTR [r9+rbx]
|
||||||
xor r10d, 32
|
|
||||||
mov r12, QWORD PTR [r14+8]
|
mov r12, QWORD PTR [r14+8]
|
||||||
xor edx, edx
|
xor edx, edx
|
||||||
lea r9d, DWORD PTR [ecx+ecx]
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret:
|
||||||
mul rbp
|
mul rbp
|
||||||
movq xmm2, rdx
|
movq xmm2, rdx
|
||||||
xor rdx, [rcx+rbx]
|
xor rdx, [rcx+rbx]
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov edi, r8d
|
||||||
|
and edi, 2097136
|
||||||
movq xmm0, rax
|
movq xmm0, rax
|
||||||
xor rax, [rcx+rbx+8]
|
xor rax, [rcx+rbx+8]
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
punpcklqdq xmm2, xmm0
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
|
@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret:
|
||||||
movdqa xmm4, xmm6
|
movdqa xmm4, xmm6
|
||||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
add r8, rdx
|
movdqu xmm6, [rdi+rbx]
|
||||||
mov QWORD PTR [r14], r8
|
mov r10d, edi
|
||||||
xor r8, rdi
|
|
||||||
mov r10, r8
|
|
||||||
add r11, rax
|
|
||||||
mov QWORD PTR [r14+8], r11
|
|
||||||
and r10d, 2097136
|
|
||||||
xor r11, r12
|
xor r11, r12
|
||||||
dec rsi
|
dec rsi
|
||||||
jne main_loop_ivybridge
|
jne main_loop_ivybridge
|
||||||
|
|
|
@ -65,7 +65,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
{
|
{
|
||||||
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
||||||
|
|
||||||
static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = {
|
constexpr const size_t count = VARIANT_MAX * 10 * 3;
|
||||||
|
static const cn_hash_fun func_table[count + 2] = {
|
||||||
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
|
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
|
||||||
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
|
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
|
||||||
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>,
|
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>,
|
||||||
|
@ -242,6 +243,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||||
# endif
|
# endif
|
||||||
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
|
||||||
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
|
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
|
||||||
|
|
|
@ -21,8 +21,8 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __WORKER_H__
|
#ifndef XMRIG_WORKER_H
|
||||||
#define __WORKER_H__
|
#define XMRIG_WORKER_H
|
||||||
|
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -33,7 +33,6 @@
|
||||||
#include "Mem.h"
|
#include "Mem.h"
|
||||||
|
|
||||||
|
|
||||||
struct cryptonight_ctx;
|
|
||||||
class Handle;
|
class Handle;
|
||||||
|
|
||||||
|
|
||||||
|
@ -67,4 +66,4 @@ protected:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* __WORKER_H__ */
|
#endif /* XMRIG_WORKER_H */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue