diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cb5bd8a..09b88155 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # 1.8.0 - Integration of cnv2 aka moneroV8 -- Hashrate improve -> cnv1 and cnv2 by adding ASM code for Intel/AMD and softaes [thx @SChernykh] +- Hashrate improve -> all cnv1, cn-litev1, cnv2, xtl by adding ASM code for Intel/AMD and softaes [thx @SChernykh] - Fixes for OSX builds [thx @djfinch] - Fixed safe mode #173 # 1.7.0 diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index a4c7fe1e..e7589738 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -125,7 +125,15 @@ template static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) { # if !defined(XMRIG_ARMv7) if (powVersion == PowVariant::POW_V1) { +#if defined(XMRIG_ARM) CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); +#else + if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + } else { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); + } +#endif } else if (powVersion == PowVariant::POW_TUBE) { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad); } else { @@ -137,7 +145,15 @@ static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant p template static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) { if (powVersion == PowVariant::POW_V1) { +#if defined(XMRIG_ARM) CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); +#else + if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + } else { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); + } +#endif } else if (powVersion == PowVariant::POW_TUBE) { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad); } else { diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 22d35c8b..eca53c83 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -51,10 +51,12 @@ extern "C" #ifndef XMRIG_NO_ASM void cnv1_mainloop_sandybridge_asm(ScratchPad* ctx0); + void cn_litev1_mainloop_sandybridge_asm(ScratchPad* ctx0); void cnv2_mainloop_ivybridge_asm(ScratchPad* ctx0); void cnv2_mainloop_ryzen_asm(ScratchPad* ctx0); void cnv2_double_mainloop_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1); void cnv1_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0); + void cn_litev1_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0); void cnv2_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0); #endif } @@ -1419,9 +1421,18 @@ public: if (SOFT_AES) { scratchPad[0]->t_fn = (const uint32_t*)saes_table; - cnv1_mainloop_soft_aes_sandybridge_asm(scratchPad[0]); + + if (ITERATIONS == 0x80000) { + cnv1_mainloop_soft_aes_sandybridge_asm(scratchPad[0]); + } else if (ITERATIONS == 0x40000){ + cn_litev1_mainloop_soft_aes_sandybridge_asm(scratchPad[0]); + } } else { - cnv1_mainloop_sandybridge_asm(scratchPad[0]); + if (ITERATIONS == 0x80000) { + cnv1_mainloop_sandybridge_asm(scratchPad[0]); + } else if (ITERATIONS == 0x40000){ + cn_litev1_mainloop_sandybridge_asm(scratchPad[0]); + } } #endif diff --git a/src/crypto/asm/cn_litev1_mainloop_sandybridge.inc b/src/crypto/asm/cn_litev1_mainloop_sandybridge.inc new file mode 100644 index 00000000..2842d5fb --- /dev/null +++ b/src/crypto/asm/cn_litev1_mainloop_sandybridge.inc @@ -0,0 +1,74 @@ + mov QWORD PTR [rsp+8], rbx + mov QWORD PTR [rsp+16], rbp + mov QWORD PTR [rsp+24], rsi + mov QWORD PTR [rsp+32], rdi + push r14 + push r15 + mov rax, QWORD PTR [rcx+48] + mov ebp, 262144 + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm3, rax + mov rax, QWORD PTR [rcx+256] + mov rdi, QWORD PTR [rcx+40] + movq xmm0, rdx + xor rdi, QWORD PTR [rcx+8] + mov rdx, r8 + mov r15, QWORD PTR [rcx+264] + and edx, 1048560 + mov r14, QWORD PTR [rax+35] + xor r14, QWORD PTR [rcx+192] + mov rsi, QWORD PTR [rcx+224] + punpcklqdq xmm3, xmm0 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + + #ifdef __APPLE__ + ALIGN 16 + #else + ALIGN 64 + #endif +cn_litev1_mainloop_sandybridge: + movq xmm0, rdi + movq xmm1, r8 + punpcklqdq xmm1, xmm0 + aesenc xmm2, xmm1 + movq r10, xmm2 + mov r9d, r10d + and r9d, 1048560 + add r9, rsi + movdqa xmm0, xmm2 + pxor xmm0, xmm3 + movdqa xmm3, xmm2 + movdqu XMMWORD PTR [rdx+rsi], xmm0 + psrldq xmm0, 11 + movq rax, xmm0 + movzx eax, al + movzx eax, BYTE PTR [rax+r15] + mov BYTE PTR [rsi+rdx+11], al + mov rbx, QWORD PTR [r9] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + mul r10 + add r8, rdx + mov QWORD PTR [r9], r8 + add rdi, rax + mov rax, r14 + xor rax, rdi + mov QWORD PTR [r9+8], rax + xor r8, rbx + mov rdx, r8 + and edx, 1048560 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + xor rdi, r11 + dec ebp + jne cn_litev1_mainloop_sandybridge + + mov rbx, QWORD PTR [rsp+24] + mov rbp, QWORD PTR [rsp+32] + mov rsi, QWORD PTR [rsp+40] + mov rdi, QWORD PTR [rsp+48] + pop r15 + pop r14 diff --git a/src/crypto/asm/cn_litev1_mainloop_soft_aes_sandybridge.inc b/src/crypto/asm/cn_litev1_mainloop_soft_aes_sandybridge.inc new file mode 100644 index 00000000..e38dcd60 --- /dev/null +++ b/src/crypto/asm/cn_litev1_mainloop_soft_aes_sandybridge.inc @@ -0,0 +1,166 @@ + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 72 + + movaps XMMWORD PTR [rsp], xmm6 + movaps XMMWORD PTR [rsp+16], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + movaps XMMWORD PTR [rsp+48], xmm9 + + mov rax, QWORD PTR [rcx+48] + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm4, rax + mov rax, QWORD PTR [rcx+256] + mov r13, QWORD PTR [rcx+40] + movq xmm0, rdx + xor r13, QWORD PTR [rcx+8] + mov rdx, r8 + mov rdi, QWORD PTR [rcx+224] + and edx, 1048560 + mov rax, QWORD PTR [rax+35] + xor rax, QWORD PTR [rcx+192] + movq xmm5, rax + movq xmm8, rdi + punpcklqdq xmm4, xmm0 + mov QWORD PTR [rsp+64], rdx + + movq xmm6, rcx + mov rax, QWORD PTR [rcx+264] + movq xmm7, rax + + mov eax, 262144 + + #ifdef __APPLE__ + ALIGN 16 + #else + ALIGN 64 + #endif +cn_litev1_mainloop_soft_aes_sandybridge: + movq xmm9, rax + mov r12, QWORD PTR [rcx+272] + mov esi, DWORD PTR [rdx+rdi] + mov r10d, DWORD PTR [rdx+rdi+4] + mov ebp, DWORD PTR [rdx+rdi+12] + mov r14d, DWORD PTR [rdx+rdi+8] + mov rdx, QWORD PTR [rsp+64] + movzx ecx, sil + shr esi, 8 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + add ebp, 256 + movd xmm1, r11d + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movq rdi, xmm8 + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + punpckldq xmm2, xmm1 + movq xmm1, r8 + xor eax, DWORD PTR [r12+rcx*4] + xor eax, r15d + movd xmm3, eax + movq rax, xmm7 + punpckldq xmm3, xmm0 + movq xmm0, r13 + punpcklqdq xmm1, xmm0 + punpckldq xmm3, xmm2 + pxor xmm3, xmm1 + movq r9, xmm3 + mov r10d, r9d + and r10d, 1048560 + movdqa xmm0, xmm3 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx+rdi], xmm0 + psrldq xmm0, 11 + movq rcx, xmm0 + movzx ecx, cl + mov cl, BYTE PTR [rcx+rax] + mov BYTE PTR [rdi+rdx+11], cl + mov rbx, QWORD PTR [r10+rdi] + mov rcx, r9 + lea r9, QWORD PTR [r10+rdi] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + movdqa xmm4, xmm3 + mul rcx + movq rcx, xmm6 + add r8, rdx + add r13, rax + movq rax, xmm5 + xor rax, r13 + mov QWORD PTR [r9], r8 + xor r8, rbx + mov QWORD PTR [r9+8], rax + movq rax, xmm9 + mov rdx, r8 + xor r13, r11 + and edx, 1048560 + mov QWORD PTR [rsp+64], rdx + sub eax, 1 + jne cn_litev1_mainloop_soft_aes_sandybridge + + movaps xmm6, XMMWORD PTR [rsp] + movaps xmm7, XMMWORD PTR [rsp+16] + movaps xmm8, XMMWORD PTR [rsp+32] + movaps xmm9, XMMWORD PTR [rsp+48] + + add rsp, 72 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index 54e35b9b..c8087dcc 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -8,11 +8,13 @@ .section .text #endif .global FN_PREFIX(cnv1_mainloop_sandybridge_asm) +.global FN_PREFIX(cn_litev1_mainloop_sandybridge_asm) .global FN_PREFIX(cnv2_mainloop_ivybridge_asm) .global FN_PREFIX(cnv2_mainloop_ryzen_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) .global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm) +.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm) #ifdef __APPLE__ @@ -26,6 +28,19 @@ FN_PREFIX(cnv1_mainloop_sandybridge_asm): #include "cnv1_mainloop_sandybridge.inc" add rsp, 48 ret 0 + +#ifdef __APPLE__ +ALIGN 16 +#else +ALIGN 64 +#endif +FN_PREFIX(cn_litev1_mainloop_sandybridge_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn_litev1_mainloop_sandybridge.inc" + add rsp, 48 + ret 0 + #ifdef __APPLE__ ALIGN 16 #else @@ -75,6 +90,18 @@ FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm): add rsp, 48 ret 0 +#ifdef __APPLE__ +ALIGN 16 +#else +ALIGN 64 +#endif +FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn_litev1_mainloop_soft_aes_sandybridge.inc" + add rsp, 48 + ret 0 + #ifdef __APPLE__ ALIGN 16 #else diff --git a/src/crypto/asm/win/cn_litev1_mainloop_sandybridge.inc b/src/crypto/asm/win/cn_litev1_mainloop_sandybridge.inc new file mode 100644 index 00000000..289c3de8 --- /dev/null +++ b/src/crypto/asm/win/cn_litev1_mainloop_sandybridge.inc @@ -0,0 +1,70 @@ + mov QWORD PTR [rsp+8], rbx + mov QWORD PTR [rsp+16], rbp + mov QWORD PTR [rsp+24], rsi + mov QWORD PTR [rsp+32], rdi + push r14 + push r15 + mov rax, QWORD PTR [rcx+48] + mov ebp, 262144 + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm3, rax + mov rax, QWORD PTR [rcx+256] + mov rdi, QWORD PTR [rcx+40] + movq xmm0, rdx + xor rdi, QWORD PTR [rcx+8] + mov rdx, r8 + mov r15, QWORD PTR [rcx+264] + and edx, 1048560 + mov r14, QWORD PTR [rax+35] + xor r14, QWORD PTR [rcx+192] + mov rsi, QWORD PTR [rcx+224] + punpcklqdq xmm3, xmm0 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + + ALIGN 64 +cn_litev1_mainloop_sandybridge: + movq xmm0, rdi + movq xmm1, r8 + punpcklqdq xmm1, xmm0 + aesenc xmm2, xmm1 + movq r10, xmm2 + mov r9d, r10d + and r9d, 1048560 + add r9, rsi + movdqa xmm0, xmm2 + pxor xmm0, xmm3 + movdqa xmm3, xmm2 + movdqu XMMWORD PTR [rdx+rsi], xmm0 + psrldq xmm0, 11 + movq rax, xmm0 + movzx eax, al + movzx eax, BYTE PTR [rax+r15] + mov BYTE PTR [rsi+rdx+11], al + mov rbx, QWORD PTR [r9] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + mul r10 + add r8, rdx + mov QWORD PTR [r9], r8 + add rdi, rax + mov rax, r14 + xor rax, rdi + mov QWORD PTR [r9+8], rax + xor r8, rbx + mov rdx, r8 + and edx, 1048560 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + xor rdi, r11 + dec ebp + jne cn_litev1_mainloop_sandybridge + + mov rbx, QWORD PTR [rsp+24] + mov rbp, QWORD PTR [rsp+32] + mov rsi, QWORD PTR [rsp+40] + mov rdi, QWORD PTR [rsp+48] + pop r15 + pop r14 diff --git a/src/crypto/asm/win/cn_litev1_mainloop_soft_aes_sandybridge.inc b/src/crypto/asm/win/cn_litev1_mainloop_soft_aes_sandybridge.inc new file mode 100644 index 00000000..d75b1def --- /dev/null +++ b/src/crypto/asm/win/cn_litev1_mainloop_soft_aes_sandybridge.inc @@ -0,0 +1,162 @@ + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 72 + + movaps XMMWORD PTR [rsp], xmm6 + movaps XMMWORD PTR [rsp+16], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + movaps XMMWORD PTR [rsp+48], xmm9 + + mov rax, QWORD PTR [rcx+48] + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm4, rax + mov rax, QWORD PTR [rcx+256] + mov r13, QWORD PTR [rcx+40] + movq xmm0, rdx + xor r13, QWORD PTR [rcx+8] + mov rdx, r8 + mov rdi, QWORD PTR [rcx+224] + and edx, 1048560 + mov rax, QWORD PTR [rax+35] + xor rax, QWORD PTR [rcx+192] + movq xmm5, rax + movq xmm8, rdi + punpcklqdq xmm4, xmm0 + mov QWORD PTR [rsp+64], rdx + + movq xmm6, rcx + mov rax, QWORD PTR [rcx+264] + movq xmm7, rax + + mov eax, 262144 + + ALIGN 64 +cn_litev1_mainloop_soft_aes_sandybridge: + movq xmm9, rax + mov r12, QWORD PTR [rcx+272] + mov esi, DWORD PTR [rdx+rdi] + mov r10d, DWORD PTR [rdx+rdi+4] + mov ebp, DWORD PTR [rdx+rdi+12] + mov r14d, DWORD PTR [rdx+rdi+8] + mov rdx, QWORD PTR [rsp+64] + movzx ecx, sil + shr esi, 8 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + add ebp, 256 + movd xmm1, r11d + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movq rdi, xmm8 + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + punpckldq xmm2, xmm1 + movq xmm1, r8 + xor eax, DWORD PTR [r12+rcx*4] + xor eax, r15d + movd xmm3, eax + movq rax, xmm7 + punpckldq xmm3, xmm0 + movq xmm0, r13 + punpcklqdq xmm1, xmm0 + punpckldq xmm3, xmm2 + pxor xmm3, xmm1 + movq r9, xmm3 + mov r10d, r9d + and r10d, 1048560 + movdqa xmm0, xmm3 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx+rdi], xmm0 + psrldq xmm0, 11 + movq rcx, xmm0 + movzx ecx, cl + mov cl, BYTE PTR [rcx+rax] + mov BYTE PTR [rdi+rdx+11], cl + mov rbx, QWORD PTR [r10+rdi] + mov rcx, r9 + lea r9, QWORD PTR [r10+rdi] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + movdqa xmm4, xmm3 + mul rcx + movq rcx, xmm6 + add r8, rdx + add r13, rax + movq rax, xmm5 + xor rax, r13 + mov QWORD PTR [r9], r8 + xor r8, rbx + mov QWORD PTR [r9+8], rax + movq rax, xmm9 + mov rdx, r8 + xor r13, r11 + and edx, 1048560 + mov QWORD PTR [rsp+64], rdx + sub eax, 1 + jne cn_litev1_mainloop_soft_aes_sandybridge + + movaps xmm6, XMMWORD PTR [rsp] + movaps xmm7, XMMWORD PTR [rsp+16] + movaps xmm8, XMMWORD PTR [rsp+32] + movaps xmm9, XMMWORD PTR [rsp+48] + + add rsp, 72 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx diff --git a/src/crypto/asm/win/cn_main_loop.asm b/src/crypto/asm/win/cn_main_loop.asm index da10db0b..18aaba3a 100644 --- a/src/crypto/asm/win/cn_main_loop.asm +++ b/src/crypto/asm/win/cn_main_loop.asm @@ -1,10 +1,12 @@ _TEXT_CN_MAINLOOP SEGMENT PAGE READ EXECUTE PUBLIC cnv1_mainloop_sandybridge_asm +PUBLIC cn_litev1_mainloop_sandybridge_asm PUBLIC cnv2_mainloop_ivybridge_asm PUBLIC cnv2_mainloop_ryzen_asm PUBLIC cnv2_double_mainloop_sandybridge_asm PUBLIC cnv1_mainloop_soft_aes_sandybridge_asm +PUBLIC cn_litev1_mainloop_soft_aes_sandybridge_asm PUBLIC cnv2_mainloop_soft_aes_sandybridge_asm ALIGN 64 @@ -13,6 +15,12 @@ cnv1_mainloop_sandybridge_asm PROC ret 0 cnv1_mainloop_sandybridge_asm ENDP +ALIGN 64 +cn_litev1_mainloop_sandybridge_asm PROC + INCLUDE cn_litev1_mainloop_sandybridge.inc + ret 0 +cn_litev1_mainloop_sandybridge_asm ENDP + ALIGN 64 cnv2_mainloop_ivybridge_asm PROC INCLUDE cnv2_main_loop_ivybridge.inc @@ -37,6 +45,12 @@ cnv1_mainloop_soft_aes_sandybridge_asm PROC ret 0 cnv1_mainloop_soft_aes_sandybridge_asm ENDP +ALIGN 64 +cn_litev1_mainloop_soft_aes_sandybridge_asm PROC + INCLUDE cn_litev1_mainloop_soft_aes_sandybridge.inc + ret 0 +cn_litev1_mainloop_soft_aes_sandybridge_asm ENDP + ALIGN 64 cnv2_mainloop_soft_aes_sandybridge_asm PROC INCLUDE cnv2_mainloop_soft_aes_sandybridge.inc diff --git a/src/crypto/asm/win/cn_main_loop_win_gcc.S b/src/crypto/asm/win/cn_main_loop_win_gcc.S index 2da15e1f..b9261409 100644 --- a/src/crypto/asm/win/cn_main_loop_win_gcc.S +++ b/src/crypto/asm/win/cn_main_loop_win_gcc.S @@ -4,11 +4,13 @@ .section .text .global FN_PREFIX(cnv1_mainloop_sandybridge_asm) +.global FN_PREFIX(cn_litev1_mainloop_sandybridge_asm) .global FN_PREFIX(cnv2_mainloop_ivybridge_asm) .global FN_PREFIX(cnv2_mainloop_ryzen_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) .global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm) +.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm) ALIGN 64 @@ -16,6 +18,11 @@ FN_PREFIX(cnv1_mainloop_sandybridge_asm): #include "../cnv1_mainloop_sandybridge.inc" ret 0 +ALIGN 64 +FN_PREFIX(cn_litev1_mainloop_sandybridge_asm): + #include "../cn_litev1_mainloop_sandybridge.inc" + ret 0 + ALIGN 64 FN_PREFIX(cnv2_mainloop_ivybridge_asm): #include "../cnv2_main_loop_ivybridge.inc" @@ -36,6 +43,11 @@ FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm): #include "../cnv1_mainloop_soft_aes_sandybridge.inc" ret 0 +ALIGN 64 +FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm): + #include "../cn_litev1_mainloop_soft_aes_sandybridge.inc" + ret 0 + ALIGN 64 FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm): #include "../cnv2_mainloop_soft_aes_sandybridge.inc" diff --git a/src/version.h b/src/version.h index 0f590a2a..d7b4126d 100644 --- a/src/version.h +++ b/src/version.h @@ -36,7 +36,7 @@ #define APP_DESC "XMRigCC CPU miner" #define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id" #endif -#define APP_VERSION "1.8.0_rc (based on XMRig)" +#define APP_VERSION "1.8.0 (based on XMRig)" #define APP_DOMAIN "" #define APP_SITE "https://github.com/Bendr0id/xmrigCC" #define APP_KIND "cpu"