From ff4058a2a9856a00e8234adeec1bebdb8c7a45f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ben=20Gr=C3=A4f?= Date: Wed, 24 Apr 2019 22:34:30 +0200 Subject: [PATCH] Integrated CN-UPX2/extremelite (#247) * WIP * Added win/asm for upx2 * Added donation servers and fixed windows ASM variant * #1.9.2 preparation --- CHANGELOG.md | 3 + cmake/asm.cmake | 23 +++++ src/Cpu.cpp | 3 + src/Mem.cpp | 3 + src/Options.cpp | 16 +++- src/Options.h | 1 + src/PowVariant.h | 5 ++ src/crypto/CryptoNight.cpp | 43 ++++++++- src/crypto/CryptoNight.h | 1 + src/crypto/CryptoNight_arm.h | 60 ++++++------- src/crypto/CryptoNight_test.h | 9 ++ src/crypto/CryptoNight_x86.h | 87 +++++++++++-------- src/crypto/asm/cn_main_loop.S | 40 +++++++-- ...c => cnv2_double_main_loop_rwz_all.inc.in} | 54 ++++++------ ..._all.inc => cnv2_main_loop_rwz_all.inc.in} | 24 ++--- src/crypto/asm/win/cn_main_loop.asm | 31 +++++-- src/crypto/asm/win/cn_main_loop_win_gcc.S | 25 ++++-- ...c => cnv2_double_main_loop_rwz_all.inc.in} | 54 ++++++------ ..._all.inc => cnv2_main_loop_rwz_all.inc.in} | 24 ++--- src/net/Job.cpp | 4 + src/net/strategies/DonateStrategy.cpp | 8 +- src/version.h | 4 +- 22 files changed, 349 insertions(+), 173 deletions(-) rename src/crypto/asm/{cnv2_double_main_loop_rwz_all.inc => cnv2_double_main_loop_rwz_all.inc.in} (91%) rename src/crypto/asm/{cnv2_main_loop_rwz_all.inc => cnv2_main_loop_rwz_all.inc.in} (91%) rename src/crypto/asm/win/{cnv2_double_main_loop_rwz_all.inc => cnv2_double_main_loop_rwz_all.inc.in} (91%) rename src/crypto/asm/win/{cnv2_main_loop_rwz_all.inc => cnv2_main_loop_rwz_all.inc.in} (91%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 900c1961..ef5b9bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# 1.9.2 +- Integrated cn-extremelite a.k.a upx2 (algo: "cryptonight-extremelite", variant: "upx2") +- Integrated merged templates and replace of @WORKER-ID@ in template assignment # 1.9.1 - Fix coloring of outdated miners on Dashboard - Autodetect for fork of CN/R(variant: "auto"), Graft(variant: "rwz"), Zelerius(variant: "zls") diff --git a/cmake/asm.cmake b/cmake/asm.cmake index b5067939..8b11e2ee 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -131,6 +131,29 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/ configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_ultralite_sandybridge.inc") configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_ultralite_soft_aes_sandybridge.inc") +# CN V2 RWZ +set(ALGO "original") +set(ITERATIONS "393216") #0x60000 +set(MASK "2097136") #0x1FFFF0 + +configure_file("src/crypto/asm/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_main_loop_rwz_original_all.inc") +configure_file("src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_double_main_loop_rwz_original_all.inc") + +configure_file("src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_main_loop_rwz_original_all.inc") +configure_file("src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_rwz_original_all.inc") + + +# CN V2 UPX2 +set(ALGO "upx2") +set(ITERATIONS "16384") #0x4000 +set(MASK "131056") #0x1FFF0 + +configure_file("src/crypto/asm/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_main_loop_rwz_upx2_all.inc") +configure_file("src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_double_main_loop_rwz_upx2_all.inc") + +configure_file("src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_main_loop_rwz_upx2_all.inc") +configure_file("src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_rwz_upx2_all.inc") + if (CMAKE_C_COMPILER_ID MATCHES MSVC) enable_language(ASM_MASM) set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm" diff --git a/src/Cpu.cpp b/src/Cpu.cpp index 1605330c..3e424ef3 100644 --- a/src/Cpu.cpp +++ b/src/Cpu.cpp @@ -67,6 +67,9 @@ void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor, size_t cache = availableCache(); size_t algoBlockSize; switch (algo) { + case Options::ALGO_CRYPTONIGHT_EXTREMELITE: + algoBlockSize = 128; + break; case Options::ALGO_CRYPTONIGHT_ULTRALITE: algoBlockSize = 256; break; diff --git a/src/Mem.cpp b/src/Mem.cpp index cd82339c..09720241 100644 --- a/src/Mem.cpp +++ b/src/Mem.cpp @@ -44,6 +44,9 @@ ScratchPadMem Mem::create(ScratchPad** scratchPads, int threadId) case Options::ALGO_CRYPTONIGHT_ULTRALITE: scratchPadSize = MEMORY_ULTRA_LITE; break; + case Options::ALGO_CRYPTONIGHT_EXTREMELITE: + scratchPadSize = MEMORY_EXTREME_LITE; + break; case Options::ALGO_CRYPTONIGHT_SUPERLITE: scratchPadSize = MEMORY_SUPER_LITE; break; diff --git a/src/Options.cpp b/src/Options.cpp index 3619b293..143c2340 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -308,6 +308,7 @@ static const char *algo_names[] = { "cryptonight-lite", "cryptonight-superlite", "cryptonight-ultralite", + "cryptonight-extremelite", "cryptonight-heavy" }; @@ -316,6 +317,7 @@ static const char *algo_short_names[] = { "cn-lite", "cn-superlite", "cn-ultralite", + "cn-extremelite", "cn-heavy" }; @@ -337,7 +339,8 @@ constexpr static const char *pow_variant_names[] = { "hosp", "wow", "r", - "xcash" + "xcash", + "upx2" }; constexpr static const char *asm_optimization_names[] = { @@ -1112,12 +1115,16 @@ bool Options::setAlgo(const char *algo) break; } - if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cn-ultra-lite") || !strcmp(algo, "cryptonight-ultra-lite") || !strcmp(algo, "cryptonight-ultralight") || !strcmp(algo, "cryptonight-turtle") || !strcmp(algo, "cn-turtle") || !strcmp(algo, "cryptonight-pico") || !strcmp(algo, "cn-pico"))) { m_algo = ALGO_CRYPTONIGHT_ULTRALITE; break; } + if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cn-extreme-lite") || !strcmp(algo, "cryptonight-extreme-lite") || !strcmp(algo, "cryptonight-extremelight") || !strcmp(algo, "cryptonight-upx2") || !strcmp(algo, "cn-upx2") || !strcmp(algo, "cryptonight-femto") || !strcmp(algo, "cn-femto"))) { + m_algo = ALGO_CRYPTONIGHT_EXTREMELITE; + break; + } + if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cryptonight-lite-ipbc") || !strcmp(algo, "cryptonight-light-ipbc") || !strcmp(algo, "cn-lite-ipbc"))) { showDeprecateWarning("cryptonight-light-ipbc", "cryptonight-light (with variant \"ipbc\")"); m_algo = ALGO_CRYPTONIGHT_LITE; @@ -1215,6 +1222,11 @@ bool Options::parsePowVariant(const char *powVariant) break; } + if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "upx2") || !strcmp(powVariant, "upxv2") || !strcmp(powVariant, "femto"))) { + m_powVariant = POW_UPX2; + break; + } + if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "hosp") || !strcmp(powVariant, "hospital"))) { m_powVariant = POW_HOSP; break; diff --git a/src/Options.h b/src/Options.h index 902eed3d..03d68967 100644 --- a/src/Options.h +++ b/src/Options.h @@ -50,6 +50,7 @@ public: ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (1MB ScratchPad) */ ALGO_CRYPTONIGHT_SUPERLITE, /* CryptoNight-Superlite (512KB ScratchPad) */ ALGO_CRYPTONIGHT_ULTRALITE, /* CryptoNight-Ultralite (256KB ScratchPad) */ + ALGO_CRYPTONIGHT_EXTREMELITE, /* CryptoNight-Verylite (128KB ScratchPad) */ ALGO_CRYPTONIGHT_HEAVY, /* CryptoNight-Heavy (4MB ScratchPad) */ }; diff --git a/src/PowVariant.h b/src/PowVariant.h index 17ddec11..dee8f945 100644 --- a/src/PowVariant.h +++ b/src/PowVariant.h @@ -44,6 +44,7 @@ enum PowVariant POW_DOUBLE, POW_ZELERIUS, POW_RWZ, + POW_UPX2, LAST_ITEM }; @@ -89,6 +90,8 @@ inline std::string getPowVariantName(PowVariant powVariant) return "zls"; case POW_RWZ: return "rwz"; + case POW_UPX2: + return "upx2"; case POW_AUTODETECT: default: return "-1"; @@ -174,6 +177,8 @@ inline PowVariant parseVariant(const std::string variant) powVariant = PowVariant::POW_ZELERIUS; } else if (variant == "rwz" || variant == "graft") { powVariant = PowVariant::POW_RWZ; + } else if (variant == "upx2") { + powVariant = PowVariant::POW_UPX2; } return powVariant; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index cd4b6699..e2ac9675 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -398,6 +398,26 @@ static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, uint #endif } +template +static void cryptonight_extreme_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) { +# if !defined(XMRIG_ARMv7) +#if defined(XMRIG_ARM) + CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, false, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); +#else + if ((asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2)) { + CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, false, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + } else { + CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, false, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); + } +#endif +# endif +} + +template +static void cryptonight_extreme_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) { + CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, true, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); +} + template static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) { # if !defined(XMRIG_ARMv7) @@ -464,6 +484,14 @@ void setCryptoNightHashMethods(Options::Algo algo, bool aesni) } break; + case Options::ALGO_CRYPTONIGHT_EXTREMELITE: + if (aesni) { + cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_extreme_lite_aesni; + } else { + cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_extreme_lite_softaes; + } + break; + case Options::ALGO_CRYPTONIGHT_HEAVY: if (aesni) { cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_heavy_aesni; @@ -546,6 +574,7 @@ bool CryptoNight::selfCheck(int algo) bool resultLite = true; bool resultSuperLite = true; bool resultUltraLite = true; + bool resultExtremeLite = true; bool resultHeavy = true; AsmOptimization asmOptimization = Options::i()->asmOptimization(); @@ -678,9 +707,7 @@ bool CryptoNight::selfCheck(int algo) resultLite = resultLite && memcmp(output, test_output_upx, 32) == 0; } else if (algo == Options::ALGO_CRYPTONIGHT_SUPERLITE) { - return false; - } else if (algo == Options::ALGO_CRYPTONIGHT_ULTRALITE) { // cn ultralite (cnv8 + turtle) @@ -691,6 +718,16 @@ bool CryptoNight::selfCheck(int algo) cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads); resultUltraLite = resultUltraLite && memcmp(output, test_output_turtle, 64) == 0; #endif + } else if (algo == Options::ALGO_CRYPTONIGHT_EXTREMELITE) { + // cn extremelite (cnv8 + upx2) + + cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_UPX2, test_input, 76, output, scratchPads); + resultExtremeLite = resultExtremeLite && memcmp(output, test_output_upx2, 32) == 0; + + #if MAX_NUM_HASH_BLOCKS > 1 + cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_UPX2, test_input, 76, output, scratchPads); + resultExtremeLite = resultExtremeLite && memcmp(output, test_output_upx2, 64) == 0; + #endif } else { // cn v0 aka orignal cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0,test_input, 76, output, scratchPads); @@ -858,5 +895,5 @@ bool CryptoNight::selfCheck(int algo) _mm_free(scratchPads[i]); } - return result && resultLite && resultSuperLite && resultUltraLite && resultHeavy; + return result && resultLite && resultSuperLite && resultUltraLite && resultExtremeLite && resultHeavy; } \ No newline at end of file diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index aaf29145..e9e4c5e2 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -37,6 +37,7 @@ #define MEMORY_LITE 1048576 /* 1 MiB */ #define MEMORY_SUPER_LITE 524288 /* 512 KiB */ #define MEMORY_ULTRA_LITE 262144 /* 256 KiB */ +#define MEMORY_EXTREME_LITE 131072 /* 128 KiB */ #define MEMORY_HEAVY 4194304 /* 4 MiB */ #define POW_DEFAULT_INDEX_SHIFT 3 diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h index 881308cf..cf2b55e1 100644 --- a/src/crypto/CryptoNight_arm.h +++ b/src/crypto/CryptoNight_arm.h @@ -915,7 +915,7 @@ public: cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); @@ -929,7 +929,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -1533,8 +1533,8 @@ public: cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -1550,7 +1550,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -1573,7 +1573,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -2479,9 +2479,9 @@ public: cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -2499,7 +2499,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -2522,7 +2522,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -2545,7 +2545,7 @@ public: lo = __umul128(idx2, cl, &hi); - SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al2 += hi; ah2 += lo; @@ -3750,10 +3750,10 @@ public: cx3 = _mm_aesenc_si128(cx3, ax3); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -3773,7 +3773,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -3796,7 +3796,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -3819,7 +3819,7 @@ public: lo = __umul128(idx2, cl, &hi); - SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al2 += hi; ah2 += lo; @@ -3842,7 +3842,7 @@ public: lo = __umul128(idx3, cl, &hi); - SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al3 += hi; ah3 += lo; @@ -4845,11 +4845,11 @@ public: cx4 = _mm_aesenc_si128(cx4, ax4); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -4871,7 +4871,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -4894,7 +4894,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -4917,7 +4917,7 @@ public: lo = __umul128(idx2, cl, &hi); - SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al2 += hi; ah2 += lo; @@ -4940,7 +4940,7 @@ public: lo = __umul128(idx3, cl, &hi); - SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al3 += hi; ah3 += lo; @@ -4963,7 +4963,7 @@ public: lo = __umul128(idx4, cl, &hi); - SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al4 += hi; ah4 += lo; diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h index 373c51cc..1d7a1b5d 100644 --- a/src/crypto/CryptoNight_test.h +++ b/src/crypto/CryptoNight_test.h @@ -273,4 +273,13 @@ const static uint8_t test_output_turtle[64] = { 0xE3, 0x54, 0x58, 0x2B, 0xCB, 0x93, 0xF8, 0x69, 0xD4, 0x29, 0x74, 0x4D, 0xE5, 0x72, 0x6A, 0x26 }; + +// CN-Extremelite/UPX2 +const static uint8_t test_output_upx2[64] = { + 0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0, + 0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61, + 0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22, + 0xB0, 0x28, 0x9E, 0x2E, 0x5C, 0x36, 0x16, 0xE6, 0x1E, 0x78, 0x7A, 0x8F, 0xE4, 0x62, 0xEC, 0x5A +}; + #endif /* __CRYPTONIGHT_TEST_H__ */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 0833426e..bbff2386 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -95,8 +95,11 @@ extern "C" void cnv2_main_loop_zelerius_bulldozer_asm(ScratchPad* ctx0); void cnv2_double_main_loop_zelerius_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1); - void cnv2_main_loop_rwz_all_asm(ScratchPad* ctx0); - void cnv2_double_main_loop_rwz_all_asm(ScratchPad* ctx0, ScratchPad* ctx1); + void cnv2_main_loop_rwz_original_all_asm(ScratchPad* ctx0); + void cnv2_double_main_loop_rwz_original_all_asm(ScratchPad* ctx0, ScratchPad* ctx1); + + void cnv2_main_loop_rwz_upx2_all_asm(ScratchPad* ctx0); + void cnv2_double_main_loop_rwz_upx2_all_asm(ScratchPad* ctx0, ScratchPad* ctx1); void cnv1_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0); void cnv1_main_loop_lite_soft_aes_sandybridge_asm(ScratchPad* ctx0); @@ -994,7 +997,7 @@ public: cx = _mm_aesenc_si128(cx, ax); } - SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx0, cx)); @@ -1008,7 +1011,7 @@ public: lo = __umul128(idx, cl, &hi); - SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al += hi; // two fence statements are overhead ah += lo; @@ -1082,7 +1085,10 @@ public: cnv2_main_loop_zelerius_ivybridge_asm(scratchPad[0]); break; case POW_RWZ: - cnv2_main_loop_rwz_all_asm(scratchPad[0]); + cnv2_main_loop_rwz_original_all_asm(scratchPad[0]); + break; + case POW_UPX2: + cnv2_main_loop_rwz_upx2_all_asm(scratchPad[0]); break; default: cnv2_main_loop_ivybridge_asm(scratchPad[0]); @@ -1105,7 +1111,10 @@ public: cnv2_main_loop_zelerius_ryzen_asm(scratchPad[0]); break; case POW_RWZ: - cnv2_main_loop_rwz_all_asm(scratchPad[0]); + cnv2_main_loop_rwz_original_all_asm(scratchPad[0]); + break; + case POW_UPX2: + cnv2_main_loop_rwz_upx2_all_asm(scratchPad[0]); break; default: cnv2_main_loop_ryzen_asm(scratchPad[0]); @@ -1127,7 +1136,10 @@ public: cnv2_main_loop_zelerius_bulldozer_asm(scratchPad[0]); break; case POW_RWZ: - cnv2_main_loop_rwz_all_asm(scratchPad[0]); + cnv2_main_loop_rwz_original_all_asm(scratchPad[0]); + break; + case POW_UPX2: + cnv2_main_loop_rwz_upx2_all_asm(scratchPad[0]); break; default: cnv2_main_loop_bulldozer_asm(scratchPad[0]); @@ -1785,8 +1797,8 @@ public: cx1 = _mm_aesenc_si128(cx1, ax1); } - SHUFFLE_PHASE_1(l0, (idx0 & MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1 & MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0 & MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1 & MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -1807,7 +1819,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0 & MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0 & MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -1876,7 +1888,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1 & MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1 & MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -1935,7 +1947,10 @@ public: cnv2_double_main_loop_zelerius_sandybridge_asm(scratchPad[0], scratchPad[1]); break; case POW_RWZ: - cnv2_double_main_loop_rwz_all_asm(scratchPad[0], scratchPad[1]); + cnv2_double_main_loop_rwz_original_all_asm(scratchPad[0], scratchPad[1]); + break; + case POW_UPX2: + cnv2_double_main_loop_rwz_upx2_all_asm(scratchPad[0], scratchPad[1]); break; default: cnv2_double_main_loop_sandybridge_asm(scratchPad[0], scratchPad[1]); @@ -2885,9 +2900,9 @@ public: cx2 = _mm_aesenc_si128(cx2, ax2); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -2905,7 +2920,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -2928,7 +2943,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -2950,7 +2965,7 @@ public: lo = __umul128(idx2, cl, &hi); - SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al2 += hi; ah2 += lo; @@ -4167,10 +4182,10 @@ public: cx3 = _mm_aesenc_si128(cx3, ax3); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -4190,7 +4205,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -4213,7 +4228,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -4236,7 +4251,7 @@ public: lo = __umul128(idx2, cl, &hi); - SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al2 += hi; ah2 += lo; @@ -4259,7 +4274,7 @@ public: lo = __umul128(idx3, cl, &hi); - SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al3 += hi; ah3 += lo; @@ -5297,11 +5312,11 @@ public: cx4 = _mm_aesenc_si128(cx4, ax4); } - SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ) - SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ) + SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2) + SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ || VARIANT == POW_UPX2) _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1)); @@ -5323,7 +5338,7 @@ public: lo = __umul128(idx0, cl, &hi); - SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al0 += hi; ah0 += lo; @@ -5346,7 +5361,7 @@ public: lo = __umul128(idx1, cl, &hi); - SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al1 += hi; ah1 += lo; @@ -5369,7 +5384,7 @@ public: lo = __umul128(idx2, cl, &hi); - SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al2 += hi; ah2 += lo; @@ -5392,7 +5407,7 @@ public: lo = __umul128(idx3, cl, &hi); - SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al3 += hi; ah3 += lo; @@ -5415,7 +5430,7 @@ public: lo = __umul128(idx4, cl, &hi); - SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ) + SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2) al4 += hi; ah4 += lo; diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index 26f353a1..ac1e24c3 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -38,8 +38,11 @@ .global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm) .global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm) -.global FN_PREFIX(cnv2_main_loop_rwz_all_asm) -.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm) +.global FN_PREFIX(cnv2_main_loop_rwz_original_all_asm) +.global FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm) + +.global FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm) +.global FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm) .global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm) @@ -363,10 +366,10 @@ ALIGN 16 #else ALIGN 64 #endif -FN_PREFIX(cnv2_main_loop_rwz_all_asm): +FN_PREFIX(cnv2_main_loop_rwz_original_all_asm): sub rsp, 48 mov rcx, rdi - #include "cnv2_main_loop_rwz_all.inc" + #include "cnv2_main_loop_rwz_original_all.inc" add rsp, 48 ret 0 @@ -375,11 +378,36 @@ ALIGN 16 #else ALIGN 64 #endif -FN_PREFIX(cnv2_double_main_loop_rwz_all_asm): +FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm): sub rsp, 48 mov rcx, rdi mov rdx, rsi - #include "cnv2_double_main_loop_rwz_all.inc" + #include "cnv2_double_main_loop_rwz_original_all.inc" + add rsp, 48 + ret 0 + +#ifdef __APPLE__ +ALIGN 16 +#else +ALIGN 64 +#endif +FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm): + sub rsp, 48 + mov rcx, rdi + #include "cnv2_main_loop_rwz_upx2_all.inc" + add rsp, 48 + ret 0 + +#ifdef __APPLE__ +ALIGN 16 +#else +ALIGN 64 +#endif +FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm): + sub rsp, 48 + mov rcx, rdi + mov rdx, rsi + #include "cnv2_double_main_loop_rwz_upx2_all.inc" add rsp, 48 ret 0 diff --git a/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc b/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in similarity index 91% rename from src/crypto/asm/cnv2_double_main_loop_rwz_all.inc rename to src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in index 29abc7df..969032ff 100644 --- a/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc +++ b/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in @@ -18,7 +18,7 @@ mov r10, QWORD PTR [rcx+32] mov r8, rcx xor r10, QWORD PTR [rcx] - mov r14d, 393216 + mov r14d, ${ITERATIONS} mov r11, QWORD PTR [rcx+40] xor r11, QWORD PTR [rcx+8] mov rsi, QWORD PTR [rdx+224] @@ -41,7 +41,7 @@ movaps XMMWORD PTR [rsp+16], xmm15 mov rdx, r10 movq xmm4, QWORD PTR [r8+96] - and edx, 2097136 + and edx, ${MASK} mov rax, QWORD PTR [rcx+48] xorps xmm13, xmm13 xor rax, QWORD PTR [rcx+16] @@ -83,7 +83,7 @@ mov rcx, rdi mov QWORD PTR [rsp+264], r11 movq xmm8, rax - and ecx, 2097136 + and ecx, ${MASK} punpcklqdq xmm8, xmm0 movq xmm0, QWORD PTR [r9+96] punpcklqdq xmm4, xmm0 @@ -99,7 +99,7 @@ #else ALIGN(64) #endif -rwz_main_loop_double: +rwz_main_loop_double_${ALGO}: movdqu xmm9, xmm15 mov eax, edx mov ebx, edx @@ -124,7 +124,7 @@ rwz_main_loop_double: movq r11, xmm9 mov edx, r11d - and edx, 2097136 + and edx, ${MASK} movdqa xmm0, xmm9 pxor xmm0, xmm7 movdqu XMMWORD PTR [r9], xmm0 @@ -155,7 +155,7 @@ rwz_main_loop_double: movdqu XMMWORD PTR [rax+rsi], xmm0 movq rcx, xmm10 - and ecx, 2097136 + and ecx, ${MASK} movdqa xmm0, xmm10 pxor xmm0, xmm6 @@ -203,7 +203,7 @@ rwz_main_loop_double: mov QWORD PTR [rbx+8], rdx xor rdx, r9 mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 + and r11d, ${MASK} mov QWORD PTR [rsp+264], rdx mov QWORD PTR [rsp+8], r11 lea r15, QWORD PTR [r11+r13] @@ -253,8 +253,8 @@ rwz_main_loop_double: mov rbx, rax imul rax, rdx sub r11, rax - js rwz_div_fix_1 -rwz_div_fix_1_ret: + js rwz_div_fix_1_${ALGO} +rwz_div_fix_1_${ALGO}_ret: cvttsd2si rdx, xmm2 mov rax, rdx @@ -262,8 +262,8 @@ rwz_div_fix_1_ret: movd xmm2, r11d movd xmm4, ebx sub r8, rax - js rwz_div_fix_2 -rwz_div_fix_2_ret: + js rwz_div_fix_2_${ALGO} +rwz_div_fix_2_${ALGO}_ret: movd xmm1, r8d movd xmm0, edx @@ -279,15 +279,15 @@ rwz_div_fix_2_ret: movdqa xmm5, xmm1 psrlq xmm5, 19 test r9, 524287 - je rwz_sqrt_fix_1 -rwz_sqrt_fix_1_ret: + je rwz_sqrt_fix_1_${ALGO} +rwz_sqrt_fix_1_${ALGO}_ret: movq r9, xmm10 psrldq xmm1, 8 movq r8, xmm1 test r8, 524287 - je rwz_sqrt_fix_2 -rwz_sqrt_fix_2_ret: + je rwz_sqrt_fix_2_${ALGO} +rwz_sqrt_fix_2_${ALGO}_ret: mov r12d, ecx mov r8d, ecx @@ -317,7 +317,7 @@ rwz_sqrt_fix_2_ret: mov QWORD PTR [r13], rdi xor rdi, r10 mov ecx, edi - and ecx, 2097136 + and ecx, ${MASK} lea r8, QWORD PTR [rcx+rsi] mov rdx, QWORD PTR [r13+8] @@ -335,7 +335,7 @@ rwz_sqrt_fix_2_ret: movdqa xmm6, xmm10 mov r9, r15 dec r14d - jne rwz_main_loop_double + jne rwz_main_loop_double_${ALGO} ldmxcsr DWORD PTR [rsp+272] movaps xmm13, XMMWORD PTR [rsp+48] @@ -358,19 +358,19 @@ rwz_sqrt_fix_2_ret: pop rsi pop rbp pop rbx - jmp rwz_cnv2_double_mainloop_asm_endp + jmp rwz_cnv2_double_mainloop_${ALGO}_asm_endp -rwz_div_fix_1: +rwz_div_fix_1_${ALGO}: dec rbx add r11, rdx - jmp rwz_div_fix_1_ret + jmp rwz_div_fix_1_${ALGO}_ret -rwz_div_fix_2: +rwz_div_fix_2_${ALGO}: dec rdx add r8, r9 - jmp rwz_div_fix_2_ret + jmp rwz_div_fix_2_${ALGO}_ret -rwz_sqrt_fix_1: +rwz_sqrt_fix_1_${ALGO}: movq r8, xmm3 movdqa xmm0, xmm5 psrldq xmm0, 8 @@ -389,9 +389,9 @@ rwz_sqrt_fix_1: adc r9, 0 movq xmm5, r9 punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_1_ret + jmp rwz_sqrt_fix_1_${ALGO}_ret -rwz_sqrt_fix_2: +rwz_sqrt_fix_2_${ALGO}: psrldq xmm3, 8 movq r11, xmm3 dec r8 @@ -409,6 +409,6 @@ rwz_sqrt_fix_2: adc r8, 0 movq xmm0, r8 punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_2_ret + jmp rwz_sqrt_fix_2_${ALGO}_ret -rwz_cnv2_double_mainloop_asm_endp: +rwz_cnv2_double_mainloop_${ALGO}_asm_endp: diff --git a/src/crypto/asm/cnv2_main_loop_rwz_all.inc b/src/crypto/asm/cnv2_main_loop_rwz_all.inc.in similarity index 91% rename from src/crypto/asm/cnv2_main_loop_rwz_all.inc rename to src/crypto/asm/cnv2_main_loop_rwz_all.inc.in index aad81097..042af620 100644 --- a/src/crypto/asm/cnv2_main_loop_rwz_all.inc +++ b/src/crypto/asm/cnv2_main_loop_rwz_all.inc.in @@ -15,7 +15,7 @@ mov rax, QWORD PTR [rcx+48] mov r9, rcx xor rax, QWORD PTR [rcx+16] - mov esi, 393216 + mov esi, ${ITERATIONS} mov r8, QWORD PTR [rcx+32] mov r13d, -2147483647 xor r8, QWORD PTR [rcx] @@ -35,7 +35,7 @@ movaps XMMWORD PTR [rsp+64], xmm6 movaps XMMWORD PTR [rsp+48], xmm7 movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 + and r10d, ${MASK} movq xmm5, rax xor eax, eax @@ -55,7 +55,7 @@ #else ALIGN(64) #endif -rwz_main_loop: +rwz_main_loop_${ALGO}: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d mov eax, r10d @@ -69,7 +69,7 @@ rwz_main_loop: aesenc xmm6, xmm7 movq rbp, xmm6 mov r9, rbp - and r9d, 2097136 + and r9d, ${MASK} movdqu xmm0, XMMWORD PTR [rcx+rbx] movdqu xmm1, XMMWORD PTR [rax+rbx] movdqu xmm2, XMMWORD PTR [r10+rbx] @@ -113,9 +113,9 @@ rwz_main_loop: psubq xmm3, XMMWORD PTR [rsp+16] movq rdx, xmm3 test edx, 524287 - je rwz_sqrt_fixup + je rwz_sqrt_fixup_${ALGO} psrlq xmm3, 19 -rwz_sqrt_fixup_ret: +rwz_sqrt_fixup_${ALGO}_ret: mov ecx, r10d mov rax, rdi @@ -126,7 +126,7 @@ rwz_sqrt_fixup_ret: mov QWORD PTR [r14], r8 xor r8, rdi mov edi, r8d - and edi, 2097136 + and edi, ${MASK} movq xmm0, rax xor rax, [rcx+rbx+8] add r11, rax @@ -151,7 +151,7 @@ rwz_sqrt_fixup_ret: mov r10d, edi xor r11, r12 dec rsi - jne rwz_main_loop + jne rwz_main_loop_${ALGO} ldmxcsr DWORD PTR [rsp] mov rbx, QWORD PTR [rsp+160] @@ -166,9 +166,9 @@ rwz_sqrt_fixup_ret: pop rdi pop rsi pop rbp - jmp cnv2_rwz_main_loop_endp + jmp cnv2_rwz_main_loop_${ALGO}_endp -rwz_sqrt_fixup: +rwz_sqrt_fixup_${ALGO}: dec rdx mov r13d, -1022 shl r13, 32 @@ -185,6 +185,6 @@ rwz_sqrt_fixup: sub rcx, r9 adc rdx, 0 movq xmm3, rdx - jmp rwz_sqrt_fixup_ret + jmp rwz_sqrt_fixup_${ALGO}_ret -cnv2_rwz_main_loop_endp: +cnv2_rwz_main_loop_${ALGO}_endp: diff --git a/src/crypto/asm/win/cn_main_loop.asm b/src/crypto/asm/win/cn_main_loop.asm index e62d1124..72834397 100644 --- a/src/crypto/asm/win/cn_main_loop.asm +++ b/src/crypto/asm/win/cn_main_loop.asm @@ -31,8 +31,11 @@ PUBLIC cnv2_main_loop_zelerius_ryzen_asm PUBLIC cnv2_main_loop_zelerius_bulldozer_asm PUBLIC cnv2_double_main_loop_zelerius_sandybridge_asm -PUBLIC cnv2_main_loop_rwz_all_asm -PUBLIC cnv2_double_main_loop_rwz_all_asm +PUBLIC cnv2_main_loop_rwz_original_all_asm +PUBLIC cnv2_double_main_loop_rwz_original_all_asm + +PUBLIC cnv2_main_loop_rwz_upx2_all_asm +PUBLIC cnv2_double_main_loop_rwz_upx2_all_asm PUBLIC cnv1_main_loop_soft_aes_sandybridge_asm PUBLIC cnv1_main_loop_lite_soft_aes_sandybridge_asm @@ -197,16 +200,28 @@ cnv2_double_main_loop_zelerius_sandybridge_asm PROC cnv2_double_main_loop_zelerius_sandybridge_asm ENDP ALIGN 64 -cnv2_main_loop_rwz_all_asm PROC - INCLUDE cnv2_main_loop_rwz_all.inc +cnv2_main_loop_rwz_original_all_asm PROC + INCLUDE cnv2_main_loop_rwz_original_all.inc ret 0 -cnv2_main_loop_rwz_all_asm ENDP +cnv2_main_loop_rwz_original_all_asm ENDP ALIGN 64 -cnv2_double_main_loop_rwz_all_asm PROC - INCLUDE cnv2_double_main_loop_rwz_all.inc +cnv2_double_main_loop_rwz_original_all_asm PROC + INCLUDE cnv2_double_main_loop_rwz_original_all.inc ret 0 -cnv2_double_main_loop_rwz_all_asm ENDP +cnv2_double_main_loop_rwz_original_all_asm ENDP + +ALIGN 64 +cnv2_main_loop_rwz_upx2_all_asm PROC + INCLUDE cnv2_main_loop_rwz_upx2_all.inc + ret 0 +cnv2_main_loop_rwz_upx2_all_asm ENDP + +ALIGN 64 +cnv2_double_main_loop_rwz_upx2_all_asm PROC + INCLUDE cnv2_double_main_loop_rwz_upx2_all.inc + ret 0 +cnv2_double_main_loop_rwz_upx2_all_asm ENDP ALIGN 64 cnv1_main_loop_soft_aes_sandybridge_asm PROC diff --git a/src/crypto/asm/win/cn_main_loop_win_gcc.S b/src/crypto/asm/win/cn_main_loop_win_gcc.S index a101a8cb..beaab805 100644 --- a/src/crypto/asm/win/cn_main_loop_win_gcc.S +++ b/src/crypto/asm/win/cn_main_loop_win_gcc.S @@ -34,8 +34,11 @@ .global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm) .global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm) -.global FN_PREFIX(cnv2_main_loop_rwz_all_asm) -.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm) +.global FN_PREFIX(cnv2_main_loop_rwz_original_all_asm) +.global FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm) + +.global FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm) +.global FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm) .global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm) @@ -175,13 +178,23 @@ FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm): ret 0 ALIGN 64 -FN_PREFIX(cnv2_main_loop_rwz_all_asm): - #include "../cnv2_main_loop_rwz_all.inc" +FN_PREFIX(cnv2_main_loop_rwz_original_all_asm): + #include "../cnv2_main_loop_rwz_original_all.inc" ret 0 ALIGN 64 -FN_PREFIX(cnv2_double_main_loop_rwz_all_asm): - #include "../cnv2_double_main_loop_rwz_all.inc" +FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm): + #include "../cnv2_double_main_loop_rwz_original_all.inc" + ret 0 + +ALIGN 64 +FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm): + #include "../cnv2_main_loop_rwz_upx2_all.inc" + ret 0 + +ALIGN 64 +FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm): + #include "../cnv2_double_main_loop_rwz_upx2_all.inc" ret 0 ALIGN 64 diff --git a/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc b/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in similarity index 91% rename from src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc rename to src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in index 69ca8793..4f08527a 100644 --- a/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc +++ b/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in @@ -18,7 +18,7 @@ mov r10, QWORD PTR [rcx+32] mov r8, rcx xor r10, QWORD PTR [rcx] - mov r14d, 393216 + mov r14d, ${ITERATIONS} mov r11, QWORD PTR [rcx+40] xor r11, QWORD PTR [rcx+8] mov rsi, QWORD PTR [rdx+224] @@ -41,7 +41,7 @@ movaps XMMWORD PTR [rsp+16], xmm15 mov rdx, r10 movd xmm4, QWORD PTR [r8+96] - and edx, 2097136 + and edx, ${MASK} mov rax, QWORD PTR [rcx+48] xorps xmm13, xmm13 xor rax, QWORD PTR [rcx+16] @@ -83,7 +83,7 @@ mov rcx, rdi mov QWORD PTR [rsp+264], r11 movd xmm8, rax - and ecx, 2097136 + and ecx, ${MASK} punpcklqdq xmm8, xmm0 movd xmm0, QWORD PTR [r9+96] punpcklqdq xmm4, xmm0 @@ -95,7 +95,7 @@ movdqu xmm15, XMMWORD PTR [r9] ALIGN(64) -rwz_main_loop_double: +rwz_main_loop_double_${ALGO}: movdqu xmm9, xmm15 mov eax, edx mov ebx, edx @@ -120,7 +120,7 @@ rwz_main_loop_double: movd r11, xmm9 mov edx, r11d - and edx, 2097136 + and edx, ${MASK} movdqa xmm0, xmm9 pxor xmm0, xmm7 movdqu XMMWORD PTR [r9], xmm0 @@ -151,7 +151,7 @@ rwz_main_loop_double: movdqu XMMWORD PTR [rax+rsi], xmm0 movd rcx, xmm10 - and ecx, 2097136 + and ecx, ${MASK} movdqa xmm0, xmm10 pxor xmm0, xmm6 @@ -199,7 +199,7 @@ rwz_main_loop_double: mov QWORD PTR [rbx+8], rdx xor rdx, r9 mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 + and r11d, ${MASK} mov QWORD PTR [rsp+264], rdx mov QWORD PTR [rsp+8], r11 lea r15, QWORD PTR [r11+r13] @@ -249,8 +249,8 @@ rwz_main_loop_double: mov rbx, rax imul rax, rdx sub r11, rax - js rwz_div_fix_1 -rwz_div_fix_1_ret: + js rwz_div_fix_1_${ALGO} +rwz_div_fix_1_${ALGO}_ret: cvttsd2si rdx, xmm2 mov rax, rdx @@ -258,8 +258,8 @@ rwz_div_fix_1_ret: movd xmm2, r11d movd xmm4, ebx sub r8, rax - js rwz_div_fix_2 -rwz_div_fix_2_ret: + js rwz_div_fix_2_${ALGO} +rwz_div_fix_2_${ALGO}_ret: movd xmm1, r8d movd xmm0, edx @@ -275,15 +275,15 @@ rwz_div_fix_2_ret: movdqa xmm5, xmm1 psrlq xmm5, 19 test r9, 524287 - je rwz_sqrt_fix_1 -rwz_sqrt_fix_1_ret: + je rwz_sqrt_fix_1_${ALGO} +rwz_sqrt_fix_1_${ALGO}_ret: movd r9, xmm10 psrldq xmm1, 8 movd r8, xmm1 test r8, 524287 - je rwz_sqrt_fix_2 -rwz_sqrt_fix_2_ret: + je rwz_sqrt_fix_2_${ALGO} +rwz_sqrt_fix_2_${ALGO}_ret: mov r12d, ecx mov r8d, ecx @@ -313,7 +313,7 @@ rwz_sqrt_fix_2_ret: mov QWORD PTR [r13], rdi xor rdi, r10 mov ecx, edi - and ecx, 2097136 + and ecx, ${MASK} lea r8, QWORD PTR [rcx+rsi] mov rdx, QWORD PTR [r13+8] @@ -331,7 +331,7 @@ rwz_sqrt_fix_2_ret: movdqa xmm6, xmm10 mov r9, r15 dec r14d - jne rwz_main_loop_double + jne rwz_main_loop_double_${ALGO} ldmxcsr DWORD PTR [rsp+272] movaps xmm13, XMMWORD PTR [rsp+48] @@ -354,19 +354,19 @@ rwz_sqrt_fix_2_ret: pop rsi pop rbp pop rbx - jmp rwz_cnv2_double_mainloop_asm_endp + jmp rwz_cnv2_double_mainloop_${ALGO}_asm_endp -rwz_div_fix_1: +rwz_div_fix_1_${ALGO}: dec rbx add r11, rdx - jmp rwz_div_fix_1_ret + jmp rwz_div_fix_1_${ALGO}_ret -rwz_div_fix_2: +rwz_div_fix_2_${ALGO}: dec rdx add r8, r9 - jmp rwz_div_fix_2_ret + jmp rwz_div_fix_2_${ALGO}_ret -rwz_sqrt_fix_1: +rwz_sqrt_fix_1_${ALGO}: movd r8, xmm3 movdqa xmm0, xmm5 psrldq xmm0, 8 @@ -385,9 +385,9 @@ rwz_sqrt_fix_1: adc r9, 0 movd xmm5, r9 punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_1_ret + jmp rwz_sqrt_fix_1_${ALGO}_ret -rwz_sqrt_fix_2: +rwz_sqrt_fix_2_${ALGO}: psrldq xmm3, 8 movd r11, xmm3 dec r8 @@ -405,6 +405,6 @@ rwz_sqrt_fix_2: adc r8, 0 movd xmm0, r8 punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_2_ret + jmp rwz_sqrt_fix_2_${ALGO}_ret -rwz_cnv2_double_mainloop_asm_endp: +rwz_cnv2_double_mainloop_${ALGO}_asm_endp: diff --git a/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc b/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in similarity index 91% rename from src/crypto/asm/win/cnv2_main_loop_rwz_all.inc rename to src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in index 99317730..b3d7a6c7 100644 --- a/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc +++ b/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in @@ -15,7 +15,7 @@ mov rax, QWORD PTR [rcx+48] mov r9, rcx xor rax, QWORD PTR [rcx+16] - mov esi, 393216 + mov esi, ${ITERATIONS} mov r8, QWORD PTR [rcx+32] mov r13d, -2147483647 xor r8, QWORD PTR [rcx] @@ -35,7 +35,7 @@ movaps XMMWORD PTR [rsp+64], xmm6 movaps XMMWORD PTR [rsp+48], xmm7 movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 + and r10d, ${MASK} movd xmm5, rax xor eax, eax @@ -51,7 +51,7 @@ movdqu xmm6, XMMWORD PTR [r10+rbx] ALIGN(64) -rwz_main_loop: +rwz_main_loop_${ALGO}: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d mov eax, r10d @@ -65,7 +65,7 @@ rwz_main_loop: aesenc xmm6, xmm7 movd rbp, xmm6 mov r9, rbp - and r9d, 2097136 + and r9d, ${MASK} movdqu xmm0, XMMWORD PTR [rcx+rbx] movdqu xmm1, XMMWORD PTR [rax+rbx] movdqu xmm2, XMMWORD PTR [r10+rbx] @@ -109,9 +109,9 @@ rwz_main_loop: psubq xmm3, XMMWORD PTR [rsp+16] movd rdx, xmm3 test edx, 524287 - je rwz_sqrt_fixup + je rwz_sqrt_fixup_${ALGO} psrlq xmm3, 19 -rwz_sqrt_fixup_ret: +rwz_sqrt_fixup_${ALGO}_ret: mov ecx, r10d mov rax, rdi @@ -122,7 +122,7 @@ rwz_sqrt_fixup_ret: mov QWORD PTR [r14], r8 xor r8, rdi mov edi, r8d - and edi, 2097136 + and edi, ${MASK} movd xmm0, rax xor rax, [rcx+rbx+8] add r11, rax @@ -147,7 +147,7 @@ rwz_sqrt_fixup_ret: mov r10d, edi xor r11, r12 dec rsi - jne rwz_main_loop + jne rwz_main_loop_${ALGO} ldmxcsr DWORD PTR [rsp] mov rbx, QWORD PTR [rsp+160] @@ -162,9 +162,9 @@ rwz_sqrt_fixup_ret: pop rdi pop rsi pop rbp - jmp cnv2_rwz_main_loop_endp + jmp cnv2_rwz_main_loop_${ALGO}_endp -rwz_sqrt_fixup: +rwz_sqrt_fixup_${ALGO}: dec rdx mov r13d, -1022 shl r13, 32 @@ -181,6 +181,6 @@ rwz_sqrt_fixup: sub rcx, r9 adc rdx, 0 movd xmm3, rdx - jmp rwz_sqrt_fixup_ret + jmp rwz_sqrt_fixup_${ALGO}_ret -cnv2_rwz_main_loop_endp: +cnv2_rwz_main_loop_${ALGO}_endp: diff --git a/src/net/Job.cpp b/src/net/Job.cpp index b0759113..eb2362e7 100644 --- a/src/net/Job.cpp +++ b/src/net/Job.cpp @@ -143,6 +143,10 @@ PowVariant Job::powVariant() const return PowVariant::POW_TURTLE; } + if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_EXTREMELITE) { + return PowVariant::POW_UPX2; + } + if (m_powVariant == PowVariant::POW_AUTODETECT) { if (m_blob[0] >= 10) { return PowVariant::POW_V4; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index b1319a1d..cfb677c2 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -62,16 +62,20 @@ DonateStrategy::DonateStrategy(const char *agent, IStrategyListener *listener) : url = new Url("donate2.graef.in", 1080, userId, nullptr, true, false, true); } else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_ULTRALITE) { url = new Url("donate2.graef.in", 8090, userId, nullptr, true, false, true); + } else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_EXTREMELITE) { + url = new Url("donate2.graef.in", 9091, userId, nullptr, true, false, true); } else { url = new Url("donate2.graef.in", 443, userId, nullptr, true, false, true); } #else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_HEAVY) { - url = new Url("donate.graef.in", 8443, userId, nullptr, false, false, true); + url = new Url("donate2.graef.in", 9000, userId, nullptr, false, false, true); } else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_LITE) { - url = new Url("donate.graef.in", 1080, userId, nullptr, false, false, true); + url = new Url("donate2.graef.in", 7000, userId, nullptr, false, false, true); } else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_ULTRALITE) { url = new Url("donate2.graef.in", 8088, userId, nullptr, false, false, true); + } else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_EXTREMELITE) { + url = new Url("donate2.graef.in", 8088, userId, nullptr, false, false, true); } else { url = new Url("donate2.graef.in", 80, userId, nullptr, false, false, true); } diff --git a/src/version.h b/src/version.h index 7ed72904..d17d04a8 100644 --- a/src/version.h +++ b/src/version.h @@ -36,14 +36,14 @@ #define APP_DESC "XMRigCC CPU miner" #define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id" #endif -#define APP_VERSION "1.9.1 (based on XMRig)" +#define APP_VERSION "1.9.2 (based on XMRig)" #define APP_DOMAIN "" #define APP_SITE "https://github.com/Bendr0id/xmrigCC" #define APP_KIND "cpu" #define APP_VER_MAJOR 1 #define APP_VER_MINOR 9 -#define APP_VER_BUILD 1 +#define APP_VER_BUILD 2 #define APP_VER_REV 0 #ifndef NDEBUG