diff --git a/src/Options.cpp b/src/Options.cpp index 76fc679c..f552aba1 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -330,7 +330,8 @@ constexpr static const char *pow_variant_names[] = { "xfh", "fast2", "upx", - "turtle" + "turtle", + "hosp" }; constexpr static const char *asm_optimization_names[] = { @@ -1115,6 +1116,12 @@ bool Options::setAlgo(const char *algo) break; } + if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cryptonight-hospital") || !strcmp(algo, "cryptonight-hosp"))) { + m_algo = ALGO_CRYPTONIGHT; + m_powVariant = POW_HOSP; + break; + } + if (i == ARRAY_SIZE(algo_names) - 1) { showUsage(1); return false; @@ -1194,6 +1201,11 @@ bool Options::parsePowVariant(const char *powVariant) break; } + if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "hosp") || !strcmp(powVariant, "hospital"))) { + m_powVariant = POW_HOSP; + break; + } + if (i == ARRAY_SIZE(pow_variant_names) - 1) { showUsage(1); return false; diff --git a/src/PowVariant.h b/src/PowVariant.h index 4213cece..a03fbd22 100644 --- a/src/PowVariant.h +++ b/src/PowVariant.h @@ -38,6 +38,7 @@ enum PowVariant POW_FAST_2, POW_UPX, POW_TURTLE, + POW_HOSP, LAST_ITEM }; @@ -71,6 +72,8 @@ inline std::string getPowVariantName(PowVariant powVariant) return "upx"; case POW_TURTLE: return "turtle"; + case POW_HOSP: + return "hosp"; case POW_AUTODETECT: default: return "-1"; @@ -144,6 +147,8 @@ inline PowVariant parseVariant(const std::string variant) powVariant = PowVariant::POW_UPX; } else if (variant == "turtle" || variant == "trtl" || variant == "pico" || variant == "turtlev2") { powVariant = PowVariant::POW_TURTLE; + } else if (variant == "hosp" || variant == "hospital") { + powVariant = PowVariant::POW_HOSP; } return powVariant; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index e306ff2c..ae24c582 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -41,7 +41,7 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -53,7 +53,7 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) || (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) || (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) { - CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); } @@ -65,7 +65,7 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -77,7 +77,7 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) || (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) || (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); } @@ -87,13 +87,17 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } #endif -} else if (powVersion == PowVariant::POW_RTO) { - CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad); +} else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) { + if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); + } else { + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad); + } } else if (powVersion == PowVariant::POW_XFH) { CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad); } else { @@ -109,7 +113,7 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -119,7 +123,7 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); } @@ -129,7 +133,7 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); } @@ -141,7 +145,7 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -151,13 +155,17 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } #endif - } else if (powVersion == PowVariant::POW_RTO) { - CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad); + } else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) { + if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); + } else { + CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad); + } } else if (powVersion == PowVariant::POW_XFH) { CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad); } else { @@ -173,7 +181,7 @@ static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant p CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -185,7 +193,7 @@ static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant p CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -203,7 +211,7 @@ static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -215,7 +223,7 @@ static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } @@ -244,7 +252,7 @@ static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, PowVar if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) || (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) || (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) { - CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); } @@ -258,7 +266,7 @@ static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, PowV CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); #else if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { - CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization); + CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion); } else { CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad); } @@ -617,6 +625,11 @@ bool CryptoNight::selfTest(int algo) cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads); result = result && memcmp(output, test_output_alloy, 32) == 0; + // cnv7 + hosp/rto + + cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_HOSP,test_input, 76, output, scratchPads); + result = result && memcmp(output, test_output_hosp, 32) == 0; + // cnv8 aka cnv2 cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads); @@ -647,11 +660,6 @@ bool CryptoNight::selfTest(int algo) cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads); result = result && memcmp(output, test_output_xfh, 32) == 0; - #if MAX_NUM_HASH_BLOCKS > 1 - cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads); - result = result && memcmp(output, test_output_xfh, 64) == 0; - #endif - // cnv8 + xtl aka cn-fast2 cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads); diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h index 5114da8b..836f2822 100644 --- a/src/crypto/CryptoNight_test.h +++ b/src/crypto/CryptoNight_test.h @@ -116,6 +116,12 @@ const static uint8_t test_output_alloy[32] = { 0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33 }; +// CN RTO/HOSP +const static uint8_t test_output_hosp[32] = { + 0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19, + 0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5 +}; + // CN XFH const static uint8_t test_output_xfh[64] = { 0x40, 0x86, 0x5A, 0xA8, 0x87, 0x41, 0xEC, 0x1D, 0xCC, 0xBD, 0x2B, 0xC6, 0xFF, 0x36, 0xB9, 0x4D, diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 8c5ed80d..a9237471 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -54,6 +54,7 @@ extern "C" void cnv1_main_loop_lite_sandybridge_asm(ScratchPad* ctx0); void cnv1_main_loop_fast_sandybridge_asm(ScratchPad* ctx0); void cnv1_main_loop_upx_sandybridge_asm(ScratchPad* ctx0); + void cnv1_main_loop_rto_sandybridge_asm(ScratchPad* ctx0); void cnv2_main_loop_ivybridge_asm(ScratchPad* ctx0); void cnv2_main_loop_ryzen_asm(ScratchPad* ctx0); @@ -74,6 +75,7 @@ extern "C" void cnv1_main_loop_lite_soft_aes_sandybridge_asm(ScratchPad* ctx0); void cnv1_main_loop_fast_soft_aes_sandybridge_asm(ScratchPad* ctx0); void cnv1_main_loop_upx_soft_aes_sandybridge_asm(ScratchPad* ctx0); + void cnv1_main_loop_rto_soft_aes_sandybridge_asm(ScratchPad* ctx0); void cnv2_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0); void cnv2_main_loop_fastv2_soft_aes_sandybridge_asm(ScratchPad* ctx0); @@ -769,7 +771,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -887,7 +890,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -1421,7 +1425,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { keccak(static_cast(input), (int) size, scratchPad[0]->state, 200); @@ -1443,7 +1448,7 @@ public: scratchPad[0]->t_fn = (const uint32_t*)saes_table; if (ITERATIONS == 0x40000) { - if (MASK == 0x1FFFF0) { + if (powVariant == PowVariant::POW_MSR) { cnv1_main_loop_fast_soft_aes_sandybridge_asm(scratchPad[0]); } else { cnv1_main_loop_lite_soft_aes_sandybridge_asm(scratchPad[0]); @@ -1451,11 +1456,15 @@ public: } else if (ITERATIONS == 0x20000) { cnv1_main_loop_upx_soft_aes_sandybridge_asm(scratchPad[0]); } else { - cnv1_main_loop_soft_aes_sandybridge_asm(scratchPad[0]); + if (powVariant == PowVariant::POW_HOSP || powVariant == PowVariant::POW_RTO) { + cnv1_main_loop_rto_soft_aes_sandybridge_asm(scratchPad[0]); + } else { + cnv1_main_loop_soft_aes_sandybridge_asm(scratchPad[0]); + } } } else { if (ITERATIONS == 0x40000) { - if (MASK == 0x1FFFF0) { + if (powVariant == PowVariant::POW_MSR) { cnv1_main_loop_fast_sandybridge_asm(scratchPad[0]); } else { cnv1_main_loop_lite_sandybridge_asm(scratchPad[0]); @@ -1463,7 +1472,11 @@ public: } else if (ITERATIONS == 0x20000) { cnv1_main_loop_upx_sandybridge_asm(scratchPad[0]); } else { - cnv1_main_loop_sandybridge_asm(scratchPad[0]); + if (powVariant == PowVariant::POW_HOSP || powVariant == PowVariant::POW_RTO) { + cnv1_main_loop_rto_sandybridge_asm(scratchPad[0]); + } else { + cnv1_main_loop_sandybridge_asm(scratchPad[0]); + } } } #endif @@ -1551,7 +1564,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { const uint8_t* l = scratchPad[0]->memory; uint64_t* h = reinterpret_cast(scratchPad[0]->state); @@ -2109,7 +2123,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -2297,7 +2312,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200); keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200); @@ -3104,7 +3120,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -3281,7 +3298,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -4315,7 +4333,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -4539,7 +4558,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -5196,7 +5216,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } @@ -5464,7 +5485,8 @@ public: size_t size, uint8_t* __restrict__ output, ScratchPad** __restrict__ scratchPad, - AsmOptimization asmOptimization) + AsmOptimization asmOptimization, + PowVariant powVariant) { // not supported } diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index b2f6bc67..5dc80bea 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -11,6 +11,7 @@ .global FN_PREFIX(cnv1_main_loop_lite_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_fast_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_upx_sandybridge_asm) +.global FN_PREFIX(cnv1_main_loop_rto_sandybridge_asm) .global FN_PREFIX(cnv2_main_loop_ivybridge_asm) .global FN_PREFIX(cnv2_main_loop_ryzen_asm) @@ -31,6 +32,7 @@ .global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_fast_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_upx_soft_aes_sandybridge_asm) +.global FN_PREFIX(cnv1_main_loop_rto_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm) @@ -84,6 +86,18 @@ FN_PREFIX(cnv1_main_loop_upx_sandybridge_asm): add rsp, 48 ret 0 +#ifdef __APPLE__ +ALIGN 16 +#else +ALIGN 64 +#endif +FN_PREFIX(cnv1_main_loop_rto_sandybridge_asm): + sub rsp, 48 + mov rcx, rdi + #include "cnv1_main_loop_rto_sandybridge.inc" + add rsp, 48 + ret 0 + #ifdef __APPLE__ ALIGN 16 #else @@ -279,6 +293,18 @@ FN_PREFIX(cnv1_main_loop_upx_soft_aes_sandybridge_asm): add rsp, 48 ret 0 +#ifdef __APPLE__ +ALIGN 16 +#else +ALIGN 64 +#endif +FN_PREFIX(cnv1_main_loop_rto_soft_aes_sandybridge_asm): + sub rsp, 48 + mov rcx, rdi + #include "cnv1_main_loop_rto_soft_aes_sandybridge.inc" + add rsp, 48 + ret 0 + #ifdef __APPLE__ ALIGN 16 #else diff --git a/src/crypto/asm/cnv1_main_loop_rto_sandybridge.inc b/src/crypto/asm/cnv1_main_loop_rto_sandybridge.inc new file mode 100644 index 00000000..c4a3df06 --- /dev/null +++ b/src/crypto/asm/cnv1_main_loop_rto_sandybridge.inc @@ -0,0 +1,75 @@ + mov QWORD PTR [rsp+8], rbx + mov QWORD PTR [rsp+16], rbp + mov QWORD PTR [rsp+24], rsi + mov QWORD PTR [rsp+32], rdi + push r14 + push r15 + mov rax, QWORD PTR [rcx+48] + mov ebp, 524288 + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm3, rax + mov rax, QWORD PTR [rcx+256] + mov rdi, QWORD PTR [rcx+40] + movq xmm0, rdx + xor rdi, QWORD PTR [rcx+8] + mov rdx, r8 + mov r15, QWORD PTR [rcx+264] + and edx, 2097136 + mov r14, QWORD PTR [rax+35] + xor r14, QWORD PTR [rcx+192] + mov rsi, QWORD PTR [rcx+224] + punpcklqdq xmm3, xmm0 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + + #ifdef __APPLE__ + ALIGN 16 + #else + ALIGN 64 + #endif +cnv1_main_loop_rto_sandybridge: + movq xmm0, rdi + movq xmm1, r8 + punpcklqdq xmm1, xmm0 + aesenc xmm2, xmm1 + movq r10, xmm2 + mov r9d, r10d + and r9d, 2097136 + add r9, rsi + movdqa xmm0, xmm2 + pxor xmm0, xmm3 + movdqa xmm3, xmm2 + movdqu XMMWORD PTR [rdx+rsi], xmm0 + psrldq xmm0, 11 + movq rax, xmm0 + movzx eax, al + movzx eax, BYTE PTR [rax+r15] + mov BYTE PTR [rsi+rdx+11], al + mov rbx, QWORD PTR [r9] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + mul r10 + add r8, rdx + mov QWORD PTR [r9], r8 + add rdi, rax + mov rax, r14 + xor rax, rdi + xor rax, r8 + mov QWORD PTR [r9+8], rax + xor r8, rbx + mov rdx, r8 + and edx, 2097136 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + xor rdi, r11 + dec ebp + jne cnv1_main_loop_rto_sandybridge + + mov rbx, QWORD PTR [rsp+24] + mov rbp, QWORD PTR [rsp+32] + mov rsi, QWORD PTR [rsp+40] + mov rdi, QWORD PTR [rsp+48] + pop r15 + pop r14 diff --git a/src/crypto/asm/cnv1_main_loop_rto_soft_aes_sandybridge.inc b/src/crypto/asm/cnv1_main_loop_rto_soft_aes_sandybridge.inc new file mode 100644 index 00000000..43b3488f --- /dev/null +++ b/src/crypto/asm/cnv1_main_loop_rto_soft_aes_sandybridge.inc @@ -0,0 +1,167 @@ + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 72 + + movaps XMMWORD PTR [rsp], xmm6 + movaps XMMWORD PTR [rsp+16], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + movaps XMMWORD PTR [rsp+48], xmm9 + + mov rax, QWORD PTR [rcx+48] + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm4, rax + mov rax, QWORD PTR [rcx+256] + mov r13, QWORD PTR [rcx+40] + movq xmm0, rdx + xor r13, QWORD PTR [rcx+8] + mov rdx, r8 + mov rdi, QWORD PTR [rcx+224] + and edx, 2097136 + mov rax, QWORD PTR [rax+35] + xor rax, QWORD PTR [rcx+192] + movq xmm5, rax + movq xmm8, rdi + punpcklqdq xmm4, xmm0 + mov QWORD PTR [rsp+64], rdx + + movq xmm6, rcx + mov rax, QWORD PTR [rcx+264] + movq xmm7, rax + + mov eax, 524288 + + #ifdef __APPLE__ + ALIGN 16 + #else + ALIGN 64 + #endif +cnv1_main_loop_rto_soft_aes_sandybridge: + movq xmm9, rax + mov r12, QWORD PTR [rcx+272] + mov esi, DWORD PTR [rdx+rdi] + mov r10d, DWORD PTR [rdx+rdi+4] + mov ebp, DWORD PTR [rdx+rdi+12] + mov r14d, DWORD PTR [rdx+rdi+8] + mov rdx, QWORD PTR [rsp+64] + movzx ecx, sil + shr esi, 8 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + add ebp, 256 + movd xmm1, r11d + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movq rdi, xmm8 + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + punpckldq xmm2, xmm1 + movq xmm1, r8 + xor eax, DWORD PTR [r12+rcx*4] + xor eax, r15d + movd xmm3, eax + movq rax, xmm7 + punpckldq xmm3, xmm0 + movq xmm0, r13 + punpcklqdq xmm1, xmm0 + punpckldq xmm3, xmm2 + pxor xmm3, xmm1 + movq r9, xmm3 + mov r10d, r9d + and r10d, 2097136 + movdqa xmm0, xmm3 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx+rdi], xmm0 + psrldq xmm0, 11 + movq rcx, xmm0 + movzx ecx, cl + mov cl, BYTE PTR [rcx+rax] + mov BYTE PTR [rdi+rdx+11], cl + mov rbx, QWORD PTR [r10+rdi] + mov rcx, r9 + lea r9, QWORD PTR [r10+rdi] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + movdqa xmm4, xmm3 + mul rcx + movq rcx, xmm6 + add r8, rdx + add r13, rax + movq rax, xmm5 + xor rax, r13 + mov QWORD PTR [r9], r8 + xor rax, r8 + xor r8, rbx + mov QWORD PTR [r9+8], rax + movq rax, xmm9 + mov rdx, r8 + xor r13, r11 + and edx, 2097136 + mov QWORD PTR [rsp+64], rdx + sub eax, 1 + jne cnv1_main_loop_rto_soft_aes_sandybridge + + movaps xmm6, XMMWORD PTR [rsp] + movaps xmm7, XMMWORD PTR [rsp+16] + movaps xmm8, XMMWORD PTR [rsp+32] + movaps xmm9, XMMWORD PTR [rsp+48] + + add rsp, 72 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx diff --git a/src/crypto/asm/win/cn_main_loop.asm b/src/crypto/asm/win/cn_main_loop.asm index e01e3134..a23addd9 100644 --- a/src/crypto/asm/win/cn_main_loop.asm +++ b/src/crypto/asm/win/cn_main_loop.asm @@ -4,6 +4,7 @@ PUBLIC cnv1_main_loop_sandybridge_asm PUBLIC cnv1_main_loop_lite_sandybridge_asm PUBLIC cnv1_main_loop_fast_sandybridge_asm PUBLIC cnv1_main_loop_upx_sandybridge_asm +PUBLIC cnv1_main_loop_rto_sandybridge_asm PUBLIC cnv2_main_loop_ivybridge_asm PUBLIC cnv2_main_loop_ryzen_asm @@ -24,6 +25,7 @@ PUBLIC cnv1_main_loop_soft_aes_sandybridge_asm PUBLIC cnv1_main_loop_lite_soft_aes_sandybridge_asm PUBLIC cnv1_main_loop_fast_soft_aes_sandybridge_asm PUBLIC cnv1_main_loop_upx_soft_aes_sandybridge_asm +PUBLIC cnv1_main_loop_rto_soft_aes_sandybridge_asm PUBLIC cnv2_main_loop_soft_aes_sandybridge_asm PUBLIC cnv2_main_loop_fastv2_soft_aes_sandybridge_asm @@ -53,6 +55,12 @@ cnv1_main_loop_upx_sandybridge_asm PROC ret 0 cnv1_main_loop_upx_sandybridge_asm ENDP +ALIGN 64 +cnv1_main_loop_rto_sandybridge_asm PROC + INCLUDE cnv1_main_loop_rto_sandybridge.inc + ret 0 +cnv1_main_loop_rto_sandybridge_asm ENDP + ALIGN 64 cnv2_main_loop_ivybridge_asm PROC INCLUDE cnv2_main_loop_ivybridge.inc @@ -149,6 +157,12 @@ cnv1_main_loop_upx_soft_aes_sandybridge_asm PROC ret 0 cnv1_main_loop_upx_soft_aes_sandybridge_asm ENDP +ALIGN 64 +cnv1_main_loop_rto_soft_aes_sandybridge_asm PROC + INCLUDE cnv1_main_loop_rto_soft_aes_sandybridge.inc + ret 0 +cnv1_main_loop_rto_soft_aes_sandybridge_asm ENDP + ALIGN 64 cnv2_main_loop_soft_aes_sandybridge_asm PROC INCLUDE cnv2_main_loop_soft_aes_sandybridge.inc diff --git a/src/crypto/asm/win/cn_main_loop_win_gcc.S b/src/crypto/asm/win/cn_main_loop_win_gcc.S index ef48a077..7bf3c668 100644 --- a/src/crypto/asm/win/cn_main_loop_win_gcc.S +++ b/src/crypto/asm/win/cn_main_loop_win_gcc.S @@ -7,6 +7,7 @@ .global FN_PREFIX(cnv1_main_loop_lite_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_fast_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_upx_sandybridge_asm) +.global FN_PREFIX(cnv1_main_loop_rto_sandybridge_asm) .global FN_PREFIX(cnv2_main_loop_ivybridge_asm) .global FN_PREFIX(cnv2_main_loop_ryzen_asm) @@ -27,6 +28,7 @@ .global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_fast_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv1_main_loop_upx_soft_aes_sandybridge_asm) +.global FN_PREFIX(cnv1_main_loop_rto_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm) .global FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm) @@ -52,6 +54,11 @@ FN_PREFIX(cnv1_main_loop_upx_sandybridge_asm): #include "../cnv1_main_loop_upx_sandybridge.inc" ret 0 +ALIGN 64 +FN_PREFIX(cnv1_main_loop_rto_sandybridge_asm): + #include "../cnv1_main_loop_rto_sandybridge.inc" + ret 0 + ALIGN 64 FN_PREFIX(cnv2_main_loop_ivybridge_asm): #include "../cnv2_main_loop_ivybridge.inc" @@ -132,6 +139,11 @@ FN_PREFIX(cnv1_main_loop_upx_soft_aes_sandybridge_asm): #include "../cnv1_main_loop_upx_soft_aes_sandybridge.inc" ret 0 +ALIGN 64 +FN_PREFIX(cnv1_main_loop_rto_soft_aes_sandybridge_asm): + #include "../cnv1_main_loop_rto_soft_aes_sandybridge.inc" + ret 0 + ALIGN 64 FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm): #include "../cnv2_main_loop_soft_aes_sandybridge.inc" diff --git a/src/crypto/asm/win/cnv1_main_loop_rto_sandybridge.inc b/src/crypto/asm/win/cnv1_main_loop_rto_sandybridge.inc new file mode 100644 index 00000000..e3d74af7 --- /dev/null +++ b/src/crypto/asm/win/cnv1_main_loop_rto_sandybridge.inc @@ -0,0 +1,71 @@ + mov QWORD PTR [rsp+8], rbx + mov QWORD PTR [rsp+16], rbp + mov QWORD PTR [rsp+24], rsi + mov QWORD PTR [rsp+32], rdi + push r14 + push r15 + mov rax, QWORD PTR [rcx+48] + mov ebp, 524288 + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm3, rax + mov rax, QWORD PTR [rcx+256] + mov rdi, QWORD PTR [rcx+40] + movq xmm0, rdx + xor rdi, QWORD PTR [rcx+8] + mov rdx, r8 + mov r15, QWORD PTR [rcx+264] + and edx, 2097136 + mov r14, QWORD PTR [rax+35] + xor r14, QWORD PTR [rcx+192] + mov rsi, QWORD PTR [rcx+224] + punpcklqdq xmm3, xmm0 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + + ALIGN 64 +cnv1_main_loop_rto_sandybridge: + movq xmm0, rdi + movq xmm1, r8 + punpcklqdq xmm1, xmm0 + aesenc xmm2, xmm1 + movq r10, xmm2 + mov r9d, r10d + and r9d, 2097136 + add r9, rsi + movdqa xmm0, xmm2 + pxor xmm0, xmm3 + movdqa xmm3, xmm2 + movdqu XMMWORD PTR [rdx+rsi], xmm0 + psrldq xmm0, 11 + movq rax, xmm0 + movzx eax, al + movzx eax, BYTE PTR [rax+r15] + mov BYTE PTR [rsi+rdx+11], al + mov rbx, QWORD PTR [r9] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + mul r10 + add r8, rdx + mov QWORD PTR [r9], r8 + add rdi, rax + mov rax, r14 + xor rax, rdi + xor rax, r8 + mov QWORD PTR [r9+8], rax + xor r8, rbx + mov rdx, r8 + and edx, 2097136 + movdqu xmm2, XMMWORD PTR [rdx+rsi] + xor rdi, r11 + dec ebp + jne cnv1_main_loop_rto_sandybridge + + mov rbx, QWORD PTR [rsp+24] + mov rbp, QWORD PTR [rsp+32] + mov rsi, QWORD PTR [rsp+40] + mov rdi, QWORD PTR [rsp+48] + pop r15 + pop r14 diff --git a/src/crypto/asm/win/cnv1_main_loop_rto_soft_aes_sandybridge.inc b/src/crypto/asm/win/cnv1_main_loop_rto_soft_aes_sandybridge.inc new file mode 100644 index 00000000..874d51b6 --- /dev/null +++ b/src/crypto/asm/win/cnv1_main_loop_rto_soft_aes_sandybridge.inc @@ -0,0 +1,163 @@ + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 72 + + movaps XMMWORD PTR [rsp], xmm6 + movaps XMMWORD PTR [rsp+16], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + movaps XMMWORD PTR [rsp+48], xmm9 + + mov rax, QWORD PTR [rcx+48] + xor rax, QWORD PTR [rcx+16] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + movq xmm4, rax + mov rax, QWORD PTR [rcx+256] + mov r13, QWORD PTR [rcx+40] + movq xmm0, rdx + xor r13, QWORD PTR [rcx+8] + mov rdx, r8 + mov rdi, QWORD PTR [rcx+224] + and edx, 2097136 + mov rax, QWORD PTR [rax+35] + xor rax, QWORD PTR [rcx+192] + movq xmm5, rax + movq xmm8, rdi + punpcklqdq xmm4, xmm0 + mov QWORD PTR [rsp+64], rdx + + movq xmm6, rcx + mov rax, QWORD PTR [rcx+264] + movq xmm7, rax + + mov eax, 524288 + + ALIGN 64 +cnv1_main_loop_rto_soft_aes_sandybridge: + movq xmm9, rax + mov r12, QWORD PTR [rcx+272] + mov esi, DWORD PTR [rdx+rdi] + mov r10d, DWORD PTR [rdx+rdi+4] + mov ebp, DWORD PTR [rdx+rdi+12] + mov r14d, DWORD PTR [rdx+rdi+8] + mov rdx, QWORD PTR [rsp+64] + movzx ecx, sil + shr esi, 8 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + add ebp, 256 + movd xmm1, r11d + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movq rdi, xmm8 + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + punpckldq xmm2, xmm1 + movq xmm1, r8 + xor eax, DWORD PTR [r12+rcx*4] + xor eax, r15d + movd xmm3, eax + movq rax, xmm7 + punpckldq xmm3, xmm0 + movq xmm0, r13 + punpcklqdq xmm1, xmm0 + punpckldq xmm3, xmm2 + pxor xmm3, xmm1 + movq r9, xmm3 + mov r10d, r9d + and r10d, 2097136 + movdqa xmm0, xmm3 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx+rdi], xmm0 + psrldq xmm0, 11 + movq rcx, xmm0 + movzx ecx, cl + mov cl, BYTE PTR [rcx+rax] + mov BYTE PTR [rdi+rdx+11], cl + mov rbx, QWORD PTR [r10+rdi] + mov rcx, r9 + lea r9, QWORD PTR [r10+rdi] + mov r11, QWORD PTR [r9+8] + mov rax, rbx + movdqa xmm4, xmm3 + mul rcx + movq rcx, xmm6 + add r8, rdx + add r13, rax + movq rax, xmm5 + xor rax, r13 + mov QWORD PTR [r9], r8 + xor rax, r8 + xor r8, rbx + mov QWORD PTR [r9+8], rax + movq rax, xmm9 + mov rdx, r8 + xor r13, r11 + and edx, 2097136 + mov QWORD PTR [rsp+64], rdx + sub eax, 1 + jne cnv1_main_loop_rto_soft_aes_sandybridge + + movaps xmm6, XMMWORD PTR [rsp] + movaps xmm7, XMMWORD PTR [rsp+16] + movaps xmm8, XMMWORD PTR [rsp+32] + movaps xmm9, XMMWORD PTR [rsp+48] + + add rsp, 72 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx diff --git a/src/version.h b/src/version.h index 22594110..4be54f8f 100644 --- a/src/version.h +++ b/src/version.h @@ -36,14 +36,14 @@ #define APP_DESC "XMRigCC CPU miner" #define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id" #endif -#define APP_VERSION "1.8.12b (based on XMRig)" +#define APP_VERSION "1.8.13 (based on XMRig)" #define APP_DOMAIN "" #define APP_SITE "https://github.com/Bendr0id/xmrigCC" #define APP_KIND "cpu" #define APP_VER_MAJOR 1 #define APP_VER_MINOR 8 -#define APP_VER_BUILD 12 +#define APP_VER_BUILD 13 #define APP_VER_REV 0 #ifndef NDEBUG