Integrated CN-UPX2/extremelite (#247)
* WIP * Added win/asm for upx2 * Added donation servers and fixed windows ASM variant * #1.9.2 preparation
This commit is contained in:
parent
1b0557d3b4
commit
ff4058a2a9
22 changed files with 349 additions and 173 deletions
|
@ -1,3 +1,6 @@
|
|||
# 1.9.2
|
||||
- Integrated cn-extremelite a.k.a upx2 (algo: "cryptonight-extremelite", variant: "upx2")
|
||||
- Integrated merged templates and replace of @WORKER-ID@ in template assignment
|
||||
# 1.9.1
|
||||
- Fix coloring of outdated miners on Dashboard
|
||||
- Autodetect for fork of CN/R(variant: "auto"), Graft(variant: "rwz"), Zelerius(variant: "zls")
|
||||
|
|
|
@ -131,6 +131,29 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/
|
|||
configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_ultralite_sandybridge.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_ultralite_soft_aes_sandybridge.inc")
|
||||
|
||||
# CN V2 RWZ
|
||||
set(ALGO "original")
|
||||
set(ITERATIONS "393216") #0x60000
|
||||
set(MASK "2097136") #0x1FFFF0
|
||||
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_main_loop_rwz_original_all.inc")
|
||||
configure_file("src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_double_main_loop_rwz_original_all.inc")
|
||||
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_main_loop_rwz_original_all.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_rwz_original_all.inc")
|
||||
|
||||
|
||||
# CN V2 UPX2
|
||||
set(ALGO "upx2")
|
||||
set(ITERATIONS "16384") #0x4000
|
||||
set(MASK "131056") #0x1FFF0
|
||||
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_main_loop_rwz_upx2_all.inc")
|
||||
configure_file("src/crypto/asm/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/cnv2_double_main_loop_rwz_upx2_all.inc")
|
||||
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_main_loop_rwz_upx2_all.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_rwz_upx2_all.inc")
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm"
|
||||
|
|
|
@ -67,6 +67,9 @@ void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor,
|
|||
size_t cache = availableCache();
|
||||
size_t algoBlockSize;
|
||||
switch (algo) {
|
||||
case Options::ALGO_CRYPTONIGHT_EXTREMELITE:
|
||||
algoBlockSize = 128;
|
||||
break;
|
||||
case Options::ALGO_CRYPTONIGHT_ULTRALITE:
|
||||
algoBlockSize = 256;
|
||||
break;
|
||||
|
|
|
@ -44,6 +44,9 @@ ScratchPadMem Mem::create(ScratchPad** scratchPads, int threadId)
|
|||
case Options::ALGO_CRYPTONIGHT_ULTRALITE:
|
||||
scratchPadSize = MEMORY_ULTRA_LITE;
|
||||
break;
|
||||
case Options::ALGO_CRYPTONIGHT_EXTREMELITE:
|
||||
scratchPadSize = MEMORY_EXTREME_LITE;
|
||||
break;
|
||||
case Options::ALGO_CRYPTONIGHT_SUPERLITE:
|
||||
scratchPadSize = MEMORY_SUPER_LITE;
|
||||
break;
|
||||
|
|
|
@ -308,6 +308,7 @@ static const char *algo_names[] = {
|
|||
"cryptonight-lite",
|
||||
"cryptonight-superlite",
|
||||
"cryptonight-ultralite",
|
||||
"cryptonight-extremelite",
|
||||
"cryptonight-heavy"
|
||||
};
|
||||
|
||||
|
@ -316,6 +317,7 @@ static const char *algo_short_names[] = {
|
|||
"cn-lite",
|
||||
"cn-superlite",
|
||||
"cn-ultralite",
|
||||
"cn-extremelite",
|
||||
"cn-heavy"
|
||||
};
|
||||
|
||||
|
@ -337,7 +339,8 @@ constexpr static const char *pow_variant_names[] = {
|
|||
"hosp",
|
||||
"wow",
|
||||
"r",
|
||||
"xcash"
|
||||
"xcash",
|
||||
"upx2"
|
||||
};
|
||||
|
||||
constexpr static const char *asm_optimization_names[] = {
|
||||
|
@ -1112,12 +1115,16 @@ bool Options::setAlgo(const char *algo)
|
|||
break;
|
||||
}
|
||||
|
||||
|
||||
if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cn-ultra-lite") || !strcmp(algo, "cryptonight-ultra-lite") || !strcmp(algo, "cryptonight-ultralight") || !strcmp(algo, "cryptonight-turtle") || !strcmp(algo, "cn-turtle") || !strcmp(algo, "cryptonight-pico") || !strcmp(algo, "cn-pico"))) {
|
||||
m_algo = ALGO_CRYPTONIGHT_ULTRALITE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cn-extreme-lite") || !strcmp(algo, "cryptonight-extreme-lite") || !strcmp(algo, "cryptonight-extremelight") || !strcmp(algo, "cryptonight-upx2") || !strcmp(algo, "cn-upx2") || !strcmp(algo, "cryptonight-femto") || !strcmp(algo, "cn-femto"))) {
|
||||
m_algo = ALGO_CRYPTONIGHT_EXTREMELITE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(algo_names) - 1 && (!strcmp(algo, "cryptonight-lite-ipbc") || !strcmp(algo, "cryptonight-light-ipbc") || !strcmp(algo, "cn-lite-ipbc"))) {
|
||||
showDeprecateWarning("cryptonight-light-ipbc", "cryptonight-light (with variant \"ipbc\")");
|
||||
m_algo = ALGO_CRYPTONIGHT_LITE;
|
||||
|
@ -1215,6 +1222,11 @@ bool Options::parsePowVariant(const char *powVariant)
|
|||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "upx2") || !strcmp(powVariant, "upxv2") || !strcmp(powVariant, "femto"))) {
|
||||
m_powVariant = POW_UPX2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "hosp") || !strcmp(powVariant, "hospital"))) {
|
||||
m_powVariant = POW_HOSP;
|
||||
break;
|
||||
|
|
|
@ -50,6 +50,7 @@ public:
|
|||
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (1MB ScratchPad) */
|
||||
ALGO_CRYPTONIGHT_SUPERLITE, /* CryptoNight-Superlite (512KB ScratchPad) */
|
||||
ALGO_CRYPTONIGHT_ULTRALITE, /* CryptoNight-Ultralite (256KB ScratchPad) */
|
||||
ALGO_CRYPTONIGHT_EXTREMELITE, /* CryptoNight-Verylite (128KB ScratchPad) */
|
||||
ALGO_CRYPTONIGHT_HEAVY, /* CryptoNight-Heavy (4MB ScratchPad) */
|
||||
};
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ enum PowVariant
|
|||
POW_DOUBLE,
|
||||
POW_ZELERIUS,
|
||||
POW_RWZ,
|
||||
POW_UPX2,
|
||||
LAST_ITEM
|
||||
};
|
||||
|
||||
|
@ -89,6 +90,8 @@ inline std::string getPowVariantName(PowVariant powVariant)
|
|||
return "zls";
|
||||
case POW_RWZ:
|
||||
return "rwz";
|
||||
case POW_UPX2:
|
||||
return "upx2";
|
||||
case POW_AUTODETECT:
|
||||
default:
|
||||
return "-1";
|
||||
|
@ -174,6 +177,8 @@ inline PowVariant parseVariant(const std::string variant)
|
|||
powVariant = PowVariant::POW_ZELERIUS;
|
||||
} else if (variant == "rwz" || variant == "graft") {
|
||||
powVariant = PowVariant::POW_RWZ;
|
||||
} else if (variant == "upx2") {
|
||||
powVariant = PowVariant::POW_UPX2;
|
||||
}
|
||||
|
||||
return powVariant;
|
||||
|
|
|
@ -398,6 +398,26 @@ static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, uint
|
|||
#endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_extreme_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, false, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2)) {
|
||||
CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, false, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, false, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_extreme_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
CryptoNightMultiHash<0x4000, POW_DEFAULT_INDEX_SHIFT, MEMORY_EXTREME_LITE, 0x1FFF0, true, POW_UPX2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
|
@ -464,6 +484,14 @@ void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
|
|||
}
|
||||
break;
|
||||
|
||||
case Options::ALGO_CRYPTONIGHT_EXTREMELITE:
|
||||
if (aesni) {
|
||||
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_extreme_lite_aesni<HASH_FACTOR>;
|
||||
} else {
|
||||
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_extreme_lite_softaes<HASH_FACTOR>;
|
||||
}
|
||||
break;
|
||||
|
||||
case Options::ALGO_CRYPTONIGHT_HEAVY:
|
||||
if (aesni) {
|
||||
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_heavy_aesni<HASH_FACTOR>;
|
||||
|
@ -546,6 +574,7 @@ bool CryptoNight::selfCheck(int algo)
|
|||
bool resultLite = true;
|
||||
bool resultSuperLite = true;
|
||||
bool resultUltraLite = true;
|
||||
bool resultExtremeLite = true;
|
||||
bool resultHeavy = true;
|
||||
|
||||
AsmOptimization asmOptimization = Options::i()->asmOptimization();
|
||||
|
@ -678,9 +707,7 @@ bool CryptoNight::selfCheck(int algo)
|
|||
resultLite = resultLite && memcmp(output, test_output_upx, 32) == 0;
|
||||
|
||||
} else if (algo == Options::ALGO_CRYPTONIGHT_SUPERLITE) {
|
||||
|
||||
return false;
|
||||
|
||||
} else if (algo == Options::ALGO_CRYPTONIGHT_ULTRALITE) {
|
||||
// cn ultralite (cnv8 + turtle)
|
||||
|
||||
|
@ -691,6 +718,16 @@ bool CryptoNight::selfCheck(int algo)
|
|||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
|
||||
resultUltraLite = resultUltraLite && memcmp(output, test_output_turtle, 64) == 0;
|
||||
#endif
|
||||
} else if (algo == Options::ALGO_CRYPTONIGHT_EXTREMELITE) {
|
||||
// cn extremelite (cnv8 + upx2)
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_UPX2, test_input, 76, output, scratchPads);
|
||||
resultExtremeLite = resultExtremeLite && memcmp(output, test_output_upx2, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_UPX2, test_input, 76, output, scratchPads);
|
||||
resultExtremeLite = resultExtremeLite && memcmp(output, test_output_upx2, 64) == 0;
|
||||
#endif
|
||||
} else {
|
||||
// cn v0 aka orignal
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
||||
|
@ -858,5 +895,5 @@ bool CryptoNight::selfCheck(int algo)
|
|||
_mm_free(scratchPads[i]);
|
||||
}
|
||||
|
||||
return result && resultLite && resultSuperLite && resultUltraLite && resultHeavy;
|
||||
return result && resultLite && resultSuperLite && resultUltraLite && resultExtremeLite && resultHeavy;
|
||||
}
|
|
@ -37,6 +37,7 @@
|
|||
#define MEMORY_LITE 1048576 /* 1 MiB */
|
||||
#define MEMORY_SUPER_LITE 524288 /* 512 KiB */
|
||||
#define MEMORY_ULTRA_LITE 262144 /* 256 KiB */
|
||||
#define MEMORY_EXTREME_LITE 131072 /* 128 KiB */
|
||||
#define MEMORY_HEAVY 4194304 /* 4 MiB */
|
||||
|
||||
#define POW_DEFAULT_INDEX_SHIFT 3
|
||||
|
|
|
@ -915,7 +915,7 @@ public:
|
|||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
|
@ -929,7 +929,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -1533,8 +1533,8 @@ public:
|
|||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -1550,7 +1550,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -1573,7 +1573,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -2479,9 +2479,9 @@ public:
|
|||
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -2499,7 +2499,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -2522,7 +2522,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -2545,7 +2545,7 @@ public:
|
|||
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
|
@ -3750,10 +3750,10 @@ public:
|
|||
cx3 = _mm_aesenc_si128(cx3, ax3);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -3773,7 +3773,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -3796,7 +3796,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -3819,7 +3819,7 @@ public:
|
|||
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
|
@ -3842,7 +3842,7 @@ public:
|
|||
|
||||
lo = __umul128(idx3, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al3 += hi;
|
||||
ah3 += lo;
|
||||
|
@ -4845,11 +4845,11 @@ public:
|
|||
cx4 = _mm_aesenc_si128(cx4, ax4);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -4871,7 +4871,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -4894,7 +4894,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -4917,7 +4917,7 @@ public:
|
|||
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
|
@ -4940,7 +4940,7 @@ public:
|
|||
|
||||
lo = __umul128(idx3, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al3 += hi;
|
||||
ah3 += lo;
|
||||
|
@ -4963,7 +4963,7 @@ public:
|
|||
|
||||
lo = __umul128(idx4, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al4 += hi;
|
||||
ah4 += lo;
|
||||
|
|
|
@ -273,4 +273,13 @@ const static uint8_t test_output_turtle[64] = {
|
|||
0xE3, 0x54, 0x58, 0x2B, 0xCB, 0x93, 0xF8, 0x69, 0xD4, 0x29, 0x74, 0x4D, 0xE5, 0x72, 0x6A, 0x26
|
||||
};
|
||||
|
||||
|
||||
// CN-Extremelite/UPX2
|
||||
const static uint8_t test_output_upx2[64] = {
|
||||
0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0,
|
||||
0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61,
|
||||
0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22,
|
||||
0xB0, 0x28, 0x9E, 0x2E, 0x5C, 0x36, 0x16, 0xE6, 0x1E, 0x78, 0x7A, 0x8F, 0xE4, 0x62, 0xEC, 0x5A
|
||||
};
|
||||
|
||||
#endif /* __CRYPTONIGHT_TEST_H__ */
|
||||
|
|
|
@ -95,8 +95,11 @@ extern "C"
|
|||
void cnv2_main_loop_zelerius_bulldozer_asm(ScratchPad* ctx0);
|
||||
void cnv2_double_main_loop_zelerius_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
||||
|
||||
void cnv2_main_loop_rwz_all_asm(ScratchPad* ctx0);
|
||||
void cnv2_double_main_loop_rwz_all_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
||||
void cnv2_main_loop_rwz_original_all_asm(ScratchPad* ctx0);
|
||||
void cnv2_double_main_loop_rwz_original_all_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
||||
|
||||
void cnv2_main_loop_rwz_upx2_all_asm(ScratchPad* ctx0);
|
||||
void cnv2_double_main_loop_rwz_upx2_all_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
||||
|
||||
void cnv1_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||
void cnv1_main_loop_lite_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||
|
@ -994,7 +997,7 @@ public:
|
|||
cx = _mm_aesenc_si128(cx, ax);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx0, cx));
|
||||
|
||||
|
@ -1008,7 +1011,7 @@ public:
|
|||
|
||||
lo = __umul128(idx, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al += hi; // two fence statements are overhead
|
||||
ah += lo;
|
||||
|
@ -1082,7 +1085,10 @@ public:
|
|||
cnv2_main_loop_zelerius_ivybridge_asm(scratchPad[0]);
|
||||
break;
|
||||
case POW_RWZ:
|
||||
cnv2_main_loop_rwz_all_asm(scratchPad[0]);
|
||||
cnv2_main_loop_rwz_original_all_asm(scratchPad[0]);
|
||||
break;
|
||||
case POW_UPX2:
|
||||
cnv2_main_loop_rwz_upx2_all_asm(scratchPad[0]);
|
||||
break;
|
||||
default:
|
||||
cnv2_main_loop_ivybridge_asm(scratchPad[0]);
|
||||
|
@ -1105,7 +1111,10 @@ public:
|
|||
cnv2_main_loop_zelerius_ryzen_asm(scratchPad[0]);
|
||||
break;
|
||||
case POW_RWZ:
|
||||
cnv2_main_loop_rwz_all_asm(scratchPad[0]);
|
||||
cnv2_main_loop_rwz_original_all_asm(scratchPad[0]);
|
||||
break;
|
||||
case POW_UPX2:
|
||||
cnv2_main_loop_rwz_upx2_all_asm(scratchPad[0]);
|
||||
break;
|
||||
default:
|
||||
cnv2_main_loop_ryzen_asm(scratchPad[0]);
|
||||
|
@ -1127,7 +1136,10 @@ public:
|
|||
cnv2_main_loop_zelerius_bulldozer_asm(scratchPad[0]);
|
||||
break;
|
||||
case POW_RWZ:
|
||||
cnv2_main_loop_rwz_all_asm(scratchPad[0]);
|
||||
cnv2_main_loop_rwz_original_all_asm(scratchPad[0]);
|
||||
break;
|
||||
case POW_UPX2:
|
||||
cnv2_main_loop_rwz_upx2_all_asm(scratchPad[0]);
|
||||
break;
|
||||
default:
|
||||
cnv2_main_loop_bulldozer_asm(scratchPad[0]);
|
||||
|
@ -1785,8 +1797,8 @@ public:
|
|||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0 & MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1 & MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0 & MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1 & MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -1807,7 +1819,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0 & MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0 & MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -1876,7 +1888,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1 & MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1 & MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -1935,7 +1947,10 @@ public:
|
|||
cnv2_double_main_loop_zelerius_sandybridge_asm(scratchPad[0], scratchPad[1]);
|
||||
break;
|
||||
case POW_RWZ:
|
||||
cnv2_double_main_loop_rwz_all_asm(scratchPad[0], scratchPad[1]);
|
||||
cnv2_double_main_loop_rwz_original_all_asm(scratchPad[0], scratchPad[1]);
|
||||
break;
|
||||
case POW_UPX2:
|
||||
cnv2_double_main_loop_rwz_upx2_all_asm(scratchPad[0], scratchPad[1]);
|
||||
break;
|
||||
default:
|
||||
cnv2_double_main_loop_sandybridge_asm(scratchPad[0], scratchPad[1]);
|
||||
|
@ -2885,9 +2900,9 @@ public:
|
|||
cx2 = _mm_aesenc_si128(cx2, ax2);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -2905,7 +2920,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -2928,7 +2943,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -2950,7 +2965,7 @@ public:
|
|||
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
|
@ -4167,10 +4182,10 @@ public:
|
|||
cx3 = _mm_aesenc_si128(cx3, ax3);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -4190,7 +4205,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -4213,7 +4228,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -4236,7 +4251,7 @@ public:
|
|||
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
|
@ -4259,7 +4274,7 @@ public:
|
|||
|
||||
lo = __umul128(idx3, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al3 += hi;
|
||||
ah3 += lo;
|
||||
|
@ -5297,11 +5312,11 @@ public:
|
|||
cx4 = _mm_aesenc_si128(cx4, ax4);
|
||||
}
|
||||
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
|
||||
|
@ -5323,7 +5338,7 @@ public:
|
|||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -5346,7 +5361,7 @@ public:
|
|||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
@ -5369,7 +5384,7 @@ public:
|
|||
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
|
@ -5392,7 +5407,7 @@ public:
|
|||
|
||||
lo = __umul128(idx3, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al3 += hi;
|
||||
ah3 += lo;
|
||||
|
@ -5415,7 +5430,7 @@ public:
|
|||
|
||||
lo = __umul128(idx4, cl, &hi);
|
||||
|
||||
SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ)
|
||||
SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ || VARIANT == POW_UPX2)
|
||||
|
||||
al4 += hi;
|
||||
ah4 += lo;
|
||||
|
|
|
@ -38,8 +38,11 @@
|
|||
.global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_original_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm)
|
||||
|
||||
.global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm)
|
||||
|
@ -363,10 +366,10 @@ ALIGN 16
|
|||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_rwz_all_asm):
|
||||
FN_PREFIX(cnv2_main_loop_rwz_original_all_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_rwz_all.inc"
|
||||
#include "cnv2_main_loop_rwz_original_all.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
|
@ -375,11 +378,36 @@ ALIGN 16
|
|||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_all_asm):
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cnv2_double_main_loop_rwz_all.inc"
|
||||
#include "cnv2_double_main_loop_rwz_original_all.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_rwz_upx2_all.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cnv2_double_main_loop_rwz_upx2_all.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 393216
|
||||
mov r14d, ${ITERATIONS}
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
|
@ -41,7 +41,7 @@
|
|||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movq xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
and edx, ${MASK}
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
|
@ -83,7 +83,7 @@
|
|||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movq xmm8, rax
|
||||
and ecx, 2097136
|
||||
and ecx, ${MASK}
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
@ -99,7 +99,7 @@
|
|||
#else
|
||||
ALIGN(64)
|
||||
#endif
|
||||
rwz_main_loop_double:
|
||||
rwz_main_loop_double_${ALGO}:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
|
@ -124,7 +124,7 @@ rwz_main_loop_double:
|
|||
|
||||
movq r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
and edx, ${MASK}
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
@ -155,7 +155,7 @@ rwz_main_loop_double:
|
|||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movq rcx, xmm10
|
||||
and ecx, 2097136
|
||||
and ecx, ${MASK}
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
|
@ -203,7 +203,7 @@ rwz_main_loop_double:
|
|||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
and r11d, ${MASK}
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
|
@ -253,8 +253,8 @@ rwz_main_loop_double:
|
|||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js rwz_div_fix_1
|
||||
rwz_div_fix_1_ret:
|
||||
js rwz_div_fix_1_${ALGO}
|
||||
rwz_div_fix_1_${ALGO}_ret:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
|
@ -262,8 +262,8 @@ rwz_div_fix_1_ret:
|
|||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js rwz_div_fix_2
|
||||
rwz_div_fix_2_ret:
|
||||
js rwz_div_fix_2_${ALGO}
|
||||
rwz_div_fix_2_${ALGO}_ret:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
|
@ -279,15 +279,15 @@ rwz_div_fix_2_ret:
|
|||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je rwz_sqrt_fix_1
|
||||
rwz_sqrt_fix_1_ret:
|
||||
je rwz_sqrt_fix_1_${ALGO}
|
||||
rwz_sqrt_fix_1_${ALGO}_ret:
|
||||
|
||||
movq r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
test r8, 524287
|
||||
je rwz_sqrt_fix_2
|
||||
rwz_sqrt_fix_2_ret:
|
||||
je rwz_sqrt_fix_2_${ALGO}
|
||||
rwz_sqrt_fix_2_${ALGO}_ret:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
|
@ -317,7 +317,7 @@ rwz_sqrt_fix_2_ret:
|
|||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
and ecx, ${MASK}
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
|
@ -335,7 +335,7 @@ rwz_sqrt_fix_2_ret:
|
|||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne rwz_main_loop_double
|
||||
jne rwz_main_loop_double_${ALGO}
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
|
@ -358,19 +358,19 @@ rwz_sqrt_fix_2_ret:
|
|||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp rwz_cnv2_double_mainloop_asm_endp
|
||||
jmp rwz_cnv2_double_mainloop_${ALGO}_asm_endp
|
||||
|
||||
rwz_div_fix_1:
|
||||
rwz_div_fix_1_${ALGO}:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp rwz_div_fix_1_ret
|
||||
jmp rwz_div_fix_1_${ALGO}_ret
|
||||
|
||||
rwz_div_fix_2:
|
||||
rwz_div_fix_2_${ALGO}:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp rwz_div_fix_2_ret
|
||||
jmp rwz_div_fix_2_${ALGO}_ret
|
||||
|
||||
rwz_sqrt_fix_1:
|
||||
rwz_sqrt_fix_1_${ALGO}:
|
||||
movq r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
|
@ -389,9 +389,9 @@ rwz_sqrt_fix_1:
|
|||
adc r9, 0
|
||||
movq xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_1_ret
|
||||
jmp rwz_sqrt_fix_1_${ALGO}_ret
|
||||
|
||||
rwz_sqrt_fix_2:
|
||||
rwz_sqrt_fix_2_${ALGO}:
|
||||
psrldq xmm3, 8
|
||||
movq r11, xmm3
|
||||
dec r8
|
||||
|
@ -409,6 +409,6 @@ rwz_sqrt_fix_2:
|
|||
adc r8, 0
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_2_ret
|
||||
jmp rwz_sqrt_fix_2_${ALGO}_ret
|
||||
|
||||
rwz_cnv2_double_mainloop_asm_endp:
|
||||
rwz_cnv2_double_mainloop_${ALGO}_asm_endp:
|
|
@ -15,7 +15,7 @@
|
|||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 393216
|
||||
mov esi, ${ITERATIONS}
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
|
@ -35,7 +35,7 @@
|
|||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
and r10d, ${MASK}
|
||||
movq xmm5, rax
|
||||
|
||||
xor eax, eax
|
||||
|
@ -55,7 +55,7 @@
|
|||
#else
|
||||
ALIGN(64)
|
||||
#endif
|
||||
rwz_main_loop:
|
||||
rwz_main_loop_${ALGO}:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
|
@ -69,7 +69,7 @@ rwz_main_loop:
|
|||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
and r9d, ${MASK}
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm2, XMMWORD PTR [r10+rbx]
|
||||
|
@ -113,9 +113,9 @@ rwz_main_loop:
|
|||
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||
movq rdx, xmm3
|
||||
test edx, 524287
|
||||
je rwz_sqrt_fixup
|
||||
je rwz_sqrt_fixup_${ALGO}
|
||||
psrlq xmm3, 19
|
||||
rwz_sqrt_fixup_ret:
|
||||
rwz_sqrt_fixup_${ALGO}_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
|
@ -126,7 +126,7 @@ rwz_sqrt_fixup_ret:
|
|||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
and edi, ${MASK}
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
|
@ -151,7 +151,7 @@ rwz_sqrt_fixup_ret:
|
|||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne rwz_main_loop
|
||||
jne rwz_main_loop_${ALGO}
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
|
@ -166,9 +166,9 @@ rwz_sqrt_fixup_ret:
|
|||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp cnv2_rwz_main_loop_endp
|
||||
jmp cnv2_rwz_main_loop_${ALGO}_endp
|
||||
|
||||
rwz_sqrt_fixup:
|
||||
rwz_sqrt_fixup_${ALGO}:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
|
@ -185,6 +185,6 @@ rwz_sqrt_fixup:
|
|||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm3, rdx
|
||||
jmp rwz_sqrt_fixup_ret
|
||||
jmp rwz_sqrt_fixup_${ALGO}_ret
|
||||
|
||||
cnv2_rwz_main_loop_endp:
|
||||
cnv2_rwz_main_loop_${ALGO}_endp:
|
|
@ -31,8 +31,11 @@ PUBLIC cnv2_main_loop_zelerius_ryzen_asm
|
|||
PUBLIC cnv2_main_loop_zelerius_bulldozer_asm
|
||||
PUBLIC cnv2_double_main_loop_zelerius_sandybridge_asm
|
||||
|
||||
PUBLIC cnv2_main_loop_rwz_all_asm
|
||||
PUBLIC cnv2_double_main_loop_rwz_all_asm
|
||||
PUBLIC cnv2_main_loop_rwz_original_all_asm
|
||||
PUBLIC cnv2_double_main_loop_rwz_original_all_asm
|
||||
|
||||
PUBLIC cnv2_main_loop_rwz_upx2_all_asm
|
||||
PUBLIC cnv2_double_main_loop_rwz_upx2_all_asm
|
||||
|
||||
PUBLIC cnv1_main_loop_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv1_main_loop_lite_soft_aes_sandybridge_asm
|
||||
|
@ -197,16 +200,28 @@ cnv2_double_main_loop_zelerius_sandybridge_asm PROC
|
|||
cnv2_double_main_loop_zelerius_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_rwz_all_asm PROC
|
||||
INCLUDE cnv2_main_loop_rwz_all.inc
|
||||
cnv2_main_loop_rwz_original_all_asm PROC
|
||||
INCLUDE cnv2_main_loop_rwz_original_all.inc
|
||||
ret 0
|
||||
cnv2_main_loop_rwz_all_asm ENDP
|
||||
cnv2_main_loop_rwz_original_all_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_double_main_loop_rwz_all_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_rwz_all.inc
|
||||
cnv2_double_main_loop_rwz_original_all_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_rwz_original_all.inc
|
||||
ret 0
|
||||
cnv2_double_main_loop_rwz_all_asm ENDP
|
||||
cnv2_double_main_loop_rwz_original_all_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_rwz_upx2_all_asm PROC
|
||||
INCLUDE cnv2_main_loop_rwz_upx2_all.inc
|
||||
ret 0
|
||||
cnv2_main_loop_rwz_upx2_all_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_double_main_loop_rwz_upx2_all_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_rwz_upx2_all.inc
|
||||
ret 0
|
||||
cnv2_double_main_loop_rwz_upx2_all_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv1_main_loop_soft_aes_sandybridge_asm PROC
|
||||
|
|
|
@ -34,8 +34,11 @@
|
|||
.global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_original_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm)
|
||||
|
||||
.global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm)
|
||||
|
@ -175,13 +178,23 @@ FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm):
|
|||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_rwz_all_asm):
|
||||
#include "../cnv2_main_loop_rwz_all.inc"
|
||||
FN_PREFIX(cnv2_main_loop_rwz_original_all_asm):
|
||||
#include "../cnv2_main_loop_rwz_original_all.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_all_asm):
|
||||
#include "../cnv2_double_main_loop_rwz_all.inc"
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_original_all_asm):
|
||||
#include "../cnv2_double_main_loop_rwz_original_all.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_rwz_upx2_all_asm):
|
||||
#include "../cnv2_main_loop_rwz_upx2_all.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_upx2_all_asm):
|
||||
#include "../cnv2_double_main_loop_rwz_upx2_all.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 393216
|
||||
mov r14d, ${ITERATIONS}
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
|
@ -41,7 +41,7 @@
|
|||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movd xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
and edx, ${MASK}
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
|
@ -83,7 +83,7 @@
|
|||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movd xmm8, rax
|
||||
and ecx, 2097136
|
||||
and ecx, ${MASK}
|
||||
punpcklqdq xmm8, xmm0
|
||||
movd xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
@ -95,7 +95,7 @@
|
|||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
ALIGN(64)
|
||||
rwz_main_loop_double:
|
||||
rwz_main_loop_double_${ALGO}:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
|
@ -120,7 +120,7 @@ rwz_main_loop_double:
|
|||
|
||||
movd r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
and edx, ${MASK}
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
@ -151,7 +151,7 @@ rwz_main_loop_double:
|
|||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movd rcx, xmm10
|
||||
and ecx, 2097136
|
||||
and ecx, ${MASK}
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
|
@ -199,7 +199,7 @@ rwz_main_loop_double:
|
|||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
and r11d, ${MASK}
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
|
@ -249,8 +249,8 @@ rwz_main_loop_double:
|
|||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js rwz_div_fix_1
|
||||
rwz_div_fix_1_ret:
|
||||
js rwz_div_fix_1_${ALGO}
|
||||
rwz_div_fix_1_${ALGO}_ret:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
|
@ -258,8 +258,8 @@ rwz_div_fix_1_ret:
|
|||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js rwz_div_fix_2
|
||||
rwz_div_fix_2_ret:
|
||||
js rwz_div_fix_2_${ALGO}
|
||||
rwz_div_fix_2_${ALGO}_ret:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
|
@ -275,15 +275,15 @@ rwz_div_fix_2_ret:
|
|||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je rwz_sqrt_fix_1
|
||||
rwz_sqrt_fix_1_ret:
|
||||
je rwz_sqrt_fix_1_${ALGO}
|
||||
rwz_sqrt_fix_1_${ALGO}_ret:
|
||||
|
||||
movd r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movd r8, xmm1
|
||||
test r8, 524287
|
||||
je rwz_sqrt_fix_2
|
||||
rwz_sqrt_fix_2_ret:
|
||||
je rwz_sqrt_fix_2_${ALGO}
|
||||
rwz_sqrt_fix_2_${ALGO}_ret:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
|
@ -313,7 +313,7 @@ rwz_sqrt_fix_2_ret:
|
|||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
and ecx, ${MASK}
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
|
@ -331,7 +331,7 @@ rwz_sqrt_fix_2_ret:
|
|||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne rwz_main_loop_double
|
||||
jne rwz_main_loop_double_${ALGO}
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
|
@ -354,19 +354,19 @@ rwz_sqrt_fix_2_ret:
|
|||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp rwz_cnv2_double_mainloop_asm_endp
|
||||
jmp rwz_cnv2_double_mainloop_${ALGO}_asm_endp
|
||||
|
||||
rwz_div_fix_1:
|
||||
rwz_div_fix_1_${ALGO}:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp rwz_div_fix_1_ret
|
||||
jmp rwz_div_fix_1_${ALGO}_ret
|
||||
|
||||
rwz_div_fix_2:
|
||||
rwz_div_fix_2_${ALGO}:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp rwz_div_fix_2_ret
|
||||
jmp rwz_div_fix_2_${ALGO}_ret
|
||||
|
||||
rwz_sqrt_fix_1:
|
||||
rwz_sqrt_fix_1_${ALGO}:
|
||||
movd r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
|
@ -385,9 +385,9 @@ rwz_sqrt_fix_1:
|
|||
adc r9, 0
|
||||
movd xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_1_ret
|
||||
jmp rwz_sqrt_fix_1_${ALGO}_ret
|
||||
|
||||
rwz_sqrt_fix_2:
|
||||
rwz_sqrt_fix_2_${ALGO}:
|
||||
psrldq xmm3, 8
|
||||
movd r11, xmm3
|
||||
dec r8
|
||||
|
@ -405,6 +405,6 @@ rwz_sqrt_fix_2:
|
|||
adc r8, 0
|
||||
movd xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_2_ret
|
||||
jmp rwz_sqrt_fix_2_${ALGO}_ret
|
||||
|
||||
rwz_cnv2_double_mainloop_asm_endp:
|
||||
rwz_cnv2_double_mainloop_${ALGO}_asm_endp:
|
|
@ -15,7 +15,7 @@
|
|||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 393216
|
||||
mov esi, ${ITERATIONS}
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
|
@ -35,7 +35,7 @@
|
|||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
and r10d, ${MASK}
|
||||
movd xmm5, rax
|
||||
|
||||
xor eax, eax
|
||||
|
@ -51,7 +51,7 @@
|
|||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN(64)
|
||||
rwz_main_loop:
|
||||
rwz_main_loop_${ALGO}:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
|
@ -65,7 +65,7 @@ rwz_main_loop:
|
|||
aesenc xmm6, xmm7
|
||||
movd rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
and r9d, ${MASK}
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm2, XMMWORD PTR [r10+rbx]
|
||||
|
@ -109,9 +109,9 @@ rwz_main_loop:
|
|||
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||
movd rdx, xmm3
|
||||
test edx, 524287
|
||||
je rwz_sqrt_fixup
|
||||
je rwz_sqrt_fixup_${ALGO}
|
||||
psrlq xmm3, 19
|
||||
rwz_sqrt_fixup_ret:
|
||||
rwz_sqrt_fixup_${ALGO}_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
|
@ -122,7 +122,7 @@ rwz_sqrt_fixup_ret:
|
|||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
and edi, ${MASK}
|
||||
movd xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
|
@ -147,7 +147,7 @@ rwz_sqrt_fixup_ret:
|
|||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne rwz_main_loop
|
||||
jne rwz_main_loop_${ALGO}
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
|
@ -162,9 +162,9 @@ rwz_sqrt_fixup_ret:
|
|||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp cnv2_rwz_main_loop_endp
|
||||
jmp cnv2_rwz_main_loop_${ALGO}_endp
|
||||
|
||||
rwz_sqrt_fixup:
|
||||
rwz_sqrt_fixup_${ALGO}:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
|
@ -181,6 +181,6 @@ rwz_sqrt_fixup:
|
|||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movd xmm3, rdx
|
||||
jmp rwz_sqrt_fixup_ret
|
||||
jmp rwz_sqrt_fixup_${ALGO}_ret
|
||||
|
||||
cnv2_rwz_main_loop_endp:
|
||||
cnv2_rwz_main_loop_${ALGO}_endp:
|
|
@ -143,6 +143,10 @@ PowVariant Job::powVariant() const
|
|||
return PowVariant::POW_TURTLE;
|
||||
}
|
||||
|
||||
if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_EXTREMELITE) {
|
||||
return PowVariant::POW_UPX2;
|
||||
}
|
||||
|
||||
if (m_powVariant == PowVariant::POW_AUTODETECT) {
|
||||
if (m_blob[0] >= 10) {
|
||||
return PowVariant::POW_V4;
|
||||
|
|
|
@ -62,16 +62,20 @@ DonateStrategy::DonateStrategy(const char *agent, IStrategyListener *listener) :
|
|||
url = new Url("donate2.graef.in", 1080, userId, nullptr, true, false, true);
|
||||
} else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_ULTRALITE) {
|
||||
url = new Url("donate2.graef.in", 8090, userId, nullptr, true, false, true);
|
||||
} else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_EXTREMELITE) {
|
||||
url = new Url("donate2.graef.in", 9091, userId, nullptr, true, false, true);
|
||||
} else {
|
||||
url = new Url("donate2.graef.in", 443, userId, nullptr, true, false, true);
|
||||
}
|
||||
#else
|
||||
if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_HEAVY) {
|
||||
url = new Url("donate.graef.in", 8443, userId, nullptr, false, false, true);
|
||||
url = new Url("donate2.graef.in", 9000, userId, nullptr, false, false, true);
|
||||
} else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_LITE) {
|
||||
url = new Url("donate.graef.in", 1080, userId, nullptr, false, false, true);
|
||||
url = new Url("donate2.graef.in", 7000, userId, nullptr, false, false, true);
|
||||
} else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_ULTRALITE) {
|
||||
url = new Url("donate2.graef.in", 8088, userId, nullptr, false, false, true);
|
||||
} else if (Options::i()->algo() == Options::ALGO_CRYPTONIGHT_EXTREMELITE) {
|
||||
url = new Url("donate2.graef.in", 8088, userId, nullptr, false, false, true);
|
||||
} else {
|
||||
url = new Url("donate2.graef.in", 80, userId, nullptr, false, false, true);
|
||||
}
|
||||
|
|
|
@ -36,14 +36,14 @@
|
|||
#define APP_DESC "XMRigCC CPU miner"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
||||
#endif
|
||||
#define APP_VERSION "1.9.1 (based on XMRig)"
|
||||
#define APP_VERSION "1.9.2 (based on XMRig)"
|
||||
#define APP_DOMAIN ""
|
||||
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
||||
#define APP_KIND "cpu"
|
||||
|
||||
#define APP_VER_MAJOR 1
|
||||
#define APP_VER_MINOR 9
|
||||
#define APP_VER_BUILD 1
|
||||
#define APP_VER_BUILD 2
|
||||
#define APP_VER_REV 0
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue