diff --git a/src/Options.cpp b/src/Options.cpp index 2cbe5e05..e65ed1e1 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -282,7 +282,8 @@ constexpr static const char *pow_variant_names[] = { "ipbc", "alloy", "xtl", - "msr" + "msr", + "xhv" }; Options *Options::parse(int argc, char **argv) @@ -1028,6 +1029,11 @@ bool Options::parsePowVariant(const char *powVariant) break; } + if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "haven")) { + m_powVariant = POW_XHV; + break; + } + if (i == ARRAY_SIZE(pow_variant_names) - 1) { showUsage(1); return false; diff --git a/src/PowVariant.h b/src/PowVariant.h index 18dd2f96..ecb0da9e 100644 --- a/src/PowVariant.h +++ b/src/PowVariant.h @@ -31,6 +31,7 @@ enum PowVariant POW_ALLOY, POW_XTL, POW_MSR, + POW_XHV, LAST_ITEM }; @@ -50,6 +51,8 @@ inline std::string getPowVariantName(PowVariant powVariant) return "xtl"; case POW_MSR: return "msr"; + case POW_XHV: + return "xhv"; case POW_AUTODETECT: default: return "-1"; @@ -106,6 +109,8 @@ inline PowVariant parseVariant(const std::string variant) powVariant = PowVariant::POW_XTL; } else if (variant == "msr" || variant == "masari") { powVariant = PowVariant::POW_MSR; + } else if (variant == "xhv" || variant == "haven") { + powVariant = PowVariant::POW_XHV; } return powVariant; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index 11f103a1..65b18b2e 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -92,13 +92,23 @@ static void cryptonight_lite_softaes(PowVariant powVersion, const uint8_t* input template static void cryptonight_heavy_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx *ctx) { # if !defined(XMRIG_ARMv7) - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, ctx); + if (powVersion == PowVariant::POW_XHV) { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, ctx); + } + else { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, ctx); + } # endif } template static void cryptonight_heavy_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx *ctx) { - CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, ctx); + if (powVersion == PowVariant::POW_XHV) { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, ctx); + } + else { + CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, ctx); + } } void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx *ctx); @@ -195,6 +205,21 @@ bool CryptoNight::selfTest(int algo) cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, ctx); resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0; #endif + + // cn-heavy haven + + cryptonight_hash_ctx[0](PowVariant::POW_XHV, test_input, 76, output, ctx); + resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0; + + #if MAX_NUM_HASH_BLOCKS > 1 + cryptonight_hash_ctx[1](PowVariant::POW_XHV, test_input, 76, output, ctx); + resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0; + #endif + + #if MAX_NUM_HASH_BLOCKS > 2 + cryptonight_hash_ctx[2](PowVariant::POW_XHV, test_input, 76, output, ctx); + resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0; + #endif } else if (algo == Options::ALGO_CRYPTONIGHT_LITE) { // cn-lite v0 diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h index e51f4424..7a6f9cc3 100644 --- a/src/crypto/CryptoNight_test.h +++ b/src/crypto/CryptoNight_test.h @@ -157,4 +157,14 @@ const static uint8_t test_output_heavy[160] = { 0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB }; +// CN-Heavy Haven +const static uint8_t test_output_heavy_haven[160] = { + 0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57, + 0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6, + 0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F, + 0xDE, 0x18, 0x29, 0x8E, 0xBB, 0x34, 0x2B, 0xEF, 0x7A, 0x04, 0x22, 0xD1, 0xB1, 0xF2, 0x48, 0xDA, + 0xE3, 0x7F, 0x4B, 0x4C, 0xB4, 0xDF, 0xE8, 0xD3, 0x70, 0xE2, 0xE7, 0x44, 0x25, 0x87, 0x12, 0xF9, + 0x8F, 0x28, 0x0B, 0xCE, 0x2C, 0xEE, 0xDD, 0x88, 0x94, 0x35, 0x48, 0x51, 0xAE, 0xC8, 0x9C, 0x0B +}; + #endif /* __CRYPTONIGHT_TEST_H__ */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 4ebbd1c9..bfb8a122 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -785,6 +785,83 @@ public: output + hashBlock * 32); } } + + inline static void hashHeavyHaven(const uint8_t* __restrict__ input, + size_t size, + uint8_t* __restrict__ output, + cryptonight_ctx* __restrict__ ctx) + { + const uint8_t* l[NUM_HASH_BLOCKS]; + uint64_t* h[NUM_HASH_BLOCKS]; + uint64_t al[NUM_HASH_BLOCKS]; + uint64_t ah[NUM_HASH_BLOCKS]; + __m128i bx[NUM_HASH_BLOCKS]; + uint64_t idx[NUM_HASH_BLOCKS]; + + for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { + keccak(static_cast(input) + hashBlock * size, (int) size, ctx->state[hashBlock], 200); + } + + for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { + l[hashBlock] = ctx->memory + hashBlock * MEM; + h[hashBlock] = reinterpret_cast(ctx->state[hashBlock]); + + cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]); + + al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4]; + ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5]; + bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]); + idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4]; + } + + for (size_t i = 0; i < ITERATIONS; i++) { + for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { + __m128i cx; + + if (SOFT_AES) { + cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK], _mm_set_epi64x(ah[hashBlock], al[hashBlock])); + } else { + cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]); + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); + } + + _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK], + _mm_xor_si128(bx[hashBlock], cx)); + + idx[hashBlock] = EXTRACT64(cx); + bx[hashBlock] = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0]; + ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1]; + lo = __umul128(idx[hashBlock], cl, &hi); + + al[hashBlock] += hi; + ah[hashBlock] += lo; + + ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock]; + ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock]; + + ah[hashBlock] ^= ch; + al[hashBlock] ^= cl; + idx[hashBlock] = al[hashBlock]; + + int64_t n = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0]; + int32_t d = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q; + idx[hashBlock] = (~d) ^ q; + } + } + + for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { + cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]); + keccakf(h[hashBlock], 24); + extra_hashes[ctx->state[hashBlock][0] & 3](ctx->state[hashBlock], 200, + output + hashBlock * 32); + } + } }; @@ -1052,6 +1129,72 @@ public: keccakf(h, 24); extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); } + + inline static void hashHeavyHaven(const uint8_t* __restrict__ input, + size_t size, + uint8_t* __restrict__ output, + cryptonight_ctx* __restrict__ ctx) + { + const uint8_t* l; + uint64_t* h; + uint64_t al; + uint64_t ah; + __m128i bx; + uint64_t idx; + + keccak(static_cast(input), (int) size, ctx->state[0], 200); + + l = ctx->memory; + h = reinterpret_cast(ctx->state[0]); + + cn_explode_scratchpad_heavy((__m128i*) h, (__m128i*) l); + + al = h[0] ^ h[4]; + ah = h[1] ^ h[5]; + bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]); + idx = h[0] ^ h[4]; + + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx; + + if (SOFT_AES) { + cx = soft_aesenc((uint32_t*)&l[idx & MASK], _mm_set_epi64x(ah, al)); + } else { + cx = _mm_load_si128((__m128i*) &l[idx & MASK]); + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al)); + } + + _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx)); + idx = EXTRACT64(cx); + bx = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l[idx & MASK])[0]; + ch = ((uint64_t*) &l[idx & MASK])[1]; + lo = __umul128(idx, cl, &hi); + + al += hi; + ah += lo; + + ((uint64_t*) &l[idx & MASK])[0] = al; + ((uint64_t*) &l[idx & MASK])[1] = ah; + + ah ^= ch; + al ^= cl; + idx = al; + + int64_t n = ((int64_t*)&l[idx & MASK])[0]; + int32_t d = ((int32_t*)&l[idx & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l[idx & MASK])[0] = n ^ q; + idx = (~d) ^ q; + } + + cn_implode_scratchpad_heavy((__m128i*) l, (__m128i*) h); + keccakf(h, 24); + extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); + } }; template @@ -1474,6 +1617,112 @@ public: extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, output + 32); } + + inline static void hashHeavyHaven(const uint8_t* __restrict__ input, + size_t size, + uint8_t* __restrict__ output, + cryptonight_ctx* __restrict__ ctx) + { + keccak((const uint8_t*) input, (int) size, ctx->state[0], 200); + keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200); + + const uint8_t* l0 = ctx->memory; + const uint8_t* l1 = ctx->memory + MEM; + uint64_t* h0 = reinterpret_cast(ctx->state[0]); + uint64_t* h1 = reinterpret_cast(ctx->state[1]); + + cn_explode_scratchpad_heavy((__m128i*) h0, (__m128i*) l0); + cn_explode_scratchpad_heavy((__m128i*) h1, (__m128i*) l1); + + uint64_t al0 = h0[0] ^h0[4]; + uint64_t al1 = h1[0] ^h1[4]; + uint64_t ah0 = h0[1] ^h0[5]; + uint64_t ah1 = h1[1] ^h1[5]; + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + + uint64_t idx0 = h0[0] ^h0[4]; + uint64_t idx1 = h1[0] ^h1[4]; + + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx0; + __m128i cx1; + + if (SOFT_AES) { + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); + } + + _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); + _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); + + idx0 = EXTRACT64(cx0); + idx1 = EXTRACT64(cx1); + + bx0 = cx0; + bx1 = cx1; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & MASK])[0]; + ch = ((uint64_t*) &l0[idx0 & MASK])[1]; + lo = __umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*) &l0[idx0 & MASK])[0] = al0; + ((uint64_t*) &l0[idx0 & MASK])[1] = ah0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; + int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; + idx0 = (~d) ^ q; + + + cl = ((uint64_t*) &l1[idx1 & MASK])[0]; + ch = ((uint64_t*) &l1[idx1 & MASK])[1]; + lo = __umul128(idx1, cl, &hi); + + al1 += hi; + ah1 += lo; + + ((uint64_t*) &l1[idx1 & MASK])[0] = al1; + ((uint64_t*) &l1[idx1 & MASK])[1] = ah1; + + ah1 ^= ch; + al1 ^= cl; + idx1 = al1; + + n = ((int64_t*)&l1[idx1 & MASK])[0]; + d = ((int32_t*)&l1[idx1 & MASK])[2]; + q = n / (d | 0x5); + + ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; + idx1 = (~d) ^ q; + } + + cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0); + cn_implode_scratchpad_heavy((__m128i*) l1, (__m128i*) h1); + + keccakf(h0, 24); + keccakf(h1, 24); + + extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); + extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, output + 32); + } }; template @@ -2058,6 +2307,153 @@ public: extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, output + 32); extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, output + 64); } + + inline static void hashHeavyHaven(const uint8_t* __restrict__ input, + size_t size, + uint8_t* __restrict__ output, + cryptonight_ctx* __restrict__ ctx) + { + keccak((const uint8_t*) input, (int) size, ctx->state[0], 200); + keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200); + keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200); + + const uint8_t* l0 = ctx->memory; + const uint8_t* l1 = ctx->memory + MEM; + const uint8_t* l2 = ctx->memory + 2 * MEM; + uint64_t* h0 = reinterpret_cast(ctx->state[0]); + uint64_t* h1 = reinterpret_cast(ctx->state[1]); + uint64_t* h2 = reinterpret_cast(ctx->state[2]); + + cn_explode_scratchpad_heavy((__m128i*) h0, (__m128i*) l0); + cn_explode_scratchpad_heavy((__m128i*) h1, (__m128i*) l1); + cn_explode_scratchpad_heavy((__m128i*) h2, (__m128i*) l2); + + uint64_t al0 = h0[0] ^h0[4]; + uint64_t al1 = h1[0] ^h1[4]; + uint64_t al2 = h2[0] ^h2[4]; + uint64_t ah0 = h0[1] ^h0[5]; + uint64_t ah1 = h1[1] ^h1[5]; + uint64_t ah2 = h2[1] ^h2[5]; + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); + + uint64_t idx0 = h0[0] ^h0[4]; + uint64_t idx1 = h1[0] ^h1[4]; + uint64_t idx2 = h2[0] ^h2[4]; + + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx0; + __m128i cx1; + __m128i cx2; + + if (SOFT_AES) { + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); + } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); + cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); + } + + _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); + _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); + _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2)); + + idx0 = EXTRACT64(cx0); + idx1 = EXTRACT64(cx1); + idx2 = EXTRACT64(cx2); + + bx0 = cx0; + bx1 = cx1; + bx2 = cx2; + + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & MASK])[0]; + ch = ((uint64_t*) &l0[idx0 & MASK])[1]; + lo = __umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*) &l0[idx0 & MASK])[0] = al0; + ((uint64_t*) &l0[idx0 & MASK])[1] = ah0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; + int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; + idx0 = (~d) ^ q; + + + cl = ((uint64_t*) &l1[idx1 & MASK])[0]; + ch = ((uint64_t*) &l1[idx1 & MASK])[1]; + lo = __umul128(idx1, cl, &hi); + + al1 += hi; + ah1 += lo; + + ((uint64_t*) &l1[idx1 & MASK])[0] = al1; + ((uint64_t*) &l1[idx1 & MASK])[1] = ah1; + + ah1 ^= ch; + al1 ^= cl; + idx1 = al1; + + n = ((int64_t*)&l1[idx1 & MASK])[0]; + d = ((int32_t*)&l1[idx1 & MASK])[2]; + q = n / (d | 0x5); + + ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; + idx1 = (~d) ^ q; + + + cl = ((uint64_t*) &l2[idx2 & MASK])[0]; + ch = ((uint64_t*) &l2[idx2 & MASK])[1]; + lo = __umul128(idx2, cl, &hi); + + al2 += hi; + ah2 += lo; + + ((uint64_t*) &l2[idx2 & MASK])[0] = al2; + ((uint64_t*) &l2[idx2 & MASK])[1] = ah2; + + ah2 ^= ch; + al2 ^= cl; + idx2 = al2; + + n = ((int64_t*)&l2[idx2 & MASK])[0]; + d = ((int32_t*)&l2[idx2 & MASK])[2]; + q = n / (d | 0x5); + + ((int64_t*)&l2[idx2 & MASK])[0] = n ^ q; + idx2 = (~d) ^ q; + } + + cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0); + cn_implode_scratchpad_heavy((__m128i*) l1, (__m128i*) h1); + cn_implode_scratchpad_heavy((__m128i*) l2, (__m128i*) h2); + + keccakf(h0, 24); + keccakf(h1, 24); + keccakf(h2, 24); + + extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); + extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, output + 32); + extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, output + 64); + } }; template @@ -2616,186 +3012,15 @@ public: uint8_t* __restrict__ output, cryptonight_ctx* __restrict__ ctx) { - keccak((const uint8_t*) input, (int) size, ctx->state[0], 200); - keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200); - keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200); - keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200); + // not supported + } - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEM; - const uint8_t* l2 = ctx->memory + 2 * MEM; - const uint8_t* l3 = ctx->memory + 3 * MEM; - uint64_t* h0 = reinterpret_cast(ctx->state[0]); - uint64_t* h1 = reinterpret_cast(ctx->state[1]); - uint64_t* h2 = reinterpret_cast(ctx->state[2]); - uint64_t* h3 = reinterpret_cast(ctx->state[3]); - - cn_explode_scratchpad_heavy((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad_heavy((__m128i*) h1, (__m128i*) l1); - cn_explode_scratchpad_heavy((__m128i*) h2, (__m128i*) l2); - cn_explode_scratchpad_heavy((__m128i*) h3, (__m128i*) l3); - - uint64_t al0 = h0[0] ^h0[4]; - uint64_t al1 = h1[0] ^h1[4]; - uint64_t al2 = h2[0] ^h2[4]; - uint64_t al3 = h3[0] ^h3[4]; - uint64_t ah0 = h0[1] ^h0[5]; - uint64_t ah1 = h1[1] ^h1[5]; - uint64_t ah2 = h2[1] ^h2[5]; - uint64_t ah3 = h3[1] ^h3[5]; - - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); - __m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]); - - uint64_t idx0 = h0[0] ^h0[4]; - uint64_t idx1 = h1[0] ^h1[4]; - uint64_t idx2 = h2[0] ^h2[4]; - uint64_t idx3 = h3[0] ^h3[4]; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0; - __m128i cx1; - __m128i cx2; - __m128i cx3; - - if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); - cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); - cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3)); - } else { - cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); - cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); - - cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); - cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); - cx3 = _mm_aesenc_si128(cx3, _mm_set_epi64x(ah3, al3)); - } - - _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); - _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2)); - _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3)); - - idx0 = EXTRACT64(cx0); - idx1 = EXTRACT64(cx1); - idx2 = EXTRACT64(cx2); - idx3 = EXTRACT64(cx3); - - bx0 = cx0; - bx1 = cx1; - bx2 = cx2; - bx3 = cx3; - - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - lo = __umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*) &l0[idx0 & MASK])[0] = al0; - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - idx0 = d ^ q; - - - cl = ((uint64_t*) &l1[idx1 & MASK])[0]; - ch = ((uint64_t*) &l1[idx1 & MASK])[1]; - lo = __umul128(idx1, cl, &hi); - - al1 += hi; - ah1 += lo; - - ((uint64_t*) &l1[idx1 & MASK])[0] = al1; - ((uint64_t*) &l1[idx1 & MASK])[1] = ah1; - - ah1 ^= ch; - al1 ^= cl; - idx1 = al1; - - n = ((int64_t*)&l1[idx1 & MASK])[0]; - d = ((int32_t*)&l1[idx1 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; - idx1 = d ^ q; - - - cl = ((uint64_t*) &l2[idx2 & MASK])[0]; - ch = ((uint64_t*) &l2[idx2 & MASK])[1]; - lo = __umul128(idx2, cl, &hi); - - al2 += hi; - ah2 += lo; - - ((uint64_t*) &l2[idx2 & MASK])[0] = al2; - ((uint64_t*) &l2[idx2 & MASK])[1] = ah2; - - ah2 ^= ch; - al2 ^= cl; - idx2 = al2; - - n = ((int64_t*)&l2[idx2 & MASK])[0]; - d = ((int32_t*)&l2[idx2 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l2[idx2 & MASK])[0] = n ^ q; - idx2 = d ^ q; - - - cl = ((uint64_t*) &l3[idx3 & MASK])[0]; - ch = ((uint64_t*) &l3[idx3 & MASK])[1]; - lo = __umul128(idx3, cl, &hi); - - al3 += hi; - ah3 += lo; - - ((uint64_t*) &l3[idx3 & MASK])[0] = al3; - ((uint64_t*) &l3[idx3 & MASK])[1] = ah3; - - ah3 ^= ch; - al3 ^= cl; - idx3 = al3; - - n = ((int64_t*)&l3[idx3 & MASK])[0]; - d = ((int32_t*)&l3[idx3 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l3[idx3 & MASK])[0] = n ^ q; - idx3 = d ^ q; - } - - cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad_heavy((__m128i*) l1, (__m128i*) h1); - cn_implode_scratchpad_heavy((__m128i*) l2, (__m128i*) h2); - cn_implode_scratchpad_heavy((__m128i*) l3, (__m128i*) h3); - - keccakf(h0, 24); - keccakf(h1, 24); - keccakf(h2, 24); - keccakf(h3, 24); - - extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); - extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, output + 32); - extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, output + 64); - extra_hashes[ctx->state[3][0] & 3](ctx->state[3], 200, output + 96); + inline static void hashHeavyHaven(const uint8_t* __restrict__ input, + size_t size, + uint8_t* __restrict__ output, + cryptonight_ctx* __restrict__ ctx) + { + // not supported } }; @@ -3469,226 +3694,15 @@ public: uint8_t* __restrict__ output, cryptonight_ctx* __restrict__ ctx) { - keccak((const uint8_t*) input, (int) size, ctx->state[0], 200); - keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200); - keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200); - keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200); - keccak((const uint8_t*) input + 4 * size, (int) size, ctx->state[4], 200); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEM; - const uint8_t* l2 = ctx->memory + 2 * MEM; - const uint8_t* l3 = ctx->memory + 3 * MEM; - const uint8_t* l4 = ctx->memory + 4 * MEM; - uint64_t* h0 = reinterpret_cast(ctx->state[0]); - uint64_t* h1 = reinterpret_cast(ctx->state[1]); - uint64_t* h2 = reinterpret_cast(ctx->state[2]); - uint64_t* h3 = reinterpret_cast(ctx->state[3]); - uint64_t* h4 = reinterpret_cast(ctx->state[4]); - - cn_explode_scratchpad_heavy((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad_heavy((__m128i*) h1, (__m128i*) l1); - cn_explode_scratchpad_heavy((__m128i*) h2, (__m128i*) l2); - cn_explode_scratchpad_heavy((__m128i*) h3, (__m128i*) l3); - cn_explode_scratchpad_heavy((__m128i*) h4, (__m128i*) l4); - - uint64_t al0 = h0[0] ^h0[4]; - uint64_t al1 = h1[0] ^h1[4]; - uint64_t al2 = h2[0] ^h2[4]; - uint64_t al3 = h3[0] ^h3[4]; - uint64_t al4 = h4[0] ^h4[4]; - uint64_t ah0 = h0[1] ^h0[5]; - uint64_t ah1 = h1[1] ^h1[5]; - uint64_t ah2 = h2[1] ^h2[5]; - uint64_t ah3 = h3[1] ^h3[5]; - uint64_t ah4 = h4[1] ^h4[5]; - - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); - __m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]); - __m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]); - - uint64_t idx0 = h0[0] ^h0[4]; - uint64_t idx1 = h1[0] ^h1[4]; - uint64_t idx2 = h2[0] ^h2[4]; - uint64_t idx3 = h3[0] ^h3[4]; - uint64_t idx4 = h4[0] ^h4[4]; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0; - __m128i cx1; - __m128i cx2; - __m128i cx3; - __m128i cx4; - - if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); - cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); - cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3)); - cx4 = soft_aesenc((uint32_t*)&l4[idx4 & MASK], _mm_set_epi64x(ah4, al4)); - } else { - cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); - cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); - cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); - - cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); - cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); - cx3 = _mm_aesenc_si128(cx3, _mm_set_epi64x(ah3, al3)); - cx4 = _mm_aesenc_si128(cx4, _mm_set_epi64x(ah4, al4)); - } - - _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); - _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2)); - _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3)); - _mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx4, cx4)); - - idx0 = EXTRACT64(cx0); - idx1 = EXTRACT64(cx1); - idx2 = EXTRACT64(cx2); - idx3 = EXTRACT64(cx3); - idx4 = EXTRACT64(cx4); - - bx0 = cx0; - bx1 = cx1; - bx2 = cx2; - bx3 = cx3; - bx4 = cx4; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - lo = __umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*) &l0[idx0 & MASK])[0] = al0; - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - idx0 = d ^ q; - - - cl = ((uint64_t*) &l1[idx1 & MASK])[0]; - ch = ((uint64_t*) &l1[idx1 & MASK])[1]; - lo = __umul128(idx1, cl, &hi); - - al1 += hi; - ah1 += lo; - - ((uint64_t*) &l1[idx1 & MASK])[0] = al1; - ((uint64_t*) &l1[idx1 & MASK])[1] = ah1; - - ah1 ^= ch; - al1 ^= cl; - idx1 = al1; - - n = ((int64_t*)&l1[idx1 & MASK])[0]; - d = ((int32_t*)&l1[idx1 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; - idx1 = d ^ q; - - - cl = ((uint64_t*) &l2[idx2 & MASK])[0]; - ch = ((uint64_t*) &l2[idx2 & MASK])[1]; - lo = __umul128(idx2, cl, &hi); - - al2 += hi; - ah2 += lo; - - ((uint64_t*) &l2[idx2 & MASK])[0] = al2; - ((uint64_t*) &l2[idx2 & MASK])[1] = ah2; - - ah2 ^= ch; - al2 ^= cl; - idx2 = al2; - - n = ((int64_t*)&l2[idx2 & MASK])[0]; - d = ((int32_t*)&l2[idx2 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l2[idx2 & MASK])[0] = n ^ q; - idx2 = d ^ q; - - - cl = ((uint64_t*) &l3[idx3 & MASK])[0]; - ch = ((uint64_t*) &l3[idx3 & MASK])[1]; - lo = __umul128(idx3, cl, &hi); - - al3 += hi; - ah3 += lo; - - ((uint64_t*) &l3[idx3 & MASK])[0] = al3; - ((uint64_t*) &l3[idx3 & MASK])[1] = ah3; - - ah3 ^= ch; - al3 ^= cl; - idx3 = al3; - - n = ((int64_t*)&l3[idx3 & MASK])[0]; - d = ((int32_t*)&l3[idx3 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l3[idx3 & MASK])[0] = n ^ q; - idx3 = d ^ q; - - - cl = ((uint64_t*) &l4[idx4 & MASK])[0]; - ch = ((uint64_t*) &l4[idx4 & MASK])[1]; - lo = __umul128(idx4, cl, &hi); - - al4 += hi; - ah4 += lo; - - ((uint64_t*) &l4[idx4 & MASK])[0] = al4; - ((uint64_t*) &l4[idx4 & MASK])[1] = ah4; - - ah4 ^= ch; - al4 ^= cl; - idx4 = al4; - - n = ((int64_t*)&l4[idx4 & MASK])[0]; - d = ((int32_t*)&l4[idx4 & MASK])[2]; - q = n / (d | 0x5); - - ((int64_t*)&l4[idx4 & MASK])[0] = n ^ q; - idx4 = d ^ q; - } - - cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad_heavy((__m128i*) l1, (__m128i*) h1); - cn_implode_scratchpad_heavy((__m128i*) l2, (__m128i*) h2); - cn_implode_scratchpad_heavy((__m128i*) l3, (__m128i*) h3); - cn_implode_scratchpad_heavy((__m128i*) l4, (__m128i*) h4); - - keccakf(h0, 24); - keccakf(h1, 24); - keccakf(h2, 24); - keccakf(h3, 24); - keccakf(h4, 24); - - extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, output); - extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, output + 32); - extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, output + 64); - extra_hashes[ctx->state[3][0] & 3](ctx->state[3], 200, output + 96); - extra_hashes[ctx->state[4][0] & 3](ctx->state[4], 200, output + 128); + // not supported } + inline static void hashHeavyHaven(const uint8_t* __restrict__ input, + size_t size, + uint8_t* __restrict__ output, + cryptonight_ctx* __restrict__ ctx) + { + // not supported + } }; #endif /* __CRYPTONIGHT_X86_H__ */ diff --git a/src/net/Job.cpp b/src/net/Job.cpp index 07bf9bc6..8de5034d 100644 --- a/src/net/Job.cpp +++ b/src/net/Job.cpp @@ -150,6 +150,10 @@ PowVariant Job::powVariant() const { return POW_V1; } + else if (m_powVariant == PowVariant::POW_XHV && m_blob[0] < 3) + { + return POW_V0; + } else { return m_powVariant; diff --git a/src/version.h b/src/version.h index b24d4ace..4e2fd792 100644 --- a/src/version.h +++ b/src/version.h @@ -36,7 +36,7 @@ #define APP_DESC "XMRigCC CPU miner" #define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id" #endif -#define APP_VERSION "1.6.4_masari_v7_support (based on XMRig)" +#define APP_VERSION "1.6.4_haven_v3_support (based on XMRig)" #define APP_DOMAIN "" #define APP_SITE "https://github.com/Bendr0id/xmrigCC" #define APP_KIND "cpu"