Added support of CryptoNight v8 Waltz (aka VARIANT_WALTZ) and CryptoNight v8 ReverseWaltz (aka VARIANT_RWZ)

This commit is contained in:
EDDragonWolf 2019-03-01 06:55:56 -08:00
parent eec5ca5535
commit a89e09e427
10 changed files with 152 additions and 64 deletions

View file

@ -436,7 +436,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
uint64_t* mem_out = (uint64_t*)&l[idx];
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx);
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
@ -530,9 +530,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_4) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
}
}
@ -709,9 +709,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_4) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
}
}
@ -767,9 +767,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_4) {
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
} else {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
}
}

View file

@ -42,6 +42,7 @@ constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0;
constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000;
constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000;
constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000;
constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER = 0x60000;
constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000;
constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0;
@ -134,6 +135,8 @@ template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>()
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RTO>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_GPU>() { return CRYPTONIGHT_GPU_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_WALTZ>() { return CRYPTONIGHT_WALTZ_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RWZ>() { return CRYPTONIGHT_WALTZ_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_0>() { return CRYPTONIGHT_LITE_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_1>() { return CRYPTONIGHT_LITE_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_0>() { return CRYPTONIGHT_HEAVY_ITER; }
@ -158,6 +161,10 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
case VARIANT_TRTL:
return CRYPTONIGHT_TRTL_ITER;
case VARIANT_WALTZ:
case VARIANT_RWZ:
return CRYPTONIGHT_WALTZ_ITER;
default:
break;
}
@ -199,6 +206,8 @@ template<> inline constexpr Variant cn_base_variant<VARIANT_TRTL>() { return VA
template<> inline constexpr Variant cn_base_variant<VARIANT_GPU>() { return VARIANT_GPU; }
template<> inline constexpr Variant cn_base_variant<VARIANT_WOW>() { return VARIANT_2; }
template<> inline constexpr Variant cn_base_variant<VARIANT_4>() { return VARIANT_2; }
template<> inline constexpr Variant cn_base_variant<VARIANT_WALTZ>() { return VARIANT_2; }
template<> inline constexpr Variant cn_base_variant<VARIANT_RWZ>() { return VARIANT_2; }
template<Variant variant> inline constexpr bool cn_is_cryptonight_r() { return false; }

View file

@ -83,11 +83,11 @@
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
} while (0)
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \
do { \
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
@ -96,15 +96,20 @@
} \
} while (0)
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \
do { \
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
if (reverse) { \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk1, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk3, _b)); \
} else { \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
} \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
@ -128,11 +133,11 @@
sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
} while (0)
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \
do { \
const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \
const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
@ -141,15 +146,20 @@
} \
} while (0)
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \
do { \
const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
if (reverse) { \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b))); \
} else { \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
} \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
#endif

View file

@ -199,6 +199,32 @@ const static uint8_t test_output_rto[160] = {
0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3
};
const static uint8_t test_output_waltz[160] = {
0x51, 0x6e, 0x33, 0xc6, 0xe4, 0x46, 0xab, 0xbc, 0xcd, 0xad, 0x18, 0xc0, 0x4c, 0xd9, 0xa2, 0x5e,
0x64, 0x10, 0x28, 0x53, 0xb2, 0x0a, 0x42, 0xdf, 0xde, 0xaa, 0x8b, 0x59, 0x9e, 0xcf, 0x40, 0xe2,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
const static uint8_t test_output_rwz[160] = {
0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
#ifndef XMRIG_NO_AEON
// "cn-lite/0"

View file

@ -460,7 +460,7 @@ template<xmrig::Variant VARIANT, xmrig::Variant BASE>
static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
{
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx);
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
@ -558,9 +558,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_4) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
}
}
@ -896,9 +896,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_4) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
}
}
@ -952,9 +952,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_4) {
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
} else {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
}
}
@ -1056,9 +1056,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx, cl##part, &hi); \
if (BASE == xmrig::VARIANT_2) { \
if (VARIANT == xmrig::VARIANT_4) { \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c); \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
} else { \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); \
} \
} \
if (VARIANT == xmrig::VARIANT_4) { \