Merge xmrig v6.8.2 into master

This commit is contained in:
MoneroOcean 2021-02-12 16:38:54 +00:00
commit 5088fbdfad
15 changed files with 225 additions and 113 deletions

View file

@ -49,15 +49,15 @@
#define ADD_FN(algo) \
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \
m_map[algo][AV_DOUBLE][Assembly::NONE] = cryptonight_double_hash<algo, false>; \
m_map[algo][AV_DOUBLE_SOFT][Assembly::NONE] = cryptonight_double_hash<algo, true>; \
m_map[algo][AV_TRIPLE][Assembly::NONE] = cryptonight_triple_hash<algo, false>; \
m_map[algo][AV_TRIPLE_SOFT][Assembly::NONE] = cryptonight_triple_hash<algo, true>; \
m_map[algo][AV_QUAD][Assembly::NONE] = cryptonight_quad_hash<algo, false>; \
m_map[algo][AV_QUAD_SOFT][Assembly::NONE] = cryptonight_quad_hash<algo, true>; \
m_map[algo][AV_PENTA][Assembly::NONE] = cryptonight_penta_hash<algo, false>; \
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false, 0>; \
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true, 0>; \
m_map[algo][AV_DOUBLE][Assembly::NONE] = cryptonight_double_hash<algo, false>; \
m_map[algo][AV_DOUBLE_SOFT][Assembly::NONE] = cryptonight_double_hash<algo, true>; \
m_map[algo][AV_TRIPLE][Assembly::NONE] = cryptonight_triple_hash<algo, false>; \
m_map[algo][AV_TRIPLE_SOFT][Assembly::NONE] = cryptonight_triple_hash<algo, true>; \
m_map[algo][AV_QUAD][Assembly::NONE] = cryptonight_quad_hash<algo, false>; \
m_map[algo][AV_QUAD_SOFT][Assembly::NONE] = cryptonight_quad_hash<algo, true>; \
m_map[algo][AV_PENTA][Assembly::NONE] = cryptonight_penta_hash<algo, false>; \
m_map[algo][AV_PENTA_SOFT][Assembly::NONE] = cryptonight_penta_hash<algo, true>;
@ -303,6 +303,22 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
return nullptr;
}
# ifdef XMRIG_ALGO_CN_HEAVY
// cn-heavy optimization for Zen3 CPUs
if ((av == AV_SINGLE) && (xmrig::Cpu::info()->arch() == xmrig::ICpuInfo::ARCH_ZEN3)) {
switch (algorithm.id()) {
case xmrig::Algorithm::CN_HEAVY_0:
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_0, false, 3>;
case xmrig::Algorithm::CN_HEAVY_TUBE:
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_TUBE, false, 3>;
case xmrig::Algorithm::CN_HEAVY_XHV:
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_XHV, false, 3>;
default:
break;
}
}
# endif
# ifdef XMRIG_FEATURE_ASM
cn_hash_fun fun = cnHash.m_map[algorithm][av][Cpu::assembly(assembly)];
if (fun) {

View file

@ -435,7 +435,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;

View file

@ -306,7 +306,21 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
namespace xmrig {
template<Algorithm::Id ALGO, bool SOFT_AES>
template<int interleave>
static inline constexpr uint64_t interleaved_index(uint64_t k)
{
return ((k & ~63ULL) << interleave) | (k & 63);
}
template<>
inline constexpr uint64_t interleaved_index<0>(uint64_t k)
{
return k;
}
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
{
constexpr CnAlgo<ALGO> props;
@ -343,6 +357,11 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
}
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
if (interleave > 0) {
_mm_prefetch((const char*)(output), _MM_HINT_T0);
_mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
}
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@ -354,19 +373,21 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
_mm_store_si128(output + 0, xin0);
_mm_store_si128(output + 1, xin1);
_mm_store_si128(output + 2, xin2);
_mm_store_si128(output + 3, xin3);
output += (64 << interleave) / sizeof(__m128i);
_mm_store_si128(output + 0, xin4);
_mm_store_si128(output + 1, xin5);
_mm_store_si128(output + 2, xin6);
_mm_store_si128(output + 3, xin7);
output += (64 << interleave) / sizeof(__m128i);
}
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
{
constexpr CnAlgo<ALGO> props;
@ -391,15 +412,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
const __m128i* input_begin = input;
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
input += (64 << interleave) / sizeof(__m128i);
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
input += (64 << interleave) / sizeof(__m128i);
i += 8;
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
_mm_prefetch((const char*)(input), _MM_HINT_T0);
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -418,15 +449,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
}
if (IS_HEAVY) {
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
input = input_begin;
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
input += (64 << interleave) / sizeof(__m128i);
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
input += (64 << interleave) / sizeof(__m128i);
i += 8;
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
_mm_prefetch((const char*)(input), _MM_HINT_T0);
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -562,7 +603,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;
@ -581,7 +622,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint8_t *l0 = ctx[0]->memory;
@ -624,7 +665,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx;
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
if (ALGO == Algorithm::CN_CCX) {
cryptonight_conceal_tweak(cx, conc_var);
}
@ -636,12 +677,12 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
else if (SOFT_AES) {
if (ALGO == Algorithm::CN_CCX) {
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
cryptonight_conceal_tweak(cx, conc_var);
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
}
else {
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
cx = soft_aesenc(&l0[interleaved_index<interleave>(idx0 & MASK)], ax0, reinterpret_cast<const uint32_t*>(saes_table));
}
}
else {
@ -649,16 +690,16 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else {
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[idx0 & MASK]), _mm_xor_si128(bx0, cx));
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
}
idx0 = static_cast<uint64_t>(_mm_cvtsi128_si64(cx));
uint64_t hi, lo, cl, ch;
cl = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[0];
ch = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[1];
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
if (BASE == Algorithm::CN_2) {
if (props.isR()) {
@ -685,14 +726,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
al0 += hi;
ah0 += lo;
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[0] = al0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (BASE == Algorithm::CN_1) {
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
} else {
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
}
al0 ^= cl;
@ -701,11 +742,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
# ifdef XMRIG_ALGO_CN_HEAVY
if (props.isHeavy()) {
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
int64_t n = ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0];
int32_t d = ((int32_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[2];
int64_t q = n / (d | 0x5);
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
if (ALGO == Algorithm::CN_HEAVY_XHV) {
d = ~d;
@ -726,7 +767,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
# endif
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
@ -792,7 +833,7 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_
cn_gpu_inner_ssse3<props.iterations(), props.mask()>(ctx[0]->state, ctx[0]->memory);
}
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
memcpy(output, ctx[0]->state, 32);
}
@ -881,7 +922,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
}
keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
if (ALGO == Algorithm::CN_2) {
if (ASM == Assembly::INTEL) {
@ -958,7 +999,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
@ -980,8 +1021,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
keccak(input, size, ctx[0]->state);
keccak(input + size, size, ctx[1]->state);
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
if (ALGO == Algorithm::CN_2) {
cnv2_double_mainloop_sandybridge_asm(ctx);
@ -1010,8 +1051,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@ -1062,8 +1103,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@ -1258,8 +1299,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
bx10 = cx1;
}
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
keccakf(h0, 24);
keccakf(h1, 24);
@ -1404,7 +1445,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
for (size_t i = 0; i < 3; i++) {
keccak(input + size * i, size, ctx[i]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
uint8_t* l0 = ctx[0]->memory;
@ -1449,7 +1490,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
}
for (size_t i = 0; i < 3; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
@ -1478,7 +1519,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
for (size_t i = 0; i < 4; i++) {
keccak(input + size * i, size, ctx[i]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
uint8_t* l0 = ctx[0]->memory;
@ -1531,7 +1572,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
}
for (size_t i = 0; i < 4; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
@ -1560,7 +1601,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
for (size_t i = 0; i < 5; i++) {
keccak(input + size * i, size, ctx[i]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
uint8_t* l0 = ctx[0]->memory;
@ -1621,7 +1662,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
}
for (size_t i = 0; i < 5; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}

View file

@ -65,18 +65,13 @@
#endif
namespace xmrig {
#ifdef XMRIG_OS_LINUX
#if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(__FreeBSD__))
static inline int hugePagesFlag(size_t size)
{
return (static_cast<int>(log2(size)) & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
}
#endif
} // namespace xmrig
bool xmrig::VirtualMemory::isHugepagesAvailable()
{
@ -165,7 +160,7 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
{
# if defined(__APPLE__)
# if defined(XMRIG_OS_APPLE)
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
# elif defined(__FreeBSD__)
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);

View file

@ -88,7 +88,12 @@ void xmrig::Rx::init(IRxListener *listener)
template<typename T>
bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu)
{
if (seed.algorithm().family() != Algorithm::RANDOM_X) {
const Algorithm::Family f = seed.algorithm().family();
if ((f != Algorithm::RANDOM_X)
# ifdef XMRIG_ALGO_CN_HEAVY
&& (f != Algorithm::CN_HEAVY)
# endif
) {
# ifdef XMRIG_FEATURE_MSR
RxMsr::destroy();
# endif
@ -96,16 +101,22 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
return true;
}
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
# ifdef XMRIG_FEATURE_MSR
if (!RxMsr::isInitialized()) {
RxMsr::init(config, cpu.threads().get(seed.algorithm()).data());
}
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
if (f == Algorithm::CN_HEAVY) {
return true;
}
# endif
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
if (!osInitialized) {
# ifdef XMRIG_FIX_RYZEN
RxFix::setupMainLoopExceptionFrame();