Optimized cn-heavy for Zen3

- Uses scratchpad interleaving to access only the closest L3 slice from each CPU core. - Also activates MSR mod for cn-heavy because CPU prefetchers get confused with interleaving - 7-8% speedup on Zen3
2021-02-07 22:05:11 +01:00 · 2021-02-07 22:05:11 +01:00 · 8af8df25aa
commit 8af8df25aa
parent b1e14dc1d3
8 changed files with 187 additions and 81 deletions
--- a/src/crypto/cn/CryptoNight_arm.h
+++ b/src/crypto/cn/CryptoNight_arm.h
@ -431,7 +431,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
 }


-template<Algorithm::Id ALGO, bool SOFT_AES>
+template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
 inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
 {
    constexpr CnAlgo<ALGO> props;