Collection of CN-GPU fixes and updates

This commit is contained in:
Tony Butler 2021-03-26 16:03:13 -06:00
parent 174663bb50
commit 57d9d3ea7c
8 changed files with 153 additions and 147 deletions

View file

@ -287,8 +287,8 @@ xmrig::CnHash::CnHash()
# endif
# ifdef XMRIG_ALGO_CN_GPU
m_map[Algorithm::CN_GPU][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
m_map[Algorithm::CN_GPU][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
m_map[Algorithm::CN_GPU][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false, 0>;
m_map[Algorithm::CN_GPU][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true, 0>;
# endif
# ifdef XMRIG_FEATURE_ASM

View file

@ -30,7 +30,7 @@
#include <stddef.h>
#include <stdint.h>
#if defined _MSC_VER || defined XMRIG_ARM
#if defined _MSC_VER || defined XMRIG_ARM || defined __INTEL_COMPILER
# define ABI_ATTRIBUTE
#else
# define ABI_ATTRIBUTE __attribute__((ms_abi))

View file

@ -545,8 +545,10 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
r >>= 19;
uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64))
# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64)) && !defined(__INTEL_COMPILER)
_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
# elif defined(__INTEL_COMPILER)
_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long int*)&x2), r, 0, (unsigned long int*)&r);
# else
if (x2 < n0) ++r;
# endif
@ -813,7 +815,7 @@ void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t)
{
constexpr CnAlgo<ALGO> props;
@ -833,7 +835,7 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_
cn_gpu_inner_ssse3<props.iterations(), props.mask()>(ctx[0]->state, ctx[0]->memory);
}
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
memcpy(output, ctx[0]->state, 32);
}