Update CN-GPU integration

This commit is contained in:
Tony Butler 2021-08-20 03:09:03 -06:00
parent 0e5b54b80a
commit be26d56f66
19 changed files with 144 additions and 142 deletions

View file

@ -83,16 +83,16 @@ public:
return CN_ITER / 8;
# endif
# ifdef XMRIG_ALGO_CN_GPU
case Algorithm::CN_GPU:
return 0xC000;
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
case Algorithm::CN_UPX2:
return CN_ITER / 32;
# endif
# ifdef XMRIG_ALGO_CN_GPU
case Algorithm::CN_GPU:
return 0xC000;
# endif
default:
break;
}
@ -108,18 +108,18 @@ public:
}
# endif
# ifdef XMRIG_ALGO_CN_GPU
if (algo == Algorithm::CN_GPU) {
return 0x1FFFC0;
}
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
if (algo == Algorithm::CN_UPX2) {
return 0x1FFF0;
}
# endif
# ifdef XMRIG_ALGO_CN_GPU
if (algo == Algorithm::CN_GPU) {
return 0x1FFFC0;
}
# endif
# ifdef XMRIG_ALGO_GHOSTRIDER
if (algo == Algorithm::CN_GR_1) {
return 0x3FFF0;
@ -152,13 +152,13 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() con
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() const { return CN_ITER / 32; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const { return 0x1FFF0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
#ifdef XMRIG_ALGO_GHOSTRIDER
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_0>::iterations() const { return CN_ITER / 4; }

View file

@ -290,6 +290,12 @@ xmrig::CnHash::CnHash()
ADD_FN_ASM(Algorithm::CN_UPX2);
# endif
# ifdef XMRIG_ALGO_CN_GPU
m_map[Algorithm::CN_GPU] = new cn_hash_fun_array{};
m_map[Algorithm::CN_GPU]->data[AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
m_map[Algorithm::CN_GPU]->data[AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
# endif
# ifdef XMRIG_ALGO_ARGON2
m_map[Algorithm::AR2_CHUKWA] = new cn_hash_fun_array{};
m_map[Algorithm::AR2_CHUKWA]->data[AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
@ -310,12 +316,6 @@ xmrig::CnHash::CnHash()
m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
# endif
# ifdef XMRIG_ALGO_CN_GPU
m_map[Algorithm::CN_GPU] = new cn_hash_fun_array{};
m_map[Algorithm::CN_GPU]->data[AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
m_map[Algorithm::CN_GPU]->data[AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
# endif
# ifdef XMRIG_ALGO_GHOSTRIDER
ADD_FN(Algorithm::CN_GR_0);
ADD_FN(Algorithm::CN_GR_1);

View file

@ -370,22 +370,6 @@ const static uint8_t test_output_pico_tlo[256] = {
#endif
#ifdef XMRIG_ALGO_CN_GPU
// "cn/gpu"
const static uint8_t test_output_gpu[160] = {
0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif
#ifdef XMRIG_ALGO_CN_FEMTO
// "cn/upx2"
const static uint8_t test_output_femto_upx2[256] = {
@ -403,6 +387,23 @@ const static uint8_t test_output_femto_upx2[256] = {
#endif
#ifdef XMRIG_ALGO_CN_GPU
// "cn/gpu"
const static uint8_t test_output_gpu[160] = {
0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif
#ifdef XMRIG_ALGO_ARGON2
// "argon2/chukwa"
const static uint8_t argon2_chukwa_test_out[256] = {

View file

@ -420,7 +420,6 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
# else
constexpr bool IS_HEAVY = props.isHeavy();
# endif
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
@ -841,8 +840,11 @@ namespace xmrig {
template<size_t MEM>
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
static NOINLINE void cn_explode_scratchpad_gpu(cryptonight_ctx *ctx)
{
const uint8_t* input = reinterpret_cast<const uint8_t*>(ctx->state);
uint8_t* output = reinterpret_cast<uint8_t*>(ctx->memory);
constexpr size_t hash_size = 200; // 25x8 bytes
alignas(16) uint64_t hash[25];
@ -866,12 +868,12 @@ void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t)
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;
keccak(input, size, ctx[0]->state);
cn_explode_scratchpad_gpu<props.memory()>(ctx[0]->state, ctx[0]->memory);
cn_explode_scratchpad_gpu<props.memory()>(ctx[0]);
# ifdef _MSC_VER
_control87(RC_NEAR, MCW_RC);

View file

@ -518,13 +518,13 @@ void (*vaes_instances[])() = {
VAES_Instance<Algorithm::CN_PICO_0>,
VAES_Instance<Algorithm::CN_PICO_TLO>,
VAES_Instance<Algorithm::CN_UPX2>,
VAES_Instance<Algorithm::CN_GPU>,
VAES_Instance<Algorithm::CN_GR_0>,
VAES_Instance<Algorithm::CN_GR_1>,
VAES_Instance<Algorithm::CN_GR_2>,
VAES_Instance<Algorithm::CN_GR_3>,
VAES_Instance<Algorithm::CN_GR_4>,
VAES_Instance<Algorithm::CN_GR_5>,
VAES_Instance<Algorithm::CN_GPU>,
};