Update CN-GPU integration
This commit is contained in:
parent
0e5b54b80a
commit
be26d56f66
19 changed files with 144 additions and 142 deletions
|
@ -83,16 +83,16 @@ public:
|
|||
return CN_ITER / 8;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
case Algorithm::CN_GPU:
|
||||
return 0xC000;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
case Algorithm::CN_UPX2:
|
||||
return CN_ITER / 32;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
case Algorithm::CN_GPU:
|
||||
return 0xC000;
|
||||
# endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -108,18 +108,18 @@ public:
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
if (algo == Algorithm::CN_GPU) {
|
||||
return 0x1FFFC0;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
if (algo == Algorithm::CN_UPX2) {
|
||||
return 0x1FFF0;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
if (algo == Algorithm::CN_GPU) {
|
||||
return 0x1FFFC0;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algo == Algorithm::CN_GR_1) {
|
||||
return 0x3FFF0;
|
||||
|
@ -152,13 +152,13 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() con
|
|||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() const { return CN_ITER / 32; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
|
||||
|
||||
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const { return 0x1FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_0>::iterations() const { return CN_ITER / 4; }
|
||||
|
|
|
@ -290,6 +290,12 @@ xmrig::CnHash::CnHash()
|
|||
ADD_FN_ASM(Algorithm::CN_UPX2);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
m_map[Algorithm::CN_GPU] = new cn_hash_fun_array{};
|
||||
m_map[Algorithm::CN_GPU]->data[AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
|
||||
m_map[Algorithm::CN_GPU]->data[AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ARGON2
|
||||
m_map[Algorithm::AR2_CHUKWA] = new cn_hash_fun_array{};
|
||||
m_map[Algorithm::AR2_CHUKWA]->data[AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
|
||||
|
@ -310,12 +316,6 @@ xmrig::CnHash::CnHash()
|
|||
m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
m_map[Algorithm::CN_GPU] = new cn_hash_fun_array{};
|
||||
m_map[Algorithm::CN_GPU]->data[AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
|
||||
m_map[Algorithm::CN_GPU]->data[AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ADD_FN(Algorithm::CN_GR_0);
|
||||
ADD_FN(Algorithm::CN_GR_1);
|
||||
|
|
|
@ -370,22 +370,6 @@ const static uint8_t test_output_pico_tlo[256] = {
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_GPU
|
||||
// "cn/gpu"
|
||||
const static uint8_t test_output_gpu[160] = {
|
||||
0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
|
||||
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
// "cn/upx2"
|
||||
const static uint8_t test_output_femto_upx2[256] = {
|
||||
|
@ -403,6 +387,23 @@ const static uint8_t test_output_femto_upx2[256] = {
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_GPU
|
||||
// "cn/gpu"
|
||||
const static uint8_t test_output_gpu[160] = {
|
||||
0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
|
||||
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_ARGON2
|
||||
// "argon2/chukwa"
|
||||
const static uint8_t argon2_chukwa_test_out[256] = {
|
||||
|
|
|
@ -420,7 +420,6 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
|
|||
# else
|
||||
constexpr bool IS_HEAVY = props.isHeavy();
|
||||
# endif
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
|
@ -841,8 +840,11 @@ namespace xmrig {
|
|||
|
||||
|
||||
template<size_t MEM>
|
||||
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
|
||||
static NOINLINE void cn_explode_scratchpad_gpu(cryptonight_ctx *ctx)
|
||||
{
|
||||
const uint8_t* input = reinterpret_cast<const uint8_t*>(ctx->state);
|
||||
uint8_t* output = reinterpret_cast<uint8_t*>(ctx->memory);
|
||||
|
||||
constexpr size_t hash_size = 200; // 25x8 bytes
|
||||
alignas(16) uint64_t hash[25];
|
||||
|
||||
|
@ -866,12 +868,12 @@ void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
|
|||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t)
|
||||
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad_gpu<props.memory()>(ctx[0]->state, ctx[0]->memory);
|
||||
cn_explode_scratchpad_gpu<props.memory()>(ctx[0]);
|
||||
|
||||
# ifdef _MSC_VER
|
||||
_control87(RC_NEAR, MCW_RC);
|
||||
|
|
|
@ -518,13 +518,13 @@ void (*vaes_instances[])() = {
|
|||
VAES_Instance<Algorithm::CN_PICO_0>,
|
||||
VAES_Instance<Algorithm::CN_PICO_TLO>,
|
||||
VAES_Instance<Algorithm::CN_UPX2>,
|
||||
VAES_Instance<Algorithm::CN_GPU>,
|
||||
VAES_Instance<Algorithm::CN_GR_0>,
|
||||
VAES_Instance<Algorithm::CN_GR_1>,
|
||||
VAES_Instance<Algorithm::CN_GR_2>,
|
||||
VAES_Instance<Algorithm::CN_GR_3>,
|
||||
VAES_Instance<Algorithm::CN_GR_4>,
|
||||
VAES_Instance<Algorithm::CN_GR_5>,
|
||||
VAES_Instance<Algorithm::CN_GPU>,
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue