Rework 'cn-pico/tlo' support

This commit is contained in:
Corey Moyer 2019-12-23 04:28:53 -05:00 committed by Mika Lindqvist
parent b80ac8cdd1
commit 9ae7fdb5ac
5 changed files with 24 additions and 41 deletions

View file

@ -99,8 +99,12 @@ size_t inline generate<Algorithm::CN_HEAVY>(Threads<CpuThreads> &threads, uint32
template<>
size_t inline generate<Algorithm::CN_PICO>(Threads<CpuThreads> &threads, uint32_t limit)
{
return generate("cn-pico", threads, Algorithm::CN_PICO_0, limit) &&
generate("cn-pico/tlo", threads, Algorithm::CN_PICO_TLO, limit);
size_t count = 0;
count += generate("cn-pico", threads, Algorithm::CN_PICO_0, limit);
count += generate("cn-pico/tlo", threads, Algorithm::CN_PICO_TLO, limit);
return count;
}
#endif

View file

@ -101,8 +101,12 @@ size_t inline generate<Algorithm::CN_HEAVY>(Threads<CudaThreads> &threads, const
template<>
size_t inline generate<Algorithm::CN_PICO>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
return generate("cn-pico", threads, Algorithm::CN_PICO_0, devices) &&
generate("cn-pico/tlo", threads, Algorithm::CN_PICO_TLO, devices);
size_t count = 0;
count += generate("cn-pico", threads, Algorithm::CN_PICO_0, devices);
count += generate("cn-pico/tlo", threads, Algorithm::CN_PICO_TLO, devices);
return count;
}
#endif

View file

@ -100,8 +100,12 @@ size_t inline generate<Algorithm::CN_HEAVY>(Threads<OclThreads> &threads, const
template<>
size_t inline generate<Algorithm::CN_PICO>(Threads<OclThreads> &threads, const std::vector<OclDevice> &devices)
{
return generate("cn-pico", threads, Algorithm::CN_PICO_0, devices) &&
generate("cn-pico/tlo", threads, Algorithm::CN_PICO_TLO, devices);
size_t count = 0;
count += generate("cn-pico", threads, Algorithm::CN_PICO_0, devices);
count += generate("cn-pico/tlo", threads, Algorithm::CN_PICO_TLO, devices);
return count;
}
#endif

View file

@ -79,11 +79,6 @@ cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr;
cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
@ -146,11 +141,6 @@ static void patchAsmVariants()
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000);
cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000);
# endif
cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000);
@ -187,10 +177,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_PICO_TLO>().iterations();
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();
patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
}
# endif

View file

@ -794,11 +794,6 @@ extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
@ -873,7 +868,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
}
}
# ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) {
else if (ALGO == Algorithm::CN_PICO_0 || ALGO == Algorithm::CN_PICO_TLO) {
if (ASM == Assembly::INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx);
}
@ -884,17 +879,6 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
cn_trtl_mainloop_bulldozer_asm(ctx);
}
}
else if (ALGO == Algorithm::CN_PICO_TLO) {
if (ASM == Assembly::INTEL) {
cn_tlo_mainloop_ivybridge_asm(ctx);
}
else if (ASM == Assembly::RYZEN) {
cn_tlo_mainloop_ryzen_asm(ctx);
}
else {
cn_tlo_mainloop_bulldozer_asm(ctx);
}
}
# endif
else if (ALGO == Algorithm::CN_RWZ) {
cnv2_rwz_mainloop_asm(ctx);
@ -957,12 +941,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
cn_half_double_mainloop_sandybridge_asm(ctx);
}
# ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) {
else if (ALGO == Algorithm::CN_PICO_0 || ALGO == Algorithm::CN_PICO_TLO) {
cn_trtl_double_mainloop_sandybridge_asm(ctx);
}
else if (ALGO == Algorithm::CN_PICO_TLO) {
cn_tlo_double_mainloop_sandybridge_asm(ctx);
}
# endif
else if (ALGO == Algorithm::CN_RWZ) {
cnv2_rwz_double_mainloop_asm(ctx);