Optimized dataset read for Ryzen CPUs

Removed register dependency in dataset read, +0.8% speedup on average.
This commit is contained in:
SChernykh 2019-12-08 16:14:02 +01:00
parent 4dec063472
commit d0df824599
17 changed files with 81 additions and 20 deletions

View file

@ -157,8 +157,15 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
}
{
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_ryzen;
memcpy(codeReadDatasetTweaked, a, b - a);
codeReadDatasetTweakedSize = b - a;
}
{
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_ryzen;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
codeReadDatasetRyzenTweakedSize = b - a;
}
{
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
@ -191,10 +198,11 @@ void RandomX_ConfigurationBase::Apply()
#if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
// Not needed right now because all variants use default dataset base size
//const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
//*(uint32_t*)(codeReadDatasetTweaked + 9) = DatasetBaseMask;
//*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask;
//*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
@ -435,6 +443,7 @@ extern "C" {
}
vm->setScratchpad(scratchpad);
vm->setFlags(flags);
}
catch (std::exception &ex) {
delete vm;