Optimized dataset read for Ryzen CPUs
Removed register dependency in dataset read, +0.8% speedup on average.
This commit is contained in:
parent
4dec063472
commit
d0df824599
17 changed files with 81 additions and 20 deletions
|
@ -157,8 +157,15 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
|||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_ryzen;
|
||||
memcpy(codeReadDatasetTweaked, a, b - a);
|
||||
codeReadDatasetTweakedSize = b - a;
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_ryzen;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
||||
codeReadDatasetRyzenTweakedSize = b - a;
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
|
@ -191,10 +198,11 @@ void RandomX_ConfigurationBase::Apply()
|
|||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
||||
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
||||
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
|
||||
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
|
||||
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||
// Not needed right now because all variants use default dataset base size
|
||||
//const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
||||
//*(uint32_t*)(codeReadDatasetTweaked + 9) = DatasetBaseMask;
|
||||
//*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask;
|
||||
//*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
||||
|
@ -435,6 +443,7 @@ extern "C" {
|
|||
}
|
||||
|
||||
vm->setScratchpad(scratchpad);
|
||||
vm->setFlags(flags);
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
delete vm;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue