Optimized cn-heavy for Zen3

- Uses scratchpad interleaving to access only the closest L3 slice from each CPU core.
- Also activates MSR mod for cn-heavy because CPU prefetchers get confused with interleaving
- 7-8% speedup on Zen3
This commit is contained in:
SChernykh 2021-02-07 22:05:11 +01:00
parent b1e14dc1d3
commit 8af8df25aa
8 changed files with 187 additions and 81 deletions

View file

@ -81,6 +81,7 @@ public:
inline void start(const std::vector<CpuLaunchData> &threads, size_t memory)
{
m_workersMemory.clear();
m_hugePages.reset();
m_memory = memory;
m_started = 0;
@ -95,8 +96,10 @@ public:
if (ready) {
m_started++;
m_hugePages += worker->memory()->hugePages();
m_ways += worker->intensity();
if (m_workersMemory.insert(worker->memory()).second) {
m_hugePages += worker->memory()->hugePages();
}
m_ways += worker->intensity();
}
else {
m_errors++;
@ -126,6 +129,7 @@ public:
}
private:
std::set<const VirtualMemory*> m_workersMemory;
HugePagesInfo m_hugePages;
size_t m_errors = 0;
size_t m_memory = 0;