This commit is contained in:
MoneroOcean 2019-10-29 14:19:29 -07:00
commit 245838ffae
57 changed files with 1167 additions and 443 deletions

View file

@ -41,8 +41,7 @@ constexpr size_t pageSize = 2 * 1024 * 1024;
} // namespace xmrig
xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node) :
m_size(size)
xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node)
{
if (!size) {
return;

View file

@ -52,7 +52,6 @@ protected:
void release(uint32_t node) override;
private:
size_t m_size = 0;
size_t m_refs = 0;
size_t m_offset = 0;
VirtualMemory *m_memory = nullptr;

View file

@ -34,15 +34,6 @@
#include <algorithm>
namespace xmrig {
constexpr size_t pageSize = 2 * 1024 * 1024;
} // namespace xmrig
xmrig::NUMAMemoryPool::NUMAMemoryPool(size_t size, bool hugePages) :
m_hugePages(hugePages),
m_nodeSize(std::max<size_t>(size / Cpu::info()->nodes(), 1)),

View file

@ -112,7 +112,7 @@ void xmrig::VirtualMemory::destroy()
void xmrig::VirtualMemory::init(size_t poolSize, bool hugePages)
{
if (!pool) {
osInit();
osInit(hugePages);
}
# ifdef XMRIG_FEATURE_HWLOC

View file

@ -78,7 +78,7 @@ private:
FLAG_MAX
};
static void osInit();
static void osInit(bool hugePages);
bool allocateLargePagesMemory();
void freeLargePagesMemory();

View file

@ -96,7 +96,7 @@ void xmrig::VirtualMemory::unprotectExecutableMemory(void *p, size_t size)
}
void xmrig::VirtualMemory::osInit()
void xmrig::VirtualMemory::osInit(bool)
{
}

View file

@ -201,9 +201,11 @@ void xmrig::VirtualMemory::unprotectExecutableMemory(void *p, size_t size)
}
void xmrig::VirtualMemory::osInit()
void xmrig::VirtualMemory::osInit(bool hugePages)
{
hugepagesAvailable = TrySetLockPagesPrivilege();
if (hugePages) {
hugepagesAvailable = TrySetLockPagesPrivilege();
}
}

View file

@ -73,7 +73,7 @@ static size_t CalcDatasetItemSize()
{
return
// Prologue
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
// Main loop
RandomX_CurrentConfig.CacheAccesses * (
// Main loop prologue
@ -82,7 +82,7 @@ static size_t CalcDatasetItemSize()
((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 +
// Main loop epilogue
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
) +
) +
// Epilogue
((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
}
@ -103,6 +103,21 @@ JitCompilerA64::~JitCompilerA64()
freePagedMemory(code, CodeSize + CalcDatasetItemSize());
}
#if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
void sys_icache_invalidate(void *start, size_t len);
#endif
static void clear_code_cache(char* p1, char* p2)
{
# if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
sys_icache_invalidate(p1, static_cast<size_t>(p2 - p1));
# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__)
__builtin___clear_cache(p1, p2);
# else
# error "No clear code cache function found"
# endif
}
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config)
{
uint32_t codePos = MainLoopBegin + 4;
@ -149,9 +164,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
#endif
clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
}
void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset)
@ -206,9 +219,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos);
emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
#endif
clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
}
template<size_t N>
@ -324,9 +335,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
memcpy(code + codePos, p1, p2 - p1);
codePos += p2 - p1;
#ifdef __GNUC__
__builtin___clear_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
#endif
clear_code_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
}
template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES], std::vector<uint64_t> &reciprocalCache);
@ -858,7 +867,7 @@ void JitCompilerA64::h_FADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t dst = (instr.dst % 4) + 16;
constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emit32(ARMV8A::FADD | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
@ -881,7 +890,7 @@ void JitCompilerA64::h_FSUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t dst = (instr.dst % 4) + 16;
constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emit32(ARMV8A::FSUB | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
@ -911,7 +920,7 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
const uint32_t dst = (instr.dst % 4) + 20;
constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
// and tmp_reg_fp, tmp_reg_fp, and_mask_reg
emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);

View file

@ -98,6 +98,7 @@
# v30 -> E 'or' mask = 0x3*00000000******3*00000000******
# v31 -> scale mask = 0x81f000000000000081f0000000000000
.balign 4
randomx_program_aarch64:
# Save callee-saved registers
sub sp, sp, 192

View file

@ -631,7 +631,7 @@ namespace randomx {
int cycle1 = scheduleUop<false>(mop.getUop1(), portBusy, cycle);
int cycle2 = scheduleUop<false>(mop.getUop2(), portBusy, cycle);
if (cycle1 == cycle2) {
if (cycle1 >= 0 && cycle1 == cycle2) {
if (commit) {
scheduleUop<true>(mop.getUop1(), portBusy, cycle1);
scheduleUop<true>(mop.getUop2(), portBusy, cycle2);
@ -755,6 +755,12 @@ namespace randomx {
//recalculate when the instruction can be scheduled for execution based on operand availability
scheduleCycle = scheduleMop<true>(mop, portBusy, scheduleCycle, scheduleCycle);
if (scheduleCycle < 0) {
if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << scheduleCycle << ")" << std::endl;
portsSaturated = true;
break;
}
//calculate when the result will be ready
depCycle = scheduleCycle + mop.getLatency();

View file

@ -46,7 +46,7 @@ xmrig::RxQueue::RxQueue(IRxListener *listener) :
uv_async_init(uv_default_loop(), m_async, [](uv_async_t *handle) { static_cast<RxQueue *>(handle->data)->onReady(); });
m_thread = std::move(std::thread(&RxQueue::backgroundInit, this));
m_thread = std::thread(&RxQueue::backgroundInit, this);
}
@ -158,7 +158,7 @@ void xmrig::RxQueue::backgroundInit()
m_storage->init(item.seed, item.threads, item.hugePages);
lock = std::move(std::unique_lock<std::mutex>(m_mutex));
lock = std::unique_lock<std::mutex>(m_mutex);
if (m_state == STATE_SHUTDOWN || !m_queue.empty()) {
continue;