diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 97f22f3c..e9533eed 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -54,6 +54,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU) #set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2") + add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE) + elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL") diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index 5587ca24..bf790c2b 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -73,7 +73,7 @@ static size_t CalcDatasetItemSize() { return // Prologue - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) + + ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) + // Main loop RandomX_CurrentConfig.CacheAccesses * ( // Main loop prologue @@ -82,7 +82,7 @@ static size_t CalcDatasetItemSize() ((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 + // Main loop epilogue ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4 - ) + + ) + // Epilogue ((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result); } @@ -103,6 +103,21 @@ JitCompilerA64::~JitCompilerA64() freePagedMemory(code, CodeSize + CalcDatasetItemSize()); } +#if defined(ios_HOST_OS) || defined (darwin_HOST_OS) +void sys_icache_invalidate(void *start, size_t len); +#endif + +static void clear_code_cache(char* p1, char* p2) +{ +# if defined(ios_HOST_OS) || defined (darwin_HOST_OS) + sys_icache_invalidate(p1, static_cast(p2 - p1)); +# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__) + __builtin___clear_cache(p1, p2); +# else +# error "No clear code cache function found" +# endif +} + void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config) { uint32_t codePos = MainLoopBegin + 4; @@ -149,9 +164,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); -# ifdef HAVE_BUILTIN_CLEAR_CACHE - __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); -# endif + clear_code_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); } void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset) @@ -206,9 +219,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos); emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos); -# ifdef HAVE_BUILTIN_CLEAR_CACHE - __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); -# endif + clear_code_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); } template @@ -324,9 +335,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s memcpy(code + codePos, p1, p2 - p1); codePos += p2 - p1; -# ifdef HAVE_BUILTIN_CLEAR_CACHE - __builtin___clear_cache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos)); -# endif + clear_code_cache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos)); } template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES], std::vector &reciprocalCache); @@ -858,7 +867,7 @@ void JitCompilerA64::h_FADD_M(Instruction& instr, uint32_t& codePos) const uint32_t dst = (instr.dst % 4) + 16; constexpr uint32_t tmp_reg_fp = 28; - emitMemLoadFP(src, instr, code, k); + emitMemLoadFP(src, instr, code, k); emit32(ARMV8A::FADD | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); @@ -881,7 +890,7 @@ void JitCompilerA64::h_FSUB_M(Instruction& instr, uint32_t& codePos) const uint32_t dst = (instr.dst % 4) + 16; constexpr uint32_t tmp_reg_fp = 28; - emitMemLoadFP(src, instr, code, k); + emitMemLoadFP(src, instr, code, k); emit32(ARMV8A::FSUB | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); @@ -911,7 +920,7 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos) const uint32_t dst = (instr.dst % 4) + 20; constexpr uint32_t tmp_reg_fp = 28; - emitMemLoadFP(src, instr, code, k); + emitMemLoadFP(src, instr, code, k); // and tmp_reg_fp, tmp_reg_fp, and_mask_reg emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);