diff --git a/CHANGELOG.md b/CHANGELOG.md index 61aa5ea5..b8b42784 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# v6.3.2 +- [#1794](https://github.com/xmrig/xmrig/pull/1794) More robust 1 GB pages handling. + - Don't allocate 1 GB per thread if 1 GB is the default huge page size. + - Try to allocate scratchpad from dataset's 1 GB huge pages, if normal huge pages are not available. + - Correctly initialize RandomX cache if 1 GB pages fail to allocate on a first NUMA node. +- [#1806](https://github.com/xmrig/xmrig/pull/1806) Fixed macOS battery detection. +- [#1809](https://github.com/xmrig/xmrig/issues/1809) Improved auto configuration on ARM CPUs. + - Added retrieving ARM CPU names, based on lscpu code and database. + # v6.3.1 - [#1786](https://github.com/xmrig/xmrig/pull/1786) Added `pause-on-battery` option, supported on Windows and Linux. - Added command line options `--randomx-cache-qos` and `--argon2-impl`. diff --git a/CMakeLists.txt b/CMakeLists.txt index dfe92e85..78270e80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,6 +143,8 @@ elseif (XMRIG_OS_APPLE) src/App_unix.cpp src/crypto/common/VirtualMemory_unix.cpp ) + find_library(IOKIT_LIBRARY IOKit) + set(EXTRA_LIBS ${IOKIT_LIBRARY}) else() list(APPEND SOURCES_OS src/App_unix.cpp diff --git a/scripts/build.uv.sh b/scripts/build.uv.sh index 42590497..6f4c9796 100755 --- a/scripts/build.uv.sh +++ b/scripts/build.uv.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -UV_VERSION="1.38.0" +UV_VERSION="1.38.1" mkdir -p deps mkdir -p deps/include diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 6990de20..bb2ca0f6 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -36,6 +36,7 @@ #include "crypto/common/Nonce.h" #include "crypto/common/VirtualMemory.h" #include "crypto/rx/Rx.h" +#include "crypto/rx/RxDataset.h" #include "crypto/rx/RxVm.h" #include "net/JobResults.h" @@ -118,7 +119,9 @@ void xmrig::CpuWorker::allocateRandomX_VM() } if (!m_vm) { - m_vm = RxVm::create(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node); + // Try to allocate scratchpad from dataset's 1 GB huge pages, if normal huge pages are not available + uint8_t* scratchpad = m_memory->isHugePages() ? m_memory->scratchpad() : dataset->tryAllocateScrathpad(); + m_vm = RxVm::create(dataset, scratchpad ? scratchpad : m_memory->scratchpad(), !m_hwAES, m_assembly, m_node); } } #endif diff --git a/src/backend/cpu/cpu.cmake b/src/backend/cpu/cpu.cmake index 75ec1ad7..b2efdb03 100644 --- a/src/backend/cpu/cpu.cmake +++ b/src/backend/cpu/cpu.cmake @@ -72,6 +72,10 @@ endif() if (XMRIG_ARM) list(APPEND SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo_arm.cpp) + + if (XMRIG_OS_UNIX) + list(APPEND SOURCES_CPUID src/backend/cpu/platform/lscpu_arm.cpp) + endif() else() list(APPEND SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo.cpp) endif() diff --git a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp index 653304c8..b35fdf68 100644 --- a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp @@ -1,10 +1,4 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , * Copyright 2018-2020 SChernykh * Copyright 2016-2020 XMRig * @@ -22,6 +16,10 @@ * along with this program. If not, see . */ + +#include "base/tools/String.h" + + #include #include #include @@ -37,6 +35,15 @@ #include "3rdparty/rapidjson/document.h" +#ifdef XMRIG_OS_UNIX +namespace xmrig { + +extern String cpu_name_arm(); + +} // namespace xmrig +#endif + + xmrig::BasicCpuInfo::BasicCpuInfo() : m_threads(std::thread::hardware_concurrency()) { @@ -53,6 +60,13 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_flags.set(FLAG_AES, true); # endif # endif + +# ifdef XMRIG_OS_UNIX + auto name = cpu_name_arm(); + if (!name.isNull()) { + strncpy(m_brand, name, sizeof(m_brand) - 1); + } +# endif } diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp index 4a59c786..0d587332 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.cpp +++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp @@ -218,10 +218,11 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3 { # ifdef XMRIG_ALGO_ASTROBWT if (algorithm == Algorithm::ASTROBWT_DERO) { - return BasicCpuInfo::threads(algorithm, limit); + return allThreads(algorithm, limit); } # endif +# ifndef XMRIG_ARM if (L2() == 0 && L3() == 0) { return BasicCpuInfo::threads(algorithm, limit); } @@ -263,11 +264,35 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3 } return threads; +# else + return allThreads(algorithm, limit); +# endif } +xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, uint32_t limit) const +{ + CpuThreads threads; + threads.reserve(m_threads); + + hwloc_obj_t pu = nullptr; + + while ((pu = hwloc_get_next_obj_by_type(m_topology, HWLOC_OBJ_PU, pu)) != nullptr) { + threads.add(pu->os_index, 0); + } + + if (threads.isEmpty()) { + return BasicCpuInfo::threads(algorithm, limit); + } + + return threads; +} + + + void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const { +# ifndef XMRIG_ARM constexpr size_t oneMiB = 1024U * 1024U; size_t PUs = countByType(cache, HWLOC_OBJ_PU); @@ -366,4 +391,5 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith pu_id++; } +# endif } diff --git a/src/backend/cpu/platform/HwlocCpuInfo.h b/src/backend/cpu/platform/HwlocCpuInfo.h index 3746f151..eed3ae8b 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.h +++ b/src/backend/cpu/platform/HwlocCpuInfo.h @@ -70,6 +70,7 @@ protected: inline size_t packages() const override { return m_packages; } private: + CpuThreads allThreads(const Algorithm &algorithm, uint32_t limit) const; void processTopLevelCache(hwloc_obj_t obj, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const; diff --git a/src/backend/cpu/platform/lscpu_arm.cpp b/src/backend/cpu/platform/lscpu_arm.cpp new file mode 100644 index 00000000..60349d52 --- /dev/null +++ b/src/backend/cpu/platform/lscpu_arm.cpp @@ -0,0 +1,314 @@ +/* XMRig + * Copyright 2018 Riku Voipio + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "base/tools/String.h" + + +#include +#include + + +namespace xmrig { + + +struct lscpu_desc +{ + String vendor; + String model; + + inline bool isReady() const { return !vendor.isNull() && !model.isNull(); } +}; + + +struct id_part { + const int id; + const char *name; +}; + + +struct hw_impl { + const int id; + const id_part *parts; + const char *name; +}; + + +static const id_part arm_part[] = { + { 0x810, "ARM810" }, + { 0x920, "ARM920" }, + { 0x922, "ARM922" }, + { 0x926, "ARM926" }, + { 0x940, "ARM940" }, + { 0x946, "ARM946" }, + { 0x966, "ARM966" }, + { 0xa20, "ARM1020" }, + { 0xa22, "ARM1022" }, + { 0xa26, "ARM1026" }, + { 0xb02, "ARM11 MPCore" }, + { 0xb36, "ARM1136" }, + { 0xb56, "ARM1156" }, + { 0xb76, "ARM1176" }, + { 0xc05, "Cortex-A5" }, + { 0xc07, "Cortex-A7" }, + { 0xc08, "Cortex-A8" }, + { 0xc09, "Cortex-A9" }, + { 0xc0d, "Cortex-A17" }, /* Originally A12 */ + { 0xc0f, "Cortex-A15" }, + { 0xc0e, "Cortex-A17" }, + { 0xc14, "Cortex-R4" }, + { 0xc15, "Cortex-R5" }, + { 0xc17, "Cortex-R7" }, + { 0xc18, "Cortex-R8" }, + { 0xc20, "Cortex-M0" }, + { 0xc21, "Cortex-M1" }, + { 0xc23, "Cortex-M3" }, + { 0xc24, "Cortex-M4" }, + { 0xc27, "Cortex-M7" }, + { 0xc60, "Cortex-M0+" }, + { 0xd01, "Cortex-A32" }, + { 0xd03, "Cortex-A53" }, + { 0xd04, "Cortex-A35" }, + { 0xd05, "Cortex-A55" }, + { 0xd07, "Cortex-A57" }, + { 0xd08, "Cortex-A72" }, + { 0xd09, "Cortex-A73" }, + { 0xd0a, "Cortex-A75" }, + { 0xd0b, "Cortex-A76" }, + { 0xd0c, "Neoverse-N1" }, + { 0xd13, "Cortex-R52" }, + { 0xd20, "Cortex-M23" }, + { 0xd21, "Cortex-M33" }, + { 0xd4a, "Neoverse-E1" }, + { -1, nullptr }, +}; + +static const id_part brcm_part[] = { + { 0x0f, "Brahma B15" }, + { 0x100, "Brahma B53" }, + { 0x516, "ThunderX2" }, + { -1, nullptr }, +}; + +static const id_part dec_part[] = { + { 0xa10, "SA110" }, + { 0xa11, "SA1100" }, + { -1, nullptr }, +}; + +static const id_part cavium_part[] = { + { 0x0a0, "ThunderX" }, + { 0x0a1, "ThunderX 88XX" }, + { 0x0a2, "ThunderX 81XX" }, + { 0x0a3, "ThunderX 83XX" }, + { 0x0af, "ThunderX2 99xx" }, + { -1, nullptr }, +}; + +static const id_part apm_part[] = { + { 0x000, "X-Gene" }, + { -1, nullptr }, +}; + +static const id_part qcom_part[] = { + { 0x00f, "Scorpion" }, + { 0x02d, "Scorpion" }, + { 0x04d, "Krait" }, + { 0x06f, "Krait" }, + { 0x201, "Kryo" }, + { 0x205, "Kryo" }, + { 0x211, "Kryo" }, + { 0x800, "Falkor V1/Kryo" }, + { 0x801, "Kryo V2" }, + { 0xc00, "Falkor" }, + { 0xc01, "Saphira" }, + { -1, nullptr }, +}; + +static const id_part samsung_part[] = { + { 0x001, "exynos-m1" }, + { -1, nullptr }, +}; + +static const id_part nvidia_part[] = { + { 0x000, "Denver" }, + { 0x003, "Denver 2" }, + { -1, nullptr }, +}; + +static const id_part marvell_part[] = { + { 0x131, "Feroceon 88FR131" }, + { 0x581, "PJ4/PJ4b" }, + { 0x584, "PJ4B-MP" }, + { -1, nullptr }, +}; + +static const id_part faraday_part[] = { + { 0x526, "FA526" }, + { 0x626, "FA626" }, + { -1, nullptr }, +}; + +static const id_part intel_part[] = { + { 0x200, "i80200" }, + { 0x210, "PXA250A" }, + { 0x212, "PXA210A" }, + { 0x242, "i80321-400" }, + { 0x243, "i80321-600" }, + { 0x290, "PXA250B/PXA26x" }, + { 0x292, "PXA210B" }, + { 0x2c2, "i80321-400-B0" }, + { 0x2c3, "i80321-600-B0" }, + { 0x2d0, "PXA250C/PXA255/PXA26x" }, + { 0x2d2, "PXA210C" }, + { 0x411, "PXA27x" }, + { 0x41c, "IPX425-533" }, + { 0x41d, "IPX425-400" }, + { 0x41f, "IPX425-266" }, + { 0x682, "PXA32x" }, + { 0x683, "PXA930/PXA935" }, + { 0x688, "PXA30x" }, + { 0x689, "PXA31x" }, + { 0xb11, "SA1110" }, + { 0xc12, "IPX1200" }, + { -1, nullptr }, +}; + +static const id_part hisi_part[] = { + { 0xd01, "Kunpeng-920" }, /* aka tsv110 */ + { -1, nullptr }, +}; + + +static const hw_impl hw_implementer[] = { + { 0x41, arm_part, "ARM" }, + { 0x42, brcm_part, "Broadcom" }, + { 0x43, cavium_part, "Cavium" }, + { 0x44, dec_part, "DEC" }, + { 0x48, hisi_part, "HiSilicon" }, + { 0x4e, nvidia_part, "Nvidia" }, + { 0x50, apm_part, "APM" }, + { 0x51, qcom_part, "Qualcomm" }, + { 0x53, samsung_part, "Samsung" }, + { 0x56, marvell_part, "Marvell" }, + { 0x66, faraday_part, "Faraday" }, + { 0x69, intel_part, "Intel" } +}; + + +static bool lookup(char *line, const char *pattern, String &value) +{ + if (!*line || !value.isNull()) { + return false; + } + + char *p; + int len = strlen(pattern); + + if (strncmp(line, pattern, len) != 0) { + return false; + } + + for (p = line + len; isspace(*p); p++); + + if (*p != ':') { + return false; + } + + for (++p; isspace(*p); p++); + + if (!*p) { + return false; + } + + const char *v = p; + + len = strlen(line) - 1; + for (p = line + len; isspace(*(p-1)); p--); + *p = '\0'; + + value = v; + + return true; +} + + +static bool read_basicinfo(lscpu_desc *desc) +{ + auto fp = fopen("/proc/cpuinfo", "r"); + if (!fp) { + return false; + } + + char buf[BUFSIZ]; + while (fgets(buf, sizeof(buf), fp) != nullptr) { + if (!lookup(buf, "CPU implementer", desc->vendor)) { + lookup(buf, "CPU part", desc->model); + } + + if (desc->isReady()) { + break; + } + } + + fclose(fp); + + return desc->isReady(); +} + + +static bool arm_cpu_decode(lscpu_desc *desc) +{ + if ((strncmp(desc->vendor, "0x", 2) != 0 || strncmp(desc->model, "0x", 2) != 0)) { + return false; + } + + const int vendor = strtol(desc->vendor, nullptr, 0); + const int model = strtol(desc->model, nullptr, 0); + + for (const auto &impl : hw_implementer) { + if (impl.id != vendor) { + continue; + } + + for (size_t i = 0; impl.parts[i].id != -1; ++i) { + if (impl.parts[i].id == model) { + desc->model = impl.parts[i].name; + + return true; + } + } + } + + return false; +} + + +String cpu_name_arm() +{ + lscpu_desc desc; + if (read_basicinfo(&desc) && arm_cpu_decode(&desc)) { + return desc.model; + } + + return {}; +} + + +} // namespace xmrig diff --git a/src/base/kernel/Platform_mac.cpp b/src/base/kernel/Platform_mac.cpp index 9dd4f15e..8ffae1e1 100644 --- a/src/base/kernel/Platform_mac.cpp +++ b/src/base/kernel/Platform_mac.cpp @@ -22,6 +22,8 @@ */ +#include +#include #include #include #include @@ -111,15 +113,5 @@ void xmrig::Platform::setThreadPriority(int priority) bool xmrig::Platform::isOnBatteryPower() { - for (int i = 0; i <= 1; ++i) { - char buf[64]; - snprintf(buf, 64, "/sys/class/power_supply/BAT%d/status", i); - std::ifstream f(buf); - if (f.is_open()) { - std::string status; - f >> status; - return (status == "Discharging"); - } - } - return false; + return IOPSGetTimeRemainingEstimate() != kIOPSTimeRemainingUnlimited; } diff --git a/src/base/net/http/HttpContext.cpp b/src/base/net/http/HttpContext.cpp index 37d8ed42..b348d7e4 100644 --- a/src/base/net/http/HttpContext.cpp +++ b/src/base/net/http/HttpContext.cpp @@ -126,6 +126,10 @@ bool xmrig::HttpContext::isRequest() const size_t xmrig::HttpContext::parse(const char *data, size_t size) { + if (size == 0) { + return size; + } + return http_parser_execute(m_parser, &http_settings, data, size); } diff --git a/src/crypto/common/VirtualMemory.cpp b/src/crypto/common/VirtualMemory.cpp index c6becb89..b34e6de0 100644 --- a/src/crypto/common/VirtualMemory.cpp +++ b/src/crypto/common/VirtualMemory.cpp @@ -51,6 +51,7 @@ static std::mutex mutex; xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node, size_t alignSize) : m_size(align(size)), + m_capacity(m_size), m_node(node) { if (usePool) { @@ -69,6 +70,7 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages } if (oneGbPages && allocateOneGbPagesMemory()) { + m_capacity = align(size, 1ULL << 30); return; } diff --git a/src/crypto/common/VirtualMemory.h b/src/crypto/common/VirtualMemory.h index 0ca30775..d7fe783f 100644 --- a/src/crypto/common/VirtualMemory.h +++ b/src/crypto/common/VirtualMemory.h @@ -52,6 +52,7 @@ public: inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); } inline bool isOneGbPages() const { return m_flags.test(FLAG_1GB_PAGES); } inline size_t size() const { return m_size; } + inline size_t capacity() const { return m_capacity; } inline uint8_t *raw() const { return m_scratchpad; } inline uint8_t *scratchpad() const { return m_scratchpad; } @@ -88,6 +89,7 @@ private: void freeLargePagesMemory(); const size_t m_size; + size_t m_capacity; const uint32_t m_node; std::bitset m_flags; uint8_t *m_scratchpad = nullptr; diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index 0c77ba09..3363cdaa 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -82,7 +82,17 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size) # elif defined(__FreeBSD__) void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); # else - void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); + +# if defined(MAP_HUGE_2MB) + constexpr int flag_2mb = MAP_HUGE_2MB; +# elif defined(MAP_HUGE_SHIFT) + constexpr int flag_2mb = (21 << MAP_HUGE_SHIFT); +# else + constexpr int flag_2mb = 0; +# endif + + void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_2mb, 0, 0); + # endif return mem == MAP_FAILED ? nullptr : mem; diff --git a/src/crypto/randomx/bytecode_machine.cpp b/src/crypto/randomx/bytecode_machine.cpp index 55a63935..f0b95c30 100644 --- a/src/crypto/randomx/bytecode_machine.cpp +++ b/src/crypto/randomx/bytecode_machine.cpp @@ -108,7 +108,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); if (src != dst) { ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; } else { ibc.isrc = &zero; @@ -142,7 +142,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); if (src != dst) { ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; } else { ibc.isrc = &zero; @@ -176,7 +176,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); if (src != dst) { ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; } else { ibc.isrc = &zero; @@ -204,7 +204,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); if (src != dst) { ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; } else { ibc.isrc = &zero; @@ -232,7 +232,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); if (src != dst) { ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; } else { ibc.isrc = &zero; @@ -290,7 +290,7 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); if (src != dst) { ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; } else { ibc.isrc = &zero; @@ -373,7 +373,7 @@ namespace randomx { ibc.type = InstructionType::FADD_M; ibc.fdst = &nreg->f[dst]; ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; ibc.imm = signExtend2sCompl(instr.getImm32()); return; } @@ -393,7 +393,7 @@ namespace randomx { ibc.type = InstructionType::FSUB_M; ibc.fdst = &nreg->f[dst]; ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; ibc.imm = signExtend2sCompl(instr.getImm32()); return; } @@ -420,7 +420,7 @@ namespace randomx { ibc.type = InstructionType::FDIV_M; ibc.fdst = &nreg->e[dst]; ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; ibc.imm = signExtend2sCompl(instr.getImm32()); return; } @@ -466,7 +466,7 @@ namespace randomx { ibc.isrc = &nreg->r[src]; ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.getModCond() < StoreL3Condition) - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = AddressMask[instr.getModMem()]; else ibc.memMask = ScratchpadL3Mask; return; diff --git a/src/crypto/randomx/common.hpp b/src/crypto/randomx/common.hpp index cea5f5be..34c8477c 100644 --- a/src/crypto/randomx/common.hpp +++ b/src/crypto/randomx/common.hpp @@ -126,10 +126,7 @@ namespace randomx { double hi; }; - #define ScratchpadL1Mask RandomX_CurrentConfig.ScratchpadL1Mask_Calculated - #define ScratchpadL1Mask16 RandomX_CurrentConfig.ScratchpadL1Mask16_Calculated - #define ScratchpadL2Mask RandomX_CurrentConfig.ScratchpadL2Mask_Calculated - #define ScratchpadL2Mask16 RandomX_CurrentConfig.ScratchpadL2Mask16_Calculated + #define AddressMask RandomX_CurrentConfig.AddressMask_Calculated #define ScratchpadL3Mask RandomX_CurrentConfig.ScratchpadL3Mask_Calculated #define ScratchpadL3Mask64 RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated constexpr int RegistersCount = 8; diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h index 1dcd3ad3..df98a543 100644 --- a/src/crypto/randomx/intrin_portable.h +++ b/src/crypto/randomx/intrin_portable.h @@ -414,7 +414,7 @@ FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) { } FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { - float64x2_t temp; + float64x2_t temp{}; temp = vcopyq_laneq_f64(temp, 1, a, 1); a = vcopyq_laneq_f64(a, 1, a, 0); return vcopyq_laneq_f64(a, 0, temp, 1); @@ -505,7 +505,7 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) { FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); - rx_vec_f128 x; + rx_vec_f128 x{}; x = vsetq_lane_f64(lo, x, 0); x = vsetq_lane_f64(hi, x, 1); return x; diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 178d35a5..7aae54fd 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -508,7 +508,7 @@ namespace randomx { *(uint32_t*)(code + codePos) = 0xe181; codePos += 2; } - emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); + emit32(AddressMask[instr.getModMem()], code, codePos); } template void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos); @@ -522,7 +522,7 @@ namespace randomx { emit32(instr.getImm32(), code, codePos); emitByte(0x25, code, codePos); if (instr.getModCond() < StoreL3Condition) { - emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); + emit32(AddressMask[instr.getModMem()], code, codePos); } else { emit32(ScratchpadL3Mask, code, codePos); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 7bb7df7f..5bfd9e78 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -165,18 +165,18 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() fillAes4Rx4_Key[6] = rx_set_int_vec_i128(0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7); fillAes4Rx4_Key[7] = rx_set_int_vec_i128(0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609); +# if defined(_M_X64) || defined(__x86_64__) // Workaround for Visual Studio placing trampoline in debug builds. auto addr = [](void (*func)()) { const uint8_t* p = reinterpret_cast(func); -# if defined(_MSC_VER) +# if defined(_MSC_VER) if (p[0] == 0xE9) { p += *(const int32_t*)(p + 1) + 5; } -# endif +# endif return p; }; -#if defined(_M_X64) || defined(__x86_64__) { const uint8_t* a = addr(randomx_sshash_prefetch); const uint8_t* b = addr(randomx_sshash_end); @@ -204,17 +204,21 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() const uint8_t* b = addr(randomx_prefetch_scratchpad_end); memcpy(codePrefetchScratchpadTweaked, a, b - a); } -#endif +# endif } static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; } void RandomX_ConfigurationBase::Apply() { - ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8; - ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16; - ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8; - ScratchpadL2Mask16_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) / 2 - 1) * 16; + const uint32_t ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8; + const uint32_t ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8; + + AddressMask_Calculated[0] = ScratchpadL2Mask_Calculated; + AddressMask_Calculated[1] = ScratchpadL1Mask_Calculated; + AddressMask_Calculated[2] = ScratchpadL1Mask_Calculated; + AddressMask_Calculated[3] = ScratchpadL1Mask_Calculated; + ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8); ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64; diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 96b67cd1..ce3f6ebb 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -129,10 +129,7 @@ struct RandomX_ConfigurationBase uint32_t CacheLineAlignMask_Calculated; uint32_t DatasetExtraItems_Calculated; - uint32_t ScratchpadL1Mask_Calculated; - uint32_t ScratchpadL1Mask16_Calculated; - uint32_t ScratchpadL2Mask_Calculated; - uint32_t ScratchpadL2Mask16_Calculated; + uint32_t AddressMask_Calculated[4]; uint32_t ScratchpadL3Mask_Calculated; uint32_t ScratchpadL3Mask64_Calculated; diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index ef2371e0..03b0f155 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -193,6 +193,12 @@ void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages) } m_memory = new VirtualMemory(maxSize(), hugePages, oneGbPages, false, m_node); + + if (m_memory->isOneGbPages()) { + m_scratchpadOffset = maxSize() + RANDOMX_CACHE_MAX_SIZE; + m_scratchpadLimit = m_memory->capacity(); + } + m_dataset = randomx_create_dataset(m_memory->raw()); # ifdef XMRIG_OS_LINUX @@ -201,3 +207,19 @@ void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages) } # endif } + + +uint8_t* xmrig::RxDataset::tryAllocateScrathpad() +{ + uint8_t* p = reinterpret_cast(raw()); + if (!p) { + return nullptr; + } + + const size_t offset = m_scratchpadOffset.fetch_add(RANDOMX_SCRATCHPAD_L3_MAX_SIZE); + if (offset + RANDOMX_SCRATCHPAD_L3_MAX_SIZE > m_scratchpadLimit) { + return nullptr; + } + + return p + offset; +} diff --git a/src/crypto/rx/RxDataset.h b/src/crypto/rx/RxDataset.h index a6bb7455..798ec996 100644 --- a/src/crypto/rx/RxDataset.h +++ b/src/crypto/rx/RxDataset.h @@ -36,6 +36,8 @@ #include "crypto/randomx/randomx.h" #include "crypto/rx/RxConfig.h" +#include + struct randomx_dataset; @@ -69,6 +71,8 @@ public: void *raw() const; void setRaw(const void *raw); + uint8_t *tryAllocateScrathpad(); + static inline constexpr size_t maxSize() { return RANDOMX_DATASET_MAX_SIZE; } private: @@ -79,6 +83,9 @@ private: randomx_dataset *m_dataset = nullptr; RxCache *m_cache = nullptr; VirtualMemory *m_memory = nullptr; + + std::atomic m_scratchpadOffset; + size_t m_scratchpadLimit = 0; }; diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp index 01164f64..09a0fd37 100644 --- a/src/crypto/rx/RxNUMAStorage.cpp +++ b/src/crypto/rx/RxNUMAStorage.cpp @@ -176,10 +176,16 @@ public: inline void initDatasets(uint32_t threads, int priority) { - uint64_t ts = Chrono::steadyMSecs(); - auto id = m_nodeset.front(); - auto primary = dataset(id); + uint64_t ts = Chrono::steadyMSecs(); + uint32_t id = 0; + for (const auto &kv : m_datasets) { + if (kv.second->cache()) { + id = kv.first; + } + } + + auto primary = dataset(id); primary->init(m_seed.data(), threads, priority); printDatasetReady(id, ts); diff --git a/src/version.h b/src/version.h index 124017c1..d3dc225b 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "6.3.1" +#define APP_VERSION "6.3.2-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 6 #define APP_VER_MINOR 3 -#define APP_VER_PATCH 1 +#define APP_VER_PATCH 2 #ifdef _MSC_VER # if (_MSC_VER >= 1920)