diff --git a/CHANGELOG.md b/CHANGELOG.md index 074e3997..edf6aff0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +# v6.7.0 +- **[#1991](https://github.com/xmrig/xmrig/issues/1991) Added Apple M1 processor support.** +- **[#1986](https://github.com/xmrig/xmrig/pull/1986) Up to 20-30% faster RandomX dataset initialization with AVX2 on some CPUs.** +- [#1964](https://github.com/xmrig/xmrig/pull/1964) Cleanup and refactoring. +- [#1966](https://github.com/xmrig/xmrig/pull/1966) Removed libcpuid support. +- [#1968](https://github.com/xmrig/xmrig/pull/1968) Added virtual machine detection. +- [#1969](https://github.com/xmrig/xmrig/pull/1969) [#1970](https://github.com/xmrig/xmrig/pull/1970) Fixed errors found by static analysis. +- [#1977](https://github.com/xmrig/xmrig/pull/1977) Fixed: secure JIT and huge pages are incompatible on Windows. +- [#1979](https://github.com/xmrig/xmrig/pull/1979) Term `x64` replaced to `64-bit`. +- [#1980](https://github.com/xmrig/xmrig/pull/1980) Fixed build on gcc 11. +- [#1989](https://github.com/xmrig/xmrig/pull/1989) Fixed broken Dero solo mining. + # v6.6.2 - [#1958](https://github.com/xmrig/xmrig/pull/1958) Added example mining scripts to help new miners. - [#1959](https://github.com/xmrig/xmrig/pull/1959) Optimized JIT compiler. diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b095f54..35fba8ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,6 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 2.8.12) project(xmrig) -option(WITH_LIBCPUID "Enable libcpuid support" ON) option(WITH_HWLOC "Enable hwloc support" ON) option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON) option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON) @@ -26,6 +25,7 @@ option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF) option(WITH_PROFILING "Enable profiling for developers" OFF) option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON) option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON) +option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF) option(BUILD_STATIC "Build static binary" OFF) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) @@ -168,7 +168,7 @@ else() endif() endif() -add_definitions(-DXMRIG_MINER_PROJECT) +add_definitions(-DXMRIG_MINER_PROJECT -DXMRIG_JSON_SINGLE_LINE_ARRAY) add_definitions(-D__STDC_FORMAT_MACROS -DUNICODE) find_package(UV REQUIRED) @@ -205,7 +205,7 @@ if (WITH_DEBUG_LOG) add_definitions(/DAPP_DEBUG) endif() -add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES}) +add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES}) target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY}) if (WIN32) @@ -216,6 +216,6 @@ if (WIN32) add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $) endif() -if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release) +if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode) add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} ${CMAKE_PROJECT_NAME}) endif() diff --git a/cmake/cpu.cmake b/cmake/cpu.cmake index 8843133b..a37f1066 100644 --- a/cmake/cpu.cmake +++ b/cmake/cpu.cmake @@ -18,7 +18,6 @@ endif() if (ARM_TARGET AND ARM_TARGET GREATER 6) set(XMRIG_ARM ON) - set(WITH_LIBCPUID OFF) add_definitions(/DXMRIG_ARM) message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})") diff --git a/cmake/os.cmake b/cmake/os.cmake index 0270cc93..09931103 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -32,6 +32,10 @@ elseif(XMRIG_OS_APPLE) else() add_definitions(/DXMRIG_OS_MACOS) endif() + + if (XMRIG_ARM) + set(WITH_SECURE_JIT ON) + endif() elseif(XMRIG_OS_UNIX) add_definitions(/DXMRIG_OS_UNIX) @@ -43,3 +47,7 @@ elseif(XMRIG_OS_UNIX) add_definitions(/DXMRIG_OS_FREEBSD) endif() endif() + +if (WITH_SECURE_JIT) + add_definitions(/DXMRIG_SECURE_JIT) +endif() diff --git a/cmake/randomx.cmake b/cmake/randomx.cmake index 99de2632..5d44e05f 100644 --- a/cmake/randomx.cmake +++ b/cmake/randomx.cmake @@ -61,7 +61,11 @@ if (WITH_RANDOMX) src/crypto/randomx/jit_compiler_a64.cpp ) # cheat because cmake and ccache hate each other - set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C) + if (CMAKE_GENERATOR STREQUAL Xcode) + set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE ASM) + else() + set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C) + endif() else() list(APPEND SOURCES_CRYPTO src/crypto/randomx/jit_compiler_fallback.cpp @@ -108,6 +112,13 @@ if (WITH_RANDOMX) remove_definitions(/DXMRIG_FIX_RYZEN) message("-- WITH_MSR=OFF") endif() + + if (WITH_PROFILING) + add_definitions(/DXMRIG_FEATURE_PROFILING) + + list(APPEND HEADERS_CRYPTO src/crypto/rx/Profiler.h) + list(APPEND SOURCES_CRYPTO src/crypto/rx/Profiler.cpp) + endif() else() remove_definitions(/DXMRIG_ALGO_RANDOMX) endif() diff --git a/scripts/benchmark_10M.cmd b/scripts/benchmark_10M.cmd index b67a82ec..dbbcc78c 100644 --- a/scripts/benchmark_10M.cmd +++ b/scripts/benchmark_10M.cmd @@ -1,3 +1,4 @@ @echo off +cd %~dp0 xmrig.exe --bench=10M --submit pause diff --git a/scripts/benchmark_1M.cmd b/scripts/benchmark_1M.cmd index 0a0d95db..5d2166d0 100644 --- a/scripts/benchmark_1M.cmd +++ b/scripts/benchmark_1M.cmd @@ -1,3 +1,4 @@ @echo off +cd %~dp0 xmrig.exe --bench=1M --submit pause diff --git a/scripts/build.hwloc.sh b/scripts/build.hwloc.sh index 1ca34731..1525bd4e 100755 --- a/scripts/build.hwloc.sh +++ b/scripts/build.hwloc.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -HWLOC_VERSION="2.2.0" +HWLOC_VERSION="2.4.0" mkdir -p deps mkdir -p deps/include @@ -8,7 +8,7 @@ mkdir -p deps/lib mkdir -p build && cd build -wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz +wget https://download.open-mpi.org/release/hwloc/v2.4/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz tar -xzf hwloc-${HWLOC_VERSION}.tar.gz cd hwloc-${HWLOC_VERSION} diff --git a/scripts/build.openssl.sh b/scripts/build.openssl.sh index 4ab5d756..faeca1b2 100755 --- a/scripts/build.openssl.sh +++ b/scripts/build.openssl.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -OPENSSL_VERSION="1.1.1h" +OPENSSL_VERSION="1.1.1i" mkdir -p deps mkdir -p deps/include diff --git a/scripts/pool_mine_example.cmd b/scripts/pool_mine_example.cmd index 27749ef6..6e35c913 100644 --- a/scripts/pool_mine_example.cmd +++ b/scripts/pool_mine_example.cmd @@ -15,5 +15,6 @@ :: Choose pools outside of top 5 to help Monero network be more decentralized! :: Smaller pools also often have smaller fees/payout limits. +cd %~dp0 xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x pause diff --git a/scripts/solo_mine_example.cmd b/scripts/solo_mine_example.cmd index 151ecc5d..c925b36d 100644 --- a/scripts/solo_mine_example.cmd +++ b/scripts/solo_mine_example.cmd @@ -11,5 +11,6 @@ :: Mining solo is the best way to help Monero network be more decentralized! :: But you will only get a payout when you find a block which can take more than a year for a single low-end PC. +cd %~dp0 xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon pause diff --git a/src/3rdparty/argon2/CMakeLists.txt b/src/3rdparty/argon2/CMakeLists.txt index 2f1fc0a9..e02197be 100644 --- a/src/3rdparty/argon2/CMakeLists.txt +++ b/src/3rdparty/argon2/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 2.8.12) project(argon2 C) set(CMAKE_C_STANDARD 99) diff --git a/src/3rdparty/fmt/format-inl.h b/src/3rdparty/fmt/format-inl.h index 5ebad700..b8249f33 100644 --- a/src/3rdparty/fmt/format-inl.h +++ b/src/3rdparty/fmt/format-inl.h @@ -1754,7 +1754,7 @@ inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT { #ifdef FMT_BUILTIN_CTZLL return FMT_BUILTIN_CTZLL(x) >= exp; #else - return exp < num_bits()) && x == ((x >> exp) << exp); + return (exp < num_bits()) && x == ((x >> exp) << exp); #endif } diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt index 0e56b6fc..3f159afd 100644 --- a/src/3rdparty/hwloc/CMakeLists.txt +++ b/src/3rdparty/hwloc/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 2.8.12) project (hwloc C) include_directories(include) @@ -13,23 +13,25 @@ set(HEADERS ) set(SOURCES - src/base64.c - src/bind.c - src/bitmap.c - src/components.c - src/diff.c - src/distances.c - src/misc.c - src/pci-common.c - src/shmem.c - src/topology.c - src/topology-noos.c - src/topology-synthetic.c - src/topology-windows.c - src/topology-x86.c - src/topology-xml.c - src/topology-xml-nolibxml.c + src/base64.c + src/bind.c + src/bitmap.c + src/components.c + src/diff.c + src/distances.c + src/misc.c + src/pci-common.c + src/shmem.c + src/topology.c + src/topology-noos.c + src/topology-synthetic.c + src/topology-windows.c + src/topology-x86.c + src/topology-xml.c + src/topology-xml-nolibxml.c src/traversal.c + src/memattrs.c + src/cpukinds.c ) add_library(hwloc STATIC diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 0dfe28df..0ec17bb6 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -2,6 +2,7 @@ Copyright © 2009 CNRS Copyright © 2009-2020 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. +Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. $COPYRIGHT$ @@ -16,6 +17,76 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.4.0 +------------- +* API + + Add hwloc/cpukinds.h for reporting information about hybrid CPUs. + - Use Linux cpufreq frequencies to rank cores by efficiency. + - Use x86 CPUID hybrid leaf and future Linux kernels sysfs CPU type + files to identify Intel Atom and Core cores. + - Use the Windows native EfficiencyClass to separate kinds. +* Backends + + Properly handle Linux kernel 5.10+ exposing ACPI HMAT information + with knowledge of Generic Initiators. +* Tools + + lstopo has new --cpukinds and --no-cpukinds options for showing + CPU kinds or not in textual and graphical modes respectively. + + hwloc-calc has a new --cpukind option for filtering PUs by kind. + + hwloc-annotate has a new cpukind command for modifying CPU kinds. +* Misc + + Fix hwloc_bitmap_nr_ulongs(), thanks to Norbert Eicker. + + Add a documentation section about + "Topology Attributes: Distances, Memory Attributes and CPU Kinds". + + Silence some spurious warnings in the OpenCL backend and when showing + process binding with lstopo --ps. + + +Version 2.3.0 +------------- +* API + + Add hwloc/memattrs.h for exposing latency/bandwidth information + between initiators (CPU sets for now) and target NUMA nodes, + typically on heterogeneous platforms. + - When available, bandwidths and latencies are read from the ACPI HMAT + table exposed by Linux kernel 5.2+. + - Attributes may also be customized to expose user-defined performance + information. + + Add hwloc_get_local_numanode_objs() for listing NUMA nodes that are + local to some locality. + + The new topology flag HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT causes + support arrays to be loaded from XML exported with hwloc 2.3+. + - hwloc_topology_get_support() now returns an additional "misc" + array with feature "imported_support" set when support was imported. + + Add hwloc_topology_refresh() to refresh internal caches after modifying + the topology and before consulting the topology in a multithread context. +* Backends + + Add a ROCm SMI backend and a hwloc/rsmi.h helper file for getting + the locality of AMD GPUs, now exposed as "rsmi" OS devices. + Thanks to Mike Li. + + Remove POWER device-tree-based topology on Linux, + (it was disabled by default since 2.1). +* Tools + + Command-line options for specifying flags now understand comma-separated + lists of flag names (substrings). + + hwloc-info and hwloc-calc have new --local-memory --local-memory-flags + and --best-memattr options for reporting local memory nodes and filtering + by memory attributes. + + hwloc-bind has a new --best-memattr option for filtering by memory attributes + among the memory binding set. + + Tools that have a --restrict option may now receive a nodeset or + some custom flags for restricting the topology. + + lstopo now has a --thickness option for changing line thickness in the + graphical output. + + Fix lstopo drawing when autoresizing on Windows 10. + + Pressing the F5 key in lstopo X11 and Windows graphical/interactive outputs + now refreshes the display according to the current topology and binding. + + Add a tikz lstopo graphical backend to generate picture easily included into + LaTeX documents. Thanks to Clement Foyer. +* Misc + + The default installation path of the Bash completion file has changed to + ${datadir}/bash-completion/completions/hwloc. Thanks to Tomasz Kłoczko. + + Version 2.2.0 ------------- * API diff --git a/src/3rdparty/hwloc/README b/src/3rdparty/hwloc/README index 5567b4d1..932d6d09 100644 --- a/src/3rdparty/hwloc/README +++ b/src/3rdparty/hwloc/README @@ -23,9 +23,9 @@ APIs are documented after these sections. Installation -hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD -license. It is hosted as a sub-project of the overall Open MPI project (http:// -www.open-mpi.org/). Note that hwloc does not require any functionality from +hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD +license. It is hosted as a sub-project of the overall Open MPI project (https:/ +/www.open-mpi.org/). Note that hwloc does not require any functionality from Open MPI -- it is a wholly separate (and much smaller!) project and code base. It just happens to be hosted as part of the overall Open MPI project. @@ -75,7 +75,7 @@ Bugs should be reported in the tracker (https://github.com/open-mpi/hwloc/ issues). Opening a new issue automatically displays lots of hints about how to debug and report issues. -Questions may be sent to the users or developers mailing lists (http:// +Questions may be sent to the users or developers mailing lists (https:// www.open-mpi.org/community/lists/hwloc.php). There is also a #hwloc IRC channel on Freenode (irc.freenode.net). diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index e182793d..979c2cc8 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,7 +8,7 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=2 +minor=4 release=0 # greek is used for alpha or beta release tags. If it is non-empty, @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Mar 30, 2020" +date="Nov 26, 2020" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=17:0:2 +libhwloc_so_version=19:0:4 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index 01b42fdc..9c8c86cc 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -2,7 +2,7 @@ * Copyright © 2009 CNRS * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux - * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -11,7 +11,7 @@ * ------------------------------------------------ * $tarball_directory/doc/doxygen-doc/ * or - * http://www.open-mpi.org/projects/hwloc/doc/ + * https://www.open-mpi.org/projects/hwloc/doc/ *===================================================================== * * FAIR WARNING: Do NOT expect to be able to figure out all the @@ -93,7 +93,7 @@ extern "C" { * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020100 +#define HWLOC_API_VERSION 0x00020400 /** \brief Indicate at runtime which hwloc API version was used at build time. * @@ -102,7 +102,7 @@ extern "C" { HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); /** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ -#define HWLOC_COMPONENT_ABI 6 +#define HWLOC_COMPONENT_ABI 7 /** @} */ @@ -196,7 +196,7 @@ typedef enum { */ HWLOC_OBJ_CORE, /**< \brief Core. * A computation unit (may be shared by several - * logical processors). + * PUs, aka logical processors). */ HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor. * An execution unit (may share a core with some @@ -257,22 +257,31 @@ typedef enum { HWLOC_OBJ_BRIDGE, /**< \brief Bridge (filtered out by default). * Any bridge (or PCI switch) that connects the host or an I/O bus, * to another I/O bus. - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). + * + * Bridges are not added to the topology unless their + * filtering is changed (see hwloc_topology_set_type_filter() + * and hwloc_topology_set_io_types_filter()). + * * I/O objects are not listed in the main children list, * but rather in the dedicated io children list. * I/O objects have NULL CPU and node sets. */ HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device (filtered out by default). - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). + * + * PCI devices are not added to the topology unless their + * filtering is changed (see hwloc_topology_set_type_filter() + * and hwloc_topology_set_io_types_filter()). + * * I/O objects are not listed in the main children list, * but rather in the dedicated io children list. * I/O objects have NULL CPU and node sets. */ HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device (filtered out by default). - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). + * + * OS devices are not added to the topology unless their + * filtering is changed (see hwloc_topology_set_type_filter() + * and hwloc_topology_set_io_types_filter()). + * * I/O objects are not listed in the main children list, * but rather in the dedicated io children list. * I/O objects have NULL CPU and node sets. @@ -282,6 +291,10 @@ typedef enum { * Objects without particular meaning, that can e.g. be * added by the application for its own use, or by hwloc * for miscellaneous objects such as MemoryModule (DIMMs). + * + * They are not added to the topology unless their filtering + * is changed (see hwloc_topology_set_type_filter()). + * * These objects are not listed in the main children list, * but rather in the dedicated misc children list. * Misc objects may only have Misc objects as children, @@ -304,7 +317,6 @@ typedef enum { HWLOC_OBJ_DIE, /**< \brief Die within a physical package. * A subpart of the physical package, that contains multiple cores. - * \hideinitializer */ HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ @@ -338,8 +350,7 @@ typedef enum hwloc_obj_osdev_type_e { HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device. * For instance the "dma0chan0" DMA channel on Linux. */ HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device. - * For instance "mic0" for a Xeon Phi (MIC) on Linux, - * "opencl0d0" for a OpenCL device, + * For instance "opencl0d0" for a OpenCL device, * "cuda0" for a CUDA device. */ } hwloc_obj_osdev_type_t; @@ -512,7 +523,7 @@ struct hwloc_obj { * * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. */ - hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, + hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of processors of this object, * * This may include not only the same as the cpuset field, but also some CPUs for * which topology information is unknown or incomplete, some offlines CPUs, and @@ -533,6 +544,8 @@ struct hwloc_obj { * between this object and the NUMA node objects). * * In the end, these nodes are those that are close to the current object. + * Function hwloc_get_local_numanode_objs() may be used to list those NUMA + * nodes more precisely. * * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, * some of these nodes may not be allowed for allocation, @@ -1929,7 +1942,31 @@ enum hwloc_topology_flags_e { * would result in the same behavior. * \hideinitializer */ - HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2) + HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2), + + /** \brief Import support from the imported topology. + * + * When importing a XML topology from a remote machine, binding is + * disabled by default (see ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM). + * This disabling is also marked by putting zeroes in the corresponding + * supported feature bits reported by hwloc_topology_get_support(). + * + * The flag ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT actually imports + * support bits from the remote machine. It also sets the flag + * \p imported_support in the struct hwloc_topology_misc_support array. + * If the imported XML did not contain any support information + * (exporter hwloc is too old), this flag is not set. + * + * Note that these supported features are only relevant for the hwloc + * installation that actually exported the XML topology + * (it may vary with the operating system, or with how hwloc was compiled). + * + * Note that setting this flag however does not enable binding for the + * locally imported hwloc topology, it only reports what the remote + * hwloc and machine support. + * + */ + HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT = (1UL<<3) }; /** \brief Set OR'ed flags to non-yet-loaded topology. @@ -1972,6 +2009,8 @@ struct hwloc_topology_discovery_support { unsigned char disallowed_pu; /** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */ unsigned char disallowed_numa; + /** \brief Detecting the efficiency of CPU kinds is supported, see \ref hwlocality_cpukinds. */ + unsigned char cpukind_efficiency; }; /** \brief Flags describing actual PU binding support for this topology. @@ -2042,6 +2081,13 @@ struct hwloc_topology_membind_support { unsigned char get_area_memlocation; }; +/** \brief Flags describing miscellaneous features. + */ +struct hwloc_topology_misc_support { + /** Support was imported when importing another topology, see ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT. */ + unsigned char imported_support; +}; + /** \brief Set of flags describing actual support for this topology. * * This is retrieved with hwloc_topology_get_support() and will be valid until @@ -2052,6 +2098,7 @@ struct hwloc_topology_support { struct hwloc_topology_discovery_support *discovery; struct hwloc_topology_cpubind_support *cpubind; struct hwloc_topology_membind_support *membind; + struct hwloc_topology_misc_support *misc; }; /** \brief Retrieve the topology support. @@ -2062,6 +2109,18 @@ struct hwloc_topology_support { * call may still fail in some corner cases. * * These features are also listed by hwloc-info \--support + * + * The reported features are what the current topology supports + * on the current machine. If the topology was exported to XML + * from another machine and later imported here, support still + * describes what is supported for this imported topology after + * import. By default, binding will be reported as unsupported + * in this case (see ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM). + * + * Topology flag ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT may be used + * to report the supported features of the original remote machine + * instead. If it was successfully imported, \p imported_support + * will be set in the struct hwloc_topology_misc_support array. */ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); @@ -2108,8 +2167,8 @@ enum hwloc_type_filter_e { * * It is only useful for I/O object types. * For ::HWLOC_OBJ_PCI_DEVICE and ::HWLOC_OBJ_OS_DEVICE, it means that only objects - * of major/common kinds are kept (storage, network, OpenFabrics, Intel MICs, CUDA, - * OpenCL, NVML, and displays). + * of major/common kinds are kept (storage, network, OpenFabrics, CUDA, + * OpenCL, RSMI, NVML, and displays). * Also, only OS devices directly attached on PCI (e.g. no USB) are reported. * For ::HWLOC_OBJ_BRIDGE, it means that bridges are kept only if they have children. * @@ -2371,6 +2430,22 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t t */ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src); +/** \brief Refresh internal structures after topology modification. + * + * Modifying the topology (by restricting, adding objects, modifying structures + * such as distances or memory attributes, etc.) may cause some internal caches + * to become invalid. These caches are automatically refreshed when accessed + * but this refreshing is not thread-safe. + * + * This function is not thread-safe either, but it is a good way to end a + * non-thread-safe phase of topology modification. Once this refresh is done, + * multiple threads may concurrently consult the topology, objects, distances, + * attributes, etc. + * + * See also \ref threadsafety + */ +HWLOC_DECLSPEC int hwloc_topology_refresh(hwloc_topology_t topology); + /** @} */ @@ -2386,6 +2461,12 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src /* inline code of some functions above */ #include "hwloc/inlines.h" +/* memory attributes */ +#include "hwloc/memattrs.h" + +/* kinds of CPU cores */ +#include "hwloc/cpukinds.h" + /* exporting to XML or synthetic */ #include "hwloc/export.h" diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 06963b36..8b69185f 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,9 +11,9 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.2.0" +#define HWLOC_VERSION "2.4.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 2 +#define HWLOC_VERSION_MINOR 4 #define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h index d5b0ea02..8d9bb9c8 100644 --- a/src/3rdparty/hwloc/include/hwloc/bitmap.h +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -231,7 +231,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, /** \brief Keep a single index among those set in bitmap \p bitmap * * May be useful before binding so that the process does not - * have a chance of migrating between multiple logical CPUs + * have a chance of migrating between multiple processors * in the original mask. * Instead of running the task on any PU inside the given CPU set, * the operating system scheduler will be forced to run it on a single diff --git a/src/3rdparty/hwloc/include/hwloc/cpukinds.h b/src/3rdparty/hwloc/include/hwloc/cpukinds.h new file mode 100644 index 00000000..f240baf3 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/cpukinds.h @@ -0,0 +1,188 @@ +/* + * Copyright © 2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Kinds of CPU cores. + */ + +#ifndef HWLOC_CPUKINDS_H +#define HWLOC_CPUKINDS_H + +#include "hwloc.h" + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + +/** \defgroup hwlocality_cpukinds Kinds of CPU cores + * + * Platforms with heterogeneous CPUs may have some cores with + * different features or frequencies. + * This API exposes identical PUs in sets called CPU kinds. + * Each PU of the topology may only be in a single kind. + * + * The number of kinds may be obtained with hwloc_cpukinds_get_nr(). + * If the platform is homogeneous, there may be a single kind + * with all PUs. + * If the platform or operating system does not expose any + * information about CPU cores, there may be no kind at all. + * + * The index of the kind that describes a given CPU set + * (if any, and not partially) + * may be obtained with hwloc_cpukinds_get_by_cpuset(). + * + * From the index of a kind, it is possible to retrieve information + * with hwloc_cpukinds_get_info(): + * an abstracted efficiency value, + * and an array of info attributes + * (for instance the "CoreType" and "FrequencyMaxMHz", + * see \ref topoattrs_cpukinds). + * + * A higher efficiency value means intrinsic greater performance + * (and possibly less performance/power efficiency). + * Kinds with lower efficiency are ranked first: + * Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will + * return information about the less efficient CPU kind. + * + * When available, efficiency values are gathered from the operating + * system (when \p cpukind_efficiency is set in the + * struct hwloc_topology_discovery_support array, only on Windows 10 for now). + * Otherwise hwloc tries to compute efficiencies + * by comparing CPU kinds using frequencies (on ARM), + * or core types and frequencies (on other architectures). + * The environment variable HWLOC_CPUKINDS_RANKING may be used + * to change this heuristics, see \ref envvar. + * + * If hwloc fails to rank any kind, for instance because the operating + * system does not expose efficiencies and core frequencies, + * all kinds will have an unknown efficiency (\c -1), + * and they are not indexed/ordered in any specific way. + * + * @{ + */ + +/** \brief Get the number of different kinds of CPU cores in the topology. + * + * \p flags must be \c 0 for now. + * + * \return The number of CPU kinds (positive integer) on success. + * \return \c 0 if no information about kinds was found. + * \return \c -1 with \p errno set to \c EINVAL if \p flags is invalid. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_get_nr(hwloc_topology_t topology, + unsigned long flags); + +/** \brief Get the index of the CPU kind that contains CPUs listed in \p cpuset. + * + * \p flags must be \c 0 for now. + * + * \return The index of the CPU kind (positive integer or 0) on success. + * \return \c -1 with \p errno set to \c EXDEV if \p cpuset is + * only partially included in the some kind. + * \return \c -1 with \p errno set to \c ENOENT if \p cpuset is + * not included in any kind, even partially. + * \return \c -1 with \p errno set to \c EINVAL if parameters are invalid. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology, + hwloc_const_bitmap_t cpuset, + unsigned long flags); + +/** \brief Get the CPU set and infos about a CPU kind in the topology. + * + * \p kind_index identifies one kind of CPU between 0 and the number + * of kinds returned by hwloc_cpukinds_get_nr() minus 1. + * + * If not \c NULL, the bitmap \p cpuset will be filled with + * the set of PUs of this kind. + * + * The integer pointed by \p efficiency, if not \c NULL will, be filled + * with the ranking of this kind of CPU in term of efficiency (see above). + * It ranges from \c 0 to the number of kinds + * (as reported by hwloc_cpukinds_get_nr()) minus 1. + * + * Kinds with lower efficiency are reported first. + * + * If there is a single kind in the topology, its efficiency \c 0. + * If the efficiency of some kinds of cores is unknown, + * the efficiency of all kinds is set to \c -1, + * and kinds are reported in no specific order. + * + * The array of info attributes (for instance the "CoreType", + * "FrequencyMaxMHz" or "FrequencyBaseMHz", see \ref topoattrs_cpukinds) + * and its length are returned in \p infos or \p nr_infos. + * The array belongs to the topology, it should not be freed or modified. + * + * If \p nr_infos or \p infos is \c NULL, no info is returned. + * + * \p flags must be \c 0 for now. + * + * \return \c 0 on success. + * \return \c -1 with \p errno set to \c ENOENT if \p kind_index does not match any CPU kind. + * \return \c -1 with \p errno set to \c EINVAL if parameters are invalid. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_get_info(hwloc_topology_t topology, + unsigned kind_index, + hwloc_bitmap_t cpuset, + int *efficiency, + unsigned *nr_infos, struct hwloc_info_s **infos, + unsigned long flags); + +/** \brief Register a kind of CPU in the topology. + * + * Mark the PUs listed in \p cpuset as being of the same kind + * with respect to the given attributes. + * + * \p forced_efficiency should be \c -1 if unknown. + * Otherwise it is an abstracted efficiency value to enforce + * the ranking of all kinds if all of them have valid (and + * different) efficiencies. + * + * The array \p infos of size \p nr_infos may be used to provide + * info names and values describing this kind of PUs. + * + * \p flags must be \c 0 for now. + * + * Parameters \p cpuset and \p infos will be duplicated internally, + * the caller is responsible for freeing them. + * + * If \p cpuset overlaps with some existing kinds, those might get + * modified or split. For instance if existing kind A contains + * PUs 0 and 1, and one registers another kind for PU 1 and 2, + * there will be 3 resulting kinds: + * existing kind A is restricted to only PU 0; + * new kind B contains only PU 1 and combines information from A + * and from the newly-registered kind; + * new kind C contains only PU 2 and only gets information from + * the newly-registered kind. + * + * \note The efficiency \p forced_efficiency provided to this function + * may be different from the one reported later by hwloc_cpukinds_get_info() + * because hwloc will scale efficiency values down to + * between 0 and the number of kinds minus 1. + * + * \return \c 0 on success. + * \return \c -1 with \p errno set to \c EINVAL if some parameters are invalid, + * for instance if \p cpuset is \c NULL or empty. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_register(hwloc_topology_t topology, + hwloc_bitmap_t cpuset, + int forced_efficiency, + unsigned nr_infos, struct hwloc_info_s *infos, + unsigned long flags); + +/** @} */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_CPUKINDS_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h index 6f0cda4c..582270d1 100644 --- a/src/3rdparty/hwloc/include/hwloc/cuda.h +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -72,7 +72,7 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused return 0; } -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to device \p cudevice. * * Return the CPU set describing the locality of the CUDA device \p cudevice. diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h index 688b8421..059727ae 100644 --- a/src/3rdparty/hwloc/include/hwloc/cudart.h +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -69,7 +69,7 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus return 0; } -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to device \p idx. * * Return the CPU set describing the locality of the CUDA device diff --git a/src/3rdparty/hwloc/include/hwloc/diff.h b/src/3rdparty/hwloc/include/hwloc/diff.h index 79f2df3d..0ad0486b 100644 --- a/src/3rdparty/hwloc/include/hwloc/diff.h +++ b/src/3rdparty/hwloc/include/hwloc/diff.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2018 Inria. All rights reserved. + * Copyright © 2013-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -110,7 +110,7 @@ union hwloc_topology_diff_obj_attr_u { */ typedef enum hwloc_topology_diff_type_e { /** \brief An object attribute was changed. - * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s. + * The union is a hwloc_topology_diff_u::hwloc_topology_diff_obj_attr_s. */ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR, @@ -119,7 +119,7 @@ typedef enum hwloc_topology_diff_type_e { * this object has not been checked. * hwloc_topology_diff_build() will return 1. * - * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s. + * The union is a hwloc_topology_diff_u::hwloc_topology_diff_too_complex_s. */ HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX } hwloc_topology_diff_type_t; diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index b7baed8a..57e53cd5 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -34,6 +34,7 @@ extern "C" { * It corresponds to the latency for accessing the memory of one node * from a core in another node. * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. + * The name of this distances structure is "NUMALatency". * * The matrix may also contain bandwidths between random sets of objects, * possibly provided by the user, as specified in the \p kind attribute. @@ -144,6 +145,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, /** \brief Retrieve a distance matrix with the given name. * * Usually only one distances structure may match a given name. + * + * The name of the most common structure is "NUMALatency". */ HWLOC_DECLSPEC int hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h index 99659e03..3c5368be 100644 --- a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h +++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2013 inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -22,7 +22,7 @@ #include -#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) +#if !defined _GNU_SOURCE || (!defined _SCHED_H && !defined _SCHED_H_) || (!defined CPU_SETSIZE && !defined sched_priority) #error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h #endif diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index 3df64843..8e4d4532 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -872,8 +872,8 @@ hwloc_distrib(hwloc_topology_t topology, unsigned chunk, weight; hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i]; hwloc_cpuset_t cpuset = root->cpuset; - if (root->type == HWLOC_OBJ_NUMANODE) - /* NUMANodes have same cpuset as their parent, but we need normal objects below */ + while (!hwloc_obj_type_is_normal(root->type)) + /* If memory/io/misc, walk up to normal parent */ root = root->parent; weight = (unsigned) hwloc_bitmap_weight(cpuset); if (!weight) @@ -919,7 +919,7 @@ hwloc_distrib(hwloc_topology_t topology, /** \brief Get complete CPU set * - * \return the complete CPU set of logical processors of the system. + * \return the complete CPU set of processors of the system. * * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. @@ -931,7 +931,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_ /** \brief Get topology CPU set * - * \return the CPU set of logical processors of the system for which hwloc + * \return the CPU set of processors of the system for which hwloc * provides topology information. This is equivalent to the cpuset of the * system object. * @@ -945,7 +945,7 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_ /** \brief Get allowed CPU set * - * \return the CPU set of allowed logical processors of the system. + * \return the CPU set of allowed processors of the system. * * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_cpuset(), which means diff --git a/src/3rdparty/hwloc/include/hwloc/memattrs.h b/src/3rdparty/hwloc/include/hwloc/memattrs.h new file mode 100644 index 00000000..2494abb0 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h @@ -0,0 +1,455 @@ +/* + * Copyright © 2019-2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Memory node attributes. + */ + +#ifndef HWLOC_MEMATTR_H +#define HWLOC_MEMATTR_H + +#include "hwloc.h" + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + +/** \defgroup hwlocality_memattrs Comparing memory node attributes for finding where to allocate on + * + * Platforms with heterogeneous memory require ways to decide whether + * a buffer should be allocated on "fast" memory (such as HBM), + * "normal" memory (DDR) or even "slow" but large-capacity memory + * (non-volatile memory). + * These memory nodes are called "Targets" while the CPU accessing them + * is called the "Initiator". Access performance depends on their + * locality (NUMA platforms) as well as the intrinsic performance + * of the targets (heterogeneous platforms). + * + * The following attributes describe the performance of memory accesses + * from an Initiator to a memory Target, for instance their latency + * or bandwidth. + * Initiators performing these memory accesses are usually some PUs or Cores + * (described as a CPU set). + * Hence a Core may choose where to allocate a memory buffer by comparing + * the attributes of different target memory nodes nearby. + * + * There are also some attributes that are system-wide. + * Their value does not depend on a specific initiator performing + * an access. + * The memory node Capacity is an example of such attribute without + * initiator. + * + * One way to use this API is to start with a cpuset describing the Cores where + * a program is bound. The best target NUMA node for allocating memory in this + * program on these Cores may be obtained by passing this cpuset as an initiator + * to hwloc_memattr_get_best_target() with the relevant memory attribute. + * For instance, if the code is latency limited, use the Latency attribute. + * + * A more flexible approach consists in getting the list of local NUMA nodes + * by passing this cpuset to hwloc_get_local_numanode_objs(). + * Attribute values for these nodes, if any, may then be obtained with + * hwloc_memattr_get_value() and manually compared with the desired criteria. + * + * \note The API also supports specific objects as initiator, + * but it is currently not used internally by hwloc. + * Users may for instance use it to provide custom performance + * values for host memory accesses performed by GPUs. + * + * \note The interface actually also accepts targets that are not NUMA nodes. + * @{ + */ + +/** \brief Memory node attributes. */ +enum hwloc_memattr_id_e { + /** \brief "Capacity". + * The capacity is returned in bytes + * (local_memory attribute in objects). + * + * Best capacity nodes are nodes with higher capacity. + * + * No initiator is involved when looking at this attribute. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + */ + HWLOC_MEMATTR_ID_CAPACITY = 0, + + /** \brief "Locality". + * The locality is returned as the number of PUs in that locality + * (e.g. the weight of its cpuset). + * + * Best locality nodes are nodes with smaller locality + * (nodes that are local to very few PUs). + * Poor locality nodes are nodes with larger locality + * (nodes that are local to the entire machine). + * + * No initiator is involved when looking at this attribute. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + */ + HWLOC_MEMATTR_ID_LOCALITY = 1, + + /** \brief "Bandwidth". + * The bandwidth is returned in MiB/s, as seen from the given initiator location. + * Best bandwidth nodes are nodes with higher bandwidth. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + */ + HWLOC_MEMATTR_ID_BANDWIDTH = 2, + + /** \brief "Latency". + * The latency is returned as nanoseconds, as seen from the given initiator location. + * Best latency nodes are nodes with smaller latency. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + */ + HWLOC_MEMATTR_ID_LATENCY = 3 + + /* TODO read vs write, persistence? */ +}; + +/** \brief A memory attribute identifier. + * May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register(). + */ +typedef unsigned hwloc_memattr_id_t; + +/** \brief Return the identifier of the memory attribute with the given name. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_by_name(hwloc_topology_t topology, + const char *name, + hwloc_memattr_id_t *id); + + +/** \brief Type of location. */ +enum hwloc_location_type_e { + /** \brief Location is given as a cpuset, in the location cpuset union field. \hideinitializer */ + HWLOC_LOCATION_TYPE_CPUSET = 1, + /** \brief Location is given as an object, in the location object union field. \hideinitializer */ + HWLOC_LOCATION_TYPE_OBJECT = 0 +}; + +/** \brief Where to measure attributes from. */ +struct hwloc_location { + /** \brief Type of location. */ + enum hwloc_location_type_e type; + /** \brief Actual location. */ + union hwloc_location_u { + /** \brief Location as a cpuset, when the location type is ::HWLOC_LOCATION_TYPE_CPUSET. */ + hwloc_cpuset_t cpuset; + /** \brief Location as an object, when the location type is ::HWLOC_LOCATION_TYPE_OBJECT. */ + hwloc_obj_t object; + } location; +}; + + +/** \brief Flags for selecting target NUMA nodes. */ +enum hwloc_local_numanode_flag_e { + /** \brief Select NUMA nodes whose locality is larger than the given cpuset. + * For instance, if a single PU (or its cpuset) is given in \p initiator, + * select all nodes close to the package that contains this PU. + * \hideinitializer + */ + HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY = (1UL<<0), + + /** \brief Select NUMA nodes whose locality is smaller than the given cpuset. + * For instance, if a package (or its cpuset) is given in \p initiator, + * also select nodes that are attached to only a half of that package. + * \hideinitializer + */ + HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1), + + /** \brief Select all NUMA nodes in the topology. + * The initiator \p initiator is ignored. + * \hideinitializer + */ + HWLOC_LOCAL_NUMANODE_FLAG_ALL = (1UL<<2) +}; + +/** \brief Return an array of local NUMA nodes. + * + * By default only select the NUMA nodes whose locality is exactly + * the given \p location. More nodes may be selected if additional flags + * are given as a OR'ed set of ::hwloc_local_numanode_flag_e. + * + * If \p location is given as an explicit object, its CPU set is used + * to find NUMA nodes with the corresponding locality. + * If the object does not have a CPU set (e.g. I/O object), the CPU + * parent (where the I/O object is attached) is used. + * + * On input, \p nr points to the number of nodes that may be stored + * in the \p nodes array. + * On output, \p nr will be changed to the number of stored nodes, + * or the number of nodes that would have been stored if there were + * enough room. + * + * \note Some of these NUMA nodes may not have any memory attribute + * values and hence not be reported as actual targets in other functions. + * + * \note The number of NUMA nodes in the topology (obtained by + * hwloc_bitmap_weight() on the root object nodeset) may be used + * to allocate the \p nodes array. + * + * \note When an object CPU set is given as locality, for instance a Package, + * and when flags contain both ::HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY + * and ::HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY, + * the returned array corresponds to the nodeset of that object. + */ +HWLOC_DECLSPEC int +hwloc_get_local_numanode_objs(hwloc_topology_t topology, + struct hwloc_location *location, + unsigned *nr, + hwloc_obj_t *nodes, + unsigned long flags); + + + +/** \brief Return an attribute value for a specific target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * \p flags must be \c 0 for now. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_value(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t *value); + +/** \brief Return the best target NUMA node for the given attribute and initiator. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * If \p value is non \c NULL, the corresponding value is returned there. + * + * If multiple targets have the same attribute values, only one is + * returned (and there is no way to clarify how that one is chosen). + * Applications that want to detect targets with identical/similar + * values, or that want to look at values for multiple attributes, + * should rather get all values using hwloc_memattr_get_value() + * and manually select the target they consider the best. + * + * \p flags must be \c 0 for now. + * + * If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT; + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_best_target(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_obj_t *best_target, hwloc_uint64_t *value); + +/** \brief Return the best initiator for the given attribute and target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * \c -1 is returned and \p errno is set to \c EINVAL. + * + * If \p value is non \c NULL, the corresponding value is returned there. + * + * If multiple initiators have the same attribute values, only one is + * returned (and there is no way to clarify how that one is chosen). + * Applications that want to detect initiators with identical/similar + * values, or that want to look at values for multiple attributes, + * should rather get all values using hwloc_memattr_get_value() + * and manually select the initiator they consider the best. + * + * The returned initiator should not be modified or freed, + * it belongs to the topology. + * + * \p flags must be \c 0 for now. + * + * If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT; + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_best_initiator(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target, + unsigned long flags, + struct hwloc_location *best_initiator, hwloc_uint64_t *value); + +/** @} */ + + +/** \defgroup hwlocality_memattrs_manage Managing memory attributes + * @{ + */ + +/** \brief Return the name of a memory attribute. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_name(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + const char **name); + +/** \brief Return the flags of the given attribute. + * + * Flags are a OR'ed set of ::hwloc_memattr_flag_e. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_flags(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + unsigned long *flags); + +/** \brief Memory attribute flags. + * Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags(). + */ +enum hwloc_memattr_flag_e { + /** \brief The best nodes for this memory attribute are those with the higher values. + * For instance Bandwidth. + */ + HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0), + /** \brief The best nodes for this memory attribute are those with the lower values. + * For instance Latency. + */ + HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1), + /** \brief The value returned for this memory attribute depends on the given initiator. + * For instance Bandwidth and Latency, but not Capacity. + */ + HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2) +}; + +/** \brief Register a new memory attribute. + * + * Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e. + * Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least + * one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST. + */ +HWLOC_DECLSPEC int +hwloc_memattr_register(hwloc_topology_t topology, + const char *name, + unsigned long flags, + hwloc_memattr_id_t *id); + +/** \brief Set an attribute value for a specific target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * The initiator will be copied into the topology, + * the caller should free anything allocated to store the initiator, + * for instance the cpuset. + * + * \p flags must be \c 0 for now. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t value); + +/** \brief Return the target NUMA nodes that have some values for a given attribute. + * + * Return targets for the given attribute in the \p targets array + * (for the given initiator if any). + * If \p values is not \c NULL, the corresponding attribute values + * are stored in the array it points to. + * + * On input, \p nr points to the number of targets that may be stored + * in the array \p targets (and \p values). + * On output, \p nr points to the number of targets (and values) that + * were actually found, even if some of them couldn't be stored in the array. + * Targets that couldn't be stored are ignored, but the function still + * returns success (\c 0). The caller may find out by comparing the value pointed + * by \p nr before and after the function call. + * + * The returned targets should not be modified or freed, + * they belong to the topology. + * + * Argument \p initiator is ignored if the attribute does not relate to a specific + * initiator (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR). + * Otherwise \p initiator may be non \c NULL to report only targets + * that have a value for that initiator. + * + * \p flags must be \c 0 for now. + * + * \note This function is meant for tools and debugging (listing internal information) + * rather than for application queries. Applications should rather select useful + * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute + * values. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_targets(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + struct hwloc_location *initiator, + unsigned long flags, + unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values); + +/** \brief Return the initiators that have values for a given attribute for a specific target NUMA node. + * + * Return initiators for the given attribute and target node in the + * \p initiators array. + * If \p values is not \c NULL, the corresponding attribute values + * are stored in the array it points to. + * + * On input, \p nr points to the number of initiators that may be stored + * in the array \p initiators (and \p values). + * On output, \p nr points to the number of initiators (and values) that + * were actually found, even if some of them couldn't be stored in the array. + * Initiators that couldn't be stored are ignored, but the function still + * returns success (\c 0). The caller may find out by comparing the value pointed + * by \p nr before and after the function call. + * + * The returned initiators should not be modified or freed, + * they belong to the topology. + * + * \p flags must be \c 0 for now. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * no initiator is returned. + * + * \note This function is meant for tools and debugging (listing internal information) + * rather than for application queries. Applications should rather select useful + * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute + * values for some relevant initiators. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_initiators(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + unsigned long flags, + unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values); +/** @} */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_MEMATTR_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h index 1bc2599f..9d578903 100644 --- a/src/3rdparty/hwloc/include/hwloc/nvml.h +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2016 Inria. All rights reserved. + * Copyright © 2012-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -36,7 +36,7 @@ extern "C" { * @{ */ -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to NVML device \p device. * * Return the CPU set describing the locality of the NVML device \p device. diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 99dfb0c8..d498d606 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2019 Inria. All rights reserved. + * Copyright © 2012-2020 Inria. All rights reserved. * Copyright © 2013, 2018 Université Bordeaux. All right reserved. * See COPYING in top-level directory. */ @@ -109,7 +109,7 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device, return -1; } -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to OpenCL device \p device. * * Return the CPU set describing the locality of the OpenCL device \p device. diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h index d247a8b1..bbf25d0f 100644 --- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -41,7 +41,7 @@ extern "C" { * @{ */ -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to device \p ibdev. * * Return the CPU set describing the locality of the OpenFabrics diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index 88faf538..06e1c3e9 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -313,7 +313,13 @@ struct hwloc_component { * @{ */ +/** \brief Check whether insertion errors are hidden */ +HWLOC_DECLSPEC int hwloc_hide_errors(void); + /** \brief Add an object to the topology. + * + * Insert new object \p obj in the topology starting under existing object \p root + * (if \c NULL, the topology root object is used). * * It is sorted along the tree of other objects according to the inclusion of * cpusets, to eventually be added as a child of the smallest object including @@ -327,32 +333,20 @@ struct hwloc_component { * * This shall only be called before levels are built. * - * In case of error, hwloc_report_os_error() is called. - * * The caller should check whether the object type is filtered-out before calling this function. * * The topology cpuset/nodesets will be enlarged to include the object sets. * + * \p reason is a unique string identifying where and why this insertion call was performed + * (it will be displayed in case of internal insertion error). + * * Returns the object on success. * Returns NULL and frees obj on error. * Returns another object and frees obj if it was merged with an identical pre-existing object. */ -HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj); - -/** \brief Type of error callbacks during object insertion */ -typedef void (*hwloc_report_error_t)(const char * msg, int line); -/** \brief Report an insertion error from a backend */ -HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line); -/** \brief Check whether insertion errors are hidden */ -HWLOC_DECLSPEC int hwloc_hide_errors(void); - -/** \brief Add an object to the topology and specify which error callback to use. - * - * This function is similar to hwloc_insert_object_by_cpuset() but it allows specifying - * where to start insertion from (if \p root is NULL, the topology root object is used), - * and specifying the error callback. - */ -HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, hwloc_obj_t obj, hwloc_report_error_t report_error); +HWLOC_DECLSPEC hwloc_obj_t +hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, + hwloc_obj_t obj, const char *reason); /** \brief Insert an object somewhere in the topology. * diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index 224e2577..c2a30485 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -119,6 +119,7 @@ extern "C" { #define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED) #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) +#define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT) #define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) @@ -134,6 +135,7 @@ extern "C" { #define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support) #define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support) #define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support) +#define hwloc_topology_misc_support HWLOC_NAME(topology_misc_support) #define hwloc_topology_support HWLOC_NAME(topology_support) #define hwloc_topology_get_support HWLOC_NAME(topology_get_support) @@ -170,6 +172,7 @@ extern "C" { #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) #define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets) +#define hwloc_topology_refresh HWLOC_NAME(topology_refresh) #define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth) #define hwloc_get_type_depth HWLOC_NAME(get_type_depth) @@ -367,6 +370,51 @@ extern "C" { #define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset) #define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset) +/* memattrs.h */ + +#define hwloc_memattr_id_e HWLOC_NAME(memattr_id_e) +#define HWLOC_MEMATTR_ID_CAPACITY HWLOC_NAME_CAPS(MEMATTR_ID_CAPACITY) +#define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY) +#define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH) +#define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY) + +#define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t) +#define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name) + +#define hwloc_location HWLOC_NAME(location) +#define hwloc_location_type_e HWLOC_NAME(location_type_e) +#define HWLOC_LOCATION_TYPE_OBJECT HWLOC_NAME_CAPS(LOCATION_TYPE_OBJECT) +#define HWLOC_LOCATION_TYPE_CPUSET HWLOC_NAME_CAPS(LOCATION_TYPE_CPUSET) +#define hwloc_location_u HWLOC_NAME(location_u) + +#define hwloc_memattr_get_value HWLOC_NAME(memattr_get_value) +#define hwloc_memattr_get_best_target HWLOC_NAME(memattr_get_best_target) +#define hwloc_memattr_get_best_initiator HWLOC_NAME(memattr_get_best_initiator) + +#define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e) +#define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) +#define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY) +#define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL) +#define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs) + +#define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name) +#define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags) +#define hwloc_memattr_flag_e HWLOC_NAME(memattr_flag_e) +#define HWLOC_MEMATTR_FLAG_HIGHER_FIRST HWLOC_NAME_CAPS(MEMATTR_FLAG_HIGHER_FIRST) +#define HWLOC_MEMATTR_FLAG_LOWER_FIRST HWLOC_NAME_CAPS(MEMATTR_FLAG_LOWER_FIRST) +#define HWLOC_MEMATTR_FLAG_NEED_INITIATOR HWLOC_NAME_CAPS(MEMATTR_FLAG_NEED_INITIATOR) +#define hwloc_memattr_register HWLOC_NAME(memattr_register) +#define hwloc_memattr_set_value HWLOC_NAME(memattr_set_value) +#define hwloc_memattr_get_targets HWLOC_NAME(memattr_get_targets) +#define hwloc_memattr_get_initiators HWLOC_NAME(memattr_get_initiators) + +/* cpukinds.h */ + +#define hwloc_cpukinds_get_nr HWLOC_NAME(cpukinds_get_nr) +#define hwloc_cpukinds_get_by_cpuset HWLOC_NAME(cpukinds_get_by_cpuset) +#define hwloc_cpukinds_get_info HWLOC_NAME(cpukinds_get_info) +#define hwloc_cpukinds_register HWLOC_NAME(cpukinds_register) + /* export.h */ #define hwloc_topology_export_xml_flags_e HWLOC_NAME(topology_export_xml_flags_e) @@ -510,6 +558,12 @@ extern "C" { #define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev) #define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index) +/* rsmi.h */ + +#define hwloc_rsmi_get_device_cpuset HWLOC_NAME(rsmi_get_device_cpuset) +#define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev) +#define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index) + /* gl.h */ #define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) @@ -547,9 +601,6 @@ extern "C" { #define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace) -#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset) -#define hwloc_report_error_t HWLOC_NAME(report_error_t) -#define hwloc_report_os_error HWLOC_NAME(report_os_error) #define hwloc_hide_errors HWLOC_NAME(hide_errors) #define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset) #define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent) @@ -683,6 +734,7 @@ extern "C" { #define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_gl_component HWLOC_NAME(gl_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component) +#define hwloc_rsmi_component HWLOC_NAME(rsmi_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_pci_component HWLOC_NAME(pci_component) @@ -691,6 +743,8 @@ extern "C" { /* private/private.h */ +#define hwloc_internal_location_s HWLOC_NAME(internal_location_s) + #define hwloc_special_level_s HWLOC_NAME(special_level_s) #define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s) @@ -713,6 +767,8 @@ extern "C" { #define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object) +#define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index) + #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) @@ -723,6 +779,7 @@ extern "C" { #define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup) #define hwloc__move_infos HWLOC_NAME(_move_infos) #define hwloc__free_infos HWLOC_NAME(_free_infos) +#define hwloc__tma_dup_infos HWLOC_NAME(_tma_dup_infos) #define hwloc_binding_hooks HWLOC_NAME(binding_hooks) #define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks) @@ -764,6 +821,24 @@ extern "C" { #define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index) #define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs) +#define hwloc_internal_memattr_s HWLOC_NAME(internal_memattr_s) +#define hwloc_internal_memattr_target_s HWLOC_NAME(internal_memattr_target_s) +#define hwloc_internal_memattr_initiator_s HWLOC_NAME(internal_memattr_initiator_s) +#define hwloc_internal_memattrs_init HWLOC_NAME(internal_memattrs_init) +#define hwloc_internal_memattrs_prepare HWLOC_NAME(internal_memattrs_prepare) +#define hwloc_internal_memattrs_dup HWLOC_NAME(internal_memattrs_dup) +#define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy) +#define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh) +#define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh) + +#define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s) +#define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init) +#define hwloc_internal_cpukinds_destroy HWLOC_NAME(internal_cpukinds_destroy) +#define hwloc_internal_cpukinds_dup HWLOC_NAME(internal_cpukinds_dup) +#define hwloc_internal_cpukinds_register HWLOC_NAME(internal_cpukinds_register) +#define hwloc_internal_cpukinds_rank HWLOC_NAME(internal_cpukinds_rank) +#define hwloc_internal_cpukinds_restrict HWLOC_NAME(internal_cpukinds_restrict) + #define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64) #define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64) diff --git a/src/3rdparty/hwloc/include/private/autogen/config.h b/src/3rdparty/hwloc/include/private/autogen/config.h index a97bdfea..687e82bc 100644 --- a/src/3rdparty/hwloc/include/private/autogen/config.h +++ b/src/3rdparty/hwloc/include/private/autogen/config.h @@ -1,8 +1,8 @@ /* * Copyright © 2009, 2011, 2012 CNRS. All rights reserved. - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved. - * Copyright © 2009 Cisco Systems, Inc. All rights reserved. + * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -575,7 +575,7 @@ #define PACKAGE "hwloc" /* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "http://www.open-mpi.org/projects/hwloc/" +#define PACKAGE_BUGREPORT "https://www.open-mpi.org/projects/hwloc/" /* Define to the full name of this package. */ #define PACKAGE_NAME "hwloc" @@ -668,5 +668,9 @@ /* Define this to the thread ID type */ #define hwloc_thread_t HANDLE +/* Define to 1 if you have the declaration of `GetModuleFileName', and to 0 if + you don't. */ +#define HAVE_DECL_GETMODULEFILENAME 1 + #endif /* HWLOC_CONFIGURE_H */ diff --git a/src/3rdparty/hwloc/include/private/debug.h b/src/3rdparty/hwloc/include/private/debug.h index 637e0141..29dca70b 100644 --- a/src/3rdparty/hwloc/include/private/debug.h +++ b/src/3rdparty/hwloc/include/private/debug.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009, 2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -19,6 +19,10 @@ #include #endif +#ifdef ANDROID +extern void JNIDebug(char *text); +#endif + /* Compile-time assertion */ #define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) @@ -44,9 +48,17 @@ static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, . { #ifdef HWLOC_DEBUG if (hwloc_debug_enabled()) { +#ifdef ANDROID + char buffer[256]; +#endif va_list ap; va_start(ap, s); +#ifdef ANDROID + vsprintf(buffer, s, ap); + JNIDebug(buffer); +#else vfprintf(stderr, s, ap); +#endif va_end(ap); } #endif @@ -57,21 +69,21 @@ static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, . if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, s); \ + hwloc_debug(fmt, s); \ free(s); \ } } while (0) #define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \ if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, arg1, s); \ + hwloc_debug(fmt, arg1, s); \ free(s); \ } } while (0) #define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \ if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, arg1, arg2, s); \ + hwloc_debug(fmt, arg1, arg2, s); \ free(s); \ } } while (0) #else diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h index d3c89783..0b82a45c 100644 --- a/src/3rdparty/hwloc/include/private/internal-components.h +++ b/src/3rdparty/hwloc/include/private/internal-components.h @@ -30,6 +30,7 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index 84d95bb3..e0782659 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * @@ -40,7 +40,19 @@ #endif #include -#define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */ +#define HWLOC_TOPOLOGY_ABI 0x20400 /* version of the layout of struct topology */ + +struct hwloc_internal_location_s { + enum hwloc_location_type_e type; + union { + struct { + hwloc_obj_t obj; /* cached between refreshes */ + uint64_t gp_index; + hwloc_obj_type_t type; + } object; /* if type == HWLOC_LOCATION_TYPE_OBJECT */ + hwloc_cpuset_t cpuset; /* if type == HWLOC_LOCATION_TYPE_CPUSET */ + } location; +}; /***************************************************** * WARNING: @@ -163,6 +175,50 @@ struct hwloc_topology { } *first_dist, *last_dist; unsigned next_dist_id; + /* memory attributes */ + unsigned nr_memattrs; + struct hwloc_internal_memattr_s { + /* memattr info */ + char *name; /* TODO unit is implicit, in the documentation of standard attributes, or in the name? */ + unsigned long flags; +#define HWLOC_IMATTR_FLAG_STATIC_NAME (1U<<0) /* no need to free name */ +#define HWLOC_IMATTR_FLAG_CACHE_VALID (1U<<1) /* target and initiator are valid */ +#define HWLOC_IMATTR_FLAG_CONVENIENCE (1U<<2) /* convenience attribute reporting values from non-memattr attributes (R/O and no actual targets stored) */ + unsigned iflags; + + /* array of values */ + unsigned nr_targets; + struct hwloc_internal_memattr_target_s { + /* target object */ + hwloc_obj_t obj; /* cached between refreshes */ + hwloc_obj_type_t type; + unsigned os_index; /* only used temporarily during discovery when there's no obj/gp_index yet */ + hwloc_uint64_t gp_index; + + /* value if there are no initiator for this attr */ + hwloc_uint64_t noinitiator_value; + /* initiators otherwise */ + unsigned nr_initiators; + struct hwloc_internal_memattr_initiator_s { + struct hwloc_internal_location_s initiator; + hwloc_uint64_t value; + } *initiators; + } *targets; + } *memattrs; + + /* hybridcpus */ + unsigned nr_cpukinds; + unsigned nr_cpukinds_allocated; + struct hwloc_internal_cpukind_s { + hwloc_cpuset_t cpuset; +#define HWLOC_CPUKIND_EFFICIENCY_UNKNOWN -1 + int efficiency; + int forced_efficiency; /* returned by the hardware or OS if any */ + hwloc_uint64_t ranking_value; /* internal value for ranking */ + unsigned nr_infos; + struct hwloc_info_s *infos; + } *cpukinds; + int grouping; int grouping_verbose; unsigned grouping_nbaccuracies; @@ -240,8 +296,9 @@ extern void hwloc_topology_clear(struct hwloc_topology *topology); /* insert memory object as memory child of normal parent */ extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, - hwloc_obj_t obj, - hwloc_report_error_t report_error); + hwloc_obj_t obj, const char *reason); + +extern hwloc_obj_t hwloc_get_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index); extern void hwloc_pci_discovery_init(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); @@ -261,6 +318,7 @@ extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_t extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value); extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace); extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp); +extern int hwloc__tma_dup_infos(struct hwloc_tma *tma, struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s *src_infos, unsigned src_count); extern void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count); /* set native OS binding hooks */ @@ -354,6 +412,22 @@ extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *n extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_init(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology); +extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old); +extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value); + +extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology); +extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology); +extern void hwloc_internal_cpukinds_destroy(hwloc_topology_t topology); +extern int hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old); +#define HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY (1<<0) +extern int hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset, int forced_efficiency, const struct hwloc_info_s *infos, unsigned nr_infos, unsigned long flags); +extern void hwloc_internal_cpukinds_restrict(hwloc_topology_t topology); + /* encode src buffer into target buffer. * targsize must be at least 4*((srclength+2)/3)+1. * target will be 0-terminated. diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h index f59fca1f..3af5ba1e 100644 --- a/src/3rdparty/hwloc/include/private/xml.h +++ b/src/3rdparty/hwloc/include/private/xml.h @@ -46,7 +46,7 @@ struct hwloc_xml_backend_data_s { int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp); int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag */ void (*close_child)(struct hwloc__xml_import_state_s * state); - int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */ + int (*get_content)(struct hwloc__xml_import_state_s * state, const char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */ void (*close_content)(struct hwloc__xml_import_state_s * state); char * msgprefix; void *data; /* libxml2 doc, or nolibxml buffer */ diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c index 0bd85e25..2b5d0994 100644 --- a/src/3rdparty/hwloc/src/bind.c +++ b/src/3rdparty/hwloc/src/bind.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -921,5 +921,6 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology) DO(mem,get_area_membind); DO(mem,get_area_memlocation); DO(mem,alloc_membind); +#undef DO } } diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c index 4791a694..cf071edb 100644 --- a/src/3rdparty/hwloc/src/bitmap.c +++ b/src/3rdparty/hwloc/src/bitmap.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -818,7 +818,7 @@ int hwloc_bitmap_nr_ulongs(const struct hwloc_bitmap_s *set) return -1; last = hwloc_bitmap_last(set); - return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG; + return (last + HWLOC_BITS_PER_LONG)/HWLOC_BITS_PER_LONG; } int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu) diff --git a/src/3rdparty/hwloc/src/cpukinds.c b/src/3rdparty/hwloc/src/cpukinds.c new file mode 100644 index 00000000..5f2dd1aa --- /dev/null +++ b/src/3rdparty/hwloc/src/cpukinds.c @@ -0,0 +1,649 @@ +/* + * Copyright © 2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" + + +/***************** + * Basics + */ + +void +hwloc_internal_cpukinds_init(struct hwloc_topology *topology) +{ + topology->cpukinds = NULL; + topology->nr_cpukinds = 0; + topology->nr_cpukinds_allocated = 0; +} + +void +hwloc_internal_cpukinds_destroy(struct hwloc_topology *topology) +{ + unsigned i; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + hwloc_bitmap_free(kind->cpuset); + hwloc__free_infos(kind->infos, kind->nr_infos); + } + free(topology->cpukinds); + topology->cpukinds = NULL; + topology->nr_cpukinds = 0; +} + +int +hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old) +{ + struct hwloc_tma *tma = new->tma; + struct hwloc_internal_cpukind_s *kinds; + unsigned i; + + kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds)); + if (!kinds) + return -1; + new->cpukinds = kinds; + new->nr_cpukinds = old->nr_cpukinds; + memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds)); + + for(i=0;inr_cpukinds; i++) { + kinds[i].cpuset = hwloc_bitmap_tma_dup(tma, old->cpukinds[i].cpuset); + if (!kinds[i].cpuset) { + new->nr_cpukinds = i; + goto failed; + } + if (hwloc__tma_dup_infos(tma, + &kinds[i].infos, &kinds[i].nr_infos, + old->cpukinds[i].infos, old->cpukinds[i].nr_infos) < 0) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_bitmap_free(kinds[i].cpuset); + new->nr_cpukinds = i; + goto failed; + } + } + + return 0; + + failed: + hwloc_internal_cpukinds_destroy(new); + return -1; +} + +void +hwloc_internal_cpukinds_restrict(hwloc_topology_t topology) +{ + unsigned i; + int removed = 0; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + hwloc_bitmap_and(kind->cpuset, kind->cpuset, hwloc_get_root_obj(topology)->cpuset); + if (hwloc_bitmap_iszero(kind->cpuset)) { + hwloc_bitmap_free(kind->cpuset); + hwloc__free_infos(kind->infos, kind->nr_infos); + memmove(kind, kind+1, (topology->nr_cpukinds - i - 1)*sizeof(*kind)); + i--; + topology->nr_cpukinds--; + removed = 1; + } + } + if (removed) + hwloc_internal_cpukinds_rank(topology); +} + + +/******************** + * Registering + */ + +static __hwloc_inline int +hwloc__cpukind_check_duplicate_info(struct hwloc_internal_cpukind_s *kind, + const char *name, const char *value) +{ + unsigned i; + for(i=0; inr_infos; i++) + if (!strcmp(kind->infos[i].name, name) + && !strcmp(kind->infos[i].value, value)) + return 1; + return 0; +} + +static __hwloc_inline void +hwloc__cpukind_add_infos(struct hwloc_internal_cpukind_s *kind, + const struct hwloc_info_s *infos, unsigned nr_infos) +{ + unsigned i; + for(i=0; iinfos, &kind->nr_infos, infos[i].name, infos[i].value); + } +} + +int +hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset, + int forced_efficiency, + const struct hwloc_info_s *infos, unsigned nr_infos, + unsigned long flags) +{ + struct hwloc_internal_cpukind_s *kinds; + unsigned i, max, bits, oldnr, newnr; + + if (hwloc_bitmap_iszero(cpuset)) { + hwloc_bitmap_free(cpuset); + errno = EINVAL; + return -1; + } + + if (flags & ~HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY) { + errno = EINVAL; + return -1; + } + + /* TODO: for now, only windows provides a forced efficiency. + * if another backend ever provides a conflicting value, the first backend value will be kept. + * (user-provided values are not an issue, they are meant to overwrite) + */ + + /* If we have N kinds currently, we may need 2N+1 kinds after inserting the new one: + * - each existing kind may get split into which PUs are in the new kind and which aren't. + * - some PUs might not have been in any kind yet. + */ + max = 2 * topology->nr_cpukinds + 1; + /* Allocate the power-of-two above 2N+1. */ + bits = hwloc_flsl(max-1) + 1; + max = 1U<cpukinds; + if (max > topology->nr_cpukinds_allocated) { + kinds = realloc(kinds, max * sizeof(*kinds)); + if (!kinds) { + hwloc_bitmap_free(cpuset); + return -1; + } + memset(&kinds[topology->nr_cpukinds_allocated], 0, (max - topology->nr_cpukinds_allocated) * sizeof(*kinds)); + topology->nr_cpukinds_allocated = max; + topology->cpukinds = kinds; + } + + newnr = oldnr = topology->nr_cpukinds; + for(i=0; inr_cpukinds = newnr; + return 0; +} + +int +hwloc_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, + int forced_efficiency, + unsigned nr_infos, struct hwloc_info_s *infos, + unsigned long flags) +{ + hwloc_bitmap_t cpuset; + int err; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!_cpuset || hwloc_bitmap_iszero(_cpuset)) { + errno = EINVAL; + return -1; + } + + cpuset = hwloc_bitmap_dup(_cpuset); + if (!cpuset) + return -1; + + if (forced_efficiency < 0) + forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; + + err = hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); + if (err < 0) + return err; + + hwloc_internal_cpukinds_rank(topology); + return 0; +} + + +/********************* + * Ranking + */ + +static int +hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology) +{ + unsigned i,j; + for(i=0; inr_cpukinds; i++) + for(j=i+1; jnr_cpukinds; j++) + if (topology->cpukinds[i].forced_efficiency == topology->cpukinds[j].forced_efficiency) + /* if any duplicate, fail */ + return -1; + return 0; +} + +static int +hwloc__cpukinds_try_rank_by_forced_efficiency(struct hwloc_topology *topology) +{ + unsigned i; + + hwloc_debug("Trying to rank cpukinds by forced efficiency...\n"); + for(i=0; inr_cpukinds; i++) { + if (topology->cpukinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) + /* if any unknown, fail */ + return -1; + topology->cpukinds[i].ranking_value = topology->cpukinds[i].forced_efficiency; + } + + return hwloc__cpukinds_check_duplicate_rankings(topology); +} + +struct hwloc_cpukinds_info_summary { + int have_max_freq; + int have_base_freq; + int have_intel_core_type; + struct hwloc_cpukind_info_summary { + unsigned intel_core_type; /* 1 for atom, 2 for core */ + unsigned max_freq, base_freq; /* MHz, hence < 100000 */ + } * summaries; +}; + +static void +hwloc__cpukinds_summarize_info(struct hwloc_topology *topology, + struct hwloc_cpukinds_info_summary *summary) +{ + unsigned i, j; + + summary->have_max_freq = 1; + summary->have_base_freq = 1; + summary->have_intel_core_type = 1; + + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + for(j=0; jnr_infos; j++) { + struct hwloc_info_s *info = &kind->infos[j]; + if (!strcmp(info->name, "FrequencyMaxMHz")) { + summary->summaries[i].max_freq = atoi(info->value); + } else if (!strcmp(info->name, "FrequencyBaseMHz")) { + summary->summaries[i].base_freq = atoi(info->value); + } else if (!strcmp(info->name, "CoreType")) { + if (!strcmp(info->value, "IntelAtom")) + summary->summaries[i].intel_core_type = 1; + else if (!strcmp(info->value, "IntelCore")) + summary->summaries[i].intel_core_type = 2; + } + } + hwloc_debug("cpukind #%u has intel_core_type %u max_freq %u base_freq %u\n", + i, summary->summaries[i].intel_core_type, + summary->summaries[i].max_freq, summary->summaries[i].base_freq); + if (!summary->summaries[i].base_freq) + summary->have_base_freq = 0; + if (!summary->summaries[i].max_freq) + summary->have_max_freq = 0; + if (!summary->summaries[i].intel_core_type) + summary->have_intel_core_type = 0; + } +} + +enum hwloc_cpukinds_ranking { + HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */ + HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */ + HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY, + HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, + HWLOC_CPUKINDS_RANKING_CORETYPE, + HWLOC_CPUKINDS_RANKING_FREQUENCY, + HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX, + HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE, + HWLOC_CPUKINDS_RANKING_NONE +}; + +static int +hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology, + enum hwloc_cpukinds_ranking heuristics, + struct hwloc_cpukinds_info_summary *summary) +{ + unsigned i; + + if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) { + hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n"); + /* we need intel_core_type + (base or max freq) for all kinds */ + if (!summary->have_intel_core_type + || (!summary->have_max_freq && !summary->have_base_freq)) + return -1; + /* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + if (summary->have_base_freq) + kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq; + else + kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq; + } + + } else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) { + hwloc_debug("Trying to rank cpukinds by coretype...\n"); + /* we need intel_core_type */ + if (!summary->have_intel_core_type) + return -1; + /* rank by coretype (Core>>Atom) */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + kind->ranking_value = (summary->summaries[i].intel_core_type << 20); + } + + } else if (HWLOC_CPUKINDS_RANKING_FREQUENCY == heuristics) { + hwloc_debug("Trying to rank cpukinds by frequency...\n"); + /* we need base or max freq for all kinds */ + if (!summary->have_max_freq && !summary->have_base_freq) + return -1; + /* rank first by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + if (summary->have_base_freq) + kind->ranking_value = summary->summaries[i].base_freq; + else + kind->ranking_value = summary->summaries[i].max_freq; + } + + } else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX == heuristics) { + hwloc_debug("Trying to rank cpukinds by frequency max...\n"); + /* we need max freq for all kinds */ + if (!summary->have_max_freq) + return -1; + /* rank first by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + kind->ranking_value = summary->summaries[i].max_freq; + } + + } else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE == heuristics) { + hwloc_debug("Trying to rank cpukinds by frequency base...\n"); + /* we need max freq for all kinds */ + if (!summary->have_base_freq) + return -1; + /* rank first by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + kind->ranking_value = summary->summaries[i].base_freq; + } + + } else assert(0); + + return hwloc__cpukinds_check_duplicate_rankings(topology); +} + +static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b) +{ + const struct hwloc_internal_cpukind_s *a = _a; + const struct hwloc_internal_cpukind_s *b = _b; + return a->ranking_value - b->ranking_value; +} + +/* this function requires ranking values to be unique */ +static void +hwloc__cpukinds_finalize_ranking(struct hwloc_topology *topology) +{ + unsigned i; + /* sort */ + qsort(topology->cpukinds, topology->nr_cpukinds, sizeof(*topology->cpukinds), hwloc__cpukinds_compare_ranking_values); + /* define our own efficiency between 0 and N-1 */ + for(i=0; inr_cpukinds; i++) + topology->cpukinds[i].efficiency = i; +} + +int +hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) +{ + enum hwloc_cpukinds_ranking heuristics; + char *env; + unsigned i; + int err; + + if (!topology->nr_cpukinds) + return 0; + + if (topology->nr_cpukinds == 1) { + topology->cpukinds[0].efficiency = 0; + return 0; + } + + heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT; + env = getenv("HWLOC_CPUKINDS_RANKING"); + if (env) { + if (!strcmp(env, "default")) + heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT; + else if (!strcmp(env, "none")) + heuristics = HWLOC_CPUKINDS_RANKING_NONE; + else if (!strcmp(env, "coretype+frequency")) + heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; + else if (!strcmp(env, "coretype")) + heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE; + else if (!strcmp(env, "frequency")) + heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; + else if (!strcmp(env, "frequency_max")) + heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX; + else if (!strcmp(env, "frequency_base")) + heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE; + else if (!strcmp(env, "forced_efficiency")) + heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY; + else if (!strcmp(env, "no_forced_efficiency")) + heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY; + else if (!hwloc_hide_errors()) + fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); + } + + if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT + || heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { + /* default is forced_efficiency first */ + struct hwloc_cpukinds_info_summary summary; + enum hwloc_cpukinds_ranking subheuristics; + const char *arch; + + if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT) + hwloc_debug("Using default ranking strategy...\n"); + else + hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); + + if (heuristics != HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { + err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology); + if (!err) + goto ready; + } + + summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries)); + if (!summary.summaries) + goto failed; + hwloc__cpukinds_summarize_info(topology, &summary); + + arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture"); + /* TODO: rather coretype_frequency only on x86/Intel? */ + if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5))) + /* then frequency on ARM */ + subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; + else + /* or coretype+frequency otherwise */ + subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; + + err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary); + free(summary.summaries); + if (!err) + goto ready; + + } else if (heuristics == HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY) { + hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); + + err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology); + if (!err) + goto ready; + + } else if (heuristics != HWLOC_CPUKINDS_RANKING_NONE) { + /* custom heuristics */ + struct hwloc_cpukinds_info_summary summary; + + hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); + + summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries)); + if (!summary.summaries) + goto failed; + hwloc__cpukinds_summarize_info(topology, &summary); + + err = hwloc__cpukinds_try_rank_by_info(topology, heuristics, &summary); + free(summary.summaries); + if (!err) + goto ready; + } + + failed: + /* failed to rank, clear efficiencies */ + for(i=0; inr_cpukinds; i++) + topology->cpukinds[i].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; + hwloc_debug("Failed to rank cpukinds.\n\n"); + return 0; + + ready: + for(i=0; inr_cpukinds; i++) + hwloc_debug("cpukind #%u got ranking value %llu\n", i, (unsigned long long) topology->cpukinds[i].ranking_value); + hwloc__cpukinds_finalize_ranking(topology); +#ifdef HWLOC_DEBUG + for(i=0; inr_cpukinds; i++) + assert(topology->cpukinds[i].efficiency == (int) i); +#endif + hwloc_debug("\n"); + return 0; +} + + +/***************** + * Consulting + */ + +int +hwloc_cpukinds_get_nr(hwloc_topology_t topology, unsigned long flags) +{ + if (flags) { + errno = EINVAL; + return -1; + } + + return topology->nr_cpukinds; +} + +int +hwloc_cpukinds_get_info(hwloc_topology_t topology, + unsigned id, + hwloc_bitmap_t cpuset, + int *efficiencyp, + unsigned *nr_infosp, struct hwloc_info_s **infosp, + unsigned long flags) +{ + struct hwloc_internal_cpukind_s *kind; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_cpukinds) { + errno = ENOENT; + return -1; + } + + kind = &topology->cpukinds[id]; + + if (cpuset) + hwloc_bitmap_copy(cpuset, kind->cpuset); + + if (efficiencyp) + *efficiencyp = kind->efficiency; + + if (nr_infosp && infosp) { + *nr_infosp = kind->nr_infos; + *infosp = kind->infos; + } + return 0; +} + +int +hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology, + hwloc_const_bitmap_t cpuset, + unsigned long flags) +{ + unsigned id; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!cpuset || hwloc_bitmap_iszero(cpuset)) { + errno = EINVAL; + return -1; + } + + for(id=0; idnr_cpukinds; id++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[id]; + int res = hwloc_bitmap_compare_inclusion(cpuset, kind->cpuset); + if (res == HWLOC_BITMAP_EQUAL || res == HWLOC_BITMAP_INCLUDED) { + return (int) id; + } else if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_CONTAINS) { + errno = EXDEV; + return -1; + } + } + + errno = ENOENT; + return -1; +} diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c index 7794358b..7449a858 100644 --- a/src/3rdparty/hwloc/src/diff.c +++ b/src/3rdparty/hwloc/src/diff.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2019 Inria. All rights reserved. + * Copyright © 2013-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -333,10 +333,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, if (!err) { if (SETS_DIFFERENT(allowed_cpuset, topo1, topo2) - || SETS_DIFFERENT(allowed_nodeset, topo1, topo2)) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - } + || SETS_DIFFERENT(allowed_nodeset, topo1, topo2)) + goto roottoocomplex; } if (!err) { @@ -346,33 +344,78 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, dist1 = topo1->first_dist; dist2 = topo2->first_dist; while (dist1 || dist2) { - if (!!dist1 != !!dist2) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - break; - } + if (!!dist1 != !!dist2) + goto roottoocomplex; if (dist1->unique_type != dist2->unique_type || dist1->different_types || dist2->different_types /* too lazy to support this case */ || dist1->nbobjs != dist2->nbobjs || dist1->kind != dist2->kind - || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - break; - } + || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) + goto roottoocomplex; for(i=0; inbobjs; i++) /* gp_index isn't enforced above. so compare logical_index instead, which is enforced. requires distances refresh() above */ - if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - break; - } + if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) + goto roottoocomplex; dist1 = dist1->next; dist2 = dist2->next; } } + if (!err) { + /* memattrs */ + hwloc_internal_memattrs_refresh(topo1); + hwloc_internal_memattrs_refresh(topo2); + if (topo1->nr_memattrs != topo2->nr_memattrs) + goto roottoocomplex; + for(i=0; inr_memattrs; i++) { + struct hwloc_internal_memattr_s *imattr1 = &topo1->memattrs[i], *imattr2 = &topo2->memattrs[i]; + unsigned j; + if (strcmp(imattr1->name, imattr2->name) + || imattr1->flags != imattr2->flags + || imattr1->nr_targets != imattr2->nr_targets) + goto roottoocomplex; + if (i == HWLOC_MEMATTR_ID_CAPACITY + || i == HWLOC_MEMATTR_ID_LOCALITY) + /* no need to check virtual attributes, there were refreshed from other topology attributes, checked above */ + continue; + for(j=0; jnr_targets; j++) { + struct hwloc_internal_memattr_target_s *imtg1 = &imattr1->targets[j], *imtg2 = &imattr2->targets[j]; + if (imtg1->type != imtg2->type) + goto roottoocomplex; + if (imtg1->obj->logical_index != imtg2->obj->logical_index) + goto roottoocomplex; + if (imattr1->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + unsigned k; + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *imi1 = &imtg1->initiators[k], *imi2 = &imtg2->initiators[k]; + if (imi1->value != imi2->value + || imi1->initiator.type != imi2->initiator.type) + goto roottoocomplex; + if (imi1->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) { + if (!hwloc_bitmap_isequal(imi1->initiator.location.cpuset, imi2->initiator.location.cpuset)) + goto roottoocomplex; + } else if (imi1->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) { + if (imi1->initiator.location.object.type != imi2->initiator.location.object.type) + goto roottoocomplex; + if (imi1->initiator.location.object.obj->logical_index != imi2->initiator.location.object.obj->logical_index) + goto roottoocomplex; + } else { + assert(0); + } + } + } else { + if (imtg1->noinitiator_value != imtg2->noinitiator_value) + goto roottoocomplex; + } + } + } + } + return err; + + roottoocomplex: + hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); + return 1; } /******************** diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index 4f2897a0..c4854956 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -526,36 +526,6 @@ int hwloc_distances_add(hwloc_topology_t topology, * Refresh objects in distances */ -static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) -{ - hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); - while (obj) { - if (obj->gp_index == gp_index) - return obj; - obj = obj->next_cousin; - } - return NULL; -} - -static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) - return NULL; - if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { - int topodepth = hwloc_topology_get_depth(topology); - for(depth=0; depthcpuset); res_obj = hwloc__insert_object_by_cpuset(topology, NULL, group_obj, - (kind & HWLOC_DISTANCES_KIND_FROM_USER) ? hwloc_report_user_distance_error : hwloc_report_os_error); + (kind & HWLOC_DISTANCES_KIND_FROM_USER) ? "distances:fromuser:group" : "distances:group"); /* res_obj may be NULL on failure to insert. */ if (!res_obj) failed++; diff --git a/src/3rdparty/hwloc/src/memattrs.c b/src/3rdparty/hwloc/src/memattrs.c new file mode 100644 index 00000000..16e9896e --- /dev/null +++ b/src/3rdparty/hwloc/src/memattrs.c @@ -0,0 +1,1197 @@ +/* + * Copyright © 2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" + + +/***************************** + * Attributes + */ + +static __hwloc_inline +hwloc_uint64_t hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id, + hwloc_obj_t node) +{ + if (id == HWLOC_MEMATTR_ID_CAPACITY) + return node->attr->numanode.local_memory; + else if (id == HWLOC_MEMATTR_ID_LOCALITY) + return hwloc_bitmap_weight(node->cpuset); + else + assert(0); + return 0; /* shut up the compiler */ +} + +void +hwloc_internal_memattrs_init(struct hwloc_topology *topology) +{ + topology->nr_memattrs = 0; + topology->memattrs = NULL; +} + +static void +hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr, + char *name, + unsigned long flags, + unsigned long iflags) +{ + imattr->name = name; + imattr->flags = flags; + imattr->iflags = iflags; + + imattr->nr_targets = 0; + imattr->targets = NULL; +} + +void +hwloc_internal_memattrs_prepare(struct hwloc_topology *topology) +{ +#define NR_DEFAULT_MEMATTRS 4 + topology->memattrs = malloc(NR_DEFAULT_MEMATTRS * sizeof(*topology->memattrs)); + if (!topology->memattrs) + return; + + assert(HWLOC_MEMATTR_ID_CAPACITY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY], + (char *) "Capacity", + HWLOC_MEMATTR_FLAG_HIGHER_FIRST, + HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); + + assert(HWLOC_MEMATTR_ID_LOCALITY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY], + (char *) "Locality", + HWLOC_MEMATTR_FLAG_LOWER_FIRST, + HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); + + assert(HWLOC_MEMATTR_ID_BANDWIDTH < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH], + (char *) "Bandwidth", + HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + assert(HWLOC_MEMATTR_ID_LATENCY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY], + (char *) "Latency", + HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + topology->nr_memattrs = NR_DEFAULT_MEMATTRS; +} + +static void +hwloc__imi_destroy(struct hwloc_internal_memattr_initiator_s *imi) +{ + if (imi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) + hwloc_bitmap_free(imi->initiator.location.cpuset); +} + +static void +hwloc__imtg_destroy(struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg) +{ + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* only attributes with initiators may have something to free() in the array */ + unsigned k; + for(k=0; knr_initiators; k++) + hwloc__imi_destroy(&imtg->initiators[k]); + } + free(imtg->initiators); +} + +void +hwloc_internal_memattrs_destroy(struct hwloc_topology *topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; + unsigned j; + for(j=0; jnr_targets; j++) + hwloc__imtg_destroy(imattr, &imattr->targets[j]); + free(imattr->targets); + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_STATIC_NAME)) + free(imattr->name); + } + free(topology->memattrs); + + topology->memattrs = NULL; + topology->nr_memattrs = 0; +} + +int +hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *old) +{ + struct hwloc_tma *tma = new->tma; + struct hwloc_internal_memattr_s *imattrs; + hwloc_memattr_id_t id; + + imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs)); + if (!imattrs) + return -1; + new->memattrs = imattrs; + new->nr_memattrs = old->nr_memattrs; + memcpy(imattrs, old->memattrs, old->nr_memattrs * sizeof(*imattrs)); + + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *oimattr = &old->memattrs[id]; + struct hwloc_internal_memattr_s *nimattr = &imattrs[id]; + unsigned j; + + assert(oimattr->name); + nimattr->name = hwloc_tma_strdup(tma, oimattr->name); + if (!nimattr->name) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + new->nr_memattrs = id; + goto failed; + } + nimattr->iflags &= ~HWLOC_IMATTR_FLAG_STATIC_NAME; + nimattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; /* cache will need refresh */ + + if (!oimattr->nr_targets) + continue; + + nimattr->targets = hwloc_tma_malloc(tma, oimattr->nr_targets * sizeof(*nimattr->targets)); + if (!nimattr->targets) { + free(nimattr->name); + new->nr_memattrs = id; + goto failed; + } + memcpy(nimattr->targets, oimattr->targets, oimattr->nr_targets * sizeof(*nimattr->targets)); + + for(j=0; jnr_targets; j++) { + struct hwloc_internal_memattr_target_s *oimtg = &oimattr->targets[j]; + struct hwloc_internal_memattr_target_s *nimtg = &nimattr->targets[j]; + unsigned k; + + nimtg->obj = NULL; /* cache will need refresh */ + + if (!oimtg->nr_initiators) + continue; + + nimtg->initiators = hwloc_tma_malloc(tma, oimtg->nr_initiators * sizeof(*nimtg->initiators)); + if (!nimtg->initiators) { + nimattr->nr_targets = j; + new->nr_memattrs = id+1; + goto failed; + } + memcpy(nimtg->initiators, oimtg->initiators, oimtg->nr_initiators * sizeof(*nimtg->initiators)); + + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *oimi = &oimtg->initiators[k]; + struct hwloc_internal_memattr_initiator_s *nimi = &nimtg->initiators[k]; + if (oimi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) { + nimi->initiator.location.cpuset = hwloc_bitmap_tma_dup(tma, oimi->initiator.location.cpuset); + if (!nimi->initiator.location.cpuset) { + nimtg->nr_initiators = k; + nimattr->nr_targets = j+1; + new->nr_memattrs = id+1; + goto failed; + } + } else if (oimi->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) { + nimi->initiator.location.object.obj = NULL; /* cache will need refresh */ + } + } + } + } + return 0; + + failed: + hwloc_internal_memattrs_destroy(new); + return -1; +} + +int +hwloc_memattr_get_by_name(hwloc_topology_t topology, + const char *name, + hwloc_memattr_id_t *idp) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + if (!strcmp(topology->memattrs[id].name, name)) { + *idp = id; + return 0; + } + } + errno = EINVAL; + return -1; +} + +int +hwloc_memattr_get_name(hwloc_topology_t topology, + hwloc_memattr_id_t id, + const char **namep) +{ + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + *namep = topology->memattrs[id].name; + return 0; +} + +int +hwloc_memattr_get_flags(hwloc_topology_t topology, + hwloc_memattr_id_t id, + unsigned long *flagsp) +{ + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + *flagsp = topology->memattrs[id].flags; + return 0; +} + +int +hwloc_memattr_register(hwloc_topology_t topology, + const char *_name, + unsigned long flags, + hwloc_memattr_id_t *id) +{ + struct hwloc_internal_memattr_s *newattrs; + char *name; + unsigned i; + + /* check flags */ + if (flags & ~(HWLOC_MEMATTR_FLAG_NEED_INITIATOR|HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) { + errno = EINVAL; + return -1; + } + if (!(flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST))) { + errno = EINVAL; + return -1; + } + if ((flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) + == (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) { + errno = EINVAL; + return -1; + } + + if (!_name) { + errno = EINVAL; + return -1; + } + + /* check name isn't already used */ + for(i=0; inr_memattrs; i++) { + if (!strcmp(_name, topology->memattrs[i].name)) { + errno = EBUSY; + return -1; + } + } + + name = strdup(_name); + if (!name) + return -1; + + newattrs = realloc(topology->memattrs, (topology->nr_memattrs + 1) * sizeof(*topology->memattrs)); + if (!newattrs) { + free(name); + return -1; + } + + hwloc__setup_memattr(&newattrs[topology->nr_memattrs], + name, flags, 0); + + /* memattr valid when just created */ + newattrs[topology->nr_memattrs].iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID; + + *id = topology->nr_memattrs; + topology->nr_memattrs++; + topology->memattrs = newattrs; + return 0; +} + + +/*************************** + * Internal Locations + */ + +/* return 1 if cpuset/obj matchs the existing initiator location, + * for instance if the cpuset of query is included in the cpuset of existing + */ +static int +match_internal_location(struct hwloc_internal_location_s *iloc, + struct hwloc_internal_memattr_initiator_s *imi) +{ + if (iloc->type != imi->initiator.type) + return 0; + switch (iloc->type) { + case HWLOC_LOCATION_TYPE_CPUSET: + return hwloc_bitmap_isincluded(iloc->location.cpuset, imi->initiator.location.cpuset); + case HWLOC_LOCATION_TYPE_OBJECT: + return iloc->location.object.type == imi->initiator.location.object.type + && iloc->location.object.gp_index == imi->initiator.location.object.gp_index; + default: + return 0; + } +} + +static int +to_internal_location(struct hwloc_internal_location_s *iloc, + struct hwloc_location *location) +{ + iloc->type = location->type; + + switch (location->type) { + case HWLOC_LOCATION_TYPE_CPUSET: + if (!location->location.cpuset || hwloc_bitmap_iszero(location->location.cpuset)) { + errno = EINVAL; + return -1; + } + iloc->location.cpuset = location->location.cpuset; + return 0; + case HWLOC_LOCATION_TYPE_OBJECT: + if (!location->location.object) { + errno = EINVAL; + return -1; + } + iloc->location.object.gp_index = location->location.object->gp_index; + iloc->location.object.type = location->location.object->type; + return 0; + default: + errno = EINVAL; + return -1; + } +} + +static int +from_internal_location(struct hwloc_internal_location_s *iloc, + struct hwloc_location *location) +{ + location->type = iloc->type; + + switch (iloc->type) { + case HWLOC_LOCATION_TYPE_CPUSET: + location->location.cpuset = iloc->location.cpuset; + return 0; + case HWLOC_LOCATION_TYPE_OBJECT: + /* requires the cache to be refreshed */ + location->location.object = iloc->location.object.obj; + if (!location->location.object) + return -1; + return 0; + default: + errno = EINVAL; + return -1; + } +} + + +/************************ + * Refreshing + */ + +static int +hwloc__imi_refresh(struct hwloc_topology *topology, + struct hwloc_internal_memattr_initiator_s *imi) +{ + switch (imi->initiator.type) { + case HWLOC_LOCATION_TYPE_CPUSET: { + hwloc_bitmap_and(imi->initiator.location.cpuset, imi->initiator.location.cpuset, topology->levels[0][0]->cpuset); + if (hwloc_bitmap_iszero(imi->initiator.location.cpuset)) { + hwloc__imi_destroy(imi); + return -1; + } + return 0; + } + case HWLOC_LOCATION_TYPE_OBJECT: { + hwloc_obj_t obj = hwloc_get_obj_by_type_and_gp_index(topology, + imi->initiator.location.object.type, + imi->initiator.location.object.gp_index); + if (!obj) { + hwloc__imi_destroy(imi); + return -1; + } + imi->initiator.location.object.obj = obj; + return 0; + } + default: + assert(0); + } + return -1; +} + +static int +hwloc__imtg_refresh(struct hwloc_topology *topology, + struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg) +{ + hwloc_obj_t node; + + /* no need to refresh convenience memattrs */ + assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)); + + /* check the target object */ + if (imtg->gp_index == (hwloc_uint64_t) -1) { + /* only NUMA and PU may work with os_index, and only NUMA is currently used internally */ + if (imtg->type == HWLOC_OBJ_NUMANODE) + node = hwloc_get_numanode_obj_by_os_index(topology, imtg->os_index); + else if (imtg->type == HWLOC_OBJ_PU) + node = hwloc_get_pu_obj_by_os_index(topology, imtg->os_index); + else + node = NULL; + } else { + node = hwloc_get_obj_by_type_and_gp_index(topology, imtg->type, imtg->gp_index); + } + if (!node) { + hwloc__imtg_destroy(imattr, imtg); + return -1; + } + + /* save the gp_index in case it wasn't initialized yet */ + imtg->gp_index = node->gp_index; + /* cache the object */ + imtg->obj = node; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* check the initiators */ + unsigned k, l; + for(k=0, l=0; knr_initiators; k++) { + int err = hwloc__imi_refresh(topology, &imtg->initiators[k]); + if (err < 0) + continue; + if (k != l) + memcpy(&imtg->initiators[l], &imtg->initiators[k], sizeof(*imtg->initiators)); + l++; + } + imtg->nr_initiators = l; + if (!imtg->nr_initiators) { + hwloc__imtg_destroy(imattr, imtg); + return -1; + } + } + return 0; +} + +static void +hwloc__imattr_refresh(struct hwloc_topology *topology, + struct hwloc_internal_memattr_s *imattr) +{ + unsigned j, k; + for(j=0, k=0; jnr_targets; j++) { + int ret = hwloc__imtg_refresh(topology, imattr, &imattr->targets[j]); + if (!ret) { + /* target still valid, move it if some former targets were removed */ + if (j != k) + memcpy(&imattr->targets[k], &imattr->targets[j], sizeof(*imattr->targets)); + k++; + } + } + imattr->nr_targets = k; + imattr->iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID; +} + +void +hwloc_internal_memattrs_refresh(struct hwloc_topology *topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; + if (imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID) + /* nothing to refresh */ + continue; + hwloc__imattr_refresh(topology, imattr); + } +} + +void +hwloc_internal_memattrs_need_refresh(struct hwloc_topology *topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) + /* no need to refresh convenience memattrs */ + continue; + imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; + } +} + + +/******************************** + * Targets + */ + +static struct hwloc_internal_memattr_target_s * +hwloc__memattr_get_target(struct hwloc_internal_memattr_s *imattr, + hwloc_obj_type_t target_type, + hwloc_uint64_t target_gp_index, + unsigned target_os_index, + int create) +{ + struct hwloc_internal_memattr_target_s *news, *new; + unsigned j; + + for(j=0; jnr_targets; j++) { + if (target_type == imattr->targets[j].type) + if ((target_gp_index != (hwloc_uint64_t)-1 && target_gp_index == imattr->targets[j].gp_index) + || (target_os_index != (unsigned)-1 && target_os_index == imattr->targets[j].os_index)) + return &imattr->targets[j]; + } + if (!create) + return NULL; + + news = realloc(imattr->targets, (imattr->nr_targets+1)*sizeof(*imattr->targets)); + if (!news) + return NULL; + imattr->targets = news; + + /* FIXME sort targets? by logical index at the end of load? */ + + new = &news[imattr->nr_targets]; + new->type = target_type; + new->gp_index = target_gp_index; + new->os_index = target_os_index; + + /* cached object will be refreshed later on actual access */ + new->obj = NULL; + imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; + /* When setting a value after load(), the caller has the target object + * (and initiator object, if not CPU set). Hence, we could avoid invalidating + * the cache here. + * The overhead of the imattr-wide refresh isn't high enough so far + * to justify making the cache management more complex. + */ + + new->nr_initiators = 0; + new->initiators = NULL; + new->noinitiator_value = 0; + imattr->nr_targets++; + return new; +} + +static struct hwloc_internal_memattr_initiator_s * +hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg, + struct hwloc_location *location); + +int +hwloc_memattr_get_targets(hwloc_topology_t topology, + hwloc_memattr_id_t id, + struct hwloc_location *initiator, + unsigned long flags, + unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values) +{ + struct hwloc_internal_memattr_s *imattr; + unsigned i, found = 0, max; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!nrp || (*nrp && !targets)) { + errno = EINVAL; + return -1; + } + max = *nrp; + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes */ + for(i=0; ; i++) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); + if (!node) + break; + if (foundiflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + for(i=0; inr_targets; i++) { + struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[i]; + hwloc_uint64_t value = 0; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + if (initiator) { + /* find a matching initiator */ + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); + if (!imi) + continue; + value = imi->value; + } + } else { + value = imtg->noinitiator_value; + } + + if (foundobj; + if (values) + values[found] = value; + } + found++; + } + + done: + *nrp = found; + return 0; +} + + +/************************ + * Initiators + */ + +static struct hwloc_internal_memattr_initiator_s * +hwloc__memattr_target_get_initiator(struct hwloc_internal_memattr_target_s *imtg, + struct hwloc_internal_location_s *iloc, + int create) +{ + struct hwloc_internal_memattr_initiator_s *news, *new; + unsigned k; + + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k]; + if (match_internal_location(iloc, imi)) { + return imi; + } + } + + if (!create) + return NULL; + + news = realloc(imtg->initiators, (imtg->nr_initiators+1)*sizeof(*imtg->initiators)); + if (!news) + return NULL; + new = &news[imtg->nr_initiators]; + + new->initiator = *iloc; + if (iloc->type == HWLOC_LOCATION_TYPE_CPUSET) { + new->initiator.location.cpuset = hwloc_bitmap_dup(iloc->location.cpuset); + if (!new->initiator.location.cpuset) + goto out_with_realloc; + } + + imtg->nr_initiators++; + imtg->initiators = news; + return new; + + out_with_realloc: + imtg->initiators = news; + return NULL; +} + +static struct hwloc_internal_memattr_initiator_s * +hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg, + struct hwloc_location *location) +{ + struct hwloc_internal_memattr_initiator_s *imi; + struct hwloc_internal_location_s iloc; + + assert(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR); + + /* use the initiator value */ + if (!location) { + errno = EINVAL; + return NULL; + } + + if (to_internal_location(&iloc, location) < 0) { + errno = EINVAL; + return NULL; + } + + imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); + if (!imi) { + errno = EINVAL; + return NULL; + } + + return imi; +} + +int +hwloc_memattr_get_initiators(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + unsigned long flags, + unsigned *nrp, struct hwloc_location *initiators, hwloc_uint64_t *values) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + unsigned i, max; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!nrp || (*nrp && !initiators)) { + errno = EINVAL; + return -1; + } + max = *nrp; + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) { + *nrp = 0; + return 0; + } + + /* all convenience attributes have no initiators */ + assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)); + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); + if (!imtg) { + errno = EINVAL; + return -1; + } + + for(i=0; inr_initiators && iinitiators[i]; + int err = from_internal_location(&imi->initiator, &initiators[i]); + assert(!err); + if (values) + /* no need to handle capacity/locality special cases here, those are initiator-less attributes */ + values[i] = imi->value; + } + + *nrp = imtg->nr_initiators; + return 0; +} + + +/************************** + * Values + */ + +int +hwloc_memattr_get_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t *valuep) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes */ + *valuep = hwloc__memattr_get_convenience_value(id, target_node); + return 0; + } + + /* normal attributes */ + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); + if (!imtg) { + errno = EINVAL; + return -1; + } + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* find the initiator and set its value */ + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); + if (!imi) + return -1; + *valuep = imi->value; + } else { + /* get the no-initiator value */ + *valuep = imtg->noinitiator_value; + } + return 0; +} + +static int +hwloc__internal_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_type_t target_type, + hwloc_uint64_t target_gp_index, + unsigned target_os_index, + struct hwloc_internal_location_s *initiator, + hwloc_uint64_t value) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + + if (id >= topology->nr_memattrs) { + /* something bad happened during init */ + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* check given initiator */ + if (!initiator) { + errno = EINVAL; + return -1; + } + } + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes are read-only */ + errno = EINVAL; + return -1; + } + + if (topology->is_loaded && !(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + /* don't refresh when adding values during load (some nodes might not be ready yet), + * we'll refresh later + */ + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_type, target_gp_index, target_os_index, 1); + if (!imtg) + return -1; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* find/add the initiator and set its value */ + // FIXME what if cpuset is larger than an existing one ? + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_target_get_initiator(imtg, initiator, 1); + if (!imi) + return -1; + imi->value = value; + + } else { + /* set the no-initiator value */ + imtg->noinitiator_value = value; + } + + return 0; + +} + +int +hwloc_internal_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_type_t target_type, + hwloc_uint64_t target_gp_index, + unsigned target_os_index, + struct hwloc_internal_location_s *initiator, + hwloc_uint64_t value) +{ + assert(id != HWLOC_MEMATTR_ID_CAPACITY); + assert(id != HWLOC_MEMATTR_ID_LOCALITY); + + return hwloc__internal_memattr_set_value(topology, id, target_type, target_gp_index, target_os_index, initiator, value); +} + +int +hwloc_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t value) +{ + struct hwloc_internal_location_s iloc, *ilocp; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (initiator) { + if (to_internal_location(&iloc, initiator) < 0) { + errno = EINVAL; + return -1; + } + ilocp = &iloc; + } else { + ilocp = NULL; + } + + return hwloc__internal_memattr_set_value(topology, id, target_node->type, target_node->gp_index, target_node->os_index, ilocp, value); +} + + +/********************** + * Best target + */ + +static void +hwloc__update_best_target(hwloc_obj_t *best_obj, hwloc_uint64_t *best_value, int *found, + hwloc_obj_t new_obj, hwloc_uint64_t new_value, + int keep_highest) +{ + if (*found) { + if (keep_highest) { + if (new_value <= *best_value) + return; + } else { + if (new_value >= *best_value) + return; + } + } + + *best_obj = new_obj; + *best_value = new_value; + *found = 1; +} + +int +hwloc_memattr_get_best_target(hwloc_topology_t topology, + hwloc_memattr_id_t id, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_obj_t *bestp, hwloc_uint64_t *valuep) +{ + struct hwloc_internal_memattr_s *imattr; + hwloc_uint64_t best_value = 0; /* shutup the compiler */ + hwloc_obj_t best = NULL; + int found = 0; + unsigned j; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes */ + for(j=0; ; j++) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j); + hwloc_uint64_t value; + if (!node) + break; + value = hwloc__memattr_get_convenience_value(id, node); + hwloc__update_best_target(&best, &best_value, &found, + node, value, + imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); + } + goto done; + } + + /* normal attributes */ + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + /* not strictly need */ + hwloc__imattr_refresh(topology, imattr); + + for(j=0; jnr_targets; j++) { + struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[j]; + hwloc_uint64_t value; + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* find the initiator and set its value */ + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); + if (!imi) + continue; + value = imi->value; + } else { + /* get the no-initiator value */ + value = imtg->noinitiator_value; + } + hwloc__update_best_target(&best, &best_value, &found, + imtg->obj, value, + imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); + } + + done: + if (found) { + assert(best); + *bestp = best; + if (valuep) + *valuep = best_value; + return 0; + } else { + errno = ENOENT; + return -1; + } +} + +/********************** + * Best initiators + */ + +static void +hwloc__update_best_initiator(struct hwloc_internal_location_s *best_initiator, hwloc_uint64_t *best_value, int *found, + struct hwloc_internal_location_s *new_initiator, hwloc_uint64_t new_value, + int keep_highest) +{ + if (*found) { + if (keep_highest) { + if (new_value <= *best_value) + return; + } else { + if (new_value >= *best_value) + return; + } + } + + *best_initiator = *new_initiator; + *best_value = new_value; + *found = 1; +} + +int +hwloc_memattr_get_best_initiator(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + unsigned long flags, + struct hwloc_location *bestp, hwloc_uint64_t *valuep) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + struct hwloc_internal_location_s best_initiator; + hwloc_uint64_t best_value; + int found; + unsigned i; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) { + errno = EINVAL; + return -1; + } + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + /* not strictly need */ + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); + if (!imtg) { + errno = EINVAL; + return -1; + } + + found = 0; + for(i=0; inr_initiators; i++) { + struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[i]; + hwloc__update_best_initiator(&best_initiator, &best_value, &found, + &imi->initiator, imi->value, + imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); + } + + if (found) { + if (valuep) + *valuep = best_value; + return from_internal_location(&best_initiator, bestp); + } else { + errno = ENOENT; + return -1; + } +} + +/**************************** + * Listing local nodes + */ + +static __hwloc_inline int +match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long flags) +{ + if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL) + return 1; + if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) + && hwloc_bitmap_isincluded(cpuset, node->cpuset)) + return 1; + if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY) + && hwloc_bitmap_isincluded(node->cpuset, cpuset)) + return 1; + return hwloc_bitmap_isequal(node->cpuset, cpuset); +} + +int +hwloc_get_local_numanode_objs(hwloc_topology_t topology, + struct hwloc_location *location, + unsigned *nrp, + hwloc_obj_t *nodes, + unsigned long flags) +{ + hwloc_cpuset_t cpuset; + hwloc_obj_t node; + unsigned i; + + if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY + |HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY + | HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { + errno = EINVAL; + return -1; + } + + if (!nrp || (*nrp && !nodes)) { + errno = EINVAL; + return -1; + } + + if (!location) { + if (!(flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { + errno = EINVAL; + return -1; + } + cpuset = NULL; /* unused */ + + } else { + if (location->type == HWLOC_LOCATION_TYPE_CPUSET) { + cpuset = location->location.cpuset; + } else if (location->type == HWLOC_LOCATION_TYPE_OBJECT) { + hwloc_obj_t obj = location->location.object; + while (!obj->cpuset) + obj = obj->parent; + cpuset = obj->cpuset; + } else { + errno = EINVAL; + return -1; + } + } + + i = 0; + for(node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + node; + node = node->next_cousin) { + if (!match_local_obj_cpuset(node, cpuset, flags)) + continue; + if (i < *nrp) + nodes[i] = node; + i++; + } + + *nrp = i; + return 0; +} diff --git a/src/3rdparty/hwloc/src/misc.c b/src/3rdparty/hwloc/src/misc.c index a7b9a5eb..6f0b4a5a 100644 --- a/src/3rdparty/hwloc/src/misc.c +++ b/src/3rdparty/hwloc/src/misc.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -114,7 +114,7 @@ void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unus char * hwloc_progname(struct hwloc_topology *topology __hwloc_attribute_unused) { -#if HAVE_DECL_GETMODULEFILENAME +#if (defined HAVE_DECL_GETMODULEFILENAME) && HAVE_DECL_GETMODULEFILENAME char name[256], *local_basename; unsigned res = GetModuleFileName(NULL, name, sizeof(name)); if (res == sizeof(name) || !res) diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index a817c8da..1149113b 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -232,7 +232,8 @@ enum hwloc_pci_busid_comparison_e { HWLOC_PCI_BUSID_LOWER, HWLOC_PCI_BUSID_HIGHER, HWLOC_PCI_BUSID_INCLUDED, - HWLOC_PCI_BUSID_SUPERSET + HWLOC_PCI_BUSID_SUPERSET, + HWLOC_PCI_BUSID_EQUAL }; static enum hwloc_pci_busid_comparison_e @@ -274,11 +275,8 @@ hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b) if (a->attr->pcidev.func > b->attr->pcidev.func) return HWLOC_PCI_BUSID_HIGHER; - /* Should never reach here. Abort on both debug builds and - non-debug builds */ - assert(0); - fprintf(stderr, "Bad assertion in hwloc %s:%d (aborting)\n", __FILE__, __LINE__); - exit(1); + /* Should never reach here. */ + return HWLOC_PCI_BUSID_EQUAL; } static void @@ -329,6 +327,23 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs } return; } + case HWLOC_PCI_BUSID_EQUAL: { + static int reported = 0; + if (!reported && !hwloc_hide_errors()) { + fprintf(stderr, "*********************************************************\n"); + fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION); + fprintf(stderr, "*\n"); + fprintf(stderr, "* Trying to insert PCI object %04x:%02x:%02x.%01x at %04x:%02x:%02x.%01x\n", + new->attr->pcidev.domain, new->attr->pcidev.bus, new->attr->pcidev.dev, new->attr->pcidev.func, + (*curp)->attr->pcidev.domain, (*curp)->attr->pcidev.bus, (*curp)->attr->pcidev.dev, (*curp)->attr->pcidev.func); + fprintf(stderr, "*\n"); + fprintf(stderr, "* hwloc will now ignore this object and continue.\n"); + fprintf(stderr, "*********************************************************\n"); + reported = 1; + } + hwloc_free_unlinked_object(new); + return; + } } } /* add to the end of the list if higher than everybody */ @@ -425,39 +440,10 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology, static struct hwloc_obj * hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused, - struct hwloc_pcidev_attr_s *busid, - struct hwloc_obj *parent) + struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused, + struct hwloc_obj *parent __hwloc_attribute_unused) { - /* Xeon E5v3 in cluster-on-die mode only have PCI on the first NUMA node of each package. - * but many dual-processor host report the second PCI hierarchy on 2nd NUMA of first package. - */ - if (parent->depth >= 2 - && parent->type == HWLOC_OBJ_NUMANODE - && parent->sibling_rank == 1 && parent->parent->arity == 2 - && parent->parent->type == HWLOC_OBJ_PACKAGE - && parent->parent->sibling_rank == 0 && parent->parent->parent->arity == 2) { - const char *cpumodel = hwloc_obj_get_info_by_name(parent->parent, "CPUModel"); - if (cpumodel && strstr(cpumodel, "Xeon")) { - if (!hwloc_hide_errors()) { - fprintf(stderr, "****************************************************************************\n"); - fprintf(stderr, "* hwloc %s has encountered an incorrect PCI locality information.\n", HWLOC_VERSION); - fprintf(stderr, "* PCI bus %04x:%02x is supposedly close to 2nd NUMA node of 1st package,\n", - busid->domain, busid->bus); - fprintf(stderr, "* however hwloc believes this is impossible on this architecture.\n"); - fprintf(stderr, "* Therefore the PCI bus will be moved to 1st NUMA node of 2nd package.\n"); - fprintf(stderr, "*\n"); - fprintf(stderr, "* If you feel this fixup is wrong, disable it by setting in your environment\n"); - fprintf(stderr, "* HWLOC_PCI_%04x_%02x_LOCALCPUS= (empty value), and report the problem\n", - busid->domain, busid->bus); - fprintf(stderr, "* to the hwloc's user mailing list together with the XML output of lstopo.\n"); - fprintf(stderr, "*\n"); - fprintf(stderr, "* You may silence this message by setting HWLOC_HIDE_ERRORS=1 in your environment.\n"); - fprintf(stderr, "****************************************************************************\n"); - } - return parent->parent->next_sibling->first_child; - } - } - + /* no quirk for now */ return parent; } diff --git a/src/3rdparty/hwloc/src/shmem.c b/src/3rdparty/hwloc/src/shmem.c index 94d55eef..c73c6d92 100644 --- a/src/3rdparty/hwloc/src/shmem.c +++ b/src/3rdparty/hwloc/src/shmem.c @@ -1,5 +1,5 @@ /* - * Copyright © 2017-2019 Inria. All rights reserved. + * Copyright © 2017-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -97,6 +97,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology, * without being able to free() them. */ hwloc_internal_distances_refresh(topology); + hwloc_internal_memattrs_refresh(topology); header.header_version = HWLOC_SHMEM_HEADER_VERSION; header.header_length = sizeof(header); @@ -134,8 +135,9 @@ hwloc_shmem_topology_write(hwloc_topology_t topology, assert((char *)mmap_res <= (char *)mmap_address + length); - /* now refresh the new distances so that adopters can use them without refreshing the R/O shmem mapping */ + /* now refresh the new distances/memattrs so that adopters can use them without refreshing the R/O shmem mapping */ hwloc_internal_distances_refresh(new); + hwloc_internal_memattrs_refresh(topology); /* topology is saved, release resources now */ munmap(mmap_address, length); @@ -214,11 +216,13 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, new->support.discovery = malloc(sizeof(*new->support.discovery)); new->support.cpubind = malloc(sizeof(*new->support.cpubind)); new->support.membind = malloc(sizeof(*new->support.membind)); - if (!new->support.discovery || !new->support.cpubind || !new->support.membind) + new->support.misc = malloc(sizeof(*new->support.misc)); + if (!new->support.discovery || !new->support.cpubind || !new->support.membind || !new->support.misc) goto out_with_support; memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); + memcpy(new->support.misc, old->support.misc, sizeof(*new->support.misc)); hwloc_set_binding_hooks(new); /* clear userdata callbacks pointing to the writer process' functions */ new->userdata_export_cb = NULL; @@ -236,6 +240,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, free(new->support.discovery); free(new->support.cpubind); free(new->support.membind); + free(new->support.misc); free(new); out_with_components: hwloc_components_fini(); @@ -252,6 +257,7 @@ hwloc__topology_disadopt(hwloc_topology_t topology) free(topology->support.discovery); free(topology->support.cpubind); free(topology->support.membind); + free(topology->support.misc); free(topology); } diff --git a/src/3rdparty/hwloc/src/static-components.h b/src/3rdparty/hwloc/src/static-components.h index dac227a6..f2cb254a 100644 --- a/src/3rdparty/hwloc/src/static-components.h +++ b/src/3rdparty/hwloc/src/static-components.h @@ -1,9 +1,4 @@ -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; +#include static const struct hwloc_component * hwloc_static_components[] = { &hwloc_noos_component, &hwloc_xml_component, diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c index 50092e47..5dd4baaa 100644 --- a/src/3rdparty/hwloc/src/topology-synthetic.c +++ b/src/3rdparty/hwloc/src/topology-synthetic.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -471,7 +471,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, /* initialize parent arity to 0 so that the levels are not infinite */ data->level[count-1].arity = 0; - while (*pos == ' ') + while (*pos == ' ' || *pos == '\n') pos++; if (!*pos) @@ -912,7 +912,7 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology, hwloc_synthetic_set_attr(&attached->attr, child); - hwloc_insert_object_by_cpuset(topology, child); + hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached"); hwloc_synthetic_insert_attached(topology, data, attached->next, set); } @@ -964,7 +964,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology, hwloc_synthetic_set_attr(&curlevel->attr, obj); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic"); } hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set); diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index 195e5e22..b6458b6f 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -93,9 +93,10 @@ typedef struct _GROUP_AFFINITY { #endif #ifndef HAVE_PROCESSOR_RELATIONSHIP -typedef struct _PROCESSOR_RELATIONSHIP { +typedef struct HWLOC_PROCESSOR_RELATIONSHIP { BYTE Flags; - BYTE Reserved[21]; + BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency, only available in Win10+ */ + BYTE Reserved[20]; WORD GroupCount; GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; } PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; @@ -228,9 +229,12 @@ static PFN_VIRTUALFREEEX VirtualFreeExProc; typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb); static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc; +typedef NTSTATUS (WINAPI *PFN_RTLGETVERSION)(OSVERSIONINFOEX*); +PFN_RTLGETVERSION RtlGetVersionProc; + static void hwloc_win_get_function_ptrs(void) { - HMODULE kernel32; + HMODULE kernel32, ntdll; #if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE #pragma GCC diagnostic ignored "-Wcast-function-type" @@ -275,6 +279,9 @@ static void hwloc_win_get_function_ptrs(void) QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx"); } + ntdll = GetModuleHandle("ntdll"); + RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion"); + #if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE #pragma GCC diagnostic warning "-Wcast-function-type" #endif @@ -734,6 +741,88 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse } + +/************************* + * Efficiency classes + */ + +struct hwloc_win_efficiency_classes { + unsigned nr_classes; + unsigned nr_classes_allocated; + struct hwloc_win_efficiency_class { + unsigned value; + hwloc_bitmap_t cpuset; + } *classes; +}; + +static void +hwloc_win_efficiency_classes_init(struct hwloc_win_efficiency_classes *classes) +{ + classes->classes = NULL; + classes->nr_classes_allocated = 0; + classes->nr_classes = 0; +} + +static int +hwloc_win_efficiency_classes_add(struct hwloc_win_efficiency_classes *classes, + hwloc_const_bitmap_t cpuset, + unsigned value) +{ + unsigned i; + + /* look for existing class with that efficiency value */ + for(i=0; inr_classes; i++) { + if (classes->classes[i].value == value) { + hwloc_bitmap_or(classes->classes[i].cpuset, classes->classes[i].cpuset, cpuset); + return 0; + } + } + + /* extend the array if needed */ + if (classes->nr_classes == classes->nr_classes_allocated) { + struct hwloc_win_efficiency_class *tmp; + unsigned new_nr_allocated = 2*classes->nr_classes_allocated; + if (!new_nr_allocated) { +#define HWLOC_WIN_EFFICIENCY_CLASSES_DEFAULT_MAX 4 /* 2 should be enough is most cases */ + new_nr_allocated = HWLOC_WIN_EFFICIENCY_CLASSES_DEFAULT_MAX; + } + tmp = realloc(classes->classes, new_nr_allocated * sizeof(*classes->classes)); + if (!tmp) + return -1; + classes->classes = tmp; + classes->nr_classes_allocated = new_nr_allocated; + } + + /* add new class */ + classes->classes[classes->nr_classes].cpuset = hwloc_bitmap_alloc(); + if (!classes->classes[classes->nr_classes].cpuset) + return -1; + classes->classes[classes->nr_classes].value = value; + hwloc_bitmap_copy(classes->classes[classes->nr_classes].cpuset, cpuset); + classes->nr_classes++; + return 0; +} + +static void +hwloc_win_efficiency_classes_register(hwloc_topology_t topology, + struct hwloc_win_efficiency_classes *classes) +{ + unsigned i; + for(i=0; inr_classes; i++) { + hwloc_internal_cpukinds_register(topology, classes->classes[i].cpuset, classes->classes[i].value, NULL, 0, 0); + classes->classes[i].cpuset = NULL; /* given to cpukinds */ + } +} + +static void +hwloc_win_efficiency_classes_destroy(struct hwloc_win_efficiency_classes *classes) +{ + unsigned i; + for(i=0; inr_classes; i++) + hwloc_bitmap_free(classes->classes[i].cpuset); + free(classes->classes); +} + /************************* * discovery */ @@ -753,6 +842,12 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta DWORD length; int gotnuma = 0; int gotnumamemory = 0; + OSVERSIONINFOEX osvi; + char versionstr[20]; + char hostname[122] = ""; + unsigned hostname_size = sizeof(hostname); + int has_efficiencyclass = 0; + struct hwloc_win_efficiency_classes eclasses; assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); @@ -760,6 +855,25 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta /* somebody discovered things */ return -1; + ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + + if (RtlGetVersionProc) { + /* RtlGetVersion() returns the currently-running Windows version */ + RtlGetVersionProc(&osvi); + } else { + /* GetVersionEx() and isWindows10OrGreater() depend on what the manifest says + * (manifest of the program, not of libhwloc.dll), they may return old versions + * if the currently-running Windows is not listed in the manifest. + */ + GetVersionEx((LPOSVERSIONINFO)&osvi); + } + + if (osvi.dwMajorVersion >= 10) { + has_efficiencyclass = 1; + hwloc_win_efficiency_classes_init(&eclasses); + } + hwloc_alloc_root_sets(topology->levels[0][0]); GetSystemInfo(&SystemInfo); @@ -887,7 +1001,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta default: break; } - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation"); } free(procInfo); @@ -919,6 +1033,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { unsigned num, i; + unsigned efficiency_class = 0; GROUP_AFFINITY *GroupMask; /* Ignore unknown caches */ @@ -953,6 +1068,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta type = HWLOC_OBJ_CORE; num = procInfo->Processor.GroupCount; GroupMask = procInfo->Processor.GroupMask; + if (has_efficiencyclass) + /* the EfficiencyClass field didn't exist before Windows10 and recent MSVC headers, + * so just access it manually instead of trying to detect it. + */ + efficiency_class = * ((&procInfo->Processor.Flags) + 1); break; case RelationGroup: /* So strange an interface... */ @@ -981,7 +1101,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); obj->cpuset = set; obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP; - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation:ProcessorGroup"); } else hwloc_bitmap_free(set); } @@ -1005,6 +1125,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta } hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); switch (type) { + case HWLOC_OBJ_CORE: { + if (has_efficiencyclass) + hwloc_win_efficiency_classes_add(&eclasses, obj->cpuset, efficiency_class); + break; + } case HWLOC_OBJ_NUMANODE: { ULONGLONG avail; @@ -1055,7 +1180,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta default: break; } - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformationEx"); } free(procInfoTotal); } @@ -1076,29 +1201,88 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta hwloc_bitmap_only(obj->cpuset, idx); hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", idx, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:ProcessorGroup:pu"); } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(groups_pu_set); } else { /* no processor groups */ - SYSTEM_INFO sysinfo; hwloc_obj_t obj; unsigned idx; - GetSystemInfo(&sysinfo); for(idx=0; idx<32; idx++) - if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_only(obj->cpuset, idx); hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", idx, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:pu"); } } + if (has_efficiencyclass) { + topology->support.discovery->cpukind_efficiency = 1; + hwloc_win_efficiency_classes_register(topology, &eclasses); + } + out: + if (has_efficiencyclass) + hwloc_win_efficiency_classes_destroy(&eclasses); + + /* emulate uname instead of calling hwloc_add_uname_info() */ hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows"); - hwloc_add_uname_info(topology, NULL); + hwloc_obj_add_info(topology->levels[0][0], "OSName", "Windows"); + +#if defined(__CYGWIN__) + hwloc_obj_add_info(topology->levels[0][0], "WindowsBuildEnvironment", "Cygwin"); +#elif defined(__MINGW32__) + hwloc_obj_add_info(topology->levels[0][0], "WindowsBuildEnvironment", "MinGW"); +#endif + + /* see https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-osversioninfoexa */ + if (osvi.dwMajorVersion == 10) { + if (osvi.dwMinorVersion == 0) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "10"); + } else if (osvi.dwMajorVersion == 6) { + if (osvi.dwMinorVersion == 3) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "8.1"); /* or "Server 2012 R2" */ + else if (osvi.dwMinorVersion == 2) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "8"); /* or "Server 2012" */ + else if (osvi.dwMinorVersion == 1) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "7"); /* or "Server 2008 R2" */ + else if (osvi.dwMinorVersion == 0) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "Vista"); /* or "Server 2008" */ + } /* earlier versions are ignored */ + + snprintf(versionstr, sizeof(versionstr), "%u.%u.%u", osvi.dwMajorVersion, osvi.dwMinorVersion, osvi.dwBuildNumber); + hwloc_obj_add_info(topology->levels[0][0], "OSVersion", versionstr); + +#if !defined(__CYGWIN__) + GetComputerName(hostname, &hostname_size); +#else + gethostname(hostname, hostname_size); +#endif + if (*hostname) + hwloc_obj_add_info(topology->levels[0][0], "Hostname", hostname); + + /* convert to unix-like architecture strings */ + switch (SystemInfo.wProcessorArchitecture) { + case 0: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "i686"); + break; + case 9: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64"); + break; + case 5: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "arm"); + break; + case 12: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "arm64"); + break; + case 6: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "ia64"); + break; + } + return 0; } diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 1060157d..267384ee 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -181,6 +181,7 @@ enum hwloc_x86_disc_flags { #define has_topoext(features) ((features)[6] & (1 << 22)) #define has_x2apic(features) ((features)[4] & (1 << 21)) +#define has_hybrid(features) ((features)[18] & (1 << 15)) struct cacheinfo { hwloc_obj_cache_type_t type; @@ -217,6 +218,9 @@ struct procinfo { unsigned cpustepping; unsigned cpumodelnumber; unsigned cpufamilynumber; + + unsigned hybridcoretype; + unsigned hybridnativemodel; }; enum cpuid_type { @@ -681,6 +685,15 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } } + if (highest_cpuid >= 0x1a && has_hybrid(features)) { + /* Get hybrid cpu information from cpuid 0x1a */ + eax = 0x1a; + ecx = 0; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + infos->hybridcoretype = eax >> 24; + infos->hybridnativemodel = eax & 0xffffff; + } + /********************************************************************************* * Get the hierarchy of thread, core, die, package, etc. from CPU-specific leaves */ @@ -751,7 +764,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns /* default cacheid value */ cache->cacheid = infos->apicid / cache->nbthreads_sharing; - if (cpuid_type == amd) { + if (cpuid_type == intel) { + /* round nbthreads_sharing to nearest power of two to build a mask (for clearing lower bits) */ + unsigned bits = hwloc_flsl(cache->nbthreads_sharing-1); + unsigned mask = ~((1U<cacheid = infos->apicid & mask; + + } else if (cpuid_type == amd) { /* AMD quirks */ if (infos->cpufamilynumber == 0x17 && cache->level == 3 && cache->nbthreads_sharing == 6) { @@ -872,7 +891,7 @@ hwloc_x86_add_groups(hwloc_topology_t topology, obj->attr->group.dont_merge = dont_merge; hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n", subtype, id, obj_cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "x86:group"); } } @@ -930,7 +949,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n", packageid, package_cpuset); - hwloc_insert_object_by_cpuset(topology, package); + hwloc__insert_object_by_cpuset(topology, NULL, package, "x86:package"); } else { /* Annotate packages previously-existing packages */ @@ -986,7 +1005,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_bitmap_set(node->nodeset, nodeid); hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", nodeid, node_cpuset); - hwloc_insert_object_by_cpuset(topology, node); + hwloc__insert_object_by_cpuset(topology, NULL, node, "x86:numa"); gotnuma++; } } @@ -1033,7 +1052,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns unknown_obj->attr->group.subkind = level; hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n", level, unknownid, unknown_cpuset); - hwloc_insert_object_by_cpuset(topology, unknown_obj); + hwloc__insert_object_by_cpuset(topology, NULL, unknown_obj, "x86:group:unknown"); } } } @@ -1073,7 +1092,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns die->cpuset = die_cpuset; hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", dieid, die_cpuset); - hwloc_insert_object_by_cpuset(topology, die); + hwloc__insert_object_by_cpuset(topology, NULL, die, "x86:die"); } } } @@ -1111,7 +1130,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns core->cpuset = core_cpuset; hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", coreid, core_cpuset); - hwloc_insert_object_by_cpuset(topology, core); + hwloc__insert_object_by_cpuset(topology, NULL, core, "x86:core"); } } } @@ -1125,7 +1144,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_only(obj->cpuset, i); hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "x86:pu"); } } @@ -1208,7 +1227,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n", level, cacheid, cache_cpuset); - hwloc_insert_object_by_cpuset(topology, cache); + hwloc__insert_object_by_cpuset(topology, NULL, cache, "x86:cache"); } } } @@ -1274,8 +1293,41 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long hwloc_bitmap_free(orig_cpuset); } - if (data->apicid_unique) + if (data->apicid_unique) { summarize(backend, infos, flags); + + if (has_hybrid(features)) { + /* use hybrid info for cpukinds */ + hwloc_bitmap_t atomset = hwloc_bitmap_alloc(); + hwloc_bitmap_t coreset = hwloc_bitmap_alloc(); + for(i=0; iapicid_unique, do nothing and return success, so that the caller does nothing either */ return 0; @@ -1354,7 +1406,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) unsigned highest_cpuid; unsigned highest_ext_cpuid; /* This stores cpuid features with the same indexing as Linux */ - unsigned features[10] = { 0 }; + unsigned features[19] = { 0 }; struct procinfo *infos = NULL; enum cpuid_type cpuid_type = unknown; hwloc_x86_os_state_t os_state; @@ -1381,6 +1433,9 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) /* check if binding works */ memset(&hooks, 0, sizeof(hooks)); support.membind = &memsupport; + /* We could just copy the main hooks (except in some corner cases), + * but the current overhead is negligible, so just always reget them. + */ hwloc_set_native_binding_hooks(&hooks, &support); if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) { get_cpubind = hooks.get_thisthread_cpubind; @@ -1451,6 +1506,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) ecx = 0; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); features[9] = ebx; + features[18] = edx; } if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) { diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c index 5a0006a0..c0691962 100644 --- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -213,7 +213,7 @@ hwloc__nolibxml_import_close_child(hwloc__xml_import_state_t state) static int hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state, - char **beginp, size_t expected_length) + const char **beginp, size_t expected_length) { hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; char *buffer = nstate->tagbuffer; @@ -224,7 +224,7 @@ hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state, if (nstate->closed) { if (expected_length) return -1; - *beginp = (char *) ""; + *beginp = ""; return 0; } diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index ba242853..fe04dd94 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1,7 +1,7 @@ /* * Copyright © 2009 CNRS * Copyright © 2009-2020 Inria. All rights reserved. - * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -481,11 +481,9 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, } } - static int -hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, - hwloc_obj_t obj, - hwloc__xml_import_state_t state) +hwloc___xml_import_info(char **infonamep, char **infovaluep, + hwloc__xml_import_state_t state) { char *infoname = NULL; char *infovalue = NULL; @@ -502,6 +500,25 @@ hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, return -1; } + *infonamep = infoname; + *infovaluep = infovalue; + + return state->global->close_tag(state); +} + +static int +hwloc__xml_import_obj_info(struct hwloc_xml_backend_data_s *data, + hwloc_obj_t obj, + hwloc__xml_import_state_t state) +{ + char *infoname = NULL; + char *infovalue = NULL; + int err; + + err = hwloc___xml_import_info(&infoname, &infovalue, state); + if (err < 0) + return err; + if (infoname) { /* empty strings are ignored by libxml */ if (data->version_major < 2 && @@ -518,7 +535,7 @@ hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, } } - return state->global->close_tag(state); + return err; } static int @@ -694,14 +711,15 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h } if (!topology->userdata_import_cb) { - char *buffer; + const char *buffer; size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length; ret = state->global->get_content(state, &buffer, reallength); if (ret < 0) return -1; } else if (topology->userdata_not_decoded) { - char *buffer, *fakename; + const char *buffer; + char *fakename; size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length; ret = state->global->get_content(state, &buffer, reallength); if (ret < 0) @@ -714,7 +732,7 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h free(fakename); } else if (encoded && length) { - char *encoded_buffer; + const char *encoded_buffer; size_t encoded_length = BASE64_ENCODED_LENGTH(length); ret = state->global->get_content(state, &encoded_buffer, encoded_length); if (ret < 0) @@ -734,7 +752,7 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h } } else { /* always handle length==0 in the non-encoded case */ - char *buffer = (char *) ""; + const char *buffer = ""; if (length) { ret = state->global->get_content(state, &buffer, length); if (ret < 0) @@ -888,7 +906,7 @@ hwloc__xml_import_object(hwloc_topology_t topology, } } else if (!strcmp(tag, "info")) { - ret = hwloc__xml_import_info(data, obj, &childstate); + ret = hwloc__xml_import_obj_info(data, obj, &childstate); } else if (data->version_major < 2 && !strcmp(tag, "distances")) { ret = hwloc__xml_v1import_distances(data, obj, &childstate); } else if (!strcmp(tag, "userdata")) { @@ -1238,6 +1256,80 @@ hwloc__xml_import_object(hwloc_topology_t topology, return -1; } +static int +hwloc__xml_v2import_support(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + char *name = NULL; + int value = 1; /* value is optional */ + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "name")) + name = attrvalue; + else if (!strcmp(attrname, "value")) + value = atoi(attrvalue); + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown support attribute %s\n", + state->global->msgprefix, attrname); + } + } + + if (name && topology->flags & HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT) { +#ifdef HWLOC_DEBUG + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); +#endif + +#define DO(_cat,_name) if (!strcmp(#_cat "." #_name, name)) topology->support._cat->_name = value + DO(discovery,pu); + else DO(discovery,numa); + else DO(discovery,numa_memory); + else DO(discovery,disallowed_pu); + else DO(discovery,disallowed_numa); + else DO(discovery,cpukind_efficiency); + else DO(cpubind,set_thisproc_cpubind); + else DO(cpubind,get_thisproc_cpubind); + else DO(cpubind,set_proc_cpubind); + else DO(cpubind,get_proc_cpubind); + else DO(cpubind,set_thisthread_cpubind); + else DO(cpubind,get_thisthread_cpubind); + else DO(cpubind,set_thread_cpubind); + else DO(cpubind,get_thread_cpubind); + else DO(cpubind,get_thisproc_last_cpu_location); + else DO(cpubind,get_proc_last_cpu_location); + else DO(cpubind,get_thisthread_last_cpu_location); + else DO(membind,set_thisproc_membind); + else DO(membind,get_thisproc_membind); + else DO(membind,set_proc_membind); + else DO(membind,get_proc_membind); + else DO(membind,set_thisthread_membind); + else DO(membind,get_thisthread_membind); + else DO(membind,set_area_membind); + else DO(membind,get_area_membind); + else DO(membind,alloc_membind); + else DO(membind,firsttouch_membind); + else DO(membind,bind_membind); + else DO(membind,interleave_membind); + else DO(membind,nexttouch_membind); + else DO(membind,migrate_membind); + else DO(membind,get_area_memlocation); + + else if (!strcmp("custom.exported_support", name)) + /* support was exported in a custom/fake field, mark it as imported here */ + topology->support.misc->imported_support = 1; + +#undef DO + } + + return 0; +} + static int hwloc__xml_v2import_distances(hwloc_topology_t topology, hwloc__xml_import_state_t state, @@ -1317,7 +1409,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, nr_u64values = 0; while (1) { struct hwloc__xml_import_state_s childstate; - char *attrname, *attrvalue, *tag, *buffer; + char *attrname, *attrvalue, *tag; + const char *buffer; int length; int is_index = 0; int is_u64values = 0; @@ -1356,7 +1449,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (is_index) { /* get indexes */ - char *tmp, *tmp2; + const char *tmp, *tmp2; if (nr_indexes >= nbobjs) { if (hwloc__xml_verbose()) fprintf(stderr, "%s: %s with more than %u indexes\n", @@ -1398,7 +1491,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, } else if (is_u64values) { /* get uint64_t values */ - char *tmp; + const char *tmp; if (nr_u64values >= nbobjs*nbobjs) { if (hwloc__xml_verbose()) fprintf(stderr, "%s: %s with more than %u u64values\n", @@ -1491,6 +1584,259 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, #undef _TAG_NAME } +static int +hwloc__xml_import_memattr_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + unsigned long flags, + hwloc__xml_import_state_t state) +{ + char *target_obj_gp_index_s = NULL; + char *target_obj_type_s = NULL; + hwloc_uint64_t target_obj_gp_index; + char *value_s = NULL; + hwloc_uint64_t value; + char *initiator_cpuset_s = NULL; + char *initiator_obj_gp_index_s = NULL; + char *initiator_obj_type_s = NULL; + hwloc_obj_type_t target_obj_type = HWLOC_OBJ_TYPE_NONE; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "target_obj_gp_index")) + target_obj_gp_index_s = attrvalue; + else if (!strcmp(attrname, "target_obj_type")) + target_obj_type_s = attrvalue; + else if (!strcmp(attrname, "value")) + value_s = attrvalue; + else if (!strcmp(attrname, "initiator_cpuset")) + initiator_cpuset_s = attrvalue; + else if (!strcmp(attrname, "initiator_obj_gp_index")) + initiator_obj_gp_index_s = attrvalue; + else if (!strcmp(attrname, "initiator_obj_type")) + initiator_obj_type_s = attrvalue; + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown memattr_value attribute %s\n", + state->global->msgprefix, attrname); + return -1; + } + } + + if (!target_obj_type_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memattr_value without target_obj_type.\n", + state->global->msgprefix); + return -1; + } + if (hwloc_type_sscanf(target_obj_type_s, &target_obj_type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to identify memattr_value target object type %s\n", + state->global->msgprefix, target_obj_type_s); + return -1; + } + + if (!value_s || !target_obj_gp_index_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memattr_value without value and target_obj_gp_index\n", + state->global->msgprefix); + return -1; + } + target_obj_gp_index = strtoull(target_obj_gp_index_s, NULL, 10); + value = strtoull(value_s, NULL, 10); + + if (flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* add a value with initiator */ + struct hwloc_internal_location_s loc; + if (!initiator_cpuset_s && (!initiator_obj_gp_index_s || !initiator_obj_type_s)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memattr_value without initiator attributes\n", + state->global->msgprefix); + return -1; + } + + /* setup the initiator */ + if (initiator_cpuset_s) { + loc.type = HWLOC_LOCATION_TYPE_CPUSET; + loc.location.cpuset = hwloc_bitmap_alloc(); + if (!loc.location.cpuset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to allocated memattr_value initiator cpuset\n", + state->global->msgprefix); + return -1; + } + hwloc_bitmap_sscanf(loc.location.cpuset, initiator_cpuset_s); + } else { + loc.type = HWLOC_LOCATION_TYPE_OBJECT; + loc.location.object.gp_index = strtoull(initiator_obj_gp_index_s, NULL, 10); + if (hwloc_type_sscanf(initiator_obj_type_s, &loc.location.object.type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to identify memattr_value initiator object type %s\n", + state->global->msgprefix, initiator_obj_type_s); + return -1; + } + } + + hwloc_internal_memattr_set_value(topology, id, target_obj_type, target_obj_gp_index, (unsigned)-1, &loc, value); + + if (loc.type == HWLOC_LOCATION_TYPE_CPUSET) + hwloc_bitmap_free(loc.location.cpuset); + + } else { + /* add a value without initiator */ + hwloc_internal_memattr_set_value(topology, id, target_obj_type, target_obj_gp_index, (unsigned)-1, NULL, value); + } + + return 0; +} + +static int +hwloc__xml_import_memattr(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + char *name = NULL; + unsigned long flags = (unsigned long) -1; + hwloc_memattr_id_t id = (hwloc_memattr_id_t) -1; + int ret; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "name")) + name = attrvalue; + else if (!strcmp(attrname, "flags")) + flags = strtoul(attrvalue, NULL, 10); + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown memattr attribute %s\n", + state->global->msgprefix, attrname); + return -1; + } + } + + if (name && flags != (unsigned long) -1) { + hwloc_memattr_id_t _id; + + ret = hwloc_memattr_get_by_name(topology, name, &_id); + if (ret < 0) { + /* register a new attribute */ + ret = hwloc_memattr_register(topology, name, flags, &_id); + if (!ret) + id = _id; + } else { + /* check the flags of the existing attribute */ + unsigned long mflags; + ret = hwloc_memattr_get_flags(topology, _id, &mflags); + if (!ret && mflags == flags) + id = _id; + } + /* if there's no matching attribute, id is -1 and values will be ignored below */ + } + + while (1) { + struct hwloc__xml_import_state_s childstate; + char *tag; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret <= 0) + break; + + if (!strcmp(tag, "memattr_value")) { + ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: memattr with unrecognized child %s\n", + state->global->msgprefix, tag); + ret = -1; + } + + if (ret < 0) + goto error; + + state->global->close_child(&childstate); + } + + return state->global->close_tag(state); + + error: + return -1; +} + +static int +hwloc__xml_import_cpukind(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + hwloc_bitmap_t cpuset = NULL; + int forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; + unsigned nr_infos = 0; + struct hwloc_info_s *infos = NULL; + int ret; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "cpuset")) { + if (!cpuset) + cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(cpuset, attrvalue); + } else if (!strcmp(attrname, "forced_efficiency")) { + forced_efficiency = atoi(attrvalue); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown cpukind attribute %s\n", + state->global->msgprefix, attrname); + hwloc_bitmap_free(cpuset); + return -1; + } + } + + while (1) { + struct hwloc__xml_import_state_s childstate; + char *tag; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret <= 0) + break; + + if (!strcmp(tag, "info")) { + char *infoname = NULL; + char *infovalue = NULL; + ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate); + if (!ret && infoname && infovalue) + hwloc__add_info(&infos, &nr_infos, infoname, infovalue); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: cpukind with unrecognized child %s\n", + state->global->msgprefix, tag); + ret = -1; + } + + if (ret < 0) + goto error; + + state->global->close_child(&childstate); + } + + if (!cpuset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring cpukind without cpuset\n", + state->global->msgprefix); + goto error; + } + + hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); + + return state->global->close_tag(state); + + error: + hwloc__free_infos(infos, nr_infos); + hwloc_bitmap_free(cpuset); + return -1; +} + static int hwloc__xml_import_diff_one(hwloc__xml_import_state_t state, hwloc_topology_diff_t *firstdiffp, @@ -1759,6 +2105,18 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) ret = hwloc__xml_v2import_distances(topology, &childstate, 1); if (ret < 0) goto failed; + } else if (!strcmp(tag, "support")) { + ret = hwloc__xml_v2import_support(topology, &childstate); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "memattr")) { + ret = hwloc__xml_import_memattr(topology, &childstate); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "cpukind")) { + ret = hwloc__xml_import_cpukind(topology, &childstate); + if (ret < 0) + goto failed; } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n", @@ -1864,12 +2222,14 @@ done: /* keep the "Backend" information intact */ /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */ - topology->support.discovery->pu = 1; - topology->support.discovery->disallowed_pu = 1; - if (data->nbnumanodes) { - topology->support.discovery->numa = 1; - topology->support.discovery->numa_memory = 1; // FIXME - topology->support.discovery->disallowed_numa = 1; + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) { + topology->support.discovery->pu = 1; + topology->support.discovery->disallowed_pu = 1; + if (data->nbnumanodes) { + topology->support.discovery->numa = 1; + topology->support.discovery->numa_memory = 1; // FIXME + topology->support.discovery->disallowed_numa = 1; + } } if (data->look_done) @@ -2620,9 +2980,199 @@ hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topol hwloc___xml_v2export_distances(parentstate, dist); } +static void +hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) +{ + struct hwloc__xml_export_state_s state; + char tmp[11]; + +#ifdef HWLOC_DEBUG + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); +#endif + +#define DO(_cat,_name) do { \ + if (topology->support._cat->_name) { \ + parentstate->new_child(parentstate, &state, "support"); \ + state.new_prop(&state, "name", #_cat "." #_name); \ + if (topology->support._cat->_name != 1) { \ + sprintf(tmp, "%u", topology->support._cat->_name); \ + state.new_prop(&state, "value", tmp); \ + } \ + state.end_object(&state, "support"); \ + } \ + } while (0) + + DO(discovery,pu); + DO(discovery,numa); + DO(discovery,numa_memory); + DO(discovery,disallowed_pu); + DO(discovery,disallowed_numa); + DO(discovery,cpukind_efficiency); + DO(cpubind,set_thisproc_cpubind); + DO(cpubind,get_thisproc_cpubind); + DO(cpubind,set_proc_cpubind); + DO(cpubind,get_proc_cpubind); + DO(cpubind,set_thisthread_cpubind); + DO(cpubind,get_thisthread_cpubind); + DO(cpubind,set_thread_cpubind); + DO(cpubind,get_thread_cpubind); + DO(cpubind,get_thisproc_last_cpu_location); + DO(cpubind,get_proc_last_cpu_location); + DO(cpubind,get_thisthread_last_cpu_location); + DO(membind,set_thisproc_membind); + DO(membind,get_thisproc_membind); + DO(membind,set_proc_membind); + DO(membind,get_proc_membind); + DO(membind,set_thisthread_membind); + DO(membind,get_thisthread_membind); + DO(membind,set_area_membind); + DO(membind,get_area_membind); + DO(membind,alloc_membind); + DO(membind,firsttouch_membind); + DO(membind,bind_membind); + DO(membind,interleave_membind); + DO(membind,nexttouch_membind); + DO(membind,migrate_membind); + DO(membind,get_area_memlocation); + + /* misc.imported_support would be meaningless in the remote importer, + * but the importer needs to know whether we exported support or not + * (in case there are no support bit set at all), + * use a custom/fake field to do so. + */ + parentstate->new_child(parentstate, &state, "support"); + state.new_prop(&state, "name", "custom.exported_support"); + state.end_object(&state, "support"); + +#undef DO +} + +static void +hwloc__xml_export_memattr_target(hwloc__xml_export_state_t state, + struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg) +{ + struct hwloc__xml_export_state_s vstate; + char tmp[255]; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* export all initiators */ + unsigned k; + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k]; + state->new_child(state, &vstate, "memattr_value"); + vstate.new_prop(&vstate, "target_obj_type", hwloc_obj_type_string(imtg->type)); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->gp_index); + vstate.new_prop(&vstate, "target_obj_gp_index", tmp); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imi->value); + vstate.new_prop(&vstate, "value", tmp); + switch (imi->initiator.type) { + case HWLOC_LOCATION_TYPE_OBJECT: + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imi->initiator.location.object.gp_index); + vstate.new_prop(&vstate, "initiator_obj_gp_index", tmp); + vstate.new_prop(&vstate, "initiator_obj_type", hwloc_obj_type_string(imi->initiator.location.object.type)); + break; + case HWLOC_LOCATION_TYPE_CPUSET: { + char *setstring; + hwloc_bitmap_asprintf(&setstring, imi->initiator.location.cpuset); + if (setstring) + vstate.new_prop(&vstate, "initiator_cpuset", setstring); + free(setstring); + break; + } + default: + assert(0); + } + vstate.end_object(&vstate, "memattr_value"); + } + } else { + /* just export the global value */ + state->new_child(state, &vstate, "memattr_value"); + vstate.new_prop(&vstate, "target_obj_type", hwloc_obj_type_string(imtg->type)); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->gp_index); + vstate.new_prop(&vstate, "target_obj_gp_index", tmp); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->noinitiator_value); + vstate.new_prop(&vstate, "value", tmp); + vstate.end_object(&vstate, "memattr_value"); + } +} + +static void +hwloc__xml_export_memattrs(hwloc__xml_export_state_t state, hwloc_topology_t topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr; + struct hwloc__xml_export_state_s mstate; + char tmp[255]; + unsigned j; + + if (id == HWLOC_MEMATTR_ID_CAPACITY || id == HWLOC_MEMATTR_ID_LOCALITY) + /* no need to export virtual memattrs */ + continue; + + imattr = &topology->memattrs[id]; + if ((id == HWLOC_MEMATTR_ID_LATENCY || id == HWLOC_MEMATTR_ID_BANDWIDTH) + && !imattr->nr_targets) + /* no need to export target-less attributes for initial attributes, no release support attributes without those definitions */ + continue; + + state->new_child(state, &mstate, "memattr"); + mstate.new_prop(&mstate, "name", imattr->name); + snprintf(tmp, sizeof(tmp), "%lu", imattr->flags); + mstate.new_prop(&mstate, "flags", tmp); + + for(j=0; jnr_targets; j++) + hwloc__xml_export_memattr_target(&mstate, imattr, &imattr->targets[j]); + + mstate.end_object(&mstate, "memattr"); + } +} + +static void +hwloc__xml_export_cpukinds(hwloc__xml_export_state_t state, hwloc_topology_t topology) +{ + unsigned i; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + struct hwloc__xml_export_state_s cstate; + char *setstring; + unsigned j; + + state->new_child(state, &cstate, "cpukind"); + hwloc_bitmap_asprintf(&setstring, kind->cpuset); + cstate.new_prop(&cstate, "cpuset", setstring); + free(setstring); + if (kind->forced_efficiency != HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) { + char tmp[11]; + snprintf(tmp, sizeof(tmp), "%d", kind->forced_efficiency); + cstate.new_prop(&cstate, "forced_efficiency", tmp); + } + + for(j=0; jnr_infos; j++) { + char *name = hwloc__xml_export_safestrdup(kind->infos[j].name); + char *value = hwloc__xml_export_safestrdup(kind->infos[j].value); + struct hwloc__xml_export_state_s istate; + cstate.new_child(&cstate, &istate, "info"); + istate.new_prop(&istate, "name", name); + istate.new_prop(&istate, "value", value); + istate.end_object(&istate, "info"); + free(name); + free(value); + } + + cstate.end_object(&cstate, "cpukind"); + } +} + void hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t topology, unsigned long flags) { + char *env; hwloc_obj_t root = hwloc_get_root_obj(topology); if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { @@ -2665,6 +3215,11 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top } else { hwloc__xml_v2export_object (state, topology, root, flags); hwloc__xml_v2export_distances (state, topology); + env = getenv("HWLOC_XML_EXPORT_SUPPORT"); + if (!env || atoi(env)) + hwloc__xml_v2export_support(state, topology); + hwloc__xml_export_memattrs(state, topology); + hwloc__xml_export_cpukinds(state, topology); } } diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 34692517..94387ece 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -75,16 +75,49 @@ int hwloc_hide_errors(void) return hide; } -void hwloc_report_os_error(const char *msg, int line) + +/* format the obj info to print in error messages */ +static void +report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj) +{ + char typestr[64]; + char *cpusetstr; + char *nodesetstr = NULL; + + hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); + hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset); + if (obj->nodeset) /* may be missing during insert */ + hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset); + if (obj->os_index != HWLOC_UNKNOWN_INDEX) + snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)", + typestr, obj->os_index, cpusetstr, + nodesetstr ? " nodeset " : "", + nodesetstr ? nodesetstr : ""); + else + snprintf(buf, buflen, "%s (cpuset %s%s%s)", + typestr, cpusetstr, + nodesetstr ? " nodeset " : "", + nodesetstr ? nodesetstr : ""); + free(cpusetstr); + free(nodesetstr); +} + +static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *msg, const char *reason) { static int reported = 0; - if (!reported && !hwloc_hide_errors()) { + if (reason && !reported && !hwloc_hide_errors()) { + char newstr[512]; + char oldstr[512]; + report_insert_error_format_obj(newstr, sizeof(newstr), new); + report_insert_error_format_obj(oldstr, sizeof(oldstr), old); + fprintf(stderr, "****************************************************************************\n"); fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION); fprintf(stderr, "*\n"); - fprintf(stderr, "* %s\n", msg); - fprintf(stderr, "* Error occurred in topology.c line %d\n", line); + fprintf(stderr, "* Failed with: %s\n", msg); + fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr); + fprintf(stderr, "* coming from: %s\n", reason); fprintf(stderr, "*\n"); fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n"); fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n"); @@ -264,7 +297,7 @@ hwloc_setup_pu_level(struct hwloc_topology *topology, hwloc_debug_2args_bitmap("cpu %u (os %u) has cpuset %s\n", cpu, oscpu, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "core:pulevel"); cpu++; } @@ -347,16 +380,18 @@ hwloc_debug_print_object(int indent __hwloc_attribute_unused, hwloc_obj_t obj) static void hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj) { - hwloc_obj_t child; - hwloc_debug_print_object(indent, obj); - for_each_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); - for_each_memory_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); - for_each_io_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); - for_each_misc_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); + if (hwloc_debug_enabled() >= 2) { + hwloc_obj_t child; + hwloc_debug_print_object(indent, obj); + for_each_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_memory_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_io_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_misc_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + } } #else /* !HWLOC_DEBUG */ #define hwloc_debug_print_object(indent, obj) do { /* nothing */ } while (0) @@ -472,29 +507,33 @@ int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value) } /* This function may be called with topology->tma set, it cannot free() or realloc() */ -static int hwloc__tma_dup_infos(struct hwloc_tma *tma, hwloc_obj_t new, hwloc_obj_t src) +int hwloc__tma_dup_infos(struct hwloc_tma *tma, + struct hwloc_info_s **newip, unsigned *newcp, + struct hwloc_info_s *oldi, unsigned oldc) { + struct hwloc_info_s *newi; unsigned i, j; - new->infos = hwloc_tma_calloc(tma, src->infos_count * sizeof(*src->infos)); - if (!new->infos) + newi = hwloc_tma_calloc(tma, oldc * sizeof(*newi)); + if (!newi) return -1; - for(i=0; iinfos_count; i++) { - new->infos[i].name = hwloc_tma_strdup(tma, src->infos[i].name); - new->infos[i].value = hwloc_tma_strdup(tma, src->infos[i].value); - if (!new->infos[i].name || !new->infos[i].value) + for(i=0; iinfos_count = src->infos_count; + *newip = newi; + *newcp = oldc; return 0; failed: assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ for(j=0; j<=i; j++) { - free(new->infos[i].name); - free(new->infos[i].value); + free(newi[i].name); + free(newi[i].value); } - free(new->infos); - new->infos = NULL; + free(newi); + *newip = NULL; return -1; } @@ -812,7 +851,7 @@ hwloc__duplicate_object(struct hwloc_topology *newtopology, newobj->nodeset = hwloc_bitmap_tma_dup(tma, src->nodeset); newobj->complete_nodeset = hwloc_bitmap_tma_dup(tma, src->complete_nodeset); - hwloc__tma_dup_infos(tma, newobj, src); + hwloc__tma_dup_infos(tma, &newobj->infos, &newobj->infos_count, src->infos, src->infos_count); /* find our level */ if (src->depth < 0) { @@ -970,6 +1009,7 @@ hwloc__topology_dup(hwloc_topology_t *newp, memcpy(new->support.discovery, old->support.discovery, sizeof(*old->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*old->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*old->support.membind)); + memcpy(new->support.misc, old->support.misc, sizeof(*old->support.misc)); new->allowed_cpuset = hwloc_bitmap_tma_dup(tma, old->allowed_cpuset); new->allowed_nodeset = hwloc_bitmap_tma_dup(tma, old->allowed_nodeset); @@ -1008,6 +1048,14 @@ hwloc__topology_dup(hwloc_topology_t *newp, if (err < 0) goto out_with_topology; + err = hwloc_internal_memattrs_dup(new, old); + if (err < 0) + goto out_with_topology; + + err = hwloc_internal_cpukinds_dup(new, old); + if (err < 0) + goto out_with_topology; + /* we connected everything during duplication */ new->modified = 0; @@ -1229,31 +1277,6 @@ hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2) return 0; } -/* format the obj info to print in error messages */ -static void -hwloc__report_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj) -{ - char typestr[64]; - char *cpusetstr; - char *nodesetstr = NULL; - hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); - hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset); - if (obj->nodeset) /* may be missing during insert */ - hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset); - if (obj->os_index != HWLOC_UNKNOWN_INDEX) - snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)", - typestr, obj->os_index, cpusetstr, - nodesetstr ? " nodeset " : "", - nodesetstr ? nodesetstr : ""); - else - snprintf(buf, buflen, "%s (cpuset %s%s%s)", - typestr, cpusetstr, - nodesetstr ? " nodeset " : "", - nodesetstr ? nodesetstr : ""); - free(cpusetstr); - free(nodesetstr); -} - /* * How to insert objects into the topology. * @@ -1390,9 +1413,9 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) */ static struct hwloc_obj * hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur, hwloc_obj_t obj, - hwloc_report_error_t report_error) + const char *reason) { - hwloc_obj_t child, next_child = NULL; + hwloc_obj_t child, next_child = NULL, tmp; /* These will always point to the pointer to their next last child. */ hwloc_obj_t *cur_children = &cur->first_child; hwloc_obj_t *obj_children = &obj->first_child; @@ -1430,18 +1453,10 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur case HWLOC_OBJ_INCLUDED: /* OBJ is strictly contained is some child of CUR, go deeper. */ - return hwloc___insert_object_by_cpuset(topology, child, obj, report_error); + return hwloc___insert_object_by_cpuset(topology, child, obj, reason); case HWLOC_OBJ_INTERSECTS: - if (report_error) { - char childstr[512]; - char objstr[512]; - char msg[1100]; - hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); - hwloc__report_error_format_obj(childstr, sizeof(childstr), child); - snprintf(msg, sizeof(msg), "%s intersects with %s without inclusion!", objstr, childstr); - report_error(msg, __LINE__); - } + report_insert_error(obj, child, "intersection without inclusion", reason); goto putback; case HWLOC_OBJ_DIFFERENT: @@ -1464,6 +1479,8 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur if (setres == HWLOC_OBJ_EQUAL) { obj->memory_first_child = child->memory_first_child; child->memory_first_child = NULL; + for(tmp=obj->memory_first_child; tmp; tmp = tmp->next_sibling) + tmp->parent = obj; } break; } @@ -1483,7 +1500,9 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur return obj; putback: - /* Put-back OBJ children in CUR and return an error. */ + /* OBJ cannot be inserted. + * Put-back OBJ children in CUR and return an error. + */ if (putp) cur_children = putp; /* No need to try to insert before where OBJ was supposed to go */ else @@ -1492,12 +1511,12 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur while ((child = obj->first_child) != NULL) { /* Remove from OBJ */ obj->first_child = child->next_sibling; - obj->parent = cur; - /* Find child position in CUR, and insert. */ + /* Find child position in CUR, and reinsert it. */ while (*cur_children && hwloc__object_cpusets_compare_first(*cur_children, child) < 0) cur_children = &(*cur_children)->next_sibling; child->next_sibling = *cur_children; *cur_children = child; + child->parent = cur; } return NULL; } @@ -1521,7 +1540,7 @@ hwloc__find_obj_covering_memory_cpuset(struct hwloc_topology *topology, hwloc_ob static struct hwloc_obj * hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t obj, - hwloc_report_error_t report_error) + const char *reason) { hwloc_obj_t parent, group, result; @@ -1573,7 +1592,7 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob return parent; } - result = hwloc__insert_object_by_cpuset(topology, parent, group, report_error); + result = hwloc__insert_object_by_cpuset(topology, parent, group, reason); if (!result) { /* failed to insert, fallback to larger parent */ return parent; @@ -1586,8 +1605,7 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob /* only works for MEMCACHE and NUMAnode with a single bit in nodeset */ static hwloc_obj_t hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_obj_t parent, - hwloc_obj_t obj, - hwloc_report_error_t report_error) + hwloc_obj_t obj, const char *reason) { hwloc_obj_t *curp = &parent->memory_first_child; unsigned first = hwloc_bitmap_first(obj->nodeset); @@ -1611,20 +1629,12 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o if (obj->type == HWLOC_OBJ_NUMANODE) { if (cur->type == HWLOC_OBJ_NUMANODE) { /* identical NUMA nodes? ignore the new one */ - if (report_error) { - char curstr[512]; - char objstr[512]; - char msg[1100]; - hwloc__report_error_format_obj(curstr, sizeof(curstr), cur); - hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); - snprintf(msg, sizeof(msg), "%s and %s have identical nodesets!", objstr, curstr); - report_error(msg, __LINE__); - } + report_insert_error(obj, cur, "NUMAnodes with identical nodesets", reason); return NULL; } assert(cur->type == HWLOC_OBJ_MEMCACHE); /* insert the new NUMA node below that existing memcache */ - return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, reason); } else { assert(obj->type == HWLOC_OBJ_MEMCACHE); @@ -1637,7 +1647,7 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o * (depth starts from the NUMA node). * insert the new memcache below the existing one */ - return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, reason); } /* insert the memcache above the existing memcache or numa node */ obj->next_sibling = cur->next_sibling; @@ -1673,8 +1683,7 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o */ struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, - hwloc_obj_t obj, - hwloc_report_error_t report_error) + hwloc_obj_t obj, const char *reason) { hwloc_obj_t result; @@ -1704,7 +1713,7 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_bitmap_copy(obj->complete_cpuset, parent->complete_cpuset); #endif - result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, report_error); + result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, reason); if (result == obj) { /* Add the bit to the top sets, and to the parent CPU-side object */ if (obj->type == HWLOC_OBJ_NUMANODE) { @@ -1722,8 +1731,7 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, /* insertion routine that lets you change the error reporting callback */ struct hwloc_obj * hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, - hwloc_obj_t obj, - hwloc_report_error_t report_error) + hwloc_obj_t obj, const char *reason) { struct hwloc_obj *result; @@ -1740,20 +1748,20 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root if (hwloc__obj_type_is_memory(obj->type)) { if (!root) { - root = hwloc__find_insert_memory_parent(topology, obj, report_error); + root = hwloc__find_insert_memory_parent(topology, obj, reason); if (!root) { hwloc_free_unlinked_object(obj); return NULL; } } - return hwloc__attach_memory_object(topology, root, obj, report_error); + return hwloc__attach_memory_object(topology, root, obj, reason); } if (!root) /* Start at the top. */ root = topology->levels[0][0]; - result = hwloc___insert_object_by_cpuset(topology, root, obj, report_error); + result = hwloc___insert_object_by_cpuset(topology, root, obj, reason); if (result && result->type == HWLOC_OBJ_PU) { /* Add the bit to the top sets */ if (hwloc_bitmap_isset(result->cpuset, result->os_index)) @@ -1769,12 +1777,6 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root /* the default insertion routine warns in case of error. * it's used by most backends */ -struct hwloc_obj * -hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj) -{ - return hwloc__insert_object_by_cpuset(topology, NULL, obj, hwloc_report_os_error); -} - void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj) { @@ -1917,6 +1919,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t if (hwloc_bitmap_isset(nodeset, numa->os_index)) hwloc_bitmap_or(obj->cpuset, obj->cpuset, numa->cpuset); } + /* FIXME insert by nodeset to group NUMAs even if CPUless? */ cmp = hwloc_obj_cmp_sets(obj, root); if (cmp == HWLOC_OBJ_INCLUDED) { @@ -2047,7 +2050,7 @@ hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology)); group_obj->cpuset = hwloc_bitmap_dup(cpuset); group_obj->attr->group.kind = HWLOC_GROUP_KIND_IO; - parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, hwloc_report_os_error); + parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, "topology:io_parent"); if (!parent) /* Failed to insert the Group, maybe a conflicting cpuset */ return largeparent; @@ -3251,7 +3254,7 @@ hwloc_discover(struct hwloc_topology *topology, * produced by hwloc_setup_pu_level() */ - /* To be able to just use hwloc_insert_object_by_cpuset to insert the object + /* To be able to just use hwloc__insert_object_by_cpuset to insert the object * in the topology according to the cpuset, the cpuset field must be * initialized. */ @@ -3356,7 +3359,7 @@ hwloc_discover(struct hwloc_topology *topology, hwloc_bitmap_set(node->nodeset, 0); memcpy(&node->attr->numanode, &topology->machine_memory, sizeof(topology->machine_memory)); memset(&topology->machine_memory, 0, sizeof(topology->machine_memory)); - hwloc_insert_object_by_cpuset(topology, node); + hwloc__insert_object_by_cpuset(topology, NULL, node, "core:defaultnumanode"); } else { /* if we're sure we found all NUMA nodes without their sizes (x86 backend?), * we could split topology->total_memory in all of them. @@ -3514,6 +3517,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) memset(topology->support.discovery, 0, sizeof(*topology->support.discovery)); memset(topology->support.cpubind, 0, sizeof(*topology->support.cpubind)); memset(topology->support.membind, 0, sizeof(*topology->support.membind)); + memset(topology->support.misc, 0, sizeof(*topology->support.misc)); /* Only the System object on top by default */ topology->next_gp_index = 1; /* keep 0 as an invalid value */ @@ -3590,6 +3594,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->support.discovery = hwloc_tma_malloc(tma, sizeof(*topology->support.discovery)); topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind)); topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind)); + topology->support.misc = hwloc_tma_malloc(tma, sizeof(*topology->support.misc)); topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels)); @@ -3598,6 +3603,8 @@ hwloc__topology_init (struct hwloc_topology **topologyp, hwloc__topology_filter_init(topology); hwloc_internal_distances_init(topology); + hwloc_internal_memattrs_init(topology); + hwloc_internal_cpukinds_init(topology); topology->userdata_export_cb = NULL; topology->userdata_import_cb = NULL; @@ -3691,7 +3698,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) return -1; } - if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { + if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) { errno = EINVAL; return -1; } @@ -3827,7 +3834,9 @@ hwloc_topology_clear (struct hwloc_topology *topology) { /* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */ unsigned l; + hwloc_internal_cpukinds_destroy(topology); hwloc_internal_distances_destroy(topology); + hwloc_internal_memattrs_destroy(topology); hwloc_free_object_and_children(topology->levels[0][0]); hwloc_bitmap_free(topology->allowed_cpuset); hwloc_bitmap_free(topology->allowed_nodeset); @@ -3858,6 +3867,7 @@ hwloc_topology_destroy (struct hwloc_topology *topology) free(topology->support.discovery); free(topology->support.cpubind); free(topology->support.membind); + free(topology->support.misc); free(topology); } @@ -3873,7 +3883,9 @@ hwloc_topology_load (struct hwloc_topology *topology) return -1; } + /* initialize envvar-related things */ hwloc_internal_distances_prepare(topology); + hwloc_internal_memattrs_prepare(topology); if (getenv("HWLOC_XML_USERDATA_NOT_DECODED")) topology->userdata_not_decoded = 1; @@ -3954,6 +3966,9 @@ hwloc_topology_load (struct hwloc_topology *topology) #endif hwloc_topology_check(topology); + /* Rank cpukinds */ + hwloc_internal_cpukinds_rank(topology); + /* Mark distances objs arrays as invalid since we may have removed objects * from the topology after adding the distances (remove_empty, etc). * It would be hard to actually verify whether it's needed. @@ -3964,6 +3979,10 @@ hwloc_topology_load (struct hwloc_topology *topology) */ hwloc_internal_distances_refresh(topology); + /* Same for memattrs */ + hwloc_internal_memattrs_need_refresh(topology); + hwloc_internal_memattrs_refresh(topology); + topology->is_loaded = 1; if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) { @@ -4246,10 +4265,12 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se /* some objects may have disappeared, we need to update distances objs arrays */ hwloc_internal_distances_invalidate_cached_objs(topology); + hwloc_internal_memattrs_need_refresh(topology); hwloc_filter_levels_keep_structure(topology); hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); propagate_total_memory(topology->levels[0][0]); + hwloc_internal_cpukinds_restrict(topology); #ifndef HWLOC_DEBUG if (getenv("HWLOC_DEBUG_CHECK")) @@ -4334,6 +4355,15 @@ hwloc_topology_allow(struct hwloc_topology *topology, return -1; } +int +hwloc_topology_refresh(struct hwloc_topology *topology) +{ + hwloc_internal_cpukinds_rank(topology); + hwloc_internal_distances_refresh(topology); + hwloc_internal_memattrs_refresh(topology); + return 0; +} + int hwloc_topology_is_thissystem(struct hwloc_topology *topology) { diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c index 0b744d78..f9076ab5 100644 --- a/src/3rdparty/hwloc/src/traversal.c +++ b/src/3rdparty/hwloc/src/traversal.c @@ -1,7 +1,7 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. - * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2010, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -138,6 +138,37 @@ hwloc_obj_type_is_icache(hwloc_obj_type_t type) return hwloc__obj_type_is_icache(type); } +static hwloc_obj_t hwloc_get_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); + while (obj) { + if (obj->gp_index == gp_index) + return obj; + obj = obj->next_cousin; + } + return NULL; +} + +hwloc_obj_t hwloc_get_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { + for(depth=1 /* no multiple machine levels */; + (unsigned) depth < topology->nb_levels-1 /* no multiple PU levels */; + depth++) { + if (hwloc_get_depth_type(topology, depth) == type) { + hwloc_obj_t obj = hwloc_get_obj_by_depth_and_gp_index(topology, depth, gp_index); + if (obj) + return obj; + } + } + return NULL; + } + return hwloc_get_obj_by_depth_and_gp_index(topology, depth, gp_index); +} + unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max) { struct hwloc_obj *parent, *nextparent, **src_objs; @@ -654,7 +685,11 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t unsigned i; for(i=0; iinfos_count; i++) { struct hwloc_info_s *info = &obj->infos[i]; - const char *quote = strchr(info->value, ' ') ? "\"" : ""; + const char *quote; + if (strchr(info->value, ' ')) + quote = "\""; + else + quote = ""; res = hwloc_snprintf(tmp, tmplen, "%s%s=%s%s%s", prefix, info->name, @@ -673,3 +708,31 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t return ret; } + +int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwloc_bitmap_t cpuset, unsigned which) +{ + hwloc_obj_t core = NULL; + while ((core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpuset, HWLOC_OBJ_CORE, core)) != NULL) { + /* this core has some PUs in the cpuset, find the index-th one */ + unsigned i = 0; + int pu = -1; + do { + pu = hwloc_bitmap_next(core->cpuset, pu); + if (pu == -1) { + /* no which-th PU in cpuset and core, remove the entire core */ + hwloc_bitmap_andnot(cpuset, cpuset, core->cpuset); + break; + } + if (hwloc_bitmap_isset(cpuset, pu)) { + if (i == which) { + /* remove the entire core except that exact pu */ + hwloc_bitmap_andnot(cpuset, cpuset, core->cpuset); + hwloc_bitmap_set(cpuset, pu); + break; + } + i++; + } + } while (1); + } + return 0; +} diff --git a/src/3rdparty/libcpuid/CMakeLists.txt b/src/3rdparty/libcpuid/CMakeLists.txt deleted file mode 100644 index 51eab1d3..00000000 --- a/src/3rdparty/libcpuid/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -cmake_minimum_required (VERSION 2.8) -project (cpuid C) - -add_definitions(/DVERSION="0.4.0") - -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os") - -set(HEADERS - libcpuid.h - libcpuid_types.h - libcpuid_constants.h - libcpuid_internal.h - amd_code_t.h - intel_code_t.h - recog_amd.h - recog_intel.h - asm-bits.h - libcpuid_util.h - ) - -set(SOURCES - cpuid_main.c - asm-bits.c - recog_amd.c - recog_intel.c - libcpuid_util.c - ) - -if (CMAKE_CL_64) - enable_language(ASM_MASM) - set(SOURCES_ASM masm-x64.asm) -endif() - -add_library(cpuid STATIC - ${HEADERS} - ${SOURCES} - ${SOURCES_ASM} - ) diff --git a/src/3rdparty/libcpuid/amd_code_t.h b/src/3rdparty/libcpuid/amd_code_t.h deleted file mode 100644 index 058e8957..00000000 --- a/src/3rdparty/libcpuid/amd_code_t.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2016 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * This file contains a list of internal codes we use in detection. It is - * of no external use and isn't a complete list of AMD products. - */ - CODE2(OPTERON_800, 1000), - CODE(PHENOM), - CODE(PHENOM2), - CODE(FUSION_C), - CODE(FUSION_E), - CODE(FUSION_EA), - CODE(FUSION_Z), - CODE(FUSION_A), - diff --git a/src/3rdparty/libcpuid/asm-bits.c b/src/3rdparty/libcpuid/asm-bits.c deleted file mode 100644 index bfabd404..00000000 --- a/src/3rdparty/libcpuid/asm-bits.c +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "libcpuid.h" -#include "asm-bits.h" - -int cpuid_exists_by_eflags(void) -{ -#if defined(PLATFORM_X64) - return 1; /* CPUID is always present on the x86_64 */ -#elif defined(PLATFORM_X86) -# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) - int result; - __asm __volatile( - " pushfl\n" - " pop %%eax\n" - " mov %%eax, %%ecx\n" - " xor $0x200000, %%eax\n" - " push %%eax\n" - " popfl\n" - " pushfl\n" - " pop %%eax\n" - " xor %%ecx, %%eax\n" - " mov %%eax, %0\n" - " push %%ecx\n" - " popfl\n" - : "=m"(result) - : :"eax", "ecx", "memory"); - return (result != 0); -# elif defined(COMPILER_MICROSOFT) - int result; - __asm { - pushfd - pop eax - mov ecx, eax - xor eax, 0x200000 - push eax - popfd - pushfd - pop eax - xor eax, ecx - mov result, eax - push ecx - popfd - }; - return (result != 0); -# else - return 0; -# endif /* COMPILER_MICROSOFT */ -#elif defined(PLATFORM_ARM) - return 0; -#else - return 0; -#endif /* PLATFORM_X86 */ -} - -#ifdef INLINE_ASM_SUPPORTED -/* - * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions - * are implemented in separate .asm files. Otherwise, use inline assembly - */ -void exec_cpuid(uint32_t *regs) -{ -# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) -# ifdef PLATFORM_X64 - __asm __volatile( - " mov %0, %%rdi\n" - - " push %%rbx\n" - " push %%rcx\n" - " push %%rdx\n" - - " mov (%%rdi), %%eax\n" - " mov 4(%%rdi), %%ebx\n" - " mov 8(%%rdi), %%ecx\n" - " mov 12(%%rdi), %%edx\n" - - " cpuid\n" - - " movl %%eax, (%%rdi)\n" - " movl %%ebx, 4(%%rdi)\n" - " movl %%ecx, 8(%%rdi)\n" - " movl %%edx, 12(%%rdi)\n" - " pop %%rdx\n" - " pop %%rcx\n" - " pop %%rbx\n" - : - :"m"(regs) - :"memory", "eax", "rdi" - ); -# elif defined(PLATFORM_X86) - __asm __volatile( - " mov %0, %%edi\n" - - " push %%ebx\n" - " push %%ecx\n" - " push %%edx\n" - - " mov (%%edi), %%eax\n" - " mov 4(%%edi), %%ebx\n" - " mov 8(%%edi), %%ecx\n" - " mov 12(%%edi), %%edx\n" - - " cpuid\n" - - " mov %%eax, (%%edi)\n" - " mov %%ebx, 4(%%edi)\n" - " mov %%ecx, 8(%%edi)\n" - " mov %%edx, 12(%%edi)\n" - " pop %%edx\n" - " pop %%ecx\n" - " pop %%ebx\n" - : - :"m"(regs) - :"memory", "eax", "edi" - ); -# elif defined(PLATFORM_ARM) -# endif /* COMPILER_GCC */ -#else -# ifdef COMPILER_MICROSOFT - __asm { - push ebx - push ecx - push edx - push edi - mov edi, regs - - mov eax, [edi] - mov ebx, [edi+4] - mov ecx, [edi+8] - mov edx, [edi+12] - - cpuid - - mov [edi], eax - mov [edi+4], ebx - mov [edi+8], ecx - mov [edi+12], edx - - pop edi - pop edx - pop ecx - pop ebx - } -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif -} -#endif /* INLINE_ASSEMBLY_SUPPORTED */ - -#ifdef INLINE_ASM_SUPPORTED -void cpu_rdtsc(uint64_t* result) -{ - uint32_t low_part, hi_part; -#if defined(COMPILER_GCC) || defined(COMPILER_CLANG) -#ifdef PLATFORM_ARM - low_part = 0; - hi_part = 0; -#else - __asm __volatile ( - " rdtsc\n" - " mov %%eax, %0\n" - " mov %%edx, %1\n" - :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx" - ); -#endif -#else -# ifdef COMPILER_MICROSOFT - __asm { - rdtsc - mov low_part, eax - mov hi_part, edx - }; -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif /* COMPILER_GCC */ - *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32); -} -#endif /* INLINE_ASM_SUPPORTED */ - -#ifdef INLINE_ASM_SUPPORTED -void busy_sse_loop(int cycles) -{ -# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) -#ifndef __APPLE__ -# define XALIGN ".balign 16\n" -#else -# define XALIGN ".align 4\n" -#endif -#ifdef PLATFORM_ARM -#else - __asm __volatile ( - " xorps %%xmm0, %%xmm0\n" - " xorps %%xmm1, %%xmm1\n" - " xorps %%xmm2, %%xmm2\n" - " xorps %%xmm3, %%xmm3\n" - " xorps %%xmm4, %%xmm4\n" - " xorps %%xmm5, %%xmm5\n" - " xorps %%xmm6, %%xmm6\n" - " xorps %%xmm7, %%xmm7\n" - XALIGN - /* ".bsLoop:\n" */ - "1:\n" - // 0: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 1: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 2: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 3: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 4: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 5: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 6: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 7: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 8: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 9: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //10: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //11: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //12: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //13: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //14: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //15: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //16: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //17: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //18: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //19: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //20: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //21: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //22: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //23: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //24: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //25: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //26: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //27: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //28: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //29: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //30: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //31: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - - " dec %%eax\n" - /* "jnz .bsLoop\n" */ - " jnz 1b\n" - ::"a"(cycles) - ); -#endif -#else -# ifdef COMPILER_MICROSOFT - __asm { - mov eax, cycles - xorps xmm0, xmm0 - xorps xmm1, xmm1 - xorps xmm2, xmm2 - xorps xmm3, xmm3 - xorps xmm4, xmm4 - xorps xmm5, xmm5 - xorps xmm6, xmm6 - xorps xmm7, xmm7 - //-- - align 16 -bsLoop: - // 0: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 1: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 2: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 3: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 4: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 5: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 6: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 7: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 8: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 9: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 10: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 11: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 12: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 13: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 14: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 15: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 16: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 17: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 18: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 19: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 20: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 21: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 22: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 23: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 24: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 25: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 26: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 27: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 28: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 29: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 30: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 31: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - //---------------------- - dec eax - jnz bsLoop - } -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif /* COMPILER_GCC */ -} -#endif /* INLINE_ASSEMBLY_SUPPORTED */ \ No newline at end of file diff --git a/src/3rdparty/libcpuid/asm-bits.h b/src/3rdparty/libcpuid/asm-bits.h deleted file mode 100644 index 9049e2fe..00000000 --- a/src/3rdparty/libcpuid/asm-bits.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __ASM_BITS_H__ -#define __ASM_BITS_H__ -#include "libcpuid.h" - -/* Determine Compiler: */ -#if defined(_MSC_VER) -#if !defined(COMPILER_MICROSOFT) -# define COMPILER_MICROSOFT -#endif -#elif defined(__GNUC__) -#if !defined(COMPILER_GCC) -# define COMPILER_GCC -#endif -#elif defined(__clang__) -#if !defined(COMPILER_CLANG) -# define COMPILER_CLANG -#endif -#endif - -/* Determine Platform */ -#if defined(__x86_64__) || defined(_M_AMD64) -#if !defined(PLATFORM_X64) -# define PLATFORM_X64 -#endif -#elif defined(__i386__) || defined(_M_IX86) -#if !defined(PLATFORM_X86) -# define PLATFORM_X86 -#endif -#elif defined(__ARMEL__) -#if !defined(PLATFORM_ARM) -# define PLATFORM_ARM -#endif -#endif - -/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */ -#if (((defined(COMPILER_GCC) || defined(COMPILER_CLANG))) && \ - (defined(PLATFORM_X64) || defined(PLATFORM_X86) || defined(PLATFORM_ARM))) || \ - (defined(COMPILER_MICROSOFT) && defined(PLATFORM_X86)) -# define INLINE_ASM_SUPPORTED -#endif - -int cpuid_exists_by_eflags(void); -void exec_cpuid(uint32_t *regs); -void busy_sse_loop(int cycles); - -#endif /* __ASM_BITS_H__ */ diff --git a/src/3rdparty/libcpuid/cpuid_main.c b/src/3rdparty/libcpuid/cpuid_main.c deleted file mode 100644 index 61cb638d..00000000 --- a/src/3rdparty/libcpuid/cpuid_main.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "libcpuid.h" -#include "libcpuid_internal.h" -#include "recog_intel.h" -#include "recog_amd.h" -#include "asm-bits.h" -#include "libcpuid_util.h" -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#include -#include -#include - -/* Implementation: */ - -static int _libcpiud_errno = ERR_OK; - -int set_error(cpu_error_t err) -{ - _libcpiud_errno = (int) err; - return (int) err; -} - -static void cpu_id_t_constructor(struct cpu_id_t* id) -{ - memset(id, 0, sizeof(struct cpu_id_t)); - id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = id->l4_cache = -1; - id->l1_assoc = id->l2_assoc = id->l3_assoc = id->l4_assoc = -1; - id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = id->l4_cacheline = -1; - id->sse_size = -1; -} - -/* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */ -#ifdef __APPLE__ -#include -#include -#include -#include -static int get_total_cpus(void) -{ - kern_return_t kr; - host_basic_info_data_t basic_info; - host_info_t info = (host_info_t)&basic_info; - host_flavor_t flavor = HOST_BASIC_INFO; - mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; - kr = host_info(mach_host_self(), flavor, info, &count); - if (kr != KERN_SUCCESS) return 1; - return basic_info.avail_cpus; -} -#define GET_TOTAL_CPUS_DEFINED -#endif - -#ifdef _WIN32 -#include -static int get_total_cpus(void) -{ - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - return system_info.dwNumberOfProcessors; -} -#define GET_TOTAL_CPUS_DEFINED -#endif - -#if defined linux || defined __linux__ || defined __sun -#include -#include - -static int get_total_cpus(void) -{ - return sysconf(_SC_NPROCESSORS_ONLN); -} -#define GET_TOTAL_CPUS_DEFINED -#endif - -#if defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __bsdi__ || defined __QNX__ -#include -#include - -static int get_total_cpus(void) -{ - int mib[2] = { CTL_HW, HW_NCPU }; - int ncpus; - size_t len = sizeof(ncpus); - if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) != 0) return 1; - return ncpus; -} -#define GET_TOTAL_CPUS_DEFINED -#endif - -#ifndef GET_TOTAL_CPUS_DEFINED -static int get_total_cpus(void) -{ - static int warning_printed = 0; - if (!warning_printed) { - warning_printed = 1; - warnf("Your system is not supported by libcpuid -- don't know how to detect the\n"); - warnf("total number of CPUs on your system. It will be reported as 1.\n"); - printf("Please use cpu_id_t.logical_cpus field instead.\n"); - } - return 1; -} -#endif /* GET_TOTAL_CPUS_DEFINED */ - - -static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - const struct feature_map_t matchtable_edx1[] = { - { 0, CPU_FEATURE_FPU }, - { 1, CPU_FEATURE_VME }, - { 2, CPU_FEATURE_DE }, - { 3, CPU_FEATURE_PSE }, - { 4, CPU_FEATURE_TSC }, - { 5, CPU_FEATURE_MSR }, - { 6, CPU_FEATURE_PAE }, - { 7, CPU_FEATURE_MCE }, - { 8, CPU_FEATURE_CX8 }, - { 9, CPU_FEATURE_APIC }, - { 11, CPU_FEATURE_SEP }, - { 12, CPU_FEATURE_MTRR }, - { 13, CPU_FEATURE_PGE }, - { 14, CPU_FEATURE_MCA }, - { 15, CPU_FEATURE_CMOV }, - { 16, CPU_FEATURE_PAT }, - { 17, CPU_FEATURE_PSE36 }, - { 19, CPU_FEATURE_CLFLUSH }, - { 23, CPU_FEATURE_MMX }, - { 24, CPU_FEATURE_FXSR }, - { 25, CPU_FEATURE_SSE }, - { 26, CPU_FEATURE_SSE2 }, - { 28, CPU_FEATURE_HT }, - }; - const struct feature_map_t matchtable_ecx1[] = { - { 0, CPU_FEATURE_PNI }, - { 1, CPU_FEATURE_PCLMUL }, - { 3, CPU_FEATURE_MONITOR }, - { 9, CPU_FEATURE_SSSE3 }, - { 12, CPU_FEATURE_FMA3 }, - { 13, CPU_FEATURE_CX16 }, - { 19, CPU_FEATURE_SSE4_1 }, - { 20, CPU_FEATURE_SSE4_2 }, - { 22, CPU_FEATURE_MOVBE }, - { 23, CPU_FEATURE_POPCNT }, - { 25, CPU_FEATURE_AES }, - { 26, CPU_FEATURE_XSAVE }, - { 27, CPU_FEATURE_OSXSAVE }, - { 28, CPU_FEATURE_AVX }, - { 29, CPU_FEATURE_F16C }, - { 30, CPU_FEATURE_RDRAND }, - }; - const struct feature_map_t matchtable_ebx7[] = { - { 3, CPU_FEATURE_BMI1 }, - { 5, CPU_FEATURE_AVX2 }, - { 8, CPU_FEATURE_BMI2 }, - }; - const struct feature_map_t matchtable_edx81[] = { - { 11, CPU_FEATURE_SYSCALL }, - { 27, CPU_FEATURE_RDTSCP }, - { 29, CPU_FEATURE_LM }, - }; - const struct feature_map_t matchtable_ecx81[] = { - { 0, CPU_FEATURE_LAHF_LM }, - }; - const struct feature_map_t matchtable_edx87[] = { - { 8, CPU_FEATURE_CONSTANT_TSC }, - }; - if (raw->basic_cpuid[0][0] >= 1) { - match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); - match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); - } - if (raw->basic_cpuid[0][0] >= 7) { - match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); - } - if (raw->ext_cpuid[0][0] >= 0x80000001) { - match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); - match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); - } - if (raw->ext_cpuid[0][0] >= 0x80000007) { - match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data); - } - if (data->flags[CPU_FEATURE_SSE]) { - /* apply guesswork to check if the SSE unit width is 128 bit */ - switch (data->vendor) { - case VENDOR_AMD: - data->sse_size = (data->ext_family >= 16 && data->ext_family != 17) ? 128 : 64; - break; - case VENDOR_INTEL: - data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64; - break; - default: - break; - } - /* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines - * will set it accordingly if they detect the needed bit */ - } -} - -static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str) -{ - int i; - cpu_vendor_t vendor = VENDOR_UNKNOWN; - const struct { cpu_vendor_t vendor; char match[16]; } - matchtable[NUM_CPU_VENDORS] = { - /* source: http://www.sandpile.org/ia32/cpuid.htm */ - { VENDOR_INTEL , "GenuineIntel" }, - { VENDOR_AMD , "AuthenticAMD" }, - { VENDOR_CYRIX , "CyrixInstead" }, - { VENDOR_NEXGEN , "NexGenDriven" }, - { VENDOR_TRANSMETA , "GenuineTMx86" }, - { VENDOR_UMC , "UMC UMC UMC " }, - { VENDOR_CENTAUR , "CentaurHauls" }, - { VENDOR_RISE , "RiseRiseRise" }, - { VENDOR_SIS , "SiS SiS SiS " }, - { VENDOR_NSC , "Geode by NSC" }, - }; - - memcpy(vendor_str + 0, &raw_vendor[1], 4); - memcpy(vendor_str + 4, &raw_vendor[3], 4); - memcpy(vendor_str + 8, &raw_vendor[2], 4); - vendor_str[12] = 0; - - /* Determine vendor: */ - for (i = 0; i < NUM_CPU_VENDORS; i++) - if (!strcmp(vendor_str, matchtable[i].match)) { - vendor = matchtable[i].vendor; - break; - } - return vendor; -} - -static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - int i, j, basic, xmodel, xfamily, ext; - char brandstr[64] = {0}; - data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str); - - if (data->vendor == VENDOR_UNKNOWN) - return set_error(ERR_CPU_UNKN); - basic = raw->basic_cpuid[0][0]; - if (basic >= 1) { - data->family = (raw->basic_cpuid[1][0] >> 8) & 0xf; - data->model = (raw->basic_cpuid[1][0] >> 4) & 0xf; - data->stepping = raw->basic_cpuid[1][0] & 0xf; - xmodel = (raw->basic_cpuid[1][0] >> 16) & 0xf; - xfamily = (raw->basic_cpuid[1][0] >> 20) & 0xff; - if (data->vendor == VENDOR_AMD && data->family < 0xf) - data->ext_family = data->family; - else - data->ext_family = data->family + xfamily; - data->ext_model = data->model + (xmodel << 4); - } - ext = raw->ext_cpuid[0][0] - 0x8000000; - - /* obtain the brand string, if present: */ - if (ext >= 4) { - for (i = 0; i < 3; i++) - for (j = 0; j < 4; j++) - memcpy(brandstr + i * 16 + j * 4, - &raw->ext_cpuid[2 + i][j], 4); - brandstr[48] = 0; - i = 0; - while (brandstr[i] == ' ') i++; - strncpy(data->brand_str, brandstr + i, sizeof(data->brand_str)); - data->brand_str[48] = 0; - } - load_features_common(raw, data); - data->total_logical_cpus = get_total_cpus(); - return set_error(ERR_OK); -} - -/* Interface: */ - -int cpuid_get_total_cpus(void) -{ - return get_total_cpus(); -} - -int cpuid_present(void) -{ - return cpuid_exists_by_eflags(); -} - -void cpu_exec_cpuid(uint32_t eax, uint32_t* regs) -{ - regs[0] = eax; - regs[1] = regs[2] = regs[3] = 0; - exec_cpuid(regs); -} - -void cpu_exec_cpuid_ext(uint32_t* regs) -{ - exec_cpuid(regs); -} - -int cpuid_get_raw_data(struct cpu_raw_data_t* data) -{ - unsigned i; - if (!cpuid_present()) - return set_error(ERR_NO_CPUID); - for (i = 0; i < 32; i++) - cpu_exec_cpuid(i, data->basic_cpuid[i]); - for (i = 0; i < 32; i++) - cpu_exec_cpuid(0x80000000 + i, data->ext_cpuid[i]); - for (i = 0; i < MAX_INTELFN4_LEVEL; i++) { - memset(data->intel_fn4[i], 0, sizeof(data->intel_fn4[i])); - data->intel_fn4[i][0] = 4; - data->intel_fn4[i][2] = i; - cpu_exec_cpuid_ext(data->intel_fn4[i]); - } - for (i = 0; i < MAX_INTELFN11_LEVEL; i++) { - memset(data->intel_fn11[i], 0, sizeof(data->intel_fn11[i])); - data->intel_fn11[i][0] = 11; - data->intel_fn11[i][2] = i; - cpu_exec_cpuid_ext(data->intel_fn11[i]); - } - for (i = 0; i < MAX_INTELFN12H_LEVEL; i++) { - memset(data->intel_fn12h[i], 0, sizeof(data->intel_fn12h[i])); - data->intel_fn12h[i][0] = 0x12; - data->intel_fn12h[i][2] = i; - cpu_exec_cpuid_ext(data->intel_fn12h[i]); - } - for (i = 0; i < MAX_INTELFN14H_LEVEL; i++) { - memset(data->intel_fn14h[i], 0, sizeof(data->intel_fn14h[i])); - data->intel_fn14h[i][0] = 0x14; - data->intel_fn14h[i][2] = i; - cpu_exec_cpuid_ext(data->intel_fn14h[i]); - } - return set_error(ERR_OK); -} - -int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) -{ - int r; - struct cpu_raw_data_t myraw; - if (!raw) { - if ((r = cpuid_get_raw_data(&myraw)) < 0) - return set_error(r); - raw = &myraw; - } - cpu_id_t_constructor(data); - if ((r = cpuid_basic_identify(raw, data)) < 0) - return set_error(r); - switch (data->vendor) { - case VENDOR_INTEL: - r = cpuid_identify_intel(raw, data, internal); - break; - case VENDOR_AMD: - r = cpuid_identify_amd(raw, data, internal); - break; - default: - break; - } - return set_error(r); -} - -int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - struct internal_id_info_t throwaway; - return cpu_ident_internal(raw, data, &throwaway); -} - -const char* cpuid_lib_version(void) -{ - return VERSION; -} diff --git a/src/3rdparty/libcpuid/intel_code_t.h b/src/3rdparty/libcpuid/intel_code_t.h deleted file mode 100644 index 718ca337..00000000 --- a/src/3rdparty/libcpuid/intel_code_t.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2016 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * This file contains a list of internal codes we use in detection. It is - * of no external use and isn't a complete list of intel products. - */ - CODE2(PENTIUM, 2000), - - CODE(IRWIN), - CODE(POTOMAC), - CODE(GAINESTOWN), - CODE(WESTMERE), - - CODE(PENTIUM_M), - CODE(NOT_CELERON), - - CODE(CORE_SOLO), - CODE(MOBILE_CORE_SOLO), - CODE(CORE_DUO), - CODE(MOBILE_CORE_DUO), - - CODE(WOLFDALE), - CODE(MEROM), - CODE(PENRYN), - CODE(QUAD_CORE), - CODE(DUAL_CORE_HT), - CODE(QUAD_CORE_HT), - CODE(MORE_THAN_QUADCORE), - CODE(PENTIUM_D), - - CODE(SILVERTHORNE), - CODE(DIAMONDVILLE), - CODE(PINEVIEW), - CODE(CEDARVIEW), diff --git a/src/3rdparty/libcpuid/libcpuid.h b/src/3rdparty/libcpuid/libcpuid.h deleted file mode 100644 index 847e5a4a..00000000 --- a/src/3rdparty/libcpuid/libcpuid.h +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __LIBCPUID_H__ -#define __LIBCPUID_H__ -/** - * \file libcpuid.h - * \author Veselin Georgiev - * \date Oct 2008 - * \version 0.4.0 - * - * Version history: - * - * * 0.1.0 (2008-10-15): initial adaptation from wxfractgui sources - * * 0.1.1 (2009-07-06): Added intel_fn11 fields to cpu_raw_data_t to handle - * new processor topology enumeration required on Core i7 - * * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting - * kernel driver on Win32. - * * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock - * measurements with cpu_clock_by_ic() - * * 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size - * checking. A backwards-incompatible change, since the - * sizeof cpu_id_t is now different. - * * 0.2.1 (2012-05-26): Support for Ivy Bridge, and detecting the presence of - * the RdRand instruction. - * * 0.2.2 (2015-11-04): Support for newer processors up to Haswell and Vishera. - * Fix clock detection in cpu_clock_by_ic() for Bulldozer. - * More entries supported in cpu_msrinfo(). - * *BSD and Solaris support (unofficial). - * * 0.3.0 (2016-07-09): Support for Skylake; MSR ops in FreeBSD; INFO_VOLTAGE - * for AMD CPUs. Level 4 cache support for Crystalwell - * (a backwards-incompatible change since the sizeof - * cpu_raw_data_t is now different). - * * 0.4.0 (2016-09-30): Better detection of AMD clock multiplier with msrinfo. - * Support for Intel SGX detection - * (a backwards-incompatible change since the sizeof - * cpu_raw_data_t and cpu_id_t is now different). - */ - -/** @mainpage A simple libcpuid introduction - * - * LibCPUID provides CPU identification and access to the CPUID and RDTSC - * instructions on the x86. - *

- * To execute CPUID, use \ref cpu_exec_cpuid
- * To execute RDTSC, use \ref cpu_rdtsc
- * To fetch the CPUID info needed for CPU identification, use - * \ref cpuid_get_raw_data
- * To make sense of that data (decode, extract features), use \ref cpu_identify
- * To detect the CPU speed, use either \ref cpu_clock, \ref cpu_clock_by_os, - * \ref cpu_tsc_mark + \ref cpu_tsc_unmark + \ref cpu_clock_by_mark, - * \ref cpu_clock_measure or \ref cpu_clock_by_ic. - * Read carefully for pros/cons of each method.
- * - * To read MSRs, use \ref cpu_msr_driver_open to get a handle, and then - * \ref cpu_rdmsr for querying abilities. Some MSR decoding is available on recent - * CPUs, and can be queried through \ref cpu_msrinfo; the various types of queries - * are described in \ref cpu_msrinfo_request_t. - *

- */ - -/** @defgroup libcpuid LibCPUID - * @brief LibCPUID provides CPU identification - @{ */ - -/* Include some integer type specifications: */ -#include "libcpuid_types.h" - -/* Some limits and other constants */ -#include "libcpuid_constants.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @brief CPU vendor, as guessed from the Vendor String. - */ -typedef enum { - VENDOR_INTEL = 0, /*!< Intel CPU */ - VENDOR_AMD, /*!< AMD CPU */ - VENDOR_CYRIX, /*!< Cyrix CPU */ - VENDOR_NEXGEN, /*!< NexGen CPU */ - VENDOR_TRANSMETA, /*!< Transmeta CPU */ - VENDOR_UMC, /*!< x86 CPU by UMC */ - VENDOR_CENTAUR, /*!< x86 CPU by IDT */ - VENDOR_RISE, /*!< x86 CPU by Rise Technology */ - VENDOR_SIS, /*!< x86 CPU by SiS */ - VENDOR_NSC, /*!< x86 CPU by National Semiconductor */ - - NUM_CPU_VENDORS, /*!< Valid CPU vendor ids: 0..NUM_CPU_VENDORS - 1 */ - VENDOR_UNKNOWN = -1, -} cpu_vendor_t; -#define NUM_CPU_VENDORS NUM_CPU_VENDORS - -/** - * @brief Contains just the raw CPUID data. - * - * This contains only the most basic CPU data, required to do identification - * and feature recognition. Every processor should be identifiable using this - * data only. - */ -struct cpu_raw_data_t { - /** contains results of CPUID for eax = 0, 1, ...*/ - uint32_t basic_cpuid[MAX_CPUID_LEVEL][4]; - - /** contains results of CPUID for eax = 0x80000000, 0x80000001, ...*/ - uint32_t ext_cpuid[MAX_EXT_CPUID_LEVEL][4]; - - /** when the CPU is intel and it supports deterministic cache - information: this contains the results of CPUID for eax = 4 - and ecx = 0, 1, ... */ - uint32_t intel_fn4[MAX_INTELFN4_LEVEL][4]; - - /** when the CPU is intel and it supports leaf 0Bh (Extended Topology - enumeration leaf), this stores the result of CPUID with - eax = 11 and ecx = 0, 1, 2... */ - uint32_t intel_fn11[MAX_INTELFN11_LEVEL][4]; - - /** when the CPU is intel and supports leaf 12h (SGX enumeration leaf), - * this stores the result of CPUID with eax = 0x12 and - * ecx = 0, 1, 2... */ - uint32_t intel_fn12h[MAX_INTELFN12H_LEVEL][4]; - - /** when the CPU is intel and supports leaf 14h (Intel Processor Trace - * capabilities leaf). - * this stores the result of CPUID with eax = 0x12 and - * ecx = 0, 1, 2... */ - uint32_t intel_fn14h[MAX_INTELFN14H_LEVEL][4]; -}; - -/** - * @brief This contains information about SGX features of the processor - * Example usage: - * @code - * ... - * struct cpu_raw_data_t raw; - * struct cpu_id_t id; - * - * if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0 && id.sgx.present) { - * printf("SGX is present.\n"); - * printf("SGX1 instructions: %s.\n", id.sgx.flags[INTEL_SGX1] ? "present" : "absent"); - * printf("SGX2 instructions: %s.\n", id.sgx.flags[INTEL_SGX2] ? "present" : "absent"); - * printf("Max 32-bit enclave size: 2^%d bytes.\n", id.sgx.max_enclave_32bit); - * printf("Max 64-bit enclave size: 2^%d bytes.\n", id.sgx.max_enclave_64bit); - * for (int i = 0; i < id.sgx.num_epc_sections; i++) { - * struct cpu_epc_t epc = cpuid_get_epc(i, NULL); - * printf("EPC section #%d: address = %x, size = %d bytes.\n", epc.address, epc.size); - * } - * } else { - * printf("SGX is not present.\n"); - * } - * @endcode - */ -struct cpu_sgx_t { - /** Whether SGX is present (boolean) */ - uint32_t present; - - /** Max enclave size in 32-bit mode. This is a power-of-two value: - * if it is "31", then the max enclave size is 2^31 bytes (2 GiB). - */ - uint8_t max_enclave_32bit; - - /** Max enclave size in 64-bit mode. This is a power-of-two value: - * if it is "36", then the max enclave size is 2^36 bytes (64 GiB). - */ - uint8_t max_enclave_64bit; - - /** - * contains SGX feature flags. See the \ref cpu_sgx_feature_t - * "INTEL_SGX*" macros below. - */ - uint8_t flags[SGX_FLAGS_MAX]; - - /** number of Enclave Page Cache (EPC) sections. Info for each - * section is available through the \ref cpuid_get_epc() function - */ - int num_epc_sections; - - /** bit vector of the supported extended features that can be written - * to the MISC region of the SSA (Save State Area) - */ - uint32_t misc_select; - - /** a bit vector of the attributes that can be set to SECS.ATTRIBUTES - * via ECREATE. Corresponds to bits 0-63 (incl.) of SECS.ATTRIBUTES. - */ - uint64_t secs_attributes; - - /** a bit vector of the bits that can be set in the XSAVE feature - * request mask; Corresponds to bits 64-127 of SECS.ATTRIBUTES. - */ - uint64_t secs_xfrm; -}; - -/** - * @brief This contains the recognized CPU features/info - */ -struct cpu_id_t { - /** contains the CPU vendor string, e.g. "GenuineIntel" */ - char vendor_str[VENDOR_STR_MAX]; - - /** contains the brand string, e.g. "Intel(R) Xeon(TM) CPU 2.40GHz" */ - char brand_str[BRAND_STR_MAX]; - - /** contains the recognized CPU vendor */ - cpu_vendor_t vendor; - - /** - * contain CPU flags. Used to test for features. See - * the \ref cpu_feature_t "CPU_FEATURE_*" macros below. - * @see Features - */ - uint8_t flags[CPU_FLAGS_MAX]; - - /** CPU family */ - int32_t family; - - /** CPU model */ - int32_t model; - - /** CPU stepping */ - int32_t stepping; - - /** CPU extended family */ - int32_t ext_family; - - /** CPU extended model */ - int32_t ext_model; - - /** Number of CPU cores on the current processor */ - int32_t num_cores; - - /** - * Number of logical processors on the current processor. - * Could be more than the number of physical cores, - * e.g. when the processor has HyperThreading. - */ - int32_t num_logical_cpus; - - /** - * The total number of logical processors. - * The same value is availabe through \ref cpuid_get_total_cpus. - * - * This is num_logical_cpus * {total physical processors in the system} - * (but only on a real system, under a VM this number may be lower). - * - * If you're writing a multithreaded program and you want to run it on - * all CPUs, this is the number of threads you need. - * - * @note in a VM, this will exactly match the number of CPUs set in - * the VM's configuration. - * - */ - int32_t total_logical_cpus; - - /** - * L1 data cache size in KB. Could be zero, if the CPU lacks cache. - * If the size cannot be determined, it will be -1. - */ - int32_t l1_data_cache; - - /** - * L1 instruction cache size in KB. Could be zero, if the CPU lacks - * cache. If the size cannot be determined, it will be -1. - * @note On some Intel CPUs, whose instruction cache is in fact - * a trace cache, the size will be expressed in K uOps. - */ - int32_t l1_instruction_cache; - - /** - * L2 cache size in KB. Could be zero, if the CPU lacks L2 cache. - * If the size of the cache could not be determined, it will be -1 - */ - int32_t l2_cache; - - /** L3 cache size in KB. Zero on most systems */ - int32_t l3_cache; - - /** L4 cache size in KB. Zero on most systems */ - int32_t l4_cache; - - /** Cache associativity for the L1 data cache. -1 if undetermined */ - int32_t l1_assoc; - - /** Cache associativity for the L2 cache. -1 if undetermined */ - int32_t l2_assoc; - - /** Cache associativity for the L3 cache. -1 if undetermined */ - int32_t l3_assoc; - - /** Cache associativity for the L4 cache. -1 if undetermined */ - int32_t l4_assoc; - - /** Cache-line size for L1 data cache. -1 if undetermined */ - int32_t l1_cacheline; - - /** Cache-line size for L2 cache. -1 if undetermined */ - int32_t l2_cacheline; - - /** Cache-line size for L3 cache. -1 if undetermined */ - int32_t l3_cacheline; - - /** Cache-line size for L4 cache. -1 if undetermined */ - int32_t l4_cacheline; - - /** - * The brief and human-friendly CPU codename, which was recognized.
- * Examples: - * @code - * +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+ - * | Vendor | Family | Model | Step. | Cache | Brand String | cpu_id_t.cpu_codename | - * +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+ - * | AMD | 6 | 8 | 0 | 256 | (not available - will be ignored) | "K6-2" | - * | Intel | 15 | 2 | 5 | 512 | "Intel(R) Xeon(TM) CPU 2.40GHz" | "Xeon (Prestonia)" | - * | Intel | 6 | 15 | 11 | 4096 | "Intel(R) Core(TM)2 Duo CPU E6550..." | "Conroe (Core 2 Duo)" | - * | AMD | 15 | 35 | 2 | 1024 | "Dual Core AMD Opteron(tm) Proces..." | "Opteron (Dual Core)" | - * +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+ - * @endcode - */ - char cpu_codename[64]; - - /** SSE execution unit size (64 or 128; -1 if N/A) */ - int32_t sse_size; - - /** - * contain miscellaneous detection information. Used to test about specifics of - * certain detected features. See \ref cpu_hint_t "CPU_HINT_*" macros below. - * @see Hints - */ - uint8_t detection_hints[CPU_HINTS_MAX]; - - /** contains information about SGX features if the processor, if present */ - struct cpu_sgx_t sgx; -}; - -/** - * @brief CPU feature identifiers - * - * Usage: - * @code - * ... - * struct cpu_raw_data_t raw; - * struct cpu_id_t id; - * if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0) { - * if (id.flags[CPU_FEATURE_SSE2]) { - * // The CPU has SSE2... - * ... - * } else { - * // no SSE2 - * } - * } else { - * // processor cannot be determined. - * } - * @endcode - */ -typedef enum { - CPU_FEATURE_FPU = 0, /*!< Floating point unit */ - CPU_FEATURE_VME, /*!< Virtual mode extension */ - CPU_FEATURE_DE, /*!< Debugging extension */ - CPU_FEATURE_PSE, /*!< Page size extension */ - CPU_FEATURE_TSC, /*!< Time-stamp counter */ - CPU_FEATURE_MSR, /*!< Model-specific regsisters, RDMSR/WRMSR supported */ - CPU_FEATURE_PAE, /*!< Physical address extension */ - CPU_FEATURE_MCE, /*!< Machine check exception */ - CPU_FEATURE_CX8, /*!< CMPXCHG8B instruction supported */ - CPU_FEATURE_APIC, /*!< APIC support */ - CPU_FEATURE_MTRR, /*!< Memory type range registers */ - CPU_FEATURE_SEP, /*!< SYSENTER / SYSEXIT instructions supported */ - CPU_FEATURE_PGE, /*!< Page global enable */ - CPU_FEATURE_MCA, /*!< Machine check architecture */ - CPU_FEATURE_CMOV, /*!< CMOVxx instructions supported */ - CPU_FEATURE_PAT, /*!< Page attribute table */ - CPU_FEATURE_PSE36, /*!< 36-bit page address extension */ - CPU_FEATURE_PN, /*!< Processor serial # implemented (Intel P3 only) */ - CPU_FEATURE_CLFLUSH, /*!< CLFLUSH instruction supported */ - CPU_FEATURE_DTS, /*!< Debug store supported */ - CPU_FEATURE_ACPI, /*!< ACPI support (power states) */ - CPU_FEATURE_MMX, /*!< MMX instruction set supported */ - CPU_FEATURE_FXSR, /*!< FXSAVE / FXRSTOR supported */ - CPU_FEATURE_SSE, /*!< Streaming-SIMD Extensions (SSE) supported */ - CPU_FEATURE_SSE2, /*!< SSE2 instructions supported */ - CPU_FEATURE_SS, /*!< Self-snoop */ - CPU_FEATURE_HT, /*!< Hyper-threading supported (but might be disabled) */ - CPU_FEATURE_TM, /*!< Thermal monitor */ - CPU_FEATURE_IA64, /*!< IA64 supported (Itanium only) */ - CPU_FEATURE_PBE, /*!< Pending-break enable */ - CPU_FEATURE_PNI, /*!< PNI (SSE3) instructions supported */ - CPU_FEATURE_PCLMUL, /*!< PCLMULQDQ instruction supported */ - CPU_FEATURE_DTS64, /*!< 64-bit Debug store supported */ - CPU_FEATURE_MONITOR, /*!< MONITOR / MWAIT supported */ - CPU_FEATURE_DS_CPL, /*!< CPL Qualified Debug Store */ - CPU_FEATURE_VMX, /*!< Virtualization technology supported */ - CPU_FEATURE_SMX, /*!< Safer mode exceptions */ - CPU_FEATURE_EST, /*!< Enhanced SpeedStep */ - CPU_FEATURE_TM2, /*!< Thermal monitor 2 */ - CPU_FEATURE_SSSE3, /*!< SSSE3 instructionss supported (this is different from SSE3!) */ - CPU_FEATURE_CID, /*!< Context ID supported */ - CPU_FEATURE_CX16, /*!< CMPXCHG16B instruction supported */ - CPU_FEATURE_XTPR, /*!< Send Task Priority Messages disable */ - CPU_FEATURE_PDCM, /*!< Performance capabilities MSR supported */ - CPU_FEATURE_DCA, /*!< Direct cache access supported */ - CPU_FEATURE_SSE4_1, /*!< SSE 4.1 instructions supported */ - CPU_FEATURE_SSE4_2, /*!< SSE 4.2 instructions supported */ - CPU_FEATURE_SYSCALL, /*!< SYSCALL / SYSRET instructions supported */ - CPU_FEATURE_XD, /*!< Execute disable bit supported */ - CPU_FEATURE_MOVBE, /*!< MOVBE instruction supported */ - CPU_FEATURE_POPCNT, /*!< POPCNT instruction supported */ - CPU_FEATURE_AES, /*!< AES* instructions supported */ - CPU_FEATURE_XSAVE, /*!< XSAVE/XRSTOR/etc instructions supported */ - CPU_FEATURE_OSXSAVE, /*!< non-privileged copy of OSXSAVE supported */ - CPU_FEATURE_AVX, /*!< Advanced vector extensions supported */ - CPU_FEATURE_MMXEXT, /*!< AMD MMX-extended instructions supported */ - CPU_FEATURE_3DNOW, /*!< AMD 3DNow! instructions supported */ - CPU_FEATURE_3DNOWEXT, /*!< AMD 3DNow! extended instructions supported */ - CPU_FEATURE_NX, /*!< No-execute bit supported */ - CPU_FEATURE_FXSR_OPT, /*!< FFXSR: FXSAVE and FXRSTOR optimizations */ - CPU_FEATURE_RDTSCP, /*!< RDTSCP instruction supported (AMD-only) */ - CPU_FEATURE_LM, /*!< Long mode (x86_64/EM64T) supported */ - CPU_FEATURE_LAHF_LM, /*!< LAHF/SAHF supported in 64-bit mode */ - CPU_FEATURE_CMP_LEGACY, /*!< core multi-processing legacy mode */ - CPU_FEATURE_SVM, /*!< AMD Secure virtual machine */ - CPU_FEATURE_ABM, /*!< LZCNT instruction support */ - CPU_FEATURE_MISALIGNSSE,/*!< Misaligned SSE supported */ - CPU_FEATURE_SSE4A, /*!< SSE 4a from AMD */ - CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */ - CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */ - CPU_FEATURE_IBS, /*!< Instruction-based sampling */ - CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported (deprecated, will never be 1) */ - CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */ - CPU_FEATURE_WDT, /*!< Watchdog timer support */ - CPU_FEATURE_TS, /*!< Temperature sensor */ - CPU_FEATURE_FID, /*!< Frequency ID control */ - CPU_FEATURE_VID, /*!< Voltage ID control */ - CPU_FEATURE_TTP, /*!< THERMTRIP */ - CPU_FEATURE_TM_AMD, /*!< AMD-specified hardware thermal control */ - CPU_FEATURE_STC, /*!< Software thermal control */ - CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */ - CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */ - CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */ - CPU_FEATURE_XOP, /*!< The XOP instruction set (same as the old CPU_FEATURE_SSE5) */ - CPU_FEATURE_FMA3, /*!< The FMA3 instruction set */ - CPU_FEATURE_FMA4, /*!< The FMA4 instruction set */ - CPU_FEATURE_TBM, /*!< Trailing bit manipulation instruction support */ - CPU_FEATURE_F16C, /*!< 16-bit FP convert instruction support */ - CPU_FEATURE_RDRAND, /*!< RdRand instruction */ - CPU_FEATURE_X2APIC, /*!< x2APIC, APIC_BASE.EXTD, MSRs 0000_0800h...0000_0BFFh 64-bit ICR (+030h but not +031h), no DFR (+00Eh), SELF_IPI (+040h) also see standard level 0000_000Bh */ - CPU_FEATURE_CPB, /*!< Core performance boost */ - CPU_FEATURE_APERFMPERF, /*!< MPERF/APERF MSRs support */ - CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */ - CPU_FEATURE_PA, /*!< Processor accumulator */ - CPU_FEATURE_AVX2, /*!< AVX2 instructions */ - CPU_FEATURE_BMI1, /*!< BMI1 instructions */ - CPU_FEATURE_BMI2, /*!< BMI2 instructions */ - CPU_FEATURE_HLE, /*!< Hardware Lock Elision prefixes */ - CPU_FEATURE_RTM, /*!< Restricted Transactional Memory instructions */ - CPU_FEATURE_AVX512F, /*!< AVX-512 Foundation */ - CPU_FEATURE_AVX512DQ, /*!< AVX-512 Double/Quad granular insns */ - CPU_FEATURE_AVX512PF, /*!< AVX-512 Prefetch */ - CPU_FEATURE_AVX512ER, /*!< AVX-512 Exponential/Reciprocal */ - CPU_FEATURE_AVX512CD, /*!< AVX-512 Conflict detection */ - CPU_FEATURE_SHA_NI, /*!< SHA-1/SHA-256 instructions */ - CPU_FEATURE_AVX512BW, /*!< AVX-512 Byte/Word granular insns */ - CPU_FEATURE_AVX512VL, /*!< AVX-512 128/256 vector length extensions */ - CPU_FEATURE_SGX, /*!< SGX extensions. Non-autoritative, check cpu_id_t::sgx::present to verify presence */ - CPU_FEATURE_RDSEED, /*!< RDSEED instruction */ - CPU_FEATURE_ADX, /*!< ADX extensions (arbitrary precision) */ - /* termination: */ - NUM_CPU_FEATURES, -} cpu_feature_t; - -/** - * @brief CPU detection hints identifiers - * - * Usage: similar to the flags usage - */ -typedef enum { - CPU_HINT_SSE_SIZE_AUTH = 0, /*!< SSE unit size is authoritative (not only a Family/Model guesswork, but based on an actual CPUID bit) */ - /* termination */ - NUM_CPU_HINTS, -} cpu_hint_t; - -/** - * @brief SGX features flags - * \see cpu_sgx_t - * - * Usage: - * @code - * ... - * struct cpu_raw_data_t raw; - * struct cpu_id_t id; - * if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0 && id.sgx.present) { - * if (id.sgx.flags[INTEL_SGX1]) - * // The CPU has SGX1 instructions support... - * ... - * } else { - * // no SGX - * } - * } else { - * // processor cannot be determined. - * } - * @endcode - */ - -typedef enum { - INTEL_SGX1, /*!< SGX1 instructions support */ - INTEL_SGX2, /*!< SGX2 instructions support */ - - /* termination: */ - NUM_SGX_FEATURES, -} cpu_sgx_feature_t; - -/** - * @brief Describes common library error codes - */ -typedef enum { - ERR_OK = 0, /*!< No error */ - ERR_NO_CPUID = -1, /*!< CPUID instruction is not supported */ - ERR_NO_RDTSC = -2, /*!< RDTSC instruction is not supported */ - ERR_NO_MEM = -3, /*!< Memory allocation failed */ - ERR_OPEN = -4, /*!< File open operation failed */ - ERR_BADFMT = -5, /*!< Bad file format */ - ERR_NOT_IMP = -6, /*!< Not implemented */ - ERR_CPU_UNKN = -7, /*!< Unsupported processor */ - ERR_NO_RDMSR = -8, /*!< RDMSR instruction is not supported */ - ERR_NO_DRIVER= -9, /*!< RDMSR driver error (generic) */ - ERR_NO_PERMS = -10, /*!< No permissions to install RDMSR driver */ - ERR_EXTRACT = -11, /*!< Cannot extract RDMSR driver (read only media?) */ - ERR_HANDLE = -12, /*!< Bad handle */ - ERR_INVMSR = -13, /*!< Invalid MSR */ - ERR_INVCNB = -14, /*!< Invalid core number */ - ERR_HANDLE_R = -15, /*!< Error on handle read */ - ERR_INVRANGE = -16, /*!< Invalid given range */ -} cpu_error_t; - -/** - * @brief Internal structure, used in cpu_tsc_mark, cpu_tsc_unmark and - * cpu_clock_by_mark - */ -struct cpu_mark_t { - uint64_t tsc; /*!< Time-stamp from RDTSC */ - uint64_t sys_clock; /*!< In microsecond resolution */ -}; - -/** - * @brief Returns the total number of logical CPU threads (even if CPUID is not present). - * - * Under VM, this number (and total_logical_cpus, since they are fetched with the same code) - * may be nonsensical, i.e. might not equal NumPhysicalCPUs*NumCoresPerCPU*HyperThreading. - * This is because no matter how many logical threads the host machine has, you may limit them - * in the VM to any number you like. **This** is the number returned by cpuid_get_total_cpus(). - * - * @returns Number of logical CPU threads available. Equals the \ref cpu_id_t::total_logical_cpus. - */ -int cpuid_get_total_cpus(void); - -/** - * @brief Checks if the CPUID instruction is supported - * @retval 1 if CPUID is present - * @retval 0 the CPU doesn't have CPUID. - */ -int cpuid_present(void); - -/** - * @brief Executes the CPUID instruction - * @param eax - the value of the EAX register when executing CPUID - * @param regs - the results will be stored here. regs[0] = EAX, regs[1] = EBX, ... - * @note CPUID will be executed with EAX set to the given value and EBX, ECX, - * EDX set to zero. - */ -void cpu_exec_cpuid(uint32_t eax, uint32_t* regs); - -/** - * @brief Executes the CPUID instruction with the given input registers - * @note This is just a bit more generic version of cpu_exec_cpuid - it allows - * you to control all the registers. - * @param regs - Input/output. Prior to executing CPUID, EAX, EBX, ECX and - * EDX will be set to regs[0], regs[1], regs[2] and regs[3]. - * After CPUID, this array will contain the results. - */ -void cpu_exec_cpuid_ext(uint32_t* regs); - -/** - * @brief Obtains the raw CPUID data from the current CPU - * @param data - a pointer to cpu_raw_data_t structure - * @returns zero if successful, and some negative number on error. - * The error message can be obtained by calling \ref cpuid_error. - * @see cpu_error_t - */ -int cpuid_get_raw_data(struct cpu_raw_data_t* data); - -/** - * @brief Identifies the CPU - * @param raw - Input - a pointer to the raw CPUID data, which is obtained - * either by cpuid_get_raw_data or cpuid_deserialize_raw_data. - * Can also be NULL, in which case the functions calls - * cpuid_get_raw_data itself. - * @param data - Output - the decoded CPU features/info is written here. - * @note The function will not fail, even if some of the information - * cannot be obtained. Even when the CPU is new and thus unknown to - * libcpuid, some generic info, such as "AMD K9 family CPU" will be - * written to data.cpu_codename, and most other things, such as the - * CPU flags, cache sizes, etc. should be detected correctly anyway. - * However, the function CAN fail, if the CPU is completely alien to - * libcpuid. - * @note While cpu_identify() and cpuid_get_raw_data() are fast for most - * purposes, running them several thousand times per second can hamper - * performance significantly. Specifically, avoid writing "cpu feature - * checker" wrapping function, which calls cpu_identify and returns the - * value of some flag, if that function is going to be called frequently. - * @returns zero if successful, and some negative number on error. - * The error message can be obtained by calling \ref cpuid_error. - * @see cpu_error_t - */ -int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data); - -/** - * @brief The return value of cpuid_get_epc(). - * @details - * Describes an EPC (Enclave Page Cache) layout (physical address and size). - * A CPU may have one or more EPC areas, and information about each is - * fetched via \ref cpuid_get_epc. - */ -struct cpu_epc_t { - uint64_t start_addr; - uint64_t length; -}; - -/** - * @brief Fetches information about an EPC (Enclave Page Cache) area. - * @param index - zero-based index, valid range [0..cpu_id_t.egx.num_epc_sections) - * @param raw - a pointer to fetched raw CPUID data. Needed only for testing, - * you can safely pass NULL here (if you pass a real structure, - * it will be used for fetching the leaf 12h data if index < 2; - * otherwise the real CPUID instruction will be used). - * @returns the requested data. If the CPU doesn't support SGX, or if - * index >= cpu_id_t.egx.num_epc_sections, both fields of the returned - * structure will be zeros. - */ -struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw); - -/** - * @brief Returns the libcpuid version - * - * @returns the string representation of the libcpuid version, like "0.1.1" - */ -const char* cpuid_lib_version(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -/** @} */ - -#endif /* __LIBCPUID_H__ */ diff --git a/src/3rdparty/libcpuid/libcpuid_constants.h b/src/3rdparty/libcpuid/libcpuid_constants.h deleted file mode 100644 index 3ddb6d5e..00000000 --- a/src/3rdparty/libcpuid/libcpuid_constants.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/** - * @File libcpuid_constants.h - * @Author Veselin Georgiev - * @Brief Some limits and constants for libcpuid - */ - -#ifndef __LIBCPUID_CONSTANTS_H__ -#define __LIBCPUID_CONSTANTS_H__ - -#define VENDOR_STR_MAX 16 -#define BRAND_STR_MAX 64 -#define CPU_FLAGS_MAX 128 -#define MAX_CPUID_LEVEL 32 -#define MAX_EXT_CPUID_LEVEL 32 -#define MAX_INTELFN4_LEVEL 8 -#define MAX_INTELFN11_LEVEL 4 -#define MAX_INTELFN12H_LEVEL 4 -#define MAX_INTELFN14H_LEVEL 4 -#define CPU_HINTS_MAX 16 -#define SGX_FLAGS_MAX 14 - -#endif /* __LIBCPUID_CONSTANTS_H__ */ diff --git a/src/3rdparty/libcpuid/libcpuid_internal.h b/src/3rdparty/libcpuid/libcpuid_internal.h deleted file mode 100644 index 64804616..00000000 --- a/src/3rdparty/libcpuid/libcpuid_internal.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright 2016 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __LIBCPUID_INTERNAL_H__ -#define __LIBCPUID_INTERNAL_H__ -/* - * This file contains internal undocumented declarations and function prototypes - * for the workings of the internal library infrastructure. - */ - -enum _common_codes_t { - NA = 0, - NC, /* No code */ -}; - -#define CODE(x) x -#define CODE2(x, y) x = y -enum _amd_code_t { - #include "amd_code_t.h" -}; -typedef enum _amd_code_t amd_code_t; - -enum _intel_code_t { - #include "intel_code_t.h" -}; -typedef enum _intel_code_t intel_code_t; -#undef CODE -#undef CODE2 - -struct internal_id_info_t { - union { - amd_code_t amd; - intel_code_t intel; - } code; - uint64_t bits; - int score; // detection (matchtable) score -}; - -#define LBIT(x) (((long long) 1) << x) - -enum _common_bits_t { - _M_ = LBIT( 0 ), - MOBILE_ = LBIT( 1 ), - _MP_ = LBIT( 2 ), -}; - -// additional detection bits for Intel CPUs: -enum _intel_bits_t { - PENTIUM_ = LBIT( 10 ), - CELERON_ = LBIT( 11 ), - CORE_ = LBIT( 12 ), - _I_ = LBIT( 13 ), - _3 = LBIT( 14 ), - _5 = LBIT( 15 ), - _7 = LBIT( 16 ), - _9 = LBIT( 17 ), - XEON_ = LBIT( 18 ), - ATOM_ = LBIT( 19 ), -}; -typedef enum _intel_bits_t intel_bits_t; - -enum _amd_bits_t { - ATHLON_ = LBIT( 10 ), - _XP_ = LBIT( 11 ), - DURON_ = LBIT( 12 ), - SEMPRON_ = LBIT( 13 ), - OPTERON_ = LBIT( 14 ), - TURION_ = LBIT( 15 ), - _LV_ = LBIT( 16 ), - _64_ = LBIT( 17 ), - _X2 = LBIT( 18 ), - _X3 = LBIT( 19 ), - _X4 = LBIT( 20 ), - _X6 = LBIT( 21 ), - _FX = LBIT( 22 ), - _APU_ = LBIT( 23 ), -}; -typedef enum _amd_bits_t amd_bits_t; - - - -int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, - struct internal_id_info_t* internal); - -#endif /* __LIBCPUID_INTERNAL_H__ */ diff --git a/src/3rdparty/libcpuid/libcpuid_types.h b/src/3rdparty/libcpuid/libcpuid_types.h deleted file mode 100644 index 0e667aa6..00000000 --- a/src/3rdparty/libcpuid/libcpuid_types.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/** - * @File libcpuid_types.h - * @Author Veselin Georgiev - * @Brief Type specifications for libcpuid. - */ - -#ifndef __LIBCPUID_TYPES_H__ -#define __LIBCPUID_TYPES_H__ - -#if !defined(_MSC_VER) || _MSC_VER >= 1600 -# include -#else -/* we have to provide our own: */ -# if !defined(__int32_t_defined) -typedef int int32_t; -# endif - -# if !defined(__uint32_t_defined) -typedef unsigned uint32_t; -# endif - -typedef signed char int8_t; -typedef unsigned char uint8_t; -typedef signed short int16_t; -typedef unsigned short uint16_t; -#if (defined _MSC_VER) && (_MSC_VER <= 1300) - /* MSVC 6.0: no long longs ... */ - typedef signed __int64 int64_t; - typedef unsigned __int64 uint64_t; -#else - /* all other sane compilers: */ - typedef signed long long int64_t; - typedef unsigned long long uint64_t; -#endif - -#endif - -#endif /* __LIBCPUID_TYPES_H__ */ diff --git a/src/3rdparty/libcpuid/libcpuid_util.c b/src/3rdparty/libcpuid/libcpuid_util.c deleted file mode 100644 index 440b3724..00000000 --- a/src/3rdparty/libcpuid/libcpuid_util.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include "libcpuid.h" -#include "libcpuid_util.h" - -void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data) -{ - int i; - for (i = 0; i < count; i++) - if (reg & (1u << matchtable[i].bit)) - data->flags[matchtable[i].feature] = 1; -} - -static int xmatch_entry(char c, const char* p) -{ - int i, j; - if (c == 0) return -1; - if (c == p[0]) return 1; - if (p[0] == '.') return 1; - if (p[0] == '#' && isdigit(c)) return 1; - if (p[0] == '[') { - j = 1; - while (p[j] && p[j] != ']') j++; - if (!p[j]) return -1; - for (i = 1; i < j; i++) - if (p[i] == c) return j + 1; - } - return -1; -} - -int match_pattern(const char* s, const char* p) -{ - int i, j, dj, k, n, m; - n = (int) strlen(s); - m = (int) strlen(p); - for (i = 0; i < n; i++) { - if (xmatch_entry(s[i], p) != -1) { - j = 0; - k = 0; - while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) { - k++; - j += dj; - } - if (j == m) return i + 1; - } - } - return 0; -} - -struct cpu_id_t* get_cached_cpuid(void) -{ - static int initialized = 0; - static struct cpu_id_t id; - if (initialized) return &id; - if (cpu_identify(NULL, &id)) - memset(&id, 0, sizeof(id)); - initialized = 1; - return &id; -} - -int match_all(uint64_t bits, uint64_t mask) -{ - return (bits & mask) == mask; -} diff --git a/src/3rdparty/libcpuid/libcpuid_util.h b/src/3rdparty/libcpuid/libcpuid_util.h deleted file mode 100644 index 0c8200e8..00000000 --- a/src/3rdparty/libcpuid/libcpuid_util.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __LIBCPUID_UTIL_H__ -#define __LIBCPUID_UTIL_H__ - -#define COUNT_OF(array) (sizeof(array) / sizeof(array[0])) - -struct feature_map_t { - unsigned bit; - cpu_feature_t feature; -}; - -void match_features(const struct feature_map_t* matchtable, int count, - uint32_t reg, struct cpu_id_t* data); - -struct match_entry_t { - int family, model, stepping, ext_family, ext_model; - int ncores, l2cache, l3cache, brand_code; - uint64_t model_bits; - int model_code; - char name[32]; -}; - -// returns the match score: -int match_cpu_codename(const struct match_entry_t* matchtable, int count, - struct cpu_id_t* data, int brand_code, uint64_t bits, - int model_code); -/* - * Seek for a pattern in `haystack'. - * Pattern may be an fixed string, or contain the special metacharacters - * '.' - match any single character - * '#' - match any digit - * '[] - match any of the given chars (regex-like ranges are not - * supported) - * Return val: 0 if the pattern is not found. Nonzero if it is found (actually, - * x + 1 where x is the index where the match is found). - */ -int match_pattern(const char* haystack, const char* pattern); - -/* - * Gets an initialized cpu_id_t. It is cached, so that internal libcpuid - * machinery doesn't need to issue cpu_identify more than once. - */ -struct cpu_id_t* get_cached_cpuid(void); - - -/* returns true if all bits of mask are present in `bits'. */ -int match_all(uint64_t bits, uint64_t mask); - -/* - * Sets the current errno - */ -int set_error(cpu_error_t err); - -#endif /* __LIBCPUID_UTIL_H__ */ diff --git a/src/3rdparty/libcpuid/masm-x64.asm b/src/3rdparty/libcpuid/masm-x64.asm deleted file mode 100644 index 51e98651..00000000 --- a/src/3rdparty/libcpuid/masm-x64.asm +++ /dev/null @@ -1,359 +0,0 @@ - -.code -; procedure exec_cpuid -; Signature: void exec_cpiud(uint32_t *regs) -exec_cpuid Proc - push rbx - push rcx - push rdx - push rdi - - mov rdi, rcx - - mov eax, [rdi] - mov ebx, [rdi+4] - mov ecx, [rdi+8] - mov edx, [rdi+12] - - cpuid - - mov [rdi], eax - mov [rdi+4], ebx - mov [rdi+8], ecx - mov [rdi+12], edx - pop rdi - pop rdx - pop rcx - pop rbx - ret -exec_cpuid endp - -; procedure cpu_rdtsc -; Signature: void cpu_rdtsc(uint64_t *result) -cpu_rdtsc Proc - push rdx - rdtsc - mov [rcx], eax - mov [rcx+4], edx - pop rdx - ret -cpu_rdtsc endp - -; procedure busy_sse_loop -; Signature: void busy_sse_loop(int cycles) -busy_sse_loop Proc - ; save xmm6 & xmm7 into the shadow area, as Visual C++ 2008 - ; expects that we don't touch them: - movups [rsp + 8], xmm6 - movups [rsp + 24], xmm7 - - xorps xmm0, xmm0 - xorps xmm1, xmm1 - xorps xmm2, xmm2 - xorps xmm3, xmm3 - xorps xmm4, xmm4 - xorps xmm5, xmm5 - xorps xmm6, xmm6 - xorps xmm7, xmm7 - ; -- - align 16 -bsLoop: - ; 0: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 1: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 2: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 3: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 4: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 5: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 6: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 7: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 8: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 9: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 10: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 11: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 12: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 13: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 14: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 15: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 16: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 17: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 18: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 19: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 20: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 21: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 22: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 23: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 24: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 25: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 26: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 27: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 28: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 29: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 30: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; 31: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - ; ---------------------- - dec ecx - jnz bsLoop - - ; restore xmm6 & xmm7: - movups xmm6, [rsp + 8] - movups xmm7, [rsp + 24] - ret -busy_sse_loop endp - -END diff --git a/src/3rdparty/libcpuid/recog_amd.c b/src/3rdparty/libcpuid/recog_amd.c deleted file mode 100644 index 4726f633..00000000 --- a/src/3rdparty/libcpuid/recog_amd.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include "libcpuid.h" -#include "libcpuid_util.h" -#include "libcpuid_internal.h" -#include "recog_amd.h" - -const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = { - #define CODE(x) { x, #x } - #define CODE2(x, y) CODE(x) - #include "amd_code_t.h" - #undef CODE -}; - -struct amd_code_and_bits_t { - int code; - uint64_t bits; -}; - -enum _amd_model_codes_t { - // Only for Ryzen CPUs: - _1400, - _1500, - _1600, - _1900, - _2400, - _2500, - _2700, -}; - -static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - const struct feature_map_t matchtable_edx81[] = { - { 20, CPU_FEATURE_NX }, - { 22, CPU_FEATURE_MMXEXT }, - { 25, CPU_FEATURE_FXSR_OPT }, - { 30, CPU_FEATURE_3DNOWEXT }, - { 31, CPU_FEATURE_3DNOW }, - }; - const struct feature_map_t matchtable_ecx81[] = { - { 1, CPU_FEATURE_CMP_LEGACY }, - { 2, CPU_FEATURE_SVM }, - { 5, CPU_FEATURE_ABM }, - { 6, CPU_FEATURE_SSE4A }, - { 7, CPU_FEATURE_MISALIGNSSE }, - { 8, CPU_FEATURE_3DNOWPREFETCH }, - { 9, CPU_FEATURE_OSVW }, - { 10, CPU_FEATURE_IBS }, - { 11, CPU_FEATURE_XOP }, - { 12, CPU_FEATURE_SKINIT }, - { 13, CPU_FEATURE_WDT }, - { 16, CPU_FEATURE_FMA4 }, - { 21, CPU_FEATURE_TBM }, - }; - const struct feature_map_t matchtable_edx87[] = { - { 0, CPU_FEATURE_TS }, - { 1, CPU_FEATURE_FID }, - { 2, CPU_FEATURE_VID }, - { 3, CPU_FEATURE_TTP }, - { 4, CPU_FEATURE_TM_AMD }, - { 5, CPU_FEATURE_STC }, - { 6, CPU_FEATURE_100MHZSTEPS }, - { 7, CPU_FEATURE_HWPSTATE }, - /* id 8 is handled in common */ - { 9, CPU_FEATURE_CPB }, - { 10, CPU_FEATURE_APERFMPERF }, - { 11, CPU_FEATURE_PFI }, - { 12, CPU_FEATURE_PA }, - }; - if (raw->ext_cpuid[0][0] >= 0x80000001) { - match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); - match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); - } - if (raw->ext_cpuid[0][0] >= 0x80000007) - match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data); - if (raw->ext_cpuid[0][0] >= 0x8000001a) { - /* We have the extended info about SSE unit size */ - data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1; - data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64; - } -} - -static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - int l3_result; - const int assoc_table[16] = { - 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255 - }; - unsigned n = raw->ext_cpuid[0][0]; - - if (n >= 0x80000005) { - data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff; - data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff; - data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff; - data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff; - } - if (n >= 0x80000006) { - data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff; - data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf]; - data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff; - - l3_result = (raw->ext_cpuid[6][3] >> 18); - if (l3_result > 0) { - l3_result = 512 * l3_result; /* AMD spec says it's a range, - but we take the lower bound */ - data->l3_cache = l3_result; - data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf]; - data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff; - } else { - data->l3_cache = 0; - } - } -} - -static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - int logical_cpus = -1, num_cores = -1; - - if (raw->basic_cpuid[0][0] >= 1) { - logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff; - if (raw->ext_cpuid[0][0] >= 8) { - num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff); - } - } - if (data->flags[CPU_FEATURE_HT]) { - if (num_cores > 1) { - if (data->ext_family >= 23) - num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8. - data->num_cores = num_cores; - data->num_logical_cpus = logical_cpus; - } else { - data->num_cores = 1; - data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2); - } - } else { - data->num_cores = data->num_logical_cpus = 1; - } -} - -int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) -{ - load_amd_features(raw, data); - decode_amd_cache_info(raw, data); - decode_amd_number_of_cores(raw, data); - return 0; -} diff --git a/src/3rdparty/libcpuid/recog_amd.h b/src/3rdparty/libcpuid/recog_amd.h deleted file mode 100644 index 19f839ba..00000000 --- a/src/3rdparty/libcpuid/recog_amd.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __RECOG_AMD_H__ -#define __RECOG_AMD_H__ - -int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal); - -#endif /* __RECOG_AMD_H__ */ diff --git a/src/3rdparty/libcpuid/recog_intel.c b/src/3rdparty/libcpuid/recog_intel.c deleted file mode 100644 index 397d750e..00000000 --- a/src/3rdparty/libcpuid/recog_intel.c +++ /dev/null @@ -1,543 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include -#include "libcpuid.h" -#include "libcpuid_util.h" -#include "libcpuid_internal.h" -#include "recog_intel.h" - -const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = { - #define CODE(x) { x, #x } - #define CODE2(x, y) CODE(x) - #include "intel_code_t.h" - #undef CODE -}; - -typedef struct { - int code; - uint64_t bits; -} intel_code_and_bits_t; - -enum _intel_model_t { - UNKNOWN = -1, - _3000 = 100, - _3100, - _3200, - X3200, - _3300, - X3300, - _5100, - _5200, - _5300, - _5400, - _2xxx, /* Core i[357] 2xxx */ - _3xxx, /* Core i[357] 3xxx */ -}; -typedef enum _intel_model_t intel_model_t; - -static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - const struct feature_map_t matchtable_edx1[] = { - { 18, CPU_FEATURE_PN }, - { 21, CPU_FEATURE_DTS }, - { 22, CPU_FEATURE_ACPI }, - { 27, CPU_FEATURE_SS }, - { 29, CPU_FEATURE_TM }, - { 30, CPU_FEATURE_IA64 }, - { 31, CPU_FEATURE_PBE }, - }; - const struct feature_map_t matchtable_ecx1[] = { - { 2, CPU_FEATURE_DTS64 }, - { 4, CPU_FEATURE_DS_CPL }, - { 5, CPU_FEATURE_VMX }, - { 6, CPU_FEATURE_SMX }, - { 7, CPU_FEATURE_EST }, - { 8, CPU_FEATURE_TM2 }, - { 10, CPU_FEATURE_CID }, - { 14, CPU_FEATURE_XTPR }, - { 15, CPU_FEATURE_PDCM }, - { 18, CPU_FEATURE_DCA }, - { 21, CPU_FEATURE_X2APIC }, - }; - const struct feature_map_t matchtable_edx81[] = { - { 20, CPU_FEATURE_XD }, - }; - const struct feature_map_t matchtable_ebx7[] = { - { 2, CPU_FEATURE_SGX }, - { 4, CPU_FEATURE_HLE }, - { 11, CPU_FEATURE_RTM }, - { 16, CPU_FEATURE_AVX512F }, - { 17, CPU_FEATURE_AVX512DQ }, - { 18, CPU_FEATURE_RDSEED }, - { 19, CPU_FEATURE_ADX }, - { 26, CPU_FEATURE_AVX512PF }, - { 27, CPU_FEATURE_AVX512ER }, - { 28, CPU_FEATURE_AVX512CD }, - { 29, CPU_FEATURE_SHA_NI }, - { 30, CPU_FEATURE_AVX512BW }, - { 31, CPU_FEATURE_AVX512VL }, - }; - if (raw->basic_cpuid[0][0] >= 1) { - match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); - match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); - } - if (raw->ext_cpuid[0][0] >= 1) { - match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); - } - // detect TSX/AVX512: - if (raw->basic_cpuid[0][0] >= 7) { - match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); - } -} - -enum _cache_type_t { - L1I, - L1D, - L2, - L3, - L4 -}; -typedef enum _cache_type_t cache_type_t; - -static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data) -{ - if (!on) return; - switch (cache) { - case L1I: - data->l1_instruction_cache = size; - break; - case L1D: - data->l1_data_cache = size; - data->l1_assoc = assoc; - data->l1_cacheline = linesize; - break; - case L2: - data->l2_cache = size; - data->l2_assoc = assoc; - data->l2_cacheline = linesize; - break; - case L3: - data->l3_cache = size; - data->l3_assoc = assoc; - data->l3_cacheline = linesize; - break; - case L4: - data->l4_cache = size; - data->l4_assoc = assoc; - data->l4_cacheline = linesize; - break; - default: - break; - } -} - -static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - uint8_t f[256] = {0}; - int reg, off; - uint32_t x; - for (reg = 0; reg < 4; reg++) { - x = raw->basic_cpuid[2][reg]; - if (x & 0x80000000) continue; - for (off = 0; off < 4; off++) { - f[x & 0xff] = 1; - x >>= 8; - } - } - - check_case(f[0x06], L1I, 8, 4, 32, data); - check_case(f[0x08], L1I, 16, 4, 32, data); - check_case(f[0x0A], L1D, 8, 2, 32, data); - check_case(f[0x0C], L1D, 16, 4, 32, data); - check_case(f[0x22], L3, 512, 4, 64, data); - check_case(f[0x23], L3, 1024, 8, 64, data); - check_case(f[0x25], L3, 2048, 8, 64, data); - check_case(f[0x29], L3, 4096, 8, 64, data); - check_case(f[0x2C], L1D, 32, 8, 64, data); - check_case(f[0x30], L1I, 32, 8, 64, data); - check_case(f[0x39], L2, 128, 4, 64, data); - check_case(f[0x3A], L2, 192, 6, 64, data); - check_case(f[0x3B], L2, 128, 2, 64, data); - check_case(f[0x3C], L2, 256, 4, 64, data); - check_case(f[0x3D], L2, 384, 6, 64, data); - check_case(f[0x3E], L2, 512, 4, 64, data); - check_case(f[0x41], L2, 128, 4, 32, data); - check_case(f[0x42], L2, 256, 4, 32, data); - check_case(f[0x43], L2, 512, 4, 32, data); - check_case(f[0x44], L2, 1024, 4, 32, data); - check_case(f[0x45], L2, 2048, 4, 32, data); - check_case(f[0x46], L3, 4096, 4, 64, data); - check_case(f[0x47], L3, 8192, 8, 64, data); - check_case(f[0x4A], L3, 6144, 12, 64, data); - check_case(f[0x4B], L3, 8192, 16, 64, data); - check_case(f[0x4C], L3, 12288, 12, 64, data); - check_case(f[0x4D], L3, 16384, 16, 64, data); - check_case(f[0x4E], L2, 6144, 24, 64, data); - check_case(f[0x60], L1D, 16, 8, 64, data); - check_case(f[0x66], L1D, 8, 4, 64, data); - check_case(f[0x67], L1D, 16, 4, 64, data); - check_case(f[0x68], L1D, 32, 4, 64, data); - /* The following four entries are trace cache. Intel does not - * specify a cache-line size, so we use -1 instead - */ - check_case(f[0x70], L1I, 12, 8, -1, data); - check_case(f[0x71], L1I, 16, 8, -1, data); - check_case(f[0x72], L1I, 32, 8, -1, data); - check_case(f[0x73], L1I, 64, 8, -1, data); - - check_case(f[0x78], L2, 1024, 4, 64, data); - check_case(f[0x79], L2, 128, 8, 64, data); - check_case(f[0x7A], L2, 256, 8, 64, data); - check_case(f[0x7B], L2, 512, 8, 64, data); - check_case(f[0x7C], L2, 1024, 8, 64, data); - check_case(f[0x7D], L2, 2048, 8, 64, data); - check_case(f[0x7F], L2, 512, 2, 64, data); - check_case(f[0x82], L2, 256, 8, 32, data); - check_case(f[0x83], L2, 512, 8, 32, data); - check_case(f[0x84], L2, 1024, 8, 32, data); - check_case(f[0x85], L2, 2048, 8, 32, data); - check_case(f[0x86], L2, 512, 4, 64, data); - check_case(f[0x87], L2, 1024, 8, 64, data); - - if (f[0x49]) { - /* This flag is overloaded with two meanings. On Xeon MP - * (family 0xf, model 0x6) this means L3 cache. On all other - * CPUs (notably Conroe et al), this is L2 cache. In both cases - * it means 4MB, 16-way associative, 64-byte line size. - */ - if (data->family == 0xf && data->model == 0x6) { - data->l3_cache = 4096; - data->l3_assoc = 16; - data->l3_cacheline = 64; - } else { - data->l2_cache = 4096; - data->l2_assoc = 16; - data->l2_cacheline = 64; - } - } - if (f[0x40]) { - /* Again, a special flag. It means: - * 1) If no L2 is specified, then CPU is w/o L2 (0 KB) - * 2) If L2 is specified by other flags, then, CPU is w/o L3. - */ - if (data->l2_cache == -1) { - data->l2_cache = 0; - } else { - data->l3_cache = 0; - } - } -} - -static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw, - struct cpu_id_t* data) -{ - int ecx; - int ways, partitions, linesize, sets, size, level, typenumber; - cache_type_t type; - for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) { - typenumber = raw->intel_fn4[ecx][0] & 0x1f; - if (typenumber == 0) break; - level = (raw->intel_fn4[ecx][0] >> 5) & 0x7; - if (level == 1 && typenumber == 1) - type = L1D; - else if (level == 1 && typenumber == 2) - type = L1I; - else if (level == 2 && typenumber == 3) - type = L2; - else if (level == 3 && typenumber == 3) - type = L3; - else if (level == 4 && typenumber == 3) - type = L4; - else { - continue; - } - ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1; - partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1; - linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1; - sets = raw->intel_fn4[ecx][2] + 1; - size = ways * partitions * linesize * sets / 1024; - check_case(1, type, size, ways, linesize, data); - } -} - -static int decode_intel_extended_topology(struct cpu_raw_data_t* raw, - struct cpu_id_t* data) -{ - int i, level_type, num_smt = -1, num_core = -1; - for (i = 0; i < MAX_INTELFN11_LEVEL; i++) { - level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8; - switch (level_type) { - case 0x01: - num_smt = raw->intel_fn11[i][1] & 0xffff; - break; - case 0x02: - num_core = raw->intel_fn11[i][1] & 0xffff; - break; - default: - break; - } - } - if (num_smt == -1 || num_core == -1) return 0; - data->num_logical_cpus = num_core; - data->num_cores = num_core / num_smt; - // make sure num_cores is at least 1. In VMs, the CPUID instruction - // is rigged and may give nonsensical results, but we should at least - // avoid outputs like data->num_cores == 0. - if (data->num_cores <= 0) data->num_cores = 1; - return 1; -} - -static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw, - struct cpu_id_t* data) -{ - int logical_cpus = -1, num_cores = -1; - - if (raw->basic_cpuid[0][0] >= 11) { - if (decode_intel_extended_topology(raw, data)) return; - } - - if (raw->basic_cpuid[0][0] >= 1) { - logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff; - if (raw->basic_cpuid[0][0] >= 4) { - num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f); - } - } - if (data->flags[CPU_FEATURE_HT]) { - if (num_cores > 1) { - data->num_cores = num_cores; - data->num_logical_cpus = logical_cpus; - } else { - data->num_cores = 1; - data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1); - if (data->num_logical_cpus == 1) - data->flags[CPU_FEATURE_HT] = 0; - } - } else { - data->num_cores = data->num_logical_cpus = 1; - } -} - -static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data) -{ - intel_code_t code = (intel_code_t) NC; - intel_code_and_bits_t result; - uint64_t bits = 0; - int i = 0; - const char* bs = data->brand_str; - const char* s; - const struct { intel_code_t c; const char *search; } matchtable[] = { - { PENTIUM_M, "Pentium(R) M" }, - { CORE_SOLO, "Pentium(R) Dual CPU" }, - { CORE_SOLO, "Pentium(R) Dual-Core" }, - { PENTIUM_D, "Pentium(R) D" }, - { CORE_SOLO, "Genuine Intel(R) CPU" }, - { CORE_SOLO, "Intel(R) Core(TM)" }, - { DIAMONDVILLE, "CPU [N ][23]## " }, - { SILVERTHORNE, "CPU Z" }, - { PINEVIEW, "CPU [ND][45]## " }, - { CEDARVIEW, "CPU [ND]#### " }, - }; - - const struct { uint64_t bit; const char* search; } bit_matchtable[] = { - { XEON_, "Xeon" }, - { _MP_, " MP" }, - { ATOM_, "Atom(TM) CPU" }, - { MOBILE_, "Mobile" }, - { CELERON_, "Celeron" }, - { PENTIUM_, "Pentium" }, - }; - - for (i = 0; i < COUNT_OF(bit_matchtable); i++) { - if (match_pattern(bs, bit_matchtable[i].search)) - bits |= bit_matchtable[i].bit; - } - - if ((i = match_pattern(bs, "Core(TM) [im][3579]")) != 0) { - bits |= CORE_; - i--; - switch (bs[i + 9]) { - case 'i': bits |= _I_; break; - case 'm': bits |= _M_; break; - } - switch (bs[i + 10]) { - case '3': bits |= _3; break; - case '5': bits |= _5; break; - case '7': bits |= _7; break; - case '9': bits |= _9; break; - } - } - for (i = 0; i < COUNT_OF(matchtable); i++) - if (match_pattern(bs, matchtable[i].search)) { - code = matchtable[i].c; - break; - } - if (bits & XEON_) { - if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##")) - bits |= _7; - else if (match_pattern(bs, "[ELXW]55##")) - code = GAINESTOWN; - else if (match_pattern(bs, "[ELXW]56##")) - code = WESTMERE; - else if (data->l3_cache > 0 && data->family == 16) - /* restrict by family, since later Xeons also have L3 ... */ - code = IRWIN; - } - if (match_all(bits, XEON_ + _MP_) && data->l3_cache > 0) - code = POTOMAC; - if (code == CORE_SOLO) { - s = strstr(bs, "CPU"); - if (s) { - s += 3; - while (*s == ' ') s++; - if (*s == 'T') - bits |= MOBILE_; - } - } - if (code == CORE_SOLO) { - switch (data->num_cores) { - case 1: break; - case 2: - { - code = CORE_DUO; - if (data->num_logical_cpus > 2) - code = DUAL_CORE_HT; - break; - } - case 4: - { - code = QUAD_CORE; - if (data->num_logical_cpus > 4) - code = QUAD_CORE_HT; - break; - } - default: - code = MORE_THAN_QUADCORE; break; - } - } - - if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) { - if (data->ext_model < 23) { - code = MEROM; - } else { - code = PENRYN; - } - } - if (data->ext_model == 23 && - (code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) { - code = WOLFDALE; - } - - result.code = code; - result.bits = bits; - return result; -} - -static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data) -{ - struct cpu_epc_t epc; - int i; - - if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf - if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS - - // decode sub-leaf 0: - if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1; - if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1; - if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2]) - data->sgx.present = 1; - data->sgx.misc_select = raw->basic_cpuid[0x12][1]; - data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff; - data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff; - - // decode sub-leaf 1: - data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32); - data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32); - - // decode higher-order subleafs, whenever present: - data->sgx.num_epc_sections = -1; - for (i = 0; i < 1000000; i++) { - epc = cpuid_get_epc(i, raw); - if (epc.length == 0) { - data->sgx.num_epc_sections = i; - break; - } - } - if (data->sgx.num_epc_sections == -1) { - data->sgx.num_epc_sections = 1000000; - } -} - -struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw) -{ - uint32_t regs[4]; - struct cpu_epc_t retval = {0, 0}; - if (raw && index < MAX_INTELFN12H_LEVEL - 2) { - // this was queried already, use the data: - memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs)); - } else { - // query this ourselves: - regs[0] = 0x12; - regs[2] = 2 + index; - regs[1] = regs[3] = 0; - cpu_exec_cpuid_ext(regs); - } - - // decode values: - if ((regs[0] & 0xf) == 0x1) { - retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32) - retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52) - retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32) - retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52) - } - return retval; -} - -int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) -{ - intel_code_and_bits_t brand; - - load_intel_features(raw, data); - if (raw->basic_cpuid[0][0] >= 4) { - /* Deterministic way is preferred, being more generic */ - decode_intel_deterministic_cache_info(raw, data); - } else if (raw->basic_cpuid[0][0] >= 2) { - decode_intel_oldstyle_cache_info(raw, data); - } - decode_intel_number_of_cores(raw, data); - - brand = get_brand_code_and_bits(data); - - internal->code.intel = brand.code; - internal->bits = brand.bits; - - if (data->flags[CPU_FEATURE_SGX]) { - // if SGX is indicated by the CPU, verify its presence: - decode_intel_sgx_features(raw, data); - } - - return 0; -} diff --git a/src/3rdparty/libcpuid/recog_intel.h b/src/3rdparty/libcpuid/recog_intel.h deleted file mode 100644 index 96676f3b..00000000 --- a/src/3rdparty/libcpuid/recog_intel.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __RECOG_INTEL_H__ -#define __RECOG_INTEL_H__ - -int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal); - -#endif /*__RECOG_INTEL_H__*/ diff --git a/src/3rdparty/libethash/CMakeLists.txt b/src/3rdparty/libethash/CMakeLists.txt index 2fd2f437..6a545440 100644 --- a/src/3rdparty/libethash/CMakeLists.txt +++ b/src/3rdparty/libethash/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 2.8.12) project (ethash C) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os") diff --git a/src/App.cpp b/src/App.cpp index 1d1ec3e9..39ec0d62 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -43,17 +43,13 @@ xmrig::App::App(Process *process) { - m_controller = new Controller(process); + m_controller = std::make_shared(process); } xmrig::App::~App() { Cpu::release(); - - delete m_signals; - delete m_console; - delete m_controller; } @@ -65,7 +61,7 @@ int xmrig::App::exec() return 2; } - m_signals = new Signals(this); + m_signals = std::make_shared(this); int rc = 0; if (background(rc)) { @@ -78,10 +74,10 @@ int xmrig::App::exec() } if (!m_controller->isBackground()) { - m_console = new Console(this); + m_console = std::make_shared(this); } - Summary::print(m_controller); + Summary::print(m_controller.get()); if (m_controller->config()->isDryRun()) { LOG_NOTICE("%s " WHITE_BOLD("OK"), Tags::config()); @@ -115,32 +111,20 @@ void xmrig::App::onSignal(int signum) switch (signum) { case SIGHUP: - LOG_WARN("%s " YELLOW("SIGHUP received, exiting"), Tags::signal()); - break; - case SIGTERM: - LOG_WARN("%s " YELLOW("SIGTERM received, exiting"), Tags::signal()); - break; - case SIGINT: - LOG_WARN("%s " YELLOW("SIGINT received, exiting"), Tags::signal()); - break; + return close(); default: - return; + break; } - - close(); } void xmrig::App::close() { - m_signals->stop(); - - if (m_console) { - m_console->stop(); - } + m_signals.reset(); + m_console.reset(); m_controller->stop(); diff --git a/src/App.h b/src/App.h index 122e8fb6..962baead 100644 --- a/src/App.h +++ b/src/App.h @@ -32,6 +32,9 @@ #include "base/tools/Object.h" +#include + + namespace xmrig { @@ -60,9 +63,9 @@ private: bool background(int &rc); void close(); - Console *m_console = nullptr; - Controller *m_controller = nullptr; - Signals *m_signals = nullptr; + std::shared_ptr m_console; + std::shared_ptr m_controller; + std::shared_ptr m_signals; }; diff --git a/src/App_unix.cpp b/src/App_unix.cpp index ae2905db..b0b80079 100644 --- a/src/App_unix.cpp +++ b/src/App_unix.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,8 +36,6 @@ bool xmrig::App::background(int &rc) { - signal(SIGPIPE, SIG_IGN); - if (!m_controller->isBackground()) { return false; } diff --git a/src/Summary.cpp b/src/Summary.cpp index a026c938..ebaeb356 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,6 +47,13 @@ namespace xmrig { +#ifdef XMRIG_OS_WIN +static constexpr const char *kHugepagesSupported = GREEN_BOLD("permission granted"); +#else +static constexpr const char *kHugepagesSupported = GREEN_BOLD("supported"); +#endif + + #ifdef XMRIG_FEATURE_ASM static const char *coloredAsmNames[] = { RED_BOLD("none"), @@ -66,17 +73,13 @@ inline static const char *asmName(Assembly::Id assembly) static void print_memory(Config *config) { -# ifdef XMRIG_OS_WIN Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled")); -# else - Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled")); -# endif + "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? kHugepagesSupported : RED_BOLD("unavailable")) : RED_BOLD("disabled")); # ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_OS_LINUX Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable"))); + "1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? kHugepagesSupported : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable"))); # else Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable")); # endif @@ -88,33 +91,27 @@ static void print_cpu(Config *) { const auto info = Cpu::info(); - Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES", + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %s %sAES%s", "CPU", info->brand(), info->packages(), - info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-", - info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-" + ICpuInfo::is64bit() ? GREEN_BOLD("64-bit") : RED_BOLD("32-bit"), + info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-", + info->isVM() ? RED_BOLD_S " VM" : "" ); -# if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC) +# if defined(XMRIG_FEATURE_HWLOC) Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB") CYAN_BOLD(" %zu") "C" BLACK_BOLD("/") CYAN_BOLD("%zu") "T" -# ifdef XMRIG_FEATURE_HWLOC - BLACK_BOLD(" NUMA:") CYAN_BOLD("%zu") -# endif - , "", + BLACK_BOLD(" NUMA:") CYAN_BOLD("%zu"), + "", info->L2() / 1048576.0, info->L3() / 1048576.0, info->cores(), - info->threads() -# ifdef XMRIG_FEATURE_HWLOC - , info->nodes() -# endif + info->threads(), + info->nodes() ); # else - Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("threads:") CYAN_BOLD("%zu"), - "", - info->threads() - ); + Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("threads:") CYAN_BOLD("%zu"), "", info->threads()); # endif } @@ -160,11 +157,11 @@ static void print_threads(Config *config) static void print_commands(Config *) { if (Log::isColors()) { - Log::print(GREEN_BOLD(" * ") WHITE_BOLD("COMMANDS ") MAGENTA_BG(WHITE_BOLD_S "h") WHITE_BOLD("ashrate, ") - MAGENTA_BG(WHITE_BOLD_S "p") WHITE_BOLD("ause, ") - MAGENTA_BG(WHITE_BOLD_S "r") WHITE_BOLD("esume, ") + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("COMMANDS ") MAGENTA_BG_BOLD("h") WHITE_BOLD("ashrate, ") + MAGENTA_BG_BOLD("p") WHITE_BOLD("ause, ") + MAGENTA_BG_BOLD("r") WHITE_BOLD("esume, ") WHITE_BOLD("re") MAGENTA_BG(WHITE_BOLD_S "s") WHITE_BOLD("ults, ") - MAGENTA_BG(WHITE_BOLD_S "c") WHITE_BOLD("onnection") + MAGENTA_BG_BOLD("c") WHITE_BOLD("onnection") ); } else { diff --git a/src/backend/backend.cmake b/src/backend/backend.cmake index 6bf6c3b2..73dd8605 100644 --- a/src/backend/backend.cmake +++ b/src/backend/backend.cmake @@ -1,7 +1,7 @@ -include (src/backend/cpu/cpu.cmake) -include (src/backend/opencl/opencl.cmake) -include (src/backend/cuda/cuda.cmake) -include (src/backend/common/common.cmake) +include(src/backend/cpu/cpu.cmake) +include(src/backend/opencl/opencl.cmake) +include(src/backend/cuda/cuda.cmake) +include(src/backend/common/common.cmake) set(HEADERS_BACKEND diff --git a/src/backend/common/GpuWorker.cpp b/src/backend/common/GpuWorker.cpp new file mode 100644 index 00000000..5a9c3d13 --- /dev/null +++ b/src/backend/common/GpuWorker.cpp @@ -0,0 +1,52 @@ +/* XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "backend/common/GpuWorker.h" +#include "base/tools/Chrono.h" + + +xmrig::GpuWorker::GpuWorker(size_t id, int64_t affinity, int priority, uint32_t deviceIndex) : Worker(id, affinity, priority), + m_deviceIndex(deviceIndex) +{ +} + + +void xmrig::GpuWorker::storeStats() +{ + // Get index which is unused now + const uint32_t index = m_index.load(std::memory_order_relaxed) ^ 1; + + // Fill in the data for that index + m_hashCount[index] = m_count; + m_timestamp[index] = Chrono::steadyMSecs(); + + // Switch to that index + // All data will be in memory by the time it completes thanks to std::memory_order_seq_cst + m_index.fetch_xor(1, std::memory_order_seq_cst); +} + + +void xmrig::GpuWorker::hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const +{ + const uint32_t index = m_index.load(std::memory_order_relaxed); + + rawHashes = m_hashrateData.interpolate(timeStamp); + hashCount = m_hashCount[index]; + timeStamp = m_timestamp[index]; +} diff --git a/src/backend/common/GpuWorker.h b/src/backend/common/GpuWorker.h new file mode 100644 index 00000000..3c828e52 --- /dev/null +++ b/src/backend/common/GpuWorker.h @@ -0,0 +1,58 @@ +/* XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_GPUWORKER_H +#define XMRIG_GPUWORKER_H + + +#include + + +#include "backend/common/HashrateInterpolator.h" +#include "backend/common/Worker.h" + + +namespace xmrig { + + +class GpuWorker : public Worker +{ +public: + GpuWorker(size_t id, int64_t affinity, int priority, uint32_t m_deviceIndex); + +protected: + inline const VirtualMemory *memory() const override { return nullptr; } + inline uint32_t deviceIndex() const { return m_deviceIndex; } + + void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const override; + +protected: + void storeStats(); + + const uint32_t m_deviceIndex; + HashrateInterpolator m_hashrateData; + std::atomic m_index = {}; + uint64_t m_hashCount[2] = {}; + uint64_t m_timestamp[2] = {}; +}; + + +} // namespace xmrig + + +#endif /* XMRIG_GPUWORKER_H */ diff --git a/src/backend/common/Hashrate.cpp b/src/backend/common/Hashrate.cpp index aa4d80c7..4038e34b 100644 --- a/src/backend/common/Hashrate.cpp +++ b/src/backend/common/Hashrate.cpp @@ -1,12 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2017-2018 XMR-Stak , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,7 +19,6 @@ #include -#include #include #include @@ -75,72 +69,6 @@ xmrig::Hashrate::~Hashrate() } -double xmrig::Hashrate::calc(size_t ms) const -{ - const double data = calc(0, ms); - return std::isnormal(data) ? data : 0.0; -} - - -double xmrig::Hashrate::calc(size_t threadId, size_t ms) const -{ - assert(threadId < m_threads); - if (threadId >= m_threads) { - return nan(""); - } - - uint64_t earliestHashCount = 0; - uint64_t earliestStamp = 0; - bool haveFullSet = false; - - const uint64_t timeStampLimit = xmrig::Chrono::steadyMSecs() - ms; - uint64_t* timestamps = m_timestamps[threadId]; - uint64_t* counts = m_counts[threadId]; - - const size_t idx_start = (m_top[threadId] - 1) & kBucketMask; - size_t idx = idx_start; - - uint64_t lastestStamp = timestamps[idx]; - uint64_t lastestHashCnt = counts[idx]; - - do { - if (timestamps[idx] < timeStampLimit) { - haveFullSet = (timestamps[idx] != 0); - if (idx != idx_start) { - idx = (idx + 1) & kBucketMask; - earliestStamp = timestamps[idx]; - earliestHashCount = counts[idx]; - } - break; - } - idx = (idx - 1) & kBucketMask; - } while (idx != idx_start); - - if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) { - return nan(""); - } - - if (lastestStamp - earliestStamp == 0) { - return nan(""); - } - - const auto hashes = static_cast(lastestHashCnt - earliestHashCount); - const auto time = static_cast(lastestStamp - earliestStamp) / 1000.0; - - return hashes / time; -} - - -void xmrig::Hashrate::add(size_t threadId, uint64_t count, uint64_t timestamp) -{ - const size_t top = m_top[threadId]; - m_counts[threadId][top] = count; - m_timestamps[threadId][top] = timestamp; - - m_top[threadId] = (top + 1) & kBucketMask; -} - - const char *xmrig::Hashrate::format(double h, char *buf, size_t size) { return ::format(h, buf, size); @@ -174,10 +102,69 @@ rapidjson::Value xmrig::Hashrate::toJSON(size_t threadId, rapidjson::Document &d auto &allocator = doc.GetAllocator(); Value out(kArrayType); - out.PushBack(normalize(calc(threadId + 1, ShortInterval)), allocator); - out.PushBack(normalize(calc(threadId + 1, MediumInterval)), allocator); - out.PushBack(normalize(calc(threadId + 1, LargeInterval)), allocator); + out.PushBack(normalize(calc(threadId, ShortInterval)), allocator); + out.PushBack(normalize(calc(threadId, MediumInterval)), allocator); + out.PushBack(normalize(calc(threadId, LargeInterval)), allocator); return out; } #endif + + +double xmrig::Hashrate::hashrate(size_t index, size_t ms) const +{ + assert(index < m_threads); + if (index >= m_threads) { + return nan(""); + } + + uint64_t earliestHashCount = 0; + uint64_t earliestStamp = 0; + bool haveFullSet = false; + + const uint64_t timeStampLimit = xmrig::Chrono::steadyMSecs() - ms; + uint64_t* timestamps = m_timestamps[index]; + uint64_t* counts = m_counts[index]; + + const size_t idx_start = (m_top[index] - 1) & kBucketMask; + size_t idx = idx_start; + + uint64_t lastestStamp = timestamps[idx]; + uint64_t lastestHashCnt = counts[idx]; + + do { + if (timestamps[idx] < timeStampLimit) { + haveFullSet = (timestamps[idx] != 0); + if (idx != idx_start) { + idx = (idx + 1) & kBucketMask; + earliestStamp = timestamps[idx]; + earliestHashCount = counts[idx]; + } + break; + } + idx = (idx - 1) & kBucketMask; + } while (idx != idx_start); + + if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) { + return nan(""); + } + + if (lastestStamp - earliestStamp == 0) { + return nan(""); + } + + const auto hashes = static_cast(lastestHashCnt - earliestHashCount); + const auto time = static_cast(lastestStamp - earliestStamp) / 1000.0; + + return hashes / time; +} + + +void xmrig::Hashrate::addData(size_t index, uint64_t count, uint64_t timestamp) +{ + const size_t top = m_top[index]; + m_counts[index][top] = count; + m_timestamps[index][top] = timestamp; + + m_top[index] = (top + 1) & kBucketMask; +} diff --git a/src/backend/common/Hashrate.h b/src/backend/common/Hashrate.h index 59e1afe1..ad278cfa 100644 --- a/src/backend/common/Hashrate.h +++ b/src/backend/common/Hashrate.h @@ -1,12 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2017-2018 XMR-Stak , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,6 +21,7 @@ #define XMRIG_HASHRATE_H +#include #include #include @@ -42,7 +38,7 @@ class Hashrate public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(Hashrate) - enum Intervals { + enum Intervals : size_t { ShortInterval = 10000, MediumInterval = 60000, LargeInterval = 900000 @@ -50,11 +46,12 @@ public: Hashrate(size_t threads); ~Hashrate(); - double calc(size_t ms) const; - double calc(size_t threadId, size_t ms) const; - void add(size_t threadId, uint64_t count, uint64_t timestamp); - inline size_t threads() const { return m_threads; } + inline double calc(size_t ms) const { const double data = hashrate(0U, ms); return std::isnormal(data) ? data : 0.0; } + inline double calc(size_t threadId, size_t ms) const { return hashrate(threadId + 1, ms); } + inline size_t threads() const { return m_threads > 0U ? m_threads - 1U : 0U; } + inline void add(size_t threadId, uint64_t count, uint64_t timestamp) { addData(threadId + 1U, count, timestamp); } + inline void add(uint64_t count, uint64_t timestamp) { addData(0U, count, timestamp); } static const char *format(double h, char *buf, size_t size); static rapidjson::Value normalize(double d); @@ -65,6 +62,9 @@ public: # endif private: + double hashrate(size_t index, size_t ms) const; + void addData(size_t index, uint64_t count, uint64_t timestamp); + constexpr static size_t kBucketSize = 2 << 11; constexpr static size_t kBucketMask = kBucketSize - 1; diff --git a/src/backend/common/HashrateInterpolator.cpp b/src/backend/common/HashrateInterpolator.cpp index 2edda6a6..0ac11514 100644 --- a/src/backend/common/HashrateInterpolator.cpp +++ b/src/backend/common/HashrateInterpolator.cpp @@ -1,10 +1,4 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , * Copyright 2018-2020 SChernykh * Copyright 2016-2020 XMRig , * diff --git a/src/backend/common/HashrateInterpolator.h b/src/backend/common/HashrateInterpolator.h index b4c7b8c7..65e62443 100644 --- a/src/backend/common/HashrateInterpolator.h +++ b/src/backend/common/HashrateInterpolator.h @@ -1,10 +1,4 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , * Copyright 2018-2020 SChernykh * Copyright 2016-2020 XMRig , * @@ -47,7 +41,7 @@ public: private: // Buffer of hashrate counters, used for linear interpolation of past data mutable std::mutex m_lock; - std::deque> m_data; + std::deque > m_data; }; diff --git a/src/backend/common/Thread.h b/src/backend/common/Thread.h index ab88441c..fb6c618b 100644 --- a/src/backend/common/Thread.h +++ b/src/backend/common/Thread.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,12 +21,17 @@ #include "backend/common/interfaces/IWorker.h" -#include "base/tools/Object.h" #include +#ifdef XMRIG_OS_APPLE +# include +# include +#endif + + namespace xmrig { @@ -46,21 +45,48 @@ public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(Thread) inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {} + +# ifdef XMRIG_OS_APPLE + inline ~Thread() { pthread_join(m_thread, nullptr); delete m_worker; } + + inline void start(void *(*callback)(void *)) + { + if (m_config.affinity >= 0) { + pthread_create_suspended_np(&m_thread, nullptr, callback, this); + + mach_port_t mach_thread = pthread_mach_thread_np(m_thread); + thread_affinity_policy_data_t policy = { static_cast(m_config.affinity + 1) }; + + thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, reinterpret_cast(&policy), THREAD_AFFINITY_POLICY_COUNT); + thread_resume(mach_thread); + } + else { + pthread_create(&m_thread, nullptr, callback, this); + } + } +# else inline ~Thread() { m_thread.join(); delete m_worker; } + inline void start(void *(*callback)(void *)) { m_thread = std::thread(callback, this); } +# endif + inline const T &config() const { return m_config; } inline IBackend *backend() const { return m_backend; } inline IWorker *worker() const { return m_worker; } inline size_t id() const { return m_id; } inline void setWorker(IWorker *worker) { m_worker = worker; } - inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); } private: const size_t m_id = 0; const T m_config; IBackend *m_backend; IWorker *m_worker = nullptr; + + #ifdef XMRIG_OS_APPLE + pthread_t m_thread{}; +# else std::thread m_thread; +# endif }; diff --git a/src/backend/common/Worker.cpp b/src/backend/common/Worker.cpp index cf244ab3..b24ac280 100644 --- a/src/backend/common/Worker.cpp +++ b/src/backend/common/Worker.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,7 +19,6 @@ #include "backend/common/Worker.h" #include "base/kernel/Platform.h" -#include "base/tools/Chrono.h" #include "crypto/common/VirtualMemory.h" @@ -39,27 +31,3 @@ xmrig::Worker::Worker(size_t id, int64_t affinity, int priority) : Platform::trySetThreadAffinity(affinity); Platform::setThreadPriority(priority); } - - -void xmrig::Worker::storeStats() -{ - // Get index which is unused now - const uint32_t index = m_index.load(std::memory_order_relaxed) ^ 1; - - // Fill in the data for that index - m_hashCount[index] = m_count; - m_timestamp[index] = Chrono::steadyMSecs(); - - // Switch to that index - // All data will be in memory by the time it completes thanks to std::memory_order_seq_cst - m_index.fetch_xor(1, std::memory_order_seq_cst); -} - - -void xmrig::Worker::getHashrateData(uint64_t& hashCount, uint64_t& timeStamp) const -{ - const uint32_t index = m_index.load(std::memory_order_relaxed); - - hashCount = m_hashCount[index]; - timeStamp = m_timestamp[index]; -} diff --git a/src/backend/common/Worker.h b/src/backend/common/Worker.h index 23c592b9..f4c12ea5 100644 --- a/src/backend/common/Worker.h +++ b/src/backend/common/Worker.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,9 +20,6 @@ #define XMRIG_WORKER_H -#include - - #include "backend/common/interfaces/IWorker.h" @@ -41,23 +31,17 @@ class Worker : public IWorker public: Worker(size_t id, int64_t affinity, int priority); - inline const VirtualMemory *memory() const override { return nullptr; } - inline size_t id() const override { return m_id; } - inline uint64_t rawHashes() const override { return m_count; } - inline void jobEarlyNotification(const Job&) override {} - - void getHashrateData(uint64_t& hashCount, uint64_t& timeStamp) const override; - protected: - void storeStats(); + inline int64_t affinity() const { return m_affinity; } + inline size_t id() const override { return m_id; } + inline uint32_t node() const { return m_node; } + uint64_t m_count = 0; + +private: const int64_t m_affinity; const size_t m_id; - std::atomic m_index = {}; uint32_t m_node = 0; - uint64_t m_count = 0; - uint64_t m_hashCount[2] = {}; - uint64_t m_timestamp[2] = {}; }; diff --git a/src/backend/common/WorkerJob.h b/src/backend/common/WorkerJob.h index a37a05fa..cf664f87 100644 --- a/src/backend/common/WorkerJob.h +++ b/src/backend/common/WorkerJob.h @@ -110,7 +110,7 @@ private: alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{}; Job m_jobs[2]; uint32_t m_rounds[2] = { 0, 0 }; - uint64_t m_nonce_mask[2]; + uint64_t m_nonce_mask[2] = { 0, 0 }; uint64_t m_sequence = 0; uint8_t m_index = 0; }; diff --git a/src/backend/common/Workers.cpp b/src/backend/common/Workers.cpp index 16265b0a..a70affe6 100644 --- a/src/backend/common/Workers.cpp +++ b/src/backend/common/Workers.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,16 +17,13 @@ */ +#include "backend/common/Workers.h" #include "backend/common/Hashrate.h" #include "backend/common/interfaces/IBackend.h" -#include "backend/common/Workers.h" #include "backend/cpu/CpuWorker.h" #include "base/io/log/Log.h" #include "base/io/log/Tags.h" -#include "base/net/stratum/Pool.h" #include "base/tools/Chrono.h" -#include "base/tools/Object.h" -#include "core/Miner.h" #ifdef XMRIG_FEATURE_OPENCL @@ -59,7 +49,6 @@ class WorkersPrivate public: XMRIG_DISABLE_COPY_MOVE(WorkersPrivate) - WorkersPrivate() = default; ~WorkersPrivate() = default; @@ -87,20 +76,6 @@ xmrig::Workers::~Workers() } -template -static void getHashrateData(xmrig::IWorker* worker, uint64_t& hashCount, uint64_t& timeStamp) -{ - worker->getHashrateData(hashCount, timeStamp); -} - - -template<> -void getHashrateData(xmrig::IWorker* worker, uint64_t& hashCount, uint64_t&) -{ - hashCount = worker->rawHashes(); -} - - template bool xmrig::Workers::tick(uint64_t) { @@ -111,33 +86,32 @@ bool xmrig::Workers::tick(uint64_t) uint64_t ts = Chrono::steadyMSecs(); bool totalAvailable = true; uint64_t totalHashCount = 0; + uint64_t hashCount = 0; + uint64_t rawHashes = 0; for (Thread *handle : m_workers) { IWorker *worker = handle->worker(); if (worker) { - uint64_t hashCount; - getHashrateData(worker, hashCount, ts); - d_ptr->hashrate->add(handle->id() + 1, hashCount, ts); + worker->hashrateData(hashCount, ts, rawHashes); + d_ptr->hashrate->add(handle->id(), hashCount, ts); - const uint64_t n = worker->rawHashes(); - if (n == 0) { + if (rawHashes == 0) { totalAvailable = false; } - totalHashCount += n; + + totalHashCount += rawHashes; } } if (totalAvailable) { - d_ptr->hashrate->add(0, totalHashCount, Chrono::steadyMSecs()); + d_ptr->hashrate->add(totalHashCount, Chrono::steadyMSecs()); } # ifdef XMRIG_FEATURE_BENCHMARK - if (d_ptr->benchmark && d_ptr->benchmark->finish(totalHashCount)) { - return false; - } -# endif - + return !d_ptr->benchmark || !d_ptr->benchmark->finish(totalHashCount); +# else return true; +# endif } @@ -158,14 +132,19 @@ void xmrig::Workers::setBackend(IBackend *backend) template void xmrig::Workers::stop() { +# ifdef XMRIG_MINER_PROJECT Nonce::stop(T::backend()); +# endif for (Thread *worker : m_workers) { delete worker; } m_workers.clear(); + +# ifdef XMRIG_MINER_PROJECT Nonce::touch(T::backend()); +# endif d_ptr->hashrate.reset(); } @@ -195,7 +174,7 @@ xmrig::IWorker *xmrig::Workers::create(Thread *) template -void xmrig::Workers::onReady(void *arg) +void *xmrig::Workers::onReady(void *arg) { auto handle = static_cast* >(arg); @@ -208,13 +187,15 @@ void xmrig::Workers::onReady(void *arg) handle->backend()->start(worker, false); delete worker; - return; + return nullptr; } assert(handle->backend() != nullptr); handle->setWorker(worker); handle->backend()->start(worker, true); + + return nullptr; } @@ -226,17 +207,13 @@ void xmrig::Workers::start(const std::vector &data, bool sleep) } d_ptr->hashrate = std::make_shared(m_workers.size()); + +# ifdef XMRIG_MINER_PROJECT Nonce::touch(T::backend()); +# endif for (auto worker : m_workers) { worker->start(Workers::onReady); - - // This sleep is important for optimal caching! - // Threads must allocate scratchpads in order so that adjacent cores will use adjacent scratchpads - // Sub-optimal caching can result in up to 0.5% hashrate penalty - if (sleep) { - std::this_thread::sleep_for(std::chrono::milliseconds(20)); - } } } @@ -247,6 +224,7 @@ namespace xmrig { template<> xmrig::IWorker *xmrig::Workers::create(Thread *handle) { +# ifdef XMRIG_MINER_PROJECT switch (handle->config().intensity) { case 1: return new CpuWorker<1>(handle->id(), handle->config()); @@ -265,6 +243,11 @@ xmrig::IWorker *xmrig::Workers::create(Thread *han } return nullptr; +# else + assert(handle->config().intensity == 1); + + return new CpuWorker<1>(handle->id(), handle->config()); +# endif } diff --git a/src/backend/common/Workers.h b/src/backend/common/Workers.h index 5cdc78a2..0ef3b889 100644 --- a/src/backend/common/Workers.h +++ b/src/backend/common/Workers.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -44,10 +37,9 @@ namespace xmrig { +class Benchmark; class Hashrate; class WorkersPrivate; -class Job; -class Benchmark; template @@ -63,7 +55,7 @@ public: bool tick(uint64_t ticks); const Hashrate *hashrate() const; - void jobEarlyNotification(const Job&); + void jobEarlyNotification(const Job &job); void setBackend(IBackend *backend); void stop(); @@ -73,7 +65,7 @@ public: private: static IWorker *create(Thread *handle); - static void onReady(void *arg); + static void *onReady(void *arg); void start(const std::vector &data, bool sleep); @@ -83,7 +75,7 @@ private: template -void xmrig::Workers::jobEarlyNotification(const Job& job) +void xmrig::Workers::jobEarlyNotification(const Job &job) { for (Thread* t : m_workers) { if (t->worker()) { diff --git a/src/backend/common/common.cmake b/src/backend/common/common.cmake index 95c72440..52a5ee6f 100644 --- a/src/backend/common/common.cmake +++ b/src/backend/common/common.cmake @@ -1,6 +1,5 @@ set(HEADERS_BACKEND_COMMON src/backend/common/Hashrate.h - src/backend/common/HashrateInterpolator.h src/backend/common/Tags.h src/backend/common/interfaces/IBackend.h src/backend/common/interfaces/IRxListener.h @@ -16,7 +15,6 @@ set(HEADERS_BACKEND_COMMON set(SOURCES_BACKEND_COMMON src/backend/common/Hashrate.cpp - src/backend/common/HashrateInterpolator.cpp src/backend/common/Threads.cpp src/backend/common/Worker.cpp src/backend/common/Workers.cpp @@ -35,3 +33,16 @@ if (WITH_RANDOMX AND WITH_BENCHMARK) src/backend/common/benchmark/BenchState.cpp ) endif() + + +if (WITH_OPENCL OR WITH_CUDA) + list(APPEND HEADERS_BACKEND_COMMON + src/backend/common/HashrateInterpolator.h + src/backend/common/GpuWorker.h + ) + + list(APPEND SOURCES_BACKEND_COMMON + src/backend/common/HashrateInterpolator.cpp + src/backend/common/GpuWorker.cpp + ) +endif() diff --git a/src/backend/common/interfaces/IBackend.h b/src/backend/common/interfaces/IBackend.h index c07ee49a..53e8e0f1 100644 --- a/src/backend/common/interfaces/IBackend.h +++ b/src/backend/common/interfaces/IBackend.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,10 +20,11 @@ #define XMRIG_IBACKEND_H -#include - - #include "3rdparty/rapidjson/fwd.h" +#include "base/tools/Object.h" + + +#include namespace xmrig { @@ -47,10 +42,14 @@ class String; class IBackend { public: + XMRIG_DISABLE_COPY_MOVE(IBackend) + + IBackend() = default; virtual ~IBackend() = default; virtual bool isEnabled() const = 0; virtual bool isEnabled(const Algorithm &algorithm) const = 0; + virtual bool tick(uint64_t ticks) = 0; virtual const Hashrate *hashrate() const = 0; virtual const String &profileName() const = 0; virtual const String &type() const = 0; @@ -61,7 +60,6 @@ public: virtual void setJob(const Job &job) = 0; virtual void start(IWorker *worker, bool ready) = 0; virtual void stop() = 0; - virtual bool tick(uint64_t ticks) = 0; # ifdef XMRIG_FEATURE_API virtual rapidjson::Value toJSON(rapidjson::Document &doc) const = 0; diff --git a/src/backend/common/interfaces/IMemoryPool.h b/src/backend/common/interfaces/IMemoryPool.h index 44ff2495..10d27b50 100644 --- a/src/backend/common/interfaces/IMemoryPool.h +++ b/src/backend/common/interfaces/IMemoryPool.h @@ -1,14 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2018-2019 tevador - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +20,9 @@ #define XMRIG_IMEMORYPOOL_H +#include "base/tools/Object.h" + + #include #include @@ -38,7 +33,10 @@ namespace xmrig { class IMemoryPool { public: - virtual ~IMemoryPool() = default; + XMRIG_DISABLE_COPY_MOVE(IMemoryPool) + + IMemoryPool() = default; + virtual ~IMemoryPool() = default; virtual bool isHugePages(uint32_t node) const = 0; virtual uint8_t *get(size_t size, uint32_t node) = 0; diff --git a/src/backend/common/interfaces/IRxListener.h b/src/backend/common/interfaces/IRxListener.h index b4dde9e5..11adb0d4 100644 --- a/src/backend/common/interfaces/IRxListener.h +++ b/src/backend/common/interfaces/IRxListener.h @@ -1,10 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2016-2018 XMRig + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,13 +21,19 @@ #define XMRIG_IRXLISTENER_H +#include "base/tools/Object.h" + + namespace xmrig { class IRxListener { public: - virtual ~IRxListener() = default; + XMRIG_DISABLE_COPY_MOVE(IRxListener) + + IRxListener() = default; + virtual ~IRxListener() = default; # ifdef XMRIG_ALGO_RANDOMX virtual void onDatasetReady() = 0; diff --git a/src/backend/common/interfaces/IRxStorage.h b/src/backend/common/interfaces/IRxStorage.h index ff447044..ab287a76 100644 --- a/src/backend/common/interfaces/IRxStorage.h +++ b/src/backend/common/interfaces/IRxStorage.h @@ -1,10 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2016-2018 XMRig + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,8 +21,9 @@ #define XMRIG_IRXSTORAGE_H -#include "crypto/rx/RxConfig.h" +#include "base/tools/Object.h" #include "crypto/common/HugePagesInfo.h" +#include "crypto/rx/RxConfig.h" #include @@ -43,7 +41,10 @@ class RxSeed; class IRxStorage { public: - virtual ~IRxStorage() = default; + XMRIG_DISABLE_COPY_MOVE(IRxStorage) + + IRxStorage() = default; + virtual ~IRxStorage() = default; virtual bool isAllocated() const = 0; virtual HugePagesInfo hugePages() const = 0; diff --git a/src/backend/common/interfaces/IWorker.h b/src/backend/common/interfaces/IWorker.h index f528469a..7ed6ff00 100644 --- a/src/backend/common/interfaces/IWorker.h +++ b/src/backend/common/interfaces/IWorker.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,8 +30,8 @@ namespace xmrig { -class VirtualMemory; class Job; +class VirtualMemory; class IWorker @@ -48,14 +42,13 @@ public: IWorker() = default; virtual ~IWorker() = default; - virtual bool selfTest() = 0; - virtual const VirtualMemory *memory() const = 0; - virtual size_t id() const = 0; - virtual size_t intensity() const = 0; - virtual uint64_t rawHashes() const = 0; - virtual void getHashrateData(uint64_t&, uint64_t&) const = 0; - virtual void start() = 0; - virtual void jobEarlyNotification(const Job&) = 0; + virtual bool selfTest() = 0; + virtual const VirtualMemory *memory() const = 0; + virtual size_t id() const = 0; + virtual size_t intensity() const = 0; + virtual void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const = 0; + virtual void jobEarlyNotification(const Job &job) = 0; + virtual void start() = 0; }; diff --git a/src/backend/cpu/Cpu.cpp b/src/backend/cpu/Cpu.cpp index a6c62972..a49d6560 100644 --- a/src/backend/cpu/Cpu.cpp +++ b/src/backend/cpu/Cpu.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,8 +26,6 @@ #if defined(XMRIG_FEATURE_HWLOC) # include "backend/cpu/platform/HwlocCpuInfo.h" -#elif defined(XMRIG_FEATURE_LIBCPUID) -# include "backend/cpu/platform/AdvancedCpuInfo.h" #else # include "backend/cpu/platform/BasicCpuInfo.h" #endif @@ -47,8 +39,6 @@ xmrig::ICpuInfo *xmrig::Cpu::info() if (cpuInfo == nullptr) { # if defined(XMRIG_FEATURE_HWLOC) cpuInfo = new HwlocCpuInfo(); -# elif defined(XMRIG_FEATURE_LIBCPUID) - cpuInfo = new AdvancedCpuInfo(); # else cpuInfo = new BasicCpuInfo(); # endif diff --git a/src/backend/cpu/Cpu.h b/src/backend/cpu/Cpu.h index a8fef70f..186063d3 100644 --- a/src/backend/cpu/Cpu.h +++ b/src/backend/cpu/Cpu.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/backend/cpu/CpuBackend.cpp b/src/backend/cpu/CpuBackend.cpp index fc6b7e74..1e47a6c5 100644 --- a/src/backend/cpu/CpuBackend.cpp +++ b/src/backend/cpu/CpuBackend.cpp @@ -266,6 +266,12 @@ bool xmrig::CpuBackend::isEnabled(const Algorithm &algorithm) const } +bool xmrig::CpuBackend::tick(uint64_t ticks) +{ + return d_ptr->workers.tick(ticks); +} + + const xmrig::Hashrate *xmrig::CpuBackend::hashrate() const { return d_ptr->workers.hashrate(); @@ -316,9 +322,9 @@ void xmrig::CpuBackend::printHashrate(bool details) Log::print("| %8zu | %8" PRId64 " | %7s | %7s | %7s |", i, data.affinity, - Hashrate::format(hashrate()->calc(i + 1, Hashrate::ShortInterval), num, sizeof num / 3), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::MediumInterval), num + 8, sizeof num / 3), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::LargeInterval), num + 8 * 2, sizeof num / 3) + Hashrate::format(hashrate()->calc(i, Hashrate::ShortInterval), num, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::MediumInterval), num + 8, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::LargeInterval), num + 8 * 2, sizeof num / 3) ); i++; @@ -405,12 +411,6 @@ void xmrig::CpuBackend::stop() } -bool xmrig::CpuBackend::tick(uint64_t ticks) -{ - return d_ptr->workers.tick(ticks); -} - - #ifdef XMRIG_FEATURE_API rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const { diff --git a/src/backend/cpu/CpuBackend.h b/src/backend/cpu/CpuBackend.h index 5be933c3..2f697b8b 100644 --- a/src/backend/cpu/CpuBackend.h +++ b/src/backend/cpu/CpuBackend.h @@ -54,6 +54,7 @@ protected: bool isEnabled() const override; bool isEnabled(const Algorithm &algorithm) const override; + bool tick(uint64_t ticks) override; const Hashrate *hashrate() const override; const String &profileName() const override; const String &type() const override; @@ -63,7 +64,6 @@ protected: void setJob(const Job &job) override; void start(IWorker *worker, bool ready) override; void stop() override; - bool tick(uint64_t ticks) override; # ifdef XMRIG_FEATURE_API rapidjson::Value toJSON(rapidjson::Document &doc) const override; diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 778f8e46..f55d153c 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -80,7 +73,7 @@ xmrig::CpuWorker::CpuWorker(size_t id, const CpuLaunchData &data) : m_threads(data.threads), m_ctx() { - m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, m_node); + m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node()); } @@ -100,7 +93,7 @@ xmrig::CpuWorker::~CpuWorker() template void xmrig::CpuWorker::allocateRandomX_VM() { - RxDataset *dataset = Rx::dataset(m_job.currentJob(), m_node); + RxDataset *dataset = Rx::dataset(m_job.currentJob(), node()); while (dataset == nullptr) { std::this_thread::sleep_for(std::chrono::milliseconds(200)); @@ -109,13 +102,13 @@ void xmrig::CpuWorker::allocateRandomX_VM() return; } - dataset = Rx::dataset(m_job.currentJob(), m_node); + dataset = Rx::dataset(m_job.currentJob(), node()); } if (!m_vm) { // Try to allocate scratchpad from dataset's 1 GB huge pages, if normal huge pages are not available uint8_t* scratchpad = m_memory->isHugePages() ? m_memory->scratchpad() : dataset->tryAllocateScrathpad(); - m_vm = RxVm::create(dataset, scratchpad ? scratchpad : m_memory->scratchpad(), !m_hwAES, m_assembly, m_node); + m_vm = RxVm::create(dataset, scratchpad ? scratchpad : m_memory->scratchpad(), !m_hwAES, m_assembly, node()); } } #endif @@ -189,6 +182,14 @@ bool xmrig::CpuWorker::selfTest() } +template +void xmrig::CpuWorker::hashrateData(uint64_t &hashCount, uint64_t &, uint64_t &rawHashes) const +{ + hashCount = m_count; + rawHashes = m_count; +} + + template void xmrig::CpuWorker::start() { diff --git a/src/backend/cpu/CpuWorker.h b/src/backend/cpu/CpuWorker.h index 06c92fa4..785763af 100644 --- a/src/backend/cpu/CpuWorker.h +++ b/src/backend/cpu/CpuWorker.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -56,10 +49,12 @@ public: protected: bool selfTest() override; + void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const override; void start() override; - inline const VirtualMemory *memory() const override { return m_memory; } - inline size_t intensity() const override { return N; } + inline const VirtualMemory *memory() const override { return m_memory; } + inline size_t intensity() const override { return N; } + inline void jobEarlyNotification(const Job&) override {} private: inline cn_hash_fun fn(const Algorithm &algorithm) const { return CnHash::fn(algorithm, m_av, m_assembly); } diff --git a/src/backend/cpu/cpu.cmake b/src/backend/cpu/cpu.cmake index 3894b1c6..c485683a 100644 --- a/src/backend/cpu/cpu.cmake +++ b/src/backend/cpu/cpu.cmake @@ -1,13 +1,14 @@ set(HEADERS_BACKEND_CPU src/backend/cpu/Cpu.h src/backend/cpu/CpuBackend.h - src/backend/cpu/CpuConfig.h src/backend/cpu/CpuConfig_gen.h + src/backend/cpu/CpuConfig.h src/backend/cpu/CpuLaunchData.cpp src/backend/cpu/CpuThread.h src/backend/cpu/CpuThreads.h src/backend/cpu/CpuWorker.h src/backend/cpu/interfaces/ICpuInfo.h + src/backend/cpu/platform/BasicCpuInfo.h ) set(SOURCES_BACKEND_CPU @@ -20,7 +21,6 @@ set(SOURCES_BACKEND_CPU src/backend/cpu/CpuWorker.cpp ) - if (WITH_HWLOC) if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) add_subdirectory(src/3rdparty/hwloc) @@ -32,51 +32,26 @@ if (WITH_HWLOC) set(CPUID_LIB ${HWLOC_LIBRARY}) endif() - set(WITH_LIBCPUID OFF) - - remove_definitions(/DXMRIG_FEATURE_LIBCPUID) add_definitions(/DXMRIG_FEATURE_HWLOC) if (HWLOC_DEBUG) add_definitions(/DXMRIG_HWLOC_DEBUG) endif() - set(SOURCES_CPUID - src/backend/cpu/platform/BasicCpuInfo.h - src/backend/cpu/platform/HwlocCpuInfo.cpp - src/backend/cpu/platform/HwlocCpuInfo.h - ) -elseif (WITH_LIBCPUID) - message(WARNING, "libcpuid support is deprecated and will be removed in future versions.") - set(WITH_HWLOC OFF) - - add_subdirectory(src/3rdparty/libcpuid) - include_directories(src/3rdparty/libcpuid) - - add_definitions(/DXMRIG_FEATURE_LIBCPUID) - remove_definitions(/DXMRIG_FEATURE_HWLOC) - - set(CPUID_LIB cpuid) - set(SOURCES_CPUID - src/backend/cpu/platform/AdvancedCpuInfo.cpp - src/backend/cpu/platform/AdvancedCpuInfo.h - ) + list(APPEND HEADERS_BACKEND_CPU src/backend/cpu/platform/HwlocCpuInfo.h) + list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/HwlocCpuInfo.cpp) else() - remove_definitions(/DXMRIG_FEATURE_LIBCPUID) remove_definitions(/DXMRIG_FEATURE_HWLOC) set(CPUID_LIB "") - set(SOURCES_CPUID - src/backend/cpu/platform/BasicCpuInfo.h - ) endif() if (XMRIG_ARM) - list(APPEND SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo_arm.cpp) + list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo_arm.cpp) if (XMRIG_OS_UNIX) - list(APPEND SOURCES_CPUID src/backend/cpu/platform/lscpu_arm.cpp) + list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/lscpu_arm.cpp) endif() else() - list(APPEND SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo.cpp) + list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo.cpp) endif() diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index 9270ca09..b772a92c 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,6 +40,14 @@ public: VENDOR_AMD }; + enum Arch : uint32_t { + ARCH_UNKNOWN, + ARCH_ZEN, + ARCH_ZEN_PLUS, + ARCH_ZEN2, + ARCH_ZEN3 + }; + enum MsrMod : uint32_t { MSR_MOD_NONE, MSR_MOD_RYZEN_17H, @@ -59,6 +61,7 @@ public: enum Flag : uint32_t { FLAG_AES, + FLAG_AVX, FLAG_AVX2, FLAG_AVX512F, FLAG_BMI2, @@ -70,6 +73,7 @@ public: FLAG_XOP, FLAG_POPCNT, FLAG_CAT_L3, + FLAG_VM, FLAG_MAX }; @@ -77,18 +81,21 @@ public: virtual ~ICpuInfo() = default; # if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) - inline constexpr static bool isX64() { return true; } + inline constexpr static bool is64bit() { return true; } # else - inline constexpr static bool isX64() { return false; } + inline constexpr static bool is64bit() { return false; } # endif virtual Assembly::Id assembly() const = 0; virtual bool has(Flag feature) const = 0; virtual bool hasAES() const = 0; + virtual bool hasAVX() const = 0; virtual bool hasAVX2() const = 0; virtual bool hasBMI2() const = 0; virtual bool hasOneGbPages() const = 0; + virtual bool hasXOP() const = 0; virtual bool hasCatL3() const = 0; + virtual bool isVM() const = 0; virtual const char *backend() const = 0; virtual const char *brand() const = 0; virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0; @@ -101,6 +108,7 @@ public: virtual size_t packages() const = 0; virtual size_t threads() const = 0; virtual Vendor vendor() const = 0; + virtual Arch arch() const = 0; virtual bool jccErratum() const = 0; }; diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp deleted file mode 100644 index c6773a21..00000000 --- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "backend/cpu/platform/AdvancedCpuInfo.h" -#include "3rdparty/libcpuid/libcpuid.h" - - -#include -#include -#include -#include -#include - - -xmrig::AdvancedCpuInfo::AdvancedCpuInfo() -{ - struct cpu_raw_data_t raw = {}; - struct cpu_id_t data = {}; - - cpuid_get_raw_data(&raw); - cpu_identify(&raw, &data); - - snprintf(m_backend, sizeof m_backend, "libcpuid/%s", cpuid_lib_version()); - - m_threads = static_cast(data.total_logical_cpus); - m_packages = std::max(m_threads / static_cast(data.num_logical_cpus), 1); - m_cores = static_cast(data.num_cores) * m_packages; - m_L3 = data.l3_cache > 0 ? static_cast(data.l3_cache) * m_packages : 0; - - const auto l2 = static_cast(data.l2_cache); - - // Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97 - if (m_vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) { - m_L2 = l2 * (cores() / 2) * m_packages; - m_L2_exclusive = true; - } - // Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue - // These processors have L2 cache shared by 2 cores. - else if (m_vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) { - size_t l2_count_per_socket = cores() > 1 ? cores() / 2 : 1; - m_L2 = data.l2_cache > 0 ? l2 * l2_count_per_socket * m_packages : 0; - } - else{ - m_L2 = data.l2_cache > 0 ? l2 * cores() * m_packages : 0; - } - - m_L2 *= 1024; - m_L3 *= 1024; -} - - -xmrig::CpuThreads xmrig::AdvancedCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const -{ - if (threads() == 1) { - return 1; - } - - size_t cache = 0; - size_t count = 0; - -# ifdef XMRIG_ALGO_ASTROBWT - if (algorithm == Algorithm::ASTROBWT_DERO) { - CpuThreads t; - count = threads(); - for (size_t i = 0; i < count; ++i) { - t.add(i, 0); - } - return t; - } -# endif - - if (m_L3) { - cache = m_L2_exclusive ? (m_L2 + m_L3) : m_L3; - } - else { - cache = m_L2; - } - - if (cache) { - const size_t memory = algorithm.l3(); - assert(memory > 0); - - count = cache / memory; - - if (cache % memory >= memory / 2) { - count++; - } - } - else { - count = threads() / 2; - } - - uint32_t intensity = algorithm.maxIntensity() == 1 ? 0 : 1; - -# ifdef XMRIG_ALGO_CN_PICO - if (algorithm == Algorithm::CN_PICO_0 && (count / cores()) >= 2) { - intensity = 2; - } -# endif - - if (limit > 0 && limit < 100) { - count = std::min(count, static_cast(round(threads() * (limit / 100.0)))); - } - else { - count = std::min(count, threads()); - } - - return CpuThreads(std::max(count, 1), intensity); -} diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.h b/src/backend/cpu/platform/AdvancedCpuInfo.h deleted file mode 100644 index 9be075ef..00000000 --- a/src/backend/cpu/platform/AdvancedCpuInfo.h +++ /dev/null @@ -1,63 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_ADVANCEDCPUINFO_H -#define XMRIG_ADVANCEDCPUINFO_H - - -#include "backend/cpu/platform/BasicCpuInfo.h" - - -namespace xmrig { - - -class AdvancedCpuInfo : public BasicCpuInfo -{ -public: - AdvancedCpuInfo(); - -protected: - CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const override; - - inline const char *backend() const override { return m_backend; } - inline size_t cores() const override { return m_cores; } - inline size_t L2() const override { return m_L2; } - inline size_t L3() const override { return m_L3; } - inline size_t packages() const override { return m_packages; } - inline size_t threads() const override { return m_threads; } - -private: - bool m_L2_exclusive = false; - char m_backend[32]{}; - size_t m_cores = 0; - size_t m_L2 = 0; - size_t m_L3 = 0; - size_t m_packages = 1; -}; - - -} /* namespace xmrig */ - - -#endif /* XMRIG_ADVANCEDCPUINFO_H */ diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index 7cf09f8c..2a4dc829 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -52,8 +52,8 @@ namespace xmrig { -constexpr size_t kCpuFlagsSize = 12; -static const std::array flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3" }; +constexpr size_t kCpuFlagsSize = 14; +static const std::array flagNames = { "aes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" }; static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX mismatch"); @@ -134,11 +134,12 @@ static inline uint64_t xgetbv() #endif } -static inline bool has_xcr_avx2() { return (xgetbv() & 0x06) == 0x06; } +static inline bool has_xcr_avx() { return (xgetbv() & 0x06) == 0x06; } static inline bool has_xcr_avx512() { return (xgetbv() & 0xE6) == 0xE6; } static inline bool has_osxsave() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 27); } static inline bool has_aes_ni() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 25); } -static inline bool has_avx2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 5) && has_osxsave() && has_xcr_avx2(); } +static inline bool has_avx() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 28) && has_osxsave() && has_xcr_avx(); } +static inline bool has_avx2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 5) && has_osxsave() && has_xcr_avx(); } static inline bool has_avx512f() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 16) && has_osxsave() && has_xcr_avx512(); } static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 8); } static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); } @@ -148,6 +149,7 @@ static inline bool has_sse41() { return has_feature(PROCESSOR_INFO, static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); } static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); } static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); } +static inline bool is_vm() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 31); } } // namespace xmrig @@ -174,6 +176,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : cpu_brand_string(m_brand); m_flags.set(FLAG_AES, has_aes_ni()); + m_flags.set(FLAG_AVX, has_avx()); m_flags.set(FLAG_AVX2, has_avx2()); m_flags.set(FLAG_AVX512F, has_avx512f()); m_flags.set(FLAG_BMI2, has_bmi2()); @@ -185,6 +188,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_flags.set(FLAG_XOP, has_xop()); m_flags.set(FLAG_POPCNT, has_popcnt()); m_flags.set(FLAG_CAT_L3, has_cat_l3()); + m_flags.set(FLAG_VM, is_vm()); # ifdef XMRIG_FEATURE_ASM if (hasAES()) { @@ -213,9 +217,27 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : switch (m_family) { case 0x17: m_msrMod = MSR_MOD_RYZEN_17H; + switch (m_model) { + case 1: + case 17: + case 32: + m_arch = ARCH_ZEN; + break; + case 8: + case 24: + m_arch = ARCH_ZEN_PLUS; + break; + case 49: + case 96: + case 113: + case 144: + m_arch = ARCH_ZEN2; + break; + } break; case 0x19: + m_arch = ARCH_ZEN3; m_msrMod = MSR_MOD_RYZEN_19H; break; @@ -344,7 +366,8 @@ rapidjson::Value xmrig::BasicCpuInfo::toJSON(rapidjson::Document &doc) const out.AddMember("proc_info", m_procInfo, allocator); out.AddMember("aes", hasAES(), allocator); out.AddMember("avx2", hasAVX2(), allocator); - out.AddMember("x64", isX64(), allocator); + out.AddMember("x64", is64bit(), allocator); // DEPRECATED will be removed in the next major release. + out.AddMember("64_bit", is64bit(), allocator); out.AddMember("l2", static_cast(L2()), allocator); out.AddMember("l3", static_cast(L3()), allocator); out.AddMember("cores", static_cast(cores()), allocator); diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index 85d2582d..edf119a2 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -48,10 +48,13 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool has(Flag flag) const override { return m_flags.test(flag); } inline bool hasAES() const override { return has(FLAG_AES); } + inline bool hasAVX() const override { return has(FLAG_AVX); } inline bool hasAVX2() const override { return has(FLAG_AVX2); } inline bool hasBMI2() const override { return has(FLAG_BMI2); } inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); } + inline bool hasXOP() const override { return has(FLAG_XOP); } inline bool hasCatL3() const override { return has(FLAG_CAT_L3); } + inline bool isVM() const override { return has(FLAG_VM); } inline const char *brand() const override { return m_brand; } inline MsrMod msrMod() const override { return m_msrMod; } inline size_t cores() const override { return 0; } @@ -61,19 +64,24 @@ protected: inline size_t packages() const override { return 1; } inline size_t threads() const override { return m_threads; } inline Vendor vendor() const override { return m_vendor; } + inline Arch arch() const override { return m_arch; } inline bool jccErratum() const override { return m_jccErratum; } protected: char m_brand[64 + 6]{}; size_t m_threads; Vendor m_vendor = VENDOR_UNKNOWN; + Arch m_arch = ARCH_UNKNOWN; bool m_jccErratum = false; private: +# ifndef XMRIG_ARM uint32_t m_procInfo = 0; uint32_t m_family = 0; uint32_t m_model = 0; uint32_t m_stepping = 0; +# endif + Assembly m_assembly = Assembly::NONE; MsrMod m_msrMod = MSR_MOD_NONE; std::bitset m_flags; diff --git a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp index a2b336a5..84d2c57f 100644 --- a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp @@ -36,12 +36,14 @@ #include "3rdparty/rapidjson/document.h" -#ifdef XMRIG_OS_UNIX +#if defined(XMRIG_OS_UNIX) namespace xmrig { extern String cpu_name_arm(); } // namespace xmrig +#elif defined(XMRIG_OS_MACOS) +# include #endif @@ -62,13 +64,16 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : # endif # endif -# ifdef XMRIG_OS_UNIX +# if defined(XMRIG_OS_UNIX) auto name = cpu_name_arm(); if (!name.isNull()) { strncpy(m_brand, name, sizeof(m_brand) - 1); } m_flags.set(FLAG_PDPE1GB, std::ifstream("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages").good()); +# elif defined(XMRIG_OS_MACOS) + size_t buflen = sizeof(m_brand); + sysctlbyname("machdep.cpu.brand_string", &m_brand, &buflen, nullptr, 0); # endif } @@ -95,7 +100,8 @@ rapidjson::Value xmrig::BasicCpuInfo::toJSON(rapidjson::Document &doc) const out.AddMember("brand", StringRef(brand()), allocator); out.AddMember("aes", hasAES(), allocator); out.AddMember("avx2", false, allocator); - out.AddMember("x64", isX64(), allocator); + out.AddMember("x64", is64bit(), allocator); // DEPRECATED will be removed in the next major release. + out.AddMember("64_bit", is64bit(), allocator); out.AddMember("l2", static_cast(L2()), allocator); out.AddMember("l3", static_cast(L3()), allocator); out.AddMember("cores", static_cast(cores()), allocator); diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp index 0d587332..6a684124 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.cpp +++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -91,6 +85,15 @@ static inline void findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambd } +static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t type) +{ + const int count = hwloc_get_nbobjs_by_type(topology, type); + + return count > 0 ? static_cast(count) : 0; +} + + +#ifndef XMRIG_ARM static inline std::vector findByType(hwloc_obj_t obj, hwloc_obj_type_t type) { std::vector out; @@ -100,14 +103,6 @@ static inline std::vector findByType(hwloc_obj_t obj, hwloc_obj_typ } -static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t type) -{ - const int count = hwloc_get_nbobjs_by_type(topology, type); - - return count > 0 ? static_cast(count) : 0; -} - - static inline size_t countByType(hwloc_obj_t obj, hwloc_obj_type_t type) { size_t count = 0; @@ -122,6 +117,7 @@ static inline bool isCacheExclusive(hwloc_obj_t obj) const char *value = hwloc_obj_get_info_by_name(obj, "Inclusive"); return value == nullptr || value[0] != '1'; } +#endif } // namespace xmrig @@ -191,6 +187,12 @@ xmrig::HwlocCpuInfo::HwlocCpuInfo() m_nodeset.emplace_back(node->os_index); } } + +# if defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) + if (L2() == 33554432U && m_cores == 8 && m_cores == m_threads) { + m_cache[2] = 16777216U; + } +# endif } diff --git a/src/backend/cpu/platform/HwlocCpuInfo.h b/src/backend/cpu/platform/HwlocCpuInfo.h index eed3ae8b..6a07a037 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.h +++ b/src/backend/cpu/platform/HwlocCpuInfo.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,7 +46,7 @@ public: HwlocCpuInfo(); ~HwlocCpuInfo() override; - static inline bool has(Feature feature) { return m_features & feature; } + static inline bool hasFeature(Feature feature) { return m_features & feature; } inline const std::vector &nodeset() const { return m_nodeset; } inline hwloc_topology_t topology() const { return m_topology; } diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index ab5b22e5..59eaab1f 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -409,9 +409,9 @@ void xmrig::CudaBackend::printHashrate(bool details) Log::print("| %8zu | %8" PRId64 " | %8s | %8s | %8s |" CYAN_BOLD(" #%u") YELLOW(" %s") GREEN(" %s"), i, data.thread.affinity(), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::ShortInterval) * scale, num, sizeof num / 3), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::ShortInterval) * scale, num, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3), data.device.index(), data.device.topology().toString().data(), data.device.name().data() diff --git a/src/backend/cuda/CudaLaunchData.cpp b/src/backend/cuda/CudaLaunchData.cpp index 8a066c32..2f791d8b 100644 --- a/src/backend/cuda/CudaLaunchData.cpp +++ b/src/backend/cuda/CudaLaunchData.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,6 +25,7 @@ xmrig::CudaLaunchData::CudaLaunchData(const Miner *miner, const Algorithm &algor algorithm(algorithm), device(device), thread(thread), + affinity(thread.affinity()), miner(miner) { } diff --git a/src/backend/cuda/CudaLaunchData.h b/src/backend/cuda/CudaLaunchData.h index bb8b0fb2..26a8310a 100644 --- a/src/backend/cuda/CudaLaunchData.h +++ b/src/backend/cuda/CudaLaunchData.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -56,6 +49,7 @@ public: const Algorithm algorithm; const CudaDevice &device; const CudaThread thread; + const int64_t affinity; const Miner *miner; const uint32_t benchSize = 0; }; diff --git a/src/backend/cuda/CudaWorker.cpp b/src/backend/cuda/CudaWorker.cpp index 7bb8844c..725d59c9 100644 --- a/src/backend/cuda/CudaWorker.cpp +++ b/src/backend/cuda/CudaWorker.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -57,11 +50,10 @@ namespace xmrig { -static constexpr uint32_t kReserveCount = 32768; std::atomic CudaWorker::ready; -static inline bool isReady() { return !Nonce::isPaused() && CudaWorker::ready; } +static inline bool isReady() { return !Nonce::isPaused() && CudaWorker::ready; } } // namespace xmrig @@ -69,10 +61,9 @@ static inline bool isReady() { return !Nonce::isPaused() xmrig::CudaWorker::CudaWorker(size_t id, const CudaLaunchData &data) : - Worker(id, data.thread.affinity(), -1), + GpuWorker(id, data.thread.affinity(), -1, data.device.index()), m_algorithm(data.algorithm), - m_miner(data.miner), - m_deviceIndex(data.device.index()) + m_miner(data.miner) { switch (m_algorithm.family()) { case Algorithm::RANDOM_X: @@ -119,13 +110,7 @@ xmrig::CudaWorker::~CudaWorker() } -uint64_t xmrig::CudaWorker::rawHashes() const -{ - return m_hashrateData.interpolate(Chrono::steadyMSecs()); -} - - -void xmrig::CudaWorker::jobEarlyNotification(const Job& job) +void xmrig::CudaWorker::jobEarlyNotification(const Job &job) { if (m_runner) { m_runner->jobEarlyNotification(job); @@ -213,5 +198,5 @@ void xmrig::CudaWorker::storeStats() const uint64_t timeStamp = Chrono::steadyMSecs(); m_hashrateData.addDataPoint(m_count, timeStamp); - Worker::storeStats(); + GpuWorker::storeStats(); } diff --git a/src/backend/cuda/CudaWorker.h b/src/backend/cuda/CudaWorker.h index e82e3425..24cf1e65 100644 --- a/src/backend/cuda/CudaWorker.h +++ b/src/backend/cuda/CudaWorker.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,8 +20,7 @@ #define XMRIG_CUDAWORKER_H -#include "backend/common/HashrateInterpolator.h" -#include "backend/common/Worker.h" +#include "backend/common/GpuWorker.h" #include "backend/common/WorkerJob.h" #include "backend/cuda/CudaLaunchData.h" #include "base/tools/Object.h" @@ -41,7 +33,7 @@ namespace xmrig { class ICudaRunner; -class CudaWorker : public Worker +class CudaWorker : public GpuWorker { public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(CudaWorker) @@ -50,8 +42,7 @@ public: ~CudaWorker() override; - uint64_t rawHashes() const override; - void jobEarlyNotification(const Job&) override; + void jobEarlyNotification(const Job &job) override; static std::atomic ready; @@ -68,9 +59,6 @@ private: const Miner *m_miner; ICudaRunner *m_runner = nullptr; WorkerJob<1> m_job; - uint32_t m_deviceIndex; - - HashrateInterpolator m_hashrateData; }; diff --git a/src/backend/opencl/OclBackend.cpp b/src/backend/opencl/OclBackend.cpp index 7b99700b..d0d04f37 100644 --- a/src/backend/opencl/OclBackend.cpp +++ b/src/backend/opencl/OclBackend.cpp @@ -385,9 +385,9 @@ void xmrig::OclBackend::printHashrate(bool details) Log::print("| %8zu | %8" PRId64 " | %8s | %8s | %8s |" CYAN_BOLD(" #%u") YELLOW(" %s") " %s", i, data.affinity, - Hashrate::format(hashrate()->calc(i + 1, Hashrate::ShortInterval) * scale, num, sizeof num / 3), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3), - Hashrate::format(hashrate()->calc(i + 1, Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::ShortInterval) * scale, num, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::MediumInterval) * scale, num + 16, sizeof num / 3), + Hashrate::format(hashrate()->calc(i, Hashrate::LargeInterval) * scale, num + 16 * 2, sizeof num / 3), data.device.index(), data.device.topology().toString().data(), data.device.printableName().data() diff --git a/src/backend/opencl/OclWorker.cpp b/src/backend/opencl/OclWorker.cpp index ac493ab5..ee5c4a04 100644 --- a/src/backend/opencl/OclWorker.cpp +++ b/src/backend/opencl/OclWorker.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -59,7 +52,7 @@ namespace xmrig { std::atomic OclWorker::ready; -static inline bool isReady() { return !Nonce::isPaused() && OclWorker::ready; } +static inline bool isReady() { return !Nonce::isPaused() && OclWorker::ready; } static inline void printError(size_t id, const char *error) @@ -73,11 +66,10 @@ static inline void printError(size_t id, const char *error) xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) : - Worker(id, data.affinity, -1), + GpuWorker(id, data.affinity, -1, data.device.index()), m_algorithm(data.algorithm), m_miner(data.miner), - m_sharedData(OclSharedState::get(data.device.index())), - m_deviceIndex(data.device.index()) + m_sharedData(OclSharedState::get(data.device.index())) { switch (m_algorithm.family()) { case Algorithm::RANDOM_X: @@ -137,13 +129,7 @@ xmrig::OclWorker::~OclWorker() } -uint64_t xmrig::OclWorker::rawHashes() const -{ - return m_hashrateData.interpolate(Chrono::steadyMSecs()); -} - - -void xmrig::OclWorker::jobEarlyNotification(const Job& job) +void xmrig::OclWorker::jobEarlyNotification(const Job &job) { if (m_runner) { m_runner->jobEarlyNotification(job); @@ -180,7 +166,7 @@ void xmrig::OclWorker::start() break; } - m_sharedData.resumeDelay(m_id); + m_sharedData.resumeDelay(id()); if (!consumeJob()) { return; @@ -188,7 +174,7 @@ void xmrig::OclWorker::start() } while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) { - m_sharedData.adjustDelay(m_id); + m_sharedData.adjustDelay(id()); const uint64_t t = Chrono::steadyMSecs(); @@ -254,5 +240,5 @@ void xmrig::OclWorker::storeStats(uint64_t t) m_sharedData.setRunTime(timeStamp - t); - Worker::storeStats(); + GpuWorker::storeStats(); } diff --git a/src/backend/opencl/OclWorker.h b/src/backend/opencl/OclWorker.h index 403a0765..4dfe2a77 100644 --- a/src/backend/opencl/OclWorker.h +++ b/src/backend/opencl/OclWorker.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,8 +20,7 @@ #define XMRIG_OCLWORKER_H -#include "backend/common/HashrateInterpolator.h" -#include "backend/common/Worker.h" +#include "backend/common/GpuWorker.h" #include "backend/common/WorkerJob.h" #include "backend/opencl/OclLaunchData.h" #include "base/tools/Object.h" @@ -42,7 +34,7 @@ class IOclRunner; class Job; -class OclWorker : public Worker +class OclWorker : public GpuWorker { public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(OclWorker) @@ -51,8 +43,7 @@ public: ~OclWorker() override; - uint64_t rawHashes() const override; - void jobEarlyNotification(const Job&) override; + void jobEarlyNotification(const Job &job) override; static std::atomic ready; @@ -70,9 +61,6 @@ private: IOclRunner *m_runner = nullptr; OclSharedData &m_sharedData; WorkerJob<1> m_job; - uint32_t m_deviceIndex; - - HashrateInterpolator m_hashrateData; }; diff --git a/src/backend/opencl/runners/OclAstroBWTRunner.cpp b/src/backend/opencl/runners/OclAstroBWTRunner.cpp index 71d3637e..33b95e45 100644 --- a/src/backend/opencl/runners/OclAstroBWTRunner.cpp +++ b/src/backend/opencl/runners/OclAstroBWTRunner.cpp @@ -93,7 +93,7 @@ xmrig::OclAstroBWTRunner::~OclAstroBWTRunner() OclLib::release(m_tmp_indices); OclLib::release(m_filtered_hashes); - delete m_bwt_data_sizes_host; + delete [] m_bwt_data_sizes_host; } diff --git a/src/backend/opencl/runners/OclKawPowRunner.cpp b/src/backend/opencl/runners/OclKawPowRunner.cpp index b9ba17fc..fcc0058b 100644 --- a/src/backend/opencl/runners/OclKawPowRunner.cpp +++ b/src/backend/opencl/runners/OclKawPowRunner.cpp @@ -43,6 +43,9 @@ namespace xmrig { +constexpr size_t BLOB_SIZE = 40; + + OclKawPowRunner::OclKawPowRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data) { switch (data.thread.worksize()) @@ -82,7 +85,7 @@ void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput) const size_t global_work_offset = nonce; const size_t global_work_size = m_intensity - (m_intensity % m_workGroupSize); - enqueueWriteBuffer(m_input, CL_FALSE, 0, 40, m_blob); + enqueueWriteBuffer(m_input, CL_FALSE, 0, BLOB_SIZE, m_blob); const uint32_t zero[2] = {}; enqueueWriteBuffer(m_output, CL_FALSE, 0, sizeof(uint32_t), zero); @@ -177,7 +180,7 @@ void OclKawPowRunner::set(const Job &job, uint8_t *blob) OclLib::setKernelArg(m_searchKernel, 5, sizeof(m_stop), &m_stop); m_blob = blob; - enqueueWriteBuffer(m_input, CL_TRUE, 0, sizeof(m_blob), m_blob); + enqueueWriteBuffer(m_input, CL_TRUE, 0, BLOB_SIZE, m_blob); } diff --git a/src/backend/opencl/runners/tools/OclKawPow.cpp b/src/backend/opencl/runners/tools/OclKawPow.cpp index 4b8274f4..15feeb80 100644 --- a/src/backend/opencl/runners/tools/OclKawPow.cpp +++ b/src/backend/opencl/runners/tools/OclKawPow.cpp @@ -306,7 +306,7 @@ private: } - static std::string merge(std::string a, std::string b, uint32_t r) + static std::string merge(const std::string& a, const std::string& b, uint32_t r) { switch (r % 4) { @@ -323,7 +323,7 @@ private: } - static std::string math(std::string d, std::string a, std::string b, uint32_t r) + static std::string math(const std::string& d, const std::string& a, const std::string& b, uint32_t r) { switch (r % 11) { diff --git a/src/backend/opencl/wrappers/OclLib.cpp b/src/backend/opencl/wrappers/OclLib.cpp index 158ef13e..4794f36b 100644 --- a/src/backend/opencl/wrappers/OclLib.cpp +++ b/src/backend/opencl/wrappers/OclLib.cpp @@ -836,7 +836,13 @@ xmrig::String xmrig::OclLib::getProgramBuildLog(cl_program program, cl_device_id return String(); } - char *log = new char[size + 1](); + char* log = nullptr; + try { + log = new char[size + 1](); + } + catch (...) { + return String(); + } if (getProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, size, log, nullptr) != CL_SUCCESS) { delete [] log; diff --git a/src/base/api/Api.cpp b/src/base/api/Api.cpp index 85c9dd60..89e61593 100644 --- a/src/base/api/Api.cpp +++ b/src/base/api/Api.cpp @@ -34,8 +34,8 @@ #include "base/io/Env.h" #include "base/io/json/Json.h" #include "base/kernel/Base.h" -#include "base/tools/Buffer.h" #include "base/tools/Chrono.h" +#include "base/tools/Cvt.h" #include "core/config/Config.h" #include "core/Controller.h" #include "version.h" @@ -169,9 +169,6 @@ void xmrig::Api::exec(IApiRequest &request) # ifdef XMRIG_FEATURE_HTTP features.PushBack("http", allocator); # endif -# ifdef XMRIG_FEATURE_LIBCPUID - features.PushBack("cpuid", allocator); -# endif # ifdef XMRIG_FEATURE_HWLOC features.PushBack("hwloc", allocator); # endif @@ -222,13 +219,13 @@ void xmrig::Api::genId(const String &id) const size_t inSize = (sizeof(APP_KIND) - 1) + addrSize + sizeof(uint16_t); const auto port = static_cast(m_base->config()->http().port()); - auto*input = new uint8_t[inSize](); + auto *input = new uint8_t[inSize](); memcpy(input, &port, sizeof(uint16_t)); memcpy(input + sizeof(uint16_t), interfaces[i].phys_addr, addrSize); memcpy(input + sizeof(uint16_t) + addrSize, APP_KIND, (sizeof(APP_KIND) - 1)); keccak(input, inSize, hash); - Buffer::toHex(hash, 8, m_id); + Cvt::toHex(m_id, sizeof(m_id), hash, 8); delete [] input; break; diff --git a/src/base/base.cmake b/src/base/base.cmake index 04506927..cd6ee88b 100644 --- a/src/base/base.cmake +++ b/src/base/base.cmake @@ -22,6 +22,7 @@ set(HEADERS_BASE src/base/kernel/config/BaseConfig.h src/base/kernel/config/BaseTransform.h src/base/kernel/config/Title.h + src/base/kernel/constants.h src/base/kernel/Entry.h src/base/kernel/interfaces/IAsyncListener.h src/base/kernel/interfaces/IBaseListener.h @@ -66,6 +67,7 @@ set(HEADERS_BASE src/base/tools/Baton.h src/base/tools/Buffer.h src/base/tools/Chrono.h + src/base/tools/Cvt.h src/base/tools/Handle.h src/base/tools/String.h src/base/tools/Timer.h @@ -113,7 +115,7 @@ set(SOURCES_BASE src/base/net/tools/LineReader.cpp src/base/net/tools/NetBuffer.cpp src/base/tools/Arguments.cpp - src/base/tools/Buffer.cpp + src/base/tools/Cvt.cpp src/base/tools/String.cpp src/base/tools/Timer.cpp ) @@ -230,13 +232,6 @@ if (WITH_KAWPOW) ) endif() -if (WITH_PROFILING) - add_definitions(/DXMRIG_FEATURE_PROFILING) - - list(APPEND HEADERS_BASE src/base/tools/Profiler.h) - list(APPEND SOURCES_BASE src/base/tools/Profiler.cpp) -endif() - if (WITH_RANDOMX AND WITH_BENCHMARK) add_definitions(/DXMRIG_FEATURE_BENCHMARK) diff --git a/src/base/crypto/keccak.cpp b/src/base/crypto/keccak.cpp index f7940dfb..6d7f093f 100644 --- a/src/base/crypto/keccak.cpp +++ b/src/base/crypto/keccak.cpp @@ -168,7 +168,7 @@ typedef uint64_t state_t[25]; void xmrig::keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen) { state_t st; - uint8_t temp[144]; + alignas(8) uint8_t temp[144]; int i, rsiz, rsizw; rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen; diff --git a/src/base/io/Console.cpp b/src/base/io/Console.cpp index bba73035..5af7e4a4 100644 --- a/src/base/io/Console.cpp +++ b/src/base/io/Console.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,20 +44,9 @@ xmrig::Console::Console(IConsoleListener *listener) xmrig::Console::~Console() { - stop(); -} - - -void xmrig::Console::stop() -{ - if (!m_tty) { - return; - } - uv_tty_reset_mode(); Handle::close(m_tty); - m_tty = nullptr; } diff --git a/src/base/io/Console.h b/src/base/io/Console.h index 0a075348..65523b94 100644 --- a/src/base/io/Console.h +++ b/src/base/io/Console.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,7 +23,16 @@ #include "base/tools/Object.h" -#include +using uv_buf_t = struct uv_buf_t; +using uv_handle_t = struct uv_handle_s; +using uv_stream_t = struct uv_stream_s; +using uv_tty_t = struct uv_tty_s; + +#ifdef XMRIG_OS_WIN +using ssize_t = intptr_t; +#else +# include +#endif namespace xmrig { @@ -46,8 +49,6 @@ public: Console(IConsoleListener *listener); ~Console(); - void stop(); - private: bool isSupported() const; diff --git a/src/base/io/Signals.cpp b/src/base/io/Signals.cpp index 00ec8c17..dfe4a89b 100644 --- a/src/base/io/Signals.cpp +++ b/src/base/io/Signals.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,23 +17,30 @@ */ -#include - - -#include "base/io/Signals.h" #include "base/kernel/interfaces/ISignalListener.h" +#include "base/io/log/Log.h" +#include "base/io/log/Tags.h" +#include "base/io/Signals.h" #include "base/tools/Handle.h" +#ifdef SIGUSR1 +static const int signums[xmrig::Signals::kSignalsCount] = { SIGHUP, SIGINT, SIGTERM, SIGUSR1 }; +#else static const int signums[xmrig::Signals::kSignalsCount] = { SIGHUP, SIGINT, SIGTERM }; +#endif xmrig::Signals::Signals(ISignalListener *listener) : m_listener(listener) { +# ifndef XMRIG_OS_WIN + signal(SIGPIPE, SIG_IGN); +# endif + for (size_t i = 0; i < kSignalsCount; ++i) { - uv_signal_t *signal = new uv_signal_t; - signal->data = this; + auto signal = new uv_signal_t; + signal->data = this; m_signals[i] = signal; @@ -51,24 +52,37 @@ xmrig::Signals::Signals(ISignalListener *listener) xmrig::Signals::~Signals() { - stop(); -} - - -void xmrig::Signals::stop() -{ - if (!m_signals[0]) { - return; - } - - for (size_t i = 0; i < kSignalsCount; ++i) { - Handle::close(m_signals[i]); - m_signals[i] = nullptr; + for (auto signal : m_signals) { + Handle::close(signal); } } void xmrig::Signals::onSignal(uv_signal_t *handle, int signum) { + switch (signum) + { + case SIGHUP: + LOG_WARN("%s " YELLOW("SIGHUP received, exiting"), Tags::signal()); + break; + + case SIGTERM: + LOG_WARN("%s " YELLOW("SIGTERM received, exiting"), Tags::signal()); + break; + + case SIGINT: + LOG_WARN("%s " YELLOW("SIGINT received, exiting"), Tags::signal()); + break; + +# ifdef SIGUSR1 + case SIGUSR1: + LOG_V5("%s " WHITE_BOLD("SIGUSR1 received"), Tags::signal()); + break; +# endif + + default: + break; + } + static_cast(handle->data)->m_listener->onSignal(signum); } diff --git a/src/base/io/Signals.h b/src/base/io/Signals.h index 9b4a870a..56be5889 100644 --- a/src/base/io/Signals.h +++ b/src/base/io/Signals.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,10 +20,14 @@ #define XMRIG_SIGNALS_H -#include +#include "base/tools/Object.h" -typedef struct uv_signal_s uv_signal_t; +#include +#include + + +using uv_signal_t = struct uv_signal_s; namespace xmrig { @@ -41,20 +39,24 @@ class ISignalListener; class Signals { public: + XMRIG_DISABLE_COPY_MOVE_DEFAULT(Signals) + +# ifdef SIGUSR1 + constexpr static const size_t kSignalsCount = 4; +# else constexpr static const size_t kSignalsCount = 3; +# endif Signals(ISignalListener *listener); ~Signals(); - void stop(); - private: void close(int signum); static void onSignal(uv_signal_t *handle, int signum); ISignalListener *m_listener; - uv_signal_t *m_signals[kSignalsCount]; + uv_signal_t *m_signals[kSignalsCount]{}; }; diff --git a/src/base/io/json/Json.cpp b/src/base/io/json/Json.cpp index a9be7dc3..99a087e9 100644 --- a/src/base/io/json/Json.cpp +++ b/src/base/io/json/Json.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,6 +23,7 @@ #include #include +#include namespace xmrig { @@ -119,6 +114,21 @@ const rapidjson::Value &xmrig::Json::getValue(const rapidjson::Value &obj, const } +double xmrig::Json::getDouble(const rapidjson::Value &obj, const char *key, double defaultValue) +{ + if (isEmpty(obj)) { + return defaultValue; + } + + auto i = obj.FindMember(key); + if (i != obj.MemberEnd() && (i->value.IsDouble() || i->value.IsLosslessDouble())) { + return i->value.GetDouble(); + } + + return defaultValue; +} + + int xmrig::Json::getInt(const rapidjson::Value &obj, const char *key, int defaultValue) { if (isEmpty(obj)) { @@ -149,6 +159,25 @@ int64_t xmrig::Json::getInt64(const rapidjson::Value &obj, const char *key, int6 } +xmrig::String xmrig::Json::getString(const rapidjson::Value &obj, const char *key, size_t maxSize) +{ + if (isEmpty(obj)) { + return {}; + } + + auto i = obj.FindMember(key); + if (i == obj.MemberEnd() || !i->value.IsString()) { + return {}; + } + + if (maxSize == 0 || i->value.GetStringLength() <= maxSize) { + return i->value.GetString(); + } + + return { i->value.GetString(), maxSize }; +} + + uint64_t xmrig::Json::getUint64(const rapidjson::Value &obj, const char *key, uint64_t defaultValue) { if (isEmpty(obj)) { @@ -222,6 +251,11 @@ bool xmrig::Json::convertOffset(std::istream &ifs, size_t offset, size_t &line, } +xmrig::JsonReader::JsonReader() : + m_obj(kNullValue) +{} + + bool xmrig::JsonReader::isEmpty() const { return Json::isEmpty(m_obj); diff --git a/src/base/io/json/Json.h b/src/base/io/json/Json.h index 683eb308..e5d0fcc6 100644 --- a/src/base/io/json/Json.h +++ b/src/base/io/json/Json.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,13 +20,10 @@ #define XMRIG_JSON_H -#include "3rdparty/rapidjson/fwd.h" #include "base/kernel/interfaces/IJsonReader.h" #include -#include -#include namespace xmrig { @@ -47,8 +38,10 @@ public: static const rapidjson::Value &getArray(const rapidjson::Value &obj, const char *key); static const rapidjson::Value &getObject(const rapidjson::Value &obj, const char *key); static const rapidjson::Value &getValue(const rapidjson::Value &obj, const char *key); + static double getDouble(const rapidjson::Value &obj, const char *key, double defaultValue = 0); static int getInt(const rapidjson::Value &obj, const char *key, int defaultValue = 0); static int64_t getInt64(const rapidjson::Value &obj, const char *key, int64_t defaultValue = 0); + static String getString(const rapidjson::Value &obj, const char *key, size_t maxSize); static uint64_t getUint64(const rapidjson::Value &obj, const char *key, uint64_t defaultValue = 0); static unsigned getUint(const rapidjson::Value &obj, const char *key, unsigned defaultValue = 0); @@ -66,6 +59,7 @@ private: class JsonReader : public IJsonReader { public: + JsonReader(); inline JsonReader(const rapidjson::Value &obj) : m_obj(obj) {} inline bool getBool(const char *key, bool defaultValue = false) const override { return Json::getBool(m_obj, key, defaultValue); } @@ -73,8 +67,11 @@ public: inline const rapidjson::Value &getArray(const char *key) const override { return Json::getArray(m_obj, key); } inline const rapidjson::Value &getObject(const char *key) const override { return Json::getObject(m_obj, key); } inline const rapidjson::Value &getValue(const char *key) const override { return Json::getValue(m_obj, key); } + inline const rapidjson::Value &object() const override { return m_obj; } + inline double getDouble(const char *key, double defaultValue = 0) const override { return Json::getDouble(m_obj, key, defaultValue); } inline int getInt(const char *key, int defaultValue = 0) const override { return Json::getInt(m_obj, key, defaultValue); } inline int64_t getInt64(const char *key, int64_t defaultValue = 0) const override { return Json::getInt64(m_obj, key, defaultValue); } + inline String getString(const char *key, size_t maxSize) const override { return Json::getString(m_obj, key, maxSize); } inline uint64_t getUint64(const char *key, uint64_t defaultValue = 0) const override { return Json::getUint64(m_obj, key, defaultValue); } inline unsigned getUint(const char *key, unsigned defaultValue = 0) const override { return Json::getUint(m_obj, key, defaultValue); } diff --git a/src/base/io/json/JsonChain.cpp b/src/base/io/json/JsonChain.cpp index dff619e8..0a4a1857 100644 --- a/src/base/io/json/JsonChain.cpp +++ b/src/base/io/json/JsonChain.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -181,6 +175,28 @@ const rapidjson::Value &xmrig::JsonChain::getValue(const char *key) const } + +const rapidjson::Value &xmrig::JsonChain::object() const +{ + assert(false); + + return m_chain.back(); +} + + +double xmrig::JsonChain::getDouble(const char *key, double defaultValue) const +{ + for (auto it = m_chain.rbegin(); it != m_chain.rend(); ++it) { + auto i = it->FindMember(key); + if (i != it->MemberEnd() && (i->value.IsDouble() || i->value.IsLosslessDouble())) { + return i->value.GetDouble(); + } + } + + return defaultValue; +} + + int xmrig::JsonChain::getInt(const char *key, int defaultValue) const { for (auto it = m_chain.rbegin(); it != m_chain.rend(); ++it) { @@ -207,6 +223,24 @@ int64_t xmrig::JsonChain::getInt64(const char *key, int64_t defaultValue) const } + +xmrig::String xmrig::JsonChain::getString(const char *key, size_t maxSize) const +{ + for (auto it = m_chain.rbegin(); it != m_chain.rend(); ++it) { + auto i = it->FindMember(key); + if (i != it->MemberEnd() && i->value.IsString()) { + if (maxSize == 0 || i->value.GetStringLength() <= maxSize) { + return i->value.GetString(); + } + + return { i->value.GetString(), maxSize }; + } + } + + return {}; +} + + uint64_t xmrig::JsonChain::getUint64(const char *key, uint64_t defaultValue) const { for (auto it = m_chain.rbegin(); it != m_chain.rend(); ++it) { diff --git a/src/base/io/json/JsonChain.h b/src/base/io/json/JsonChain.h index 90f30742..d7fc2f05 100644 --- a/src/base/io/json/JsonChain.h +++ b/src/base/io/json/JsonChain.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -59,8 +53,11 @@ protected: const rapidjson::Value &getArray(const char *key) const override; const rapidjson::Value &getObject(const char *key) const override; const rapidjson::Value &getValue(const char *key) const override; + const rapidjson::Value &object() const override; + double getDouble(const char *key, double defaultValue = 0) const override; int getInt(const char *key, int defaultValue = 0) const override; int64_t getInt64(const char *key, int64_t defaultValue = 0) const override; + String getString(const char *key, size_t maxSize) const override; uint64_t getUint64(const char *key, uint64_t defaultValue = 0) const override; unsigned getUint(const char *key, unsigned defaultValue = 0) const override; diff --git a/src/base/io/json/JsonRequest.cpp b/src/base/io/json/JsonRequest.cpp index cacbdc80..2bfdda00 100644 --- a/src/base/io/json/JsonRequest.cpp +++ b/src/base/io/json/JsonRequest.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,16 +24,33 @@ namespace xmrig { -static const char *k2_0 = "2.0"; -static const char *kId = "id"; -static const char *kJsonRPC = "jsonrpc"; -static const char *kMethod = "method"; -const char *JsonRequest::kParams = "params"; +const char *JsonRequest::k2_0 = "2.0"; +const char *JsonRequest::kId = "id"; +const char *JsonRequest::kJsonRPC = "jsonrpc"; +const char *JsonRequest::kMethod = "method"; +const char *JsonRequest::kOK = "OK"; +const char *JsonRequest::kParams = "params"; +const char *JsonRequest::kResult = "result"; +const char *JsonRequest::kStatus = "status"; + +const char *JsonRequest::kParseError = "parse error"; +const char *JsonRequest::kInvalidRequest = "invalid request"; +const char *JsonRequest::kMethodNotFound = "method not found"; +const char *JsonRequest::kInvalidParams = "invalid params"; +const char *JsonRequest::kInternalError = "internal error"; + +static uint64_t nextId = 0; } // namespace xmrig +rapidjson::Document xmrig::JsonRequest::create(const char *method) +{ + return create(++nextId, method); +} + + rapidjson::Document xmrig::JsonRequest::create(int64_t id, const char *method) { using namespace rapidjson; @@ -54,7 +65,13 @@ rapidjson::Document xmrig::JsonRequest::create(int64_t id, const char *method) } -void xmrig::JsonRequest::create(rapidjson::Document &doc, int64_t id, const char *method, rapidjson::Value ¶ms) +uint64_t xmrig::JsonRequest::create(rapidjson::Document &doc, const char *method, rapidjson::Value ¶ms) +{ + return create(doc, ++nextId, method, params); +} + + +uint64_t xmrig::JsonRequest::create(rapidjson::Document &doc, int64_t id, const char *method, rapidjson::Value ¶ms) { using namespace rapidjson; auto &allocator = doc.GetAllocator(); @@ -63,4 +80,6 @@ void xmrig::JsonRequest::create(rapidjson::Document &doc, int64_t id, const char doc.AddMember(StringRef(kJsonRPC), StringRef(k2_0), allocator); doc.AddMember(StringRef(kMethod), StringRef(method), allocator); doc.AddMember(StringRef(kParams), params, allocator); + + return id; } diff --git a/src/base/io/json/JsonRequest.h b/src/base/io/json/JsonRequest.h index 88dbbad6..21451a43 100644 --- a/src/base/io/json/JsonRequest.h +++ b/src/base/io/json/JsonRequest.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,10 +29,31 @@ namespace xmrig { class JsonRequest { public: + static const char *k2_0; + static const char *kId; + static const char *kJsonRPC; + static const char *kMethod; + static const char *kOK; static const char *kParams; + static const char *kResult; + static const char *kStatus; + static const char *kParseError; + static const char *kInvalidRequest; + static const char *kMethodNotFound; + static const char *kInvalidParams; + static const char *kInternalError; + + constexpr static int kParseErrorCode = -32700; + constexpr static int kInvalidRequestCode = -32600; + constexpr static int kMethodNotFoundCode = -32601; + constexpr static int kInvalidParamsCode = -32602; + constexpr static int kInternalErrorCode = -32603; + + static rapidjson::Document create(const char *method); static rapidjson::Document create(int64_t id, const char *method); - static void create(rapidjson::Document &doc, int64_t id, const char *method, rapidjson::Value ¶ms); + static uint64_t create(rapidjson::Document &doc, const char *method, rapidjson::Value ¶ms); + static uint64_t create(rapidjson::Document &doc, int64_t id, const char *method, rapidjson::Value ¶ms); }; diff --git a/src/base/io/json/Json_unix.cpp b/src/base/io/json/Json_unix.cpp index 328dda34..d2a2f351 100644 --- a/src/base/io/json/Json_unix.cpp +++ b/src/base/io/json/Json_unix.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -43,7 +37,7 @@ bool xmrig::Json::get(const char *fileName, rapidjson::Document &doc) rapidjson::IStreamWrapper isw(ifs); doc.ParseStream(isw); - return !doc.HasParseError() && doc.IsObject(); + return !doc.HasParseError() && (doc.IsObject() || doc.IsArray()); } @@ -56,7 +50,10 @@ bool xmrig::Json::save(const char *fileName, const rapidjson::Document &doc) rapidjson::OStreamWrapper osw(ofs); rapidjson::PrettyWriter writer(osw); + +# ifdef XMRIG_JSON_SINGLE_LINE_ARRAY writer.SetFormatOptions(rapidjson::kFormatSingleLineArray); +# endif doc.Accept(writer); diff --git a/src/base/io/json/Json_win.cpp b/src/base/io/json/Json_win.cpp index 87c79c24..cd7cf584 100644 --- a/src/base/io/json/Json_win.cpp +++ b/src/base/io/json/Json_win.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -96,7 +90,7 @@ bool xmrig::Json::get(const char *fileName, rapidjson::Document &doc) IStreamWrapper isw(ifs); doc.ParseStream(isw); - return !doc.HasParseError() && doc.IsObject(); + return !doc.HasParseError() && (doc.IsObject() || doc.IsArray()); } @@ -127,7 +121,10 @@ bool xmrig::Json::save(const char *fileName, const rapidjson::Document &doc) OStreamWrapper osw(ofs); PrettyWriter writer(osw); + +# ifdef XMRIG_JSON_SINGLE_LINE_ARRAY writer.SetFormatOptions(kFormatSingleLineArray); +# endif doc.Accept(writer); diff --git a/src/base/io/log/Log.cpp b/src/base/io/log/Log.cpp index 2c2b2d14..e144858a 100644 --- a/src/base/io/log/Log.cpp +++ b/src/base/io/log/Log.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,6 +25,7 @@ #include +#include #include #include #include @@ -76,7 +71,7 @@ public: inline ~LogPrivate() { - for (ILogBackend *backend : m_backends) { + for (auto backend : m_backends) { delete backend; } } @@ -96,7 +91,7 @@ public: return; } - timestamp(level, size, offset); + const uint64_t ts = timestamp(level, size, offset); color(level, size); const int rc = vsnprintf(m_buf + size, sizeof (m_buf) - offset - 32, fmt, args); @@ -114,9 +109,9 @@ public: } if (!m_backends.empty()) { - for (ILogBackend *backend : m_backends) { - backend->print(level, m_buf, offset, size, true); - backend->print(level, txt.c_str(), offset ? (offset - 11) : 0, txt.size(), false); + for (auto backend : m_backends) { + backend->print(ts, level, m_buf, offset, size, true); + backend->print(ts, level, txt.c_str(), offset ? (offset - 11) : 0, txt.size(), false); } } else { @@ -127,14 +122,15 @@ public: private: - inline void timestamp(Log::Level level, size_t &size, size_t &offset) + inline uint64_t timestamp(Log::Level level, size_t &size, size_t &offset) { + const uint64_t ms = Chrono::currentMSecsSinceEpoch(); + if (level == Log::NONE) { - return; + return ms; } - const uint64_t ms = Chrono::currentMSecsSinceEpoch(); - time_t now = ms / 1000; + time_t now = ms / 1000; tm stime{}; # ifdef _WIN32 @@ -156,6 +152,8 @@ private: if (rc > 0) { size = offset = static_cast(rc); } + + return ms; } @@ -195,10 +193,10 @@ private: }; -bool Log::m_background = false; -bool Log::m_colors = true; -LogPrivate *Log::d = new LogPrivate(); -uint32_t Log::m_verbose = 0; +bool Log::m_background = false; +bool Log::m_colors = true; +LogPrivate *Log::d = nullptr; +uint32_t Log::m_verbose = 0; } /* namespace xmrig */ @@ -207,6 +205,8 @@ uint32_t Log::m_verbose = 0; void xmrig::Log::add(ILogBackend *backend) { + assert(d != nullptr); + if (d) { d->add(backend); } @@ -220,6 +220,12 @@ void xmrig::Log::destroy() } +void xmrig::Log::init() +{ + d = new LogPrivate(); +} + + void xmrig::Log::print(const char *fmt, ...) { if (!d) { diff --git a/src/base/io/log/Log.h b/src/base/io/log/Log.h index 73d4e53f..6da2e892 100644 --- a/src/base/io/log/Log.h +++ b/src/base/io/log/Log.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -57,6 +51,7 @@ public: static void add(ILogBackend *backend); static void destroy(); + static void init(); static void print(const char *fmt, ...); static void print(Level level, const char *fmt, ...); @@ -71,9 +66,8 @@ public: private: static bool m_background; static bool m_colors; - static uint32_t m_verbose; - static LogPrivate *d; + static uint32_t m_verbose; }; diff --git a/src/base/io/log/backends/ConsoleLog.cpp b/src/base/io/log/backends/ConsoleLog.cpp index 29103a54..cc3081fd 100644 --- a/src/base/io/log/backends/ConsoleLog.cpp +++ b/src/base/io/log/backends/ConsoleLog.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,14 +18,13 @@ */ -#include - - #include "base/io/log/backends/ConsoleLog.h" #include "base/io/log/Log.h" #include "base/kernel/config/Title.h" #include "base/tools/Handle.h" -#include "version.h" + + +#include xmrig::ConsoleLog::ConsoleLog(const Title &title) @@ -75,7 +68,7 @@ xmrig::ConsoleLog::~ConsoleLog() } -void xmrig::ConsoleLog::print(int, const char *line, size_t, size_t size, bool colors) +void xmrig::ConsoleLog::print(uint64_t, int, const char *line, size_t, size_t size, bool colors) { if (!m_tty || Log::isColors() != colors) { return; diff --git a/src/base/io/log/backends/ConsoleLog.h b/src/base/io/log/backends/ConsoleLog.h index 7f365a15..01917763 100644 --- a/src/base/io/log/backends/ConsoleLog.h +++ b/src/base/io/log/backends/ConsoleLog.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,7 +44,7 @@ public: ~ConsoleLog() override; protected: - void print(int level, const char *line, size_t offset, size_t size, bool colors) override; + void print(uint64_t timestamp, int level, const char *line, size_t offset, size_t size, bool colors) override; private: bool isSupported() const; diff --git a/src/base/io/log/backends/FileLog.cpp b/src/base/io/log/backends/FileLog.cpp index 2d52d812..51e5b1f7 100644 --- a/src/base/io/log/backends/FileLog.cpp +++ b/src/base/io/log/backends/FileLog.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -37,7 +31,7 @@ xmrig::FileLog::FileLog(const char *fileName) : } -void xmrig::FileLog::print(int, const char *line, size_t, size_t size, bool colors) +void xmrig::FileLog::print(uint64_t, int, const char *line, size_t, size_t size, bool colors) { if (!m_writer.isOpen() || colors) { return; diff --git a/src/base/io/log/backends/FileLog.h b/src/base/io/log/backends/FileLog.h index 95e9881a..c52cefd9 100644 --- a/src/base/io/log/backends/FileLog.h +++ b/src/base/io/log/backends/FileLog.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,7 +34,7 @@ public: FileLog(const char *fileName); protected: - void print(int level, const char *line, size_t offset, size_t size, bool colors) override; + void print(uint64_t timestamp, int level, const char *line, size_t offset, size_t size, bool colors) override; private: FileLogWriter m_writer; diff --git a/src/base/io/log/backends/SysLog.cpp b/src/base/io/log/backends/SysLog.cpp index e66f2e35..bd333908 100644 --- a/src/base/io/log/backends/SysLog.cpp +++ b/src/base/io/log/backends/SysLog.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -43,7 +37,7 @@ xmrig::SysLog::~SysLog() } -void xmrig::SysLog::print(int level, const char *line, size_t offset, size_t, bool colors) +void xmrig::SysLog::print(uint64_t, int level, const char *line, size_t offset, size_t, bool colors) { if (colors) { return; diff --git a/src/base/io/log/backends/SysLog.h b/src/base/io/log/backends/SysLog.h index d131784b..447ad006 100644 --- a/src/base/io/log/backends/SysLog.h +++ b/src/base/io/log/backends/SysLog.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2019 Spudz76 + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,11 +30,13 @@ namespace xmrig { class SysLog : public ILogBackend { public: + XMRIG_DISABLE_COPY_MOVE(SysLog) + SysLog(); ~SysLog() override; protected: - void print(int level, const char *line, size_t offset, size_t size, bool colors) override; + void print(uint64_t timestamp, int level, const char *line, size_t offset, size_t size, bool colors) override; }; diff --git a/src/base/kernel/Base.cpp b/src/base/kernel/Base.cpp index 1e5019ca..24736193 100644 --- a/src/base/kernel/Base.cpp +++ b/src/base/kernel/Base.cpp @@ -166,6 +166,7 @@ private: xmrig::Base::Base(Process *process) : d_ptr(new BasePrivate(process)) { + Log::init(); } diff --git a/src/base/kernel/Platform_hwloc.cpp b/src/base/kernel/Platform_hwloc.cpp index 8ec06975..8d9d9f59 100644 --- a/src/base/kernel/Platform_hwloc.cpp +++ b/src/base/kernel/Platform_hwloc.cpp @@ -26,6 +26,7 @@ #include +#ifndef XMRIG_OS_APPLE bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) { auto cpu = static_cast(Cpu::info()); @@ -44,3 +45,4 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) std::this_thread::sleep_for(std::chrono::milliseconds(1)); return result; } +#endif diff --git a/src/base/kernel/Platform_mac.cpp b/src/base/kernel/Platform_mac.cpp index 27797ff7..20ad9652 100644 --- a/src/base/kernel/Platform_mac.cpp +++ b/src/base/kernel/Platform_mac.cpp @@ -19,8 +19,6 @@ #include #include -#include -#include #include #include #include @@ -38,7 +36,14 @@ char *xmrig::Platform::createUserAgent() constexpr const size_t max = 256; char *buf = new char[max](); - int length = snprintf(buf, max, "%s/%s (Macintosh; Intel Mac OS X) libuv/%s", APP_NAME, APP_VERSION, uv_version_string()); + int length = snprintf(buf, max, + "%s/%s (Macintosh; macOS" +# ifdef XMRIG_ARM + "; arm64" +# else + "; x86_64" +# endif + ") libuv/%s", APP_NAME, APP_VERSION, uv_version_string()); # ifdef __clang__ length += snprintf(buf + length, max - length, " clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__); @@ -50,18 +55,10 @@ char *xmrig::Platform::createUserAgent() } -#ifndef XMRIG_FEATURE_HWLOC bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) { - thread_port_t mach_thread; - thread_affinity_policy_data_t policy = { static_cast(cpu_id) }; - mach_thread = pthread_mach_thread_np(pthread_self()); - - const bool result = (thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1) == KERN_SUCCESS); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - return result; + return true; } -#endif void xmrig::Platform::setProcessPriority(int) diff --git a/src/base/kernel/config/Title.cpp b/src/base/kernel/config/Title.cpp index 1e9c3ce4..030aa727 100644 --- a/src/base/kernel/config/Title.cpp +++ b/src/base/kernel/config/Title.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/kernel/config/Title.h b/src/base/kernel/config/Title.h index 8cf73f28..452869dc 100644 --- a/src/base/kernel/config/Title.h +++ b/src/base/kernel/config/Title.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/kernel/constants.h b/src/base/kernel/constants.h new file mode 100644 index 00000000..a76cee9a --- /dev/null +++ b/src/base/kernel/constants.h @@ -0,0 +1,31 @@ +/* XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_CONSTANTS_H +#define XMRIG_CONSTANTS_H + + +#include +#include + + +constexpr size_t XMRIG_NET_BUFFER_CHUNK_SIZE = 64 * 1024; +constexpr size_t XMRIG_NET_BUFFER_INIT_CHUNKS = 4; + + +#endif /* XMRIG_CONSTANTS_H */ diff --git a/src/base/kernel/interfaces/IJsonReader.h b/src/base/kernel/interfaces/IJsonReader.h index b545514d..044a291c 100644 --- a/src/base/kernel/interfaces/IJsonReader.h +++ b/src/base/kernel/interfaces/IJsonReader.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,6 +21,8 @@ #include "3rdparty/rapidjson/fwd.h" +#include "base/tools/Object.h" +#include "base/tools/String.h" namespace xmrig { @@ -35,7 +31,10 @@ namespace xmrig { class IJsonReader { public: - virtual ~IJsonReader() = default; + XMRIG_DISABLE_COPY_MOVE(IJsonReader) + + IJsonReader() = default; + virtual ~IJsonReader() = default; virtual bool getBool(const char *key, bool defaultValue = false) const = 0; virtual bool isEmpty() const = 0; @@ -43,8 +42,11 @@ public: virtual const rapidjson::Value &getArray(const char *key) const = 0; virtual const rapidjson::Value &getObject(const char *key) const = 0; virtual const rapidjson::Value &getValue(const char *key) const = 0; + virtual const rapidjson::Value &object() const = 0; + virtual double getDouble(const char *key, double defaultValue = 0) const = 0; virtual int getInt(const char *key, int defaultValue = 0) const = 0; virtual int64_t getInt64(const char *key, int64_t defaultValue = 0) const = 0; + virtual String getString(const char *key, size_t maxSize) const = 0; virtual uint64_t getUint64(const char *key, uint64_t defaultValue = 0) const = 0; virtual unsigned getUint(const char *key, unsigned defaultValue = 0) const = 0; }; diff --git a/src/base/kernel/interfaces/ILogBackend.h b/src/base/kernel/interfaces/ILogBackend.h index ef18da88..88137fd1 100644 --- a/src/base/kernel/interfaces/ILogBackend.h +++ b/src/base/kernel/interfaces/ILogBackend.h @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2019 Spudz76 - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,8 +20,11 @@ #define XMRIG_ILOGBACKEND_H -#include -#include +#include "base/tools/Object.h" + + +#include +#include namespace xmrig { @@ -37,9 +33,12 @@ namespace xmrig { class ILogBackend { public: - virtual ~ILogBackend() = default; + XMRIG_DISABLE_COPY_MOVE(ILogBackend) - virtual void print(int level, const char *line, size_t offset, size_t size, bool colors) = 0; + ILogBackend() = default; + virtual ~ILogBackend() = default; + + virtual void print(uint64_t timestamp, int level, const char *line, size_t offset, size_t size, bool colors) = 0; }; diff --git a/src/base/net/dns/Dns.cpp b/src/base/net/dns/Dns.cpp index 6246421c..ef50b20d 100644 --- a/src/base/net/dns/Dns.cpp +++ b/src/base/net/dns/Dns.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/dns/Dns.h b/src/base/net/dns/Dns.h index 068ab80f..86f90145 100644 --- a/src/base/net/dns/Dns.h +++ b/src/base/net/dns/Dns.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/dns/DnsRecord.cpp b/src/base/net/dns/DnsRecord.cpp index e722ad6a..1667c715 100644 --- a/src/base/net/dns/DnsRecord.cpp +++ b/src/base/net/dns/DnsRecord.cpp @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/dns/DnsRecord.h b/src/base/net/dns/DnsRecord.h index 0b572df8..cf6c2598 100644 --- a/src/base/net/dns/DnsRecord.h +++ b/src/base/net/dns/DnsRecord.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/http/Fetch.cpp b/src/base/net/http/Fetch.cpp index 4d18b71d..84ff715c 100644 --- a/src/base/net/http/Fetch.cpp +++ b/src/base/net/http/Fetch.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -95,7 +95,7 @@ void xmrig::FetchRequest::setBody(const rapidjson::Value &value) } -void xmrig::fetch(FetchRequest &&req, const std::weak_ptr &listener, int type) +void xmrig::fetch(const char *tag, FetchRequest &&req, const std::weak_ptr &listener, int type, uint64_t rpcId) { # ifdef APP_DEBUG LOG_DEBUG(CYAN("http%s://%s:%u ") MAGENTA_BOLD("\"%s %s\"") BLACK_BOLD(" body: ") CYAN_BOLD("%zu") BLACK_BOLD(" bytes"), @@ -109,14 +109,15 @@ void xmrig::fetch(FetchRequest &&req, const std::weak_ptr &listen HttpClient *client; # ifdef XMRIG_FEATURE_TLS if (req.tls) { - client = new HttpsClient(std::move(req), listener); + client = new HttpsClient(tag, std::move(req), listener); } else # endif { - client = new HttpClient(std::move(req), listener); + client = new HttpClient(tag, std::move(req), listener); } client->userType = type; + client->rpcId = rpcId; client->connect(); } diff --git a/src/base/net/http/Fetch.h b/src/base/net/http/Fetch.h index b6fbf487..bdab46ba 100644 --- a/src/base/net/http/Fetch.h +++ b/src/base/net/http/Fetch.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -58,10 +58,11 @@ public: String host; String path; uint16_t port = 0; + uint64_t timeout = 0; }; -void fetch(FetchRequest &&req, const std::weak_ptr &listener, int type = 0); +void fetch(const char *tag, FetchRequest &&req, const std::weak_ptr &listener, int type = 0, uint64_t rpcId = 0); } // namespace xmrig diff --git a/src/base/net/http/HttpClient.cpp b/src/base/net/http/HttpClient.cpp index 92ca6fac..320f8ee4 100644 --- a/src/base/net/http/HttpClient.cpp +++ b/src/base/net/http/HttpClient.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2014-2019 heapwolf - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2014-2019 heapwolf + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,6 +24,7 @@ #include "base/kernel/Platform.h" #include "base/net/dns/Dns.h" #include "base/net/tools/NetBuffer.h" +#include "base/tools/Timer.h" #include @@ -44,21 +39,20 @@ static const char *kCRLF = "\r\n"; } // namespace xmrig -xmrig::HttpClient::HttpClient(FetchRequest &&req, const std::weak_ptr &listener) : +xmrig::HttpClient::HttpClient(const char *tag, FetchRequest &&req, const std::weak_ptr &listener) : HttpContext(HTTP_RESPONSE, listener), + m_tag(tag), m_req(std::move(req)) { method = m_req.method; url = std::move(m_req.path); body = std::move(m_req.body); headers = std::move(m_req.headers); - m_dns = new Dns(this); -} + m_dns = std::make_shared(this); - -xmrig::HttpClient::~HttpClient() -{ - delete m_dns; + if (m_req.timeout) { + m_timer = std::make_shared(this, m_req.timeout, 0); + } } @@ -74,7 +68,7 @@ void xmrig::HttpClient::onResolved(const Dns &dns, int status) if (status < 0 && dns.isEmpty()) { if (!isQuiet()) { - LOG_ERR("[%s:%d] DNS error: \"%s\"", dns.host().data(), port(), uv_strerror(status)); + LOG_ERR("%s " RED("DNS error: ") RED_BOLD("\"%s\""), tag(), uv_strerror(status)); } return; @@ -91,6 +85,12 @@ void xmrig::HttpClient::onResolved(const Dns &dns, int status) } +void xmrig::HttpClient::onTimer(const Timer *) +{ + close(UV_ETIMEDOUT); +} + + void xmrig::HttpClient::handshake() { headers.insert({ "Host", host() }); @@ -135,8 +135,12 @@ void xmrig::HttpClient::onConnect(uv_connect_t *req, int status) } if (status < 0) { + if (status == UV_ECANCELED) { + status = UV_ETIMEDOUT; + } + if (!client->isQuiet()) { - LOG_ERR("[%s:%d] connect error: \"%s\"", client->m_dns->host().data(), client->port(), uv_strerror(status)); + LOG_ERR("%s " RED("connect error: ") RED_BOLD("\"%s\""), client->tag(), uv_strerror(status)); } return client->close(status); @@ -151,7 +155,7 @@ void xmrig::HttpClient::onConnect(uv_connect_t *req, int status) client->read(buf->base, static_cast(nread)); } else { if (!client->isQuiet() && nread != UV_EOF) { - LOG_ERR("[%s:%d] read error: \"%s\"", client->m_dns->host().data(), client->port(), uv_strerror(static_cast(nread))); + LOG_ERR("%s " RED("read error: ") RED_BOLD("\"%s\""), client->tag(), uv_strerror(static_cast(nread))); } client->close(static_cast(nread)); diff --git a/src/base/net/http/HttpClient.h b/src/base/net/http/HttpClient.h index acfe15e2..2b9a314d 100644 --- a/src/base/net/http/HttpClient.h +++ b/src/base/net/http/HttpClient.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2014-2019 heapwolf - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2014-2019 heapwolf + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,6 +23,7 @@ #include "base/kernel/interfaces/IDnsListener.h" +#include "base/kernel/interfaces/ITimerListener.h" #include "base/net/http/Fetch.h" #include "base/net/http/HttpContext.h" #include "base/tools/Object.h" @@ -40,22 +35,24 @@ namespace xmrig { class String; -class HttpClient : public HttpContext, public IDnsListener +class HttpClient : public HttpContext, public IDnsListener, public ITimerListener { public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(HttpClient); - HttpClient(FetchRequest &&req, const std::weak_ptr &listener); - ~HttpClient() override; + HttpClient(const char *tag, FetchRequest &&req, const std::weak_ptr &listener); + ~HttpClient() override = default; inline bool isQuiet() const { return m_req.quiet; } inline const char *host() const override { return m_req.host; } + inline const char *tag() const { return m_tag; } inline uint16_t port() const override { return m_req.port; } bool connect(); protected: void onResolved(const Dns &dns, int status) override; + void onTimer(const Timer *timer) override; virtual void handshake(); virtual void read(const char *data, size_t size); @@ -66,8 +63,10 @@ protected: private: static void onConnect(uv_connect_t *req, int status); - Dns *m_dns; + const char *m_tag; FetchRequest m_req; + std::shared_ptr m_dns; + std::shared_ptr m_timer; }; diff --git a/src/base/net/http/HttpContext.cpp b/src/base/net/http/HttpContext.cpp index b348d7e4..52739e4d 100644 --- a/src/base/net/http/HttpContext.cpp +++ b/src/base/net/http/HttpContext.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2014-2019 heapwolf - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2014-2019 heapwolf + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -160,6 +154,10 @@ uint64_t xmrig::HttpContext::elapsed() const void xmrig::HttpContext::close(int status) { + if (!get(id())) { + return; + } + auto listener = httpListener(); if (status < 0 && listener) { @@ -262,7 +260,7 @@ void xmrig::HttpContext::attach(http_parser_settings *settings) settings->on_body = [](http_parser *parser, const char *at, size_t len) -> int { - static_cast(parser->data)->body += std::string(at, len); + static_cast(parser->data)->body.append(at, len); return 0; }; diff --git a/src/base/net/http/HttpContext.h b/src/base/net/http/HttpContext.h index a7554679..4202bfaf 100644 --- a/src/base/net/http/HttpContext.h +++ b/src/base/net/http/HttpContext.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2014-2019 heapwolf - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2014-2019 heapwolf + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/http/HttpListener.cpp b/src/base/net/http/HttpListener.cpp index 077184fc..ba9130ed 100644 --- a/src/base/net/http/HttpListener.cpp +++ b/src/base/net/http/HttpListener.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/http/HttpListener.h b/src/base/net/http/HttpListener.h index 4f982c4d..8f86d2ba 100644 --- a/src/base/net/http/HttpListener.h +++ b/src/base/net/http/HttpListener.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/net/https/HttpsClient.cpp b/src/base/net/https/HttpsClient.cpp index 28b5a5ef..8b414917 100644 --- a/src/base/net/https/HttpsClient.cpp +++ b/src/base/net/https/HttpsClient.cpp @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2014-2019 heapwolf - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2014-2019 heapwolf + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,7 +25,7 @@ #include "base/net/https/HttpsClient.h" #include "base/io/log/Log.h" -#include "base/tools/Buffer.h" +#include "base/tools/Cvt.h" #ifdef _MSC_VER @@ -39,8 +33,8 @@ #endif -xmrig::HttpsClient::HttpsClient(FetchRequest &&req, const std::weak_ptr &listener) : - HttpClient(std::move(req), listener) +xmrig::HttpsClient::HttpsClient(const char *tag, FetchRequest &&req, const std::weak_ptr &listener) : + HttpClient(tag, std::move(req), listener) { m_ctx = SSL_CTX_new(SSLv23_method()); assert(m_ctx != nullptr); @@ -182,7 +176,7 @@ bool xmrig::HttpsClient::verifyFingerprint(X509 *cert) return false; } - Buffer::toHex(md, 32, m_fingerprint); + Cvt::toHex(m_fingerprint, sizeof(m_fingerprint), md, 32); return req().fingerprint.isNull() || strncasecmp(m_fingerprint, req().fingerprint.data(), 64) == 0; } diff --git a/src/base/net/https/HttpsClient.h b/src/base/net/https/HttpsClient.h index eeeec747..bd0c65ff 100644 --- a/src/base/net/https/HttpsClient.h +++ b/src/base/net/https/HttpsClient.h @@ -1,13 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2014-2019 heapwolf - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2014-2019 heapwolf + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,7 +40,7 @@ class HttpsClient : public HttpClient public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(HttpsClient) - HttpsClient(FetchRequest &&req, const std::weak_ptr &listener); + HttpsClient(const char *tag, FetchRequest &&req, const std::weak_ptr &listener); ~HttpsClient() override; const char *tlsFingerprint() const override; diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index 018f7a66..5327867e 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -50,7 +50,7 @@ #include "base/net/dns/Dns.h" #include "base/net/stratum/Socks5.h" #include "base/net/tools/NetBuffer.h" -#include "base/tools/Buffer.h" +#include "base/tools/Cvt.h" #include "base/tools/Chrono.h" #include "net/JobResult.h" @@ -202,11 +202,8 @@ int64_t xmrig::Client::submit(const JobResult &result) char *nonce = m_sendBuf.data(); char *data = m_sendBuf.data() + 16; - Buffer::toHex(reinterpret_cast(&result.nonce), 4, nonce); - nonce[8] = '\0'; - - Buffer::toHex(result.result(), 32, data); - data[64] = '\0'; + Cvt::toHex(nonce, sizeof(uint32_t) * 2 + 1, reinterpret_cast(&result.nonce), sizeof(uint32_t)); + Cvt::toHex(data, 65, result.result(), 32); # endif Document doc(kObjectType); diff --git a/src/base/net/stratum/DaemonClient.cpp b/src/base/net/stratum/DaemonClient.cpp index a8986747..8b4d0fb6 100644 --- a/src/base/net/stratum/DaemonClient.cpp +++ b/src/base/net/stratum/DaemonClient.cpp @@ -36,7 +36,7 @@ #include "base/net/http/HttpData.h" #include "base/net/http/HttpListener.h" #include "base/net/stratum/SubmitResult.h" -#include "base/tools/Buffer.h" +#include "base/tools/Cvt.h" #include "base/tools/Timer.h" #include "net/JobResult.h" @@ -54,7 +54,7 @@ static const char *kHash = "hash"; static const char *kHeight = "height"; static const char *kJsonRPC = "/json_rpc"; -static const size_t BlobReserveSize = 8; +static constexpr size_t kBlobReserveSize = 8; } @@ -104,7 +104,7 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result) # ifdef XMRIG_PROXY_PROJECT memcpy(data + 78, result.nonce, 8); # else - Buffer::toHex(reinterpret_cast(&result.nonce), 4, data + 78); + Cvt::toHex(data + 78, 9, reinterpret_cast(&result.nonce), 4); # endif using namespace rapidjson; @@ -227,7 +227,7 @@ bool xmrig::DaemonClient::parseJob(const rapidjson::Value ¶ms, int *code) m_blockhashingblob = Json::getString(params, "blockhashing_blob"); if (m_apiVersion == API_DERO) { const uint64_t offset = Json::getUint64(params, "reserved_offset"); - Buffer::toHex(Buffer::randomBytes(BlobReserveSize).data(), BlobReserveSize, m_blockhashingblob.data() + offset * 2); + Cvt::toHex(m_blockhashingblob.data() + offset * 2, kBlobReserveSize * 2 + 1, Cvt::randomBytes(kBlobReserveSize).data(), kBlobReserveSize); } if (blocktemplate.isNull() || !job.setBlob(m_blockhashingblob)) { @@ -315,10 +315,10 @@ int64_t xmrig::DaemonClient::getBlockTemplate() Value params(kObjectType); params.AddMember("wallet_address", m_user.toJSON(), allocator); if (m_apiVersion == API_DERO) { - params.AddMember("reserve_size", static_cast(BlobReserveSize), allocator); + params.AddMember("reserve_size", static_cast(kBlobReserveSize), allocator); } else { - params.AddMember("extra_nonce", Buffer::randomBytes(BlobReserveSize).toHex().toJSON(doc), allocator); + params.AddMember("extra_nonce", Cvt::toHex(Cvt::randomBytes(kBlobReserveSize)).toJSON(doc), allocator); } JsonRequest::create(doc, m_sequence, "getblocktemplate", params); @@ -330,7 +330,7 @@ int64_t xmrig::DaemonClient::getBlockTemplate() int64_t xmrig::DaemonClient::rpcSend(const rapidjson::Document &doc) { FetchRequest req(HTTP_POST, m_pool.host(), m_pool.port(), kJsonRPC, doc, m_pool.isTLS(), isQuiet()); - fetch(std::move(req), m_httpListener); + fetch(tag(), std::move(req), m_httpListener); return m_sequence++; } @@ -357,7 +357,7 @@ void xmrig::DaemonClient::retry() void xmrig::DaemonClient::send(const char *path) { FetchRequest req(HTTP_GET, m_pool.host(), m_pool.port(), path, m_pool.isTLS(), isQuiet()); - fetch(std::move(req), m_httpListener); + fetch(tag(), std::move(req), m_httpListener); } diff --git a/src/base/net/stratum/Job.cpp b/src/base/net/stratum/Job.cpp index 20345414..acd5a0d6 100644 --- a/src/base/net/stratum/Job.cpp +++ b/src/base/net/stratum/Job.cpp @@ -31,6 +31,7 @@ #include "base/net/stratum/Job.h" #include "base/tools/Buffer.h" +#include "base/tools/Cvt.h" xmrig::Job::Job(bool nicehash, const Algorithm &algorithm, const String &clientId) : @@ -59,11 +60,13 @@ bool xmrig::Job::setBlob(const char *blob) } m_size /= 2; - if (m_size < 76 || m_size >= sizeof(m_blob)) { + + const size_t minSize = nonceOffset() + nonceSize(); + if (m_size < minSize || m_size >= sizeof(m_blob)) { return false; } - if (!Buffer::fromHex(blob, m_size * 2, m_blob)) { + if (!Cvt::fromHex(m_blob, sizeof(m_blob), blob, m_size * 2)) { return false; } @@ -90,9 +93,9 @@ bool xmrig::Job::setSeedHash(const char *hash) m_rawSeedHash = hash; # endif - m_seed = Buffer::fromHex(hash, kMaxSeedSize * 2); + m_seed = Cvt::fromHex(hash, kMaxSeedSize * 2); - return !m_seed.isEmpty(); + return !m_seed.empty(); } @@ -102,27 +105,14 @@ bool xmrig::Job::setTarget(const char *target) return false; } - const size_t len = strlen(target); + const auto raw = Cvt::fromHex(target, strlen(target)); + const size_t size = raw.size(); - if (len <= 8) { - uint32_t tmp = 0; - char str[8]; - memcpy(str, target, len); - - if (!Buffer::fromHex(str, 8, reinterpret_cast(&tmp)) || tmp == 0) { - return false; - } - - m_target = 0xFFFFFFFFFFFFFFFFULL / (0xFFFFFFFFULL / static_cast(tmp)); + if (size == 4) { + m_target = 0xFFFFFFFFFFFFFFFFULL / (0xFFFFFFFFULL / uint64_t(*reinterpret_cast(raw.data()))); } - else if (len <= 16) { - m_target = 0; - char str[16]; - memcpy(str, target, len); - - if (!Buffer::fromHex(str, 16, reinterpret_cast(&m_target)) || m_target == 0) { - return false; - } + else if (size == 8) { + m_target = *reinterpret_cast(raw.data()); } else { return false; diff --git a/src/base/net/stratum/SelfSelectClient.cpp b/src/base/net/stratum/SelfSelectClient.cpp index 8ad48b65..fc4cea6e 100644 --- a/src/base/net/stratum/SelfSelectClient.cpp +++ b/src/base/net/stratum/SelfSelectClient.cpp @@ -154,7 +154,7 @@ void xmrig::SelfSelectClient::getBlockTemplate() JsonRequest::create(doc, m_sequence++, "getblocktemplate", params); FetchRequest req(HTTP_POST, pool().daemon().host(), pool().daemon().port(), "/json_rpc", doc, pool().daemon().isTLS(), isQuiet()); - fetch(std::move(req), m_httpListener); + fetch(tag(), std::move(req), m_httpListener); } diff --git a/src/base/net/stratum/Tls.cpp b/src/base/net/stratum/Tls.cpp index 5915fad9..a4ccd853 100644 --- a/src/base/net/stratum/Tls.cpp +++ b/src/base/net/stratum/Tls.cpp @@ -27,7 +27,7 @@ #include "base/net/stratum/Tls.h" #include "base/io/log/Log.h" #include "base/net/stratum/Client.h" -#include "base/tools/Buffer.h" +#include "base/tools/Cvt.h" #ifdef _MSC_VER @@ -183,7 +183,7 @@ bool xmrig::Client::Tls::verifyFingerprint(X509 *cert) return false; } - Buffer::toHex(md, 32, m_fingerprint); + Cvt::toHex(m_fingerprint, sizeof(m_fingerprint), md, 32); const char *fingerprint = m_client->m_pool.fingerprint(); return fingerprint == nullptr || strncasecmp(m_fingerprint, fingerprint, 64) == 0; diff --git a/src/base/net/stratum/benchmark/BenchClient.cpp b/src/base/net/stratum/benchmark/BenchClient.cpp index 003eeca7..beac2f19 100644 --- a/src/base/net/stratum/benchmark/BenchClient.cpp +++ b/src/base/net/stratum/benchmark/BenchClient.cpp @@ -31,6 +31,7 @@ #include "base/net/http/HttpData.h" #include "base/net/http/HttpListener.h" #include "base/net/stratum/benchmark/BenchConfig.h" +#include "base/tools/Cvt.h" #include "version.h" @@ -217,7 +218,7 @@ bool xmrig::BenchClient::setSeed(const char *seed) return false; } - if (!Buffer::fromHex(seed, size * 2, m_job.blob())) { + if (!Cvt::fromHex(m_job.blob(), m_job.size(), seed, size * 2)) { return false; } @@ -321,7 +322,7 @@ void xmrig::BenchClient::send(Request request) case GET_BENCH: { FetchRequest req(HTTP_GET, m_ip, BenchConfig::kApiPort, fmt::format("/1/benchmark/{}", m_job.id()).c_str(), BenchConfig::kApiTLS, true); - fetch(std::move(req), m_httpListener); + fetch(tag(), std::move(req), m_httpListener); } break; @@ -335,7 +336,7 @@ void xmrig::BenchClient::send(Request request) doc.AddMember("cpu", Cpu::toJSON(doc), allocator); FetchRequest req(HTTP_POST, m_ip, BenchConfig::kApiPort, "/1/benchmark", doc, BenchConfig::kApiTLS, true); - fetch(std::move(req), m_httpListener); + fetch(tag(), std::move(req), m_httpListener); } break; @@ -374,6 +375,6 @@ void xmrig::BenchClient::update(const rapidjson::Value &body) FetchRequest req(HTTP_PATCH, m_ip, BenchConfig::kApiPort, fmt::format("/1/benchmark/{}", m_job.id()).c_str(), body, BenchConfig::kApiTLS, true); req.headers.insert({ "Authorization", fmt::format("Bearer {}", m_token)}); - fetch(std::move(req), m_httpListener); + fetch(tag(), std::move(req), m_httpListener); } #endif diff --git a/src/base/net/tls/ServerTls.cpp b/src/base/net/tls/ServerTls.cpp index e76d5862..abae315d 100644 --- a/src/base/net/tls/ServerTls.cpp +++ b/src/base/net/tls/ServerTls.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,6 +20,7 @@ #include "base/net/tls/ServerTls.h" +#include #include #include #include @@ -39,11 +40,23 @@ xmrig::ServerTls::~ServerTls() } +bool xmrig::ServerTls::isHTTP(const char *data, size_t size) +{ + assert(size > 0); + + static const char test[6] = "GET /"; + + return size > 0 && memcmp(data, test, std::min(size, sizeof(test) - 1)) == 0; +} + + bool xmrig::ServerTls::isTLS(const char *data, size_t size) { + assert(size > 0); + static const uint8_t test[3] = { 0x16, 0x03, 0x01 }; - return size >= sizeof(test) && memcmp(data, test, sizeof(test)) == 0; + return size > 0 && memcmp(data, test, std::min(size, sizeof(test))) == 0; } diff --git a/src/base/net/tls/ServerTls.h b/src/base/net/tls/ServerTls.h index 892bed2b..20337eb6 100644 --- a/src/base/net/tls/ServerTls.h +++ b/src/base/net/tls/ServerTls.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,6 +41,7 @@ public: ServerTls(SSL_CTX *ctx); virtual ~ServerTls(); + static bool isHTTP(const char *data, size_t size); static bool isTLS(const char *data, size_t size); bool send(const char *data, size_t size); diff --git a/src/base/net/tools/LineReader.cpp b/src/base/net/tools/LineReader.cpp index 3ad5c373..07d0f1dc 100644 --- a/src/base/net/tools/LineReader.cpp +++ b/src/base/net/tools/LineReader.cpp @@ -19,8 +19,9 @@ #include "base/net/tools/LineReader.h" -#include "base/net/tools/NetBuffer.h" +#include "base/kernel/constants.h" #include "base/kernel/interfaces/ILineListener.h" +#include "base/net/tools/NetBuffer.h" #include #include @@ -55,8 +56,8 @@ void xmrig::LineReader::reset() void xmrig::LineReader::add(const char *data, size_t size) { - if (size > NetBuffer::kChunkSize - m_pos) { - // it breakes correctness silently for long lines + if (size + m_pos > XMRIG_NET_BUFFER_CHUNK_SIZE) { + // it breaks correctness silently for long lines return; } diff --git a/src/base/net/tools/MemPool.h b/src/base/net/tools/MemPool.h index 3b8a8ef4..8a0a126b 100644 --- a/src/base/net/tools/MemPool.h +++ b/src/base/net/tools/MemPool.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,10 +20,11 @@ #define XMRIG_MEMPOOL_H -#include -#include #include #include +#include +#include +#include namespace xmrig { diff --git a/src/base/net/tools/NetBuffer.cpp b/src/base/net/tools/NetBuffer.cpp index 3303d971..8931f06e 100644 --- a/src/base/net/tools/NetBuffer.cpp +++ b/src/base/net/tools/NetBuffer.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,8 +17,9 @@ */ -#include "base/net/tools/MemPool.h" #include "base/net/tools/NetBuffer.h" +#include "base/kernel/constants.h" +#include "base/net/tools/MemPool.h" #include @@ -28,14 +29,13 @@ namespace xmrig { -static constexpr size_t kInitSize = 4; -static MemPool *pool = nullptr; +static MemPool *pool = nullptr; -inline MemPool *getPool() +inline MemPool *getPool() { if (!pool) { - pool = new MemPool(); + pool = new MemPool(); } return pool; @@ -67,17 +67,25 @@ void xmrig::NetBuffer::destroy() void xmrig::NetBuffer::onAlloc(uv_handle_t *, size_t, uv_buf_t *buf) { buf->base = getPool()->allocate(); - buf->len = kChunkSize; + buf->len = XMRIG_NET_BUFFER_CHUNK_SIZE; } void xmrig::NetBuffer::release(const char *buf) { + if (buf == nullptr) { + return; + } + getPool()->deallocate(buf); } void xmrig::NetBuffer::release(const uv_buf_t *buf) { + if (buf->base == nullptr) { + return; + } + getPool()->deallocate(buf->base); } diff --git a/src/base/net/tools/NetBuffer.h b/src/base/net/tools/NetBuffer.h index 14255bb0..a7a2ee05 100644 --- a/src/base/net/tools/NetBuffer.h +++ b/src/base/net/tools/NetBuffer.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,8 +33,6 @@ namespace xmrig { class NetBuffer { public: - static constexpr size_t kChunkSize = 16 * 1024; - static char *allocate(); static void destroy(); static void onAlloc(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf); diff --git a/src/base/tools/Buffer.cpp b/src/base/tools/Buffer.cpp deleted file mode 100644 index e260b7a5..00000000 --- a/src/base/tools/Buffer.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include "base/tools/Buffer.h" - - -#include - - -namespace xmrig { - - -static std::random_device randomDevice; -static std::mt19937 randomEngine(randomDevice()); - - -} // namespace xmrig - - -static inline uint8_t hf_hex2bin(uint8_t c, bool &err) -{ - if (c >= '0' && c <= '9') { - return c - '0'; - } - - if (c >= 'a' && c <= 'f') { - return c - 'a' + 0xA; - } - - if (c >= 'A' && c <= 'F') { - return c - 'A' + 0xA; - } - - err = true; - return 0; -} - - -static inline uint8_t hf_bin2hex(uint8_t c) -{ - if (c <= 0x9) { - return '0' + c; - } - - return 'a' - 0xA + c; -} - - -xmrig::Buffer::Buffer(Buffer &&other) noexcept : - m_data(other.m_data), - m_size(other.m_size) -{ - other.m_data = nullptr; - other.m_size = 0; -} - - -xmrig::Buffer::Buffer(const Buffer &other) -{ - copy(other.data(), other.size()); -} - - -xmrig::Buffer::Buffer(const char *data, size_t size) -{ - copy(data, size); -} - - -xmrig::Buffer::Buffer(size_t size) : - m_size(size) -{ - if (size > 0) { - m_data = new char[size](); - } -} - - -xmrig::Buffer::~Buffer() -{ - delete [] m_data; -} - - -void xmrig::Buffer::from(const char *data, size_t size) -{ - if (m_size > 0) { - if (m_size == size) { - memcpy(m_data, data, m_size); - - return; - } - - delete [] m_data; - } - - copy(data, size); -} - - -xmrig::Buffer xmrig::Buffer::allocUnsafe(size_t size) -{ - if (size == 0) { - return {}; - } - - Buffer buf; - buf.m_size = size; - buf.m_data = new char[size]; - - return buf; -} - - -xmrig::Buffer xmrig::Buffer::randomBytes(const size_t size) -{ - Buffer buf(size); - std::uniform_int_distribution<> dis(0, 255); - - for (size_t i = 0; i < size; ++i) { - buf.m_data[i] = static_cast(dis(randomEngine)); - } - - return buf; -} - - -bool xmrig::Buffer::fromHex(const uint8_t *in, size_t size, uint8_t *out) -{ - bool error = false; - for (size_t i = 0; i < size; i += 2) { - out[i / 2] = static_cast((hf_hex2bin(in[i], error) << 4) | hf_hex2bin(in[i + 1], error)); - - if (error) { - return false; - } - } - - return true; -} - - -xmrig::Buffer xmrig::Buffer::fromHex(const char *data, size_t size) -{ - if (data == nullptr || size % 2 != 0) { - return {}; - } - - Buffer buf(size / 2); - if (!fromHex(data, size, buf.data())) { - return {}; - } - - return buf; -} - - -void xmrig::Buffer::toHex(const uint8_t *in, size_t size, uint8_t *out) -{ - for (size_t i = 0; i < size; i++) { - out[i * 2] = hf_bin2hex((in[i] & 0xF0) >> 4); - out[i * 2 + 1] = hf_bin2hex(in[i] & 0x0F); - } -} - - -xmrig::String xmrig::Buffer::toHex() const -{ - if (m_size == 0) { - return String(); - } - - char *buf = new char[m_size * 2 + 1]; - buf[m_size * 2] = '\0'; - - toHex(m_data, m_size, buf); - - return String(buf); -} - - -void xmrig::Buffer::copy(const char *data, size_t size) -{ - if (size == 0) { - m_data = nullptr; - m_size = 0; - - return; - } - - m_data = new char[size]; - m_size = size; - - memcpy(m_data, data, m_size); -} - - -void xmrig::Buffer::move(Buffer &&other) -{ - if (m_size > 0) { - delete [] m_data; - } - - m_data = other.m_data; - m_size = other.m_size; - - other.m_data = nullptr; - other.m_size = 0; -} diff --git a/src/base/tools/Buffer.h b/src/base/tools/Buffer.h index 33b186c0..91c98e64 100644 --- a/src/base/tools/Buffer.h +++ b/src/base/tools/Buffer.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,67 +20,14 @@ #define XMRIG_BUFFER_H -#include "base/tools/String.h" +#include +#include namespace xmrig { -class Buffer -{ -public: - Buffer() = default; - Buffer(Buffer &&other) noexcept; - Buffer(const Buffer &other); - Buffer(const char *data, size_t size); - Buffer(size_t size); - ~Buffer(); - - - inline bool isEmpty() const { return size() == 0; } - inline bool isEqual(const Buffer &other) const { return m_size == other.m_size && (m_size == 0 || memcmp(m_data, other.m_data, m_size) == 0); } - inline char *data() { return m_data; } - inline const char *data() const { return m_data; } - inline size_t size() const { return m_size; } - inline void from(const Buffer &other) { from(other.data(), other.size()); } - - - void from(const char *data, size_t size); - - - inline bool operator!=(const Buffer &other) const { return !isEqual(other); } - inline bool operator==(const Buffer &other) const { return isEqual(other); } - inline Buffer &operator=(Buffer &&other) noexcept { move(std::move(other)); return *this; } - inline Buffer &operator=(const Buffer &other) { from(other); return *this; } - - - static Buffer allocUnsafe(size_t size); - static Buffer randomBytes(const size_t size); - - static inline Buffer alloc(size_t size) { return Buffer(size); } - - - inline static bool fromHex(const char *in, size_t size, char *out) { return fromHex(reinterpret_cast(in), size, reinterpret_cast(out)); } - inline static bool fromHex(const char *in, size_t size, uint8_t *out) { return fromHex(reinterpret_cast(in), size, out); } - inline static Buffer fromHex(const char *data) { return fromHex(data, strlen(data)); } - inline static Buffer fromHex(const String &str) { return fromHex(str.data(), str.size()); } - inline static String toHex(const char *in, size_t size) { return Buffer(in, size).toHex(); } - inline static String toHex(const uint8_t *in, size_t size) { return Buffer(reinterpret_cast(in), size).toHex(); } - inline static void toHex(const char *in, size_t size, char *out) { return toHex(reinterpret_cast(in), size, reinterpret_cast(out)); } - inline static void toHex(const uint8_t *in, size_t size, char *out) { return toHex(in, size, reinterpret_cast(out)); } - - static bool fromHex(const uint8_t *in, size_t size, uint8_t *out); - static Buffer fromHex(const char *data, size_t size); - static void toHex(const uint8_t *in, size_t size, uint8_t *out); - String toHex() const; - -private: - void copy(const char *data, size_t size); - void move(Buffer &&other); - - char *m_data = nullptr; - size_t m_size = 0; -}; +using Buffer = std::vector; } /* namespace xmrig */ diff --git a/src/base/tools/Cvt.cpp b/src/base/tools/Cvt.cpp new file mode 100644 index 00000000..c80c022c --- /dev/null +++ b/src/base/tools/Cvt.cpp @@ -0,0 +1,275 @@ +/* XMRig + * Copyright (c) 2013-2020 Frank Denis + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "base/tools/Cvt.h" +#include "3rdparty/rapidjson/document.h" + + +#include +#include + + +#ifdef XMRIG_SODIUM +# include +#endif + + +namespace xmrig { + + +#ifndef XMRIG_SODIUM +static std::random_device randomDevice; +static std::mt19937 randomEngine(randomDevice()); + + +static char *cvt_bin2hex(char *const hex, const size_t hex_maxlen, const unsigned char *const bin, const size_t bin_len) +{ + size_t i = 0U; + unsigned int x; + int b; + int c; + + if (bin_len >= SIZE_MAX / 2 || hex_maxlen <= bin_len * 2U) { + return nullptr; /* LCOV_EXCL_LINE */ + } + + while (i < bin_len) { + c = bin[i] & 0xf; + b = bin[i] >> 4; + x = (unsigned char) (87U + c + (((c - 10U) >> 8) & ~38U)) << 8 | + (unsigned char) (87U + b + (((b - 10U) >> 8) & ~38U)); + hex[i * 2U] = (char) x; + x >>= 8; + hex[i * 2U + 1U] = (char) x; + i++; + } + hex[i * 2U] = 0U; + + return hex; +} + + +static int cvt_hex2bin(unsigned char *const bin, const size_t bin_maxlen, const char *const hex, const size_t hex_len, const char *const ignore, size_t *const bin_len, const char **const hex_end) +{ + size_t bin_pos = 0U; + size_t hex_pos = 0U; + int ret = 0; + unsigned char c; + unsigned char c_acc = 0U; + unsigned char c_alpha0; + unsigned char c_alpha; + unsigned char c_num0; + unsigned char c_num; + unsigned char c_val; + unsigned char state = 0U; + + while (hex_pos < hex_len) { + c = (unsigned char) hex[hex_pos]; + c_num = c ^ 48U; + c_num0 = (c_num - 10U) >> 8; + c_alpha = (c & ~32U) - 55U; + c_alpha0 = ((c_alpha - 10U) ^ (c_alpha - 16U)) >> 8; + + if ((c_num0 | c_alpha0) == 0U) { + if (ignore != nullptr && state == 0U && strchr(ignore, c) != nullptr) { + hex_pos++; + continue; + } + break; + } + + c_val = (c_num0 & c_num) | (c_alpha0 & c_alpha); + + if (bin_pos >= bin_maxlen) { + ret = -1; + errno = ERANGE; + break; + } + + if (state == 0U) { + c_acc = c_val * 16U; + } else { + bin[bin_pos++] = c_acc | c_val; + } + + state = ~state; + hex_pos++; + } + + if (state != 0U) { + hex_pos--; + errno = EINVAL; + ret = -1; + } + + if (ret != 0) { + bin_pos = 0U; + } + + if (hex_end != nullptr) { + *hex_end = &hex[hex_pos]; + } else if (hex_pos != hex_len) { + errno = EINVAL; + ret = -1; + } + + if (bin_len != nullptr) { + *bin_len = bin_pos; + } + + return ret; +} + +#define sodium_bin2hex cvt_bin2hex +#define sodium_hex2bin cvt_hex2bin +#endif + + +template +inline bool fromHexImpl(T &buf, const char *in, size_t size) +{ + assert(in != nullptr && size > 0); + if (in == nullptr || size == 0) { + return false; + } + + buf.resize(size / 2); + + return sodium_hex2bin(reinterpret_cast(&buf.front()), buf.size(), in, size, nullptr, nullptr, nullptr) == 0; +} + + +} // namespace xmrig + + +bool xmrig::Cvt::fromHex(Buffer &buf, const char *in, size_t size) +{ + return fromHexImpl(buf, in, size); +} + + +bool xmrig::Cvt::fromHex(Buffer &buf, const rapidjson::Value &value) +{ + if (!value.IsString()) { + return false; + } + + return fromHexImpl(buf, value.GetString(), value.GetStringLength()); +} + + +bool xmrig::Cvt::fromHex(std::string &buf, const char *in, size_t size) +{ + return fromHexImpl(buf, in, size); +} + + +bool xmrig::Cvt::fromHex(uint8_t *bin, size_t bin_maxlen, const char *hex, size_t hex_len) +{ + assert(hex != nullptr && hex_len > 0); + if (hex == nullptr || hex_len == 0) { + return false; + } + + return sodium_hex2bin(bin, bin_maxlen, hex, hex_len, nullptr, nullptr, nullptr) == 0; +} + + +bool xmrig::Cvt::fromHex(uint8_t *bin, size_t max, const rapidjson::Value &value) +{ + if (!value.IsString()) { + return false; + } + + return fromHex(bin, max, value.GetString(), value.GetStringLength()); +} + + +xmrig::Buffer xmrig::Cvt::fromHex(const char *in, size_t size) +{ + Buffer buf; + if (!fromHex(buf, in, size)) { + return {}; + } + + return buf; +} + + +bool xmrig::Cvt::toHex(char *hex, size_t hex_maxlen, const uint8_t *bin, size_t bin_len) +{ + return sodium_bin2hex(hex, hex_maxlen, bin, bin_len) != nullptr; +} + + +xmrig::Buffer xmrig::Cvt::randomBytes(const size_t size) +{ + Buffer buf(size); + +# ifndef XMRIG_SODIUM + std::uniform_int_distribution<> dis(0, 255); + + for (size_t i = 0; i < size; ++i) { + buf[i] = static_cast(dis(randomEngine)); + } +# else + randombytes_buf(buf.data(), size); +# endif + + return buf; +} + + +rapidjson::Value xmrig::Cvt::toHex(const Buffer &data, rapidjson::Document &doc) +{ + return toHex(data.data(), data.size(), doc); +} + + +rapidjson::Value xmrig::Cvt::toHex(const std::string &data, rapidjson::Document &doc) +{ + return toHex(reinterpret_cast(data.data()), data.size(), doc); +} + + +rapidjson::Value xmrig::Cvt::toHex(const uint8_t *in, size_t size, rapidjson::Document &doc) +{ + return toHex(in, size).toJSON(doc); +} + + +xmrig::String xmrig::Cvt::toHex(const uint8_t *in, size_t size) +{ + assert(in != nullptr && size > 0); + if (in == nullptr || size == 0) { + return {}; + } + + const size_t hex_maxlen = size * 2 + 1; + + char *buf = new char[hex_maxlen]; + if (!toHex(buf, hex_maxlen, in, size)) { + delete [] buf; + + return {}; + } + + return buf; +} diff --git a/src/base/tools/Cvt.h b/src/base/tools/Cvt.h new file mode 100644 index 00000000..8b67b146 --- /dev/null +++ b/src/base/tools/Cvt.h @@ -0,0 +1,61 @@ +/* XMRig + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_CVT_H +#define XMRIG_CVT_H + + +#include "3rdparty/rapidjson/fwd.h" +#include "base/tools/Buffer.h" +#include "base/tools/String.h" + + +#include + + +namespace xmrig { + + +class Cvt +{ +public: + inline static bool fromHex(Buffer &buf, const String &hex) { return fromHex(buf, hex.data(), hex.size()); } + inline static Buffer fromHex(const std::string &hex) { return fromHex(hex.data(), hex.size()); } + inline static Buffer fromHex(const String &hex) { return fromHex(hex.data(), hex.size()); } + inline static String toHex(const Buffer &data) { return toHex(data.data(), data.size()); } + inline static String toHex(const std::string &data) { return toHex(reinterpret_cast(data.data()), data.size()); } + + static bool fromHex(Buffer &buf, const char *in, size_t size); + static bool fromHex(Buffer &buf, const rapidjson::Value &value); + static bool fromHex(std::string &buf, const char *in, size_t size); + static bool fromHex(uint8_t *bin, size_t bin_maxlen, const char *hex, size_t hex_len); + static bool fromHex(uint8_t *bin, size_t bin_maxlen, const rapidjson::Value &value); + static bool toHex(char *hex, size_t hex_maxlen, const uint8_t *bin, size_t bin_len); + static Buffer fromHex(const char *in, size_t size); + static Buffer randomBytes(size_t size); + static rapidjson::Value toHex(const Buffer &data, rapidjson::Document &doc); + static rapidjson::Value toHex(const std::string &data, rapidjson::Document &doc); + static rapidjson::Value toHex(const uint8_t *in, size_t size, rapidjson::Document &doc); + static String toHex(const uint8_t *in, size_t size); +}; + + +} /* namespace xmrig */ + + +#endif /* XMRIG_CVT_H */ diff --git a/src/base/tools/String.cpp b/src/base/tools/String.cpp index cfc8e040..f001ff8f 100644 --- a/src/base/tools/String.cpp +++ b/src/base/tools/String.cpp @@ -125,7 +125,7 @@ std::vector xmrig::String::split(char sep) const for (pos = 0; pos < m_size; ++pos) { if (m_data[pos] == sep) { - if ((pos - start) > 0) { + if (pos > start) { out.emplace_back(m_data + start, pos - start); } @@ -133,7 +133,7 @@ std::vector xmrig::String::split(char sep) const } } - if ((pos - start) > 0) { + if (pos > start) { out.emplace_back(m_data + start, pos - start); } diff --git a/src/config.json b/src/config.json index 68fb439f..aad27326 100644 --- a/src/config.json +++ b/src/config.json @@ -16,6 +16,7 @@ "title": true, "randomx": { "init": -1, + "init-avx2": -1, "mode": "auto", "1gb-pages": false, "rdmsr": true, diff --git a/src/core/Miner.cpp b/src/core/Miner.cpp index 67a93488..9c96d1e3 100644 --- a/src/core/Miner.cpp +++ b/src/core/Miner.cpp @@ -38,12 +38,10 @@ #include "base/kernel/Platform.h" #include "base/net/stratum/Job.h" #include "base/tools/Object.h" -#include "base/tools/Profiler.h" #include "base/tools/Timer.h" #include "core/config/Config.h" #include "core/Controller.h" #include "crypto/common/Nonce.h" -#include "crypto/rx/Rx.h" #include "version.h" @@ -64,6 +62,8 @@ #ifdef XMRIG_ALGO_RANDOMX +# include "crypto/rx/Profiler.h" +# include "crypto/rx/Rx.h" # include "crypto/rx/RxConfig.h" #endif @@ -203,7 +203,7 @@ public: continue; } - for (size_t i = 1; i < hr->threads(); i++) { + for (size_t i = 0; i < hr->threads(); i++) { Value thread(kArrayType); thread.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::ShortInterval)), allocator); thread.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::MediumInterval)), allocator); diff --git a/src/core/config/Config_default.h b/src/core/config/Config_default.h index 6c8106ca..94cb88d6 100644 --- a/src/core/config/Config_default.h +++ b/src/core/config/Config_default.h @@ -50,6 +50,7 @@ R"===( "colors": true, "randomx": { "init": -1, + "init-avx2": -1, "mode": "auto", "1gb-pages": false, "rdmsr": true, diff --git a/src/crypto/astrobwt/AstroBWT.cpp b/src/crypto/astrobwt/AstroBWT.cpp index 2dd701d3..2286a5fe 100644 --- a/src/crypto/astrobwt/AstroBWT.cpp +++ b/src/crypto/astrobwt/AstroBWT.cpp @@ -173,7 +173,7 @@ void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indi bool xmrig::astrobwt::astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size, bool avx2) { - uint8_t key[32]; + alignas(8) uint8_t key[32]; uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64; uint8_t* stage1_output = scratchpad_ptr; uint8_t* stage2_output = scratchpad_ptr; diff --git a/src/crypto/cn/CnHash.cpp b/src/crypto/cn/CnHash.cpp index d11202a4..9a9e5ea5 100644 --- a/src/crypto/cn/CnHash.cpp +++ b/src/crypto/cn/CnHash.cpp @@ -219,7 +219,7 @@ static void patchAsmVariants() patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); } - VirtualMemory::protectExecutableMemory(base, allocation_size); + VirtualMemory::protectRX(base, allocation_size); VirtualMemory::flushInstructionCache(base, allocation_size); } } // namespace xmrig diff --git a/src/crypto/cn/r/CryptonightR_gen.cpp b/src/crypto/cn/r/CryptonightR_gen.cpp index 30f72363..e6a61967 100644 --- a/src/crypto/cn/r/CryptonightR_gen.cpp +++ b/src/crypto/cn/r/CryptonightR_gen.cpp @@ -76,7 +76,7 @@ static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int void_func begin = instructions[c]; - if ((ASM = xmrig::Assembly::BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) { + if ((ASM == xmrig::Assembly::BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) { // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41 uint8_t* prefix = reinterpret_cast(begin); diff --git a/src/crypto/common/Assembly.cpp b/src/crypto/common/Assembly.cpp index 5b7f8959..eb6b8537 100644 --- a/src/crypto/common/Assembly.cpp +++ b/src/crypto/common/Assembly.cpp @@ -1,13 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 SChernykh - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,8 +17,8 @@ */ -#include -#include +#include +#include #ifdef _MSC_VER diff --git a/src/crypto/common/Assembly.h b/src/crypto/common/Assembly.h index 803ea716..8d7ba43c 100644 --- a/src/crypto/common/Assembly.h +++ b/src/crypto/common/Assembly.h @@ -1,12 +1,6 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -57,6 +51,7 @@ public: rapidjson::Value toJSON() const; inline bool isEqual(const Assembly &other) const { return m_id == other.m_id; } + inline Id id() const { return m_id; } inline bool operator!=(Assembly::Id id) const { return m_id != id; } inline bool operator!=(const Assembly &other) const { return !isEqual(other); } diff --git a/src/crypto/common/VirtualMemory.cpp b/src/crypto/common/VirtualMemory.cpp index b34e6de0..7d8d980b 100644 --- a/src/crypto/common/VirtualMemory.cpp +++ b/src/crypto/common/VirtualMemory.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/common/VirtualMemory.h b/src/crypto/common/VirtualMemory.h index 1bfdab80..e0065e3e 100644 --- a/src/crypto/common/VirtualMemory.h +++ b/src/crypto/common/VirtualMemory.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,17 +42,22 @@ public: VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node = 0, size_t alignSize = 64); ~VirtualMemory(); - inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); } - inline bool isOneGbPages() const { return m_flags.test(FLAG_1GB_PAGES); } - inline size_t size() const { return m_size; } - inline size_t capacity() const { return m_capacity; } - inline uint8_t *raw() const { return m_scratchpad; } - inline uint8_t *scratchpad() const { return m_scratchpad; } + inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); } + inline bool isOneGbPages() const { return m_flags.test(FLAG_1GB_PAGES); } + inline size_t size() const { return m_size; } + inline size_t capacity() const { return m_capacity; } + inline uint8_t *raw() const { return m_scratchpad; } + inline uint8_t *scratchpad() const { return m_scratchpad; } + + inline static void flushInstructionCache(void *p1, void *p2) { flushInstructionCache(p1, static_cast(p2) - static_cast(p1)); } HugePagesInfo hugePages() const; static bool isHugepagesAvailable(); static bool isOneGbPagesAvailable(); + static bool protectRW(void *p, size_t size); + static bool protectRWX(void *p, size_t size); + static bool protectRX(void *p, size_t size); static uint32_t bindToNUMANode(int64_t affinity); static void *allocateExecutableMemory(size_t size, bool hugePages); static void *allocateLargePagesMemory(size_t size); @@ -68,8 +66,6 @@ public: static void flushInstructionCache(void *p, size_t size); static void freeLargePagesMemory(void *p, size_t size); static void init(size_t poolSize, bool hugePages); - static void protectExecutableMemory(void *p, size_t size); - static void unprotectExecutableMemory(void *p, size_t size); static inline constexpr size_t align(size_t pos, size_t align = 2097152) { return ((pos - 1) / align + 1) * align; } diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index 70bb13d5..60d77cca 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2018-2019 tevador - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,8 +27,18 @@ #include "crypto/common/VirtualMemory.h" -#if defined(__APPLE__) +#ifdef XMRIG_OS_APPLE +# include # include +# include +# include +# ifdef XMRIG_ARM +# define MEXTRA MAP_JIT +# else +# define MEXTRA 0 +# endif +#else +# define MEXTRA 0 #endif @@ -47,9 +50,20 @@ #endif +#ifdef XMRIG_SECURE_JIT +# define SECURE_PROT_EXEC 0 +#else +# define SECURE_PROT_EXEC PROT_EXEC +#endif + + bool xmrig::VirtualMemory::isHugepagesAvailable() { +# if defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) + return false; +# else return true; +# endif } @@ -63,19 +77,51 @@ bool xmrig::VirtualMemory::isOneGbPagesAvailable() } +bool xmrig::VirtualMemory::protectRW(void *p, size_t size) +{ +# if defined(XMRIG_OS_APPLE) && defined(XMRIG_ARM) + pthread_jit_write_protect_np(false); + return true; +# else + return mprotect(p, size, PROT_READ | PROT_WRITE) == 0; +# endif +} + + +bool xmrig::VirtualMemory::protectRWX(void *p, size_t size) +{ + return mprotect(p, size, PROT_READ | PROT_WRITE | PROT_EXEC) == 0; +} + + +bool xmrig::VirtualMemory::protectRX(void *p, size_t size) +{ +# if defined(XMRIG_OS_APPLE) && defined(XMRIG_ARM) + pthread_jit_write_protect_np(true); + flushInstructionCache(p, size); + return true; +# else + return mprotect(p, size, PROT_READ | PROT_EXEC) == 0; +# endif +} + + void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages) { -# if defined(__APPLE__) - void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); +# if defined(XMRIG_OS_APPLE) + void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON | MEXTRA, -1, 0); +# ifdef XMRIG_ARM + pthread_jit_write_protect_np(false); +# endif # elif defined(__FreeBSD__) void *mem = nullptr; if (hugePages) { - mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); + mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); } if (!mem) { - mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } # else @@ -91,11 +137,11 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages void *mem = nullptr; if (hugePages) { - mem = mmap(0, align(size), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | flag_2mb, -1, 0); + mem = mmap(0, align(size), PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | flag_2mb, -1, 0); } if (!mem) { - mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } # endif @@ -152,7 +198,9 @@ void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size) void xmrig::VirtualMemory::flushInstructionCache(void *p, size_t size) { -# ifdef HAVE_BUILTIN_CLEAR_CACHE +# if defined(XMRIG_OS_APPLE) + sys_icache_invalidate(p, size); +# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__) __builtin___clear_cache(reinterpret_cast(p), reinterpret_cast(p) + size); # endif } @@ -164,18 +212,6 @@ void xmrig::VirtualMemory::freeLargePagesMemory(void *p, size_t size) } -void xmrig::VirtualMemory::protectExecutableMemory(void *p, size_t size) -{ - mprotect(p, size, PROT_READ | PROT_EXEC); -} - - -void xmrig::VirtualMemory::unprotectExecutableMemory(void *p, size_t size) -{ - mprotect(p, size, PROT_WRITE | PROT_EXEC); -} - - void xmrig::VirtualMemory::osInit(bool) { } diff --git a/src/crypto/common/VirtualMemory_win.cpp b/src/crypto/common/VirtualMemory_win.cpp index 659a94a6..fee5f585 100644 --- a/src/crypto/common/VirtualMemory_win.cpp +++ b/src/crypto/common/VirtualMemory_win.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2018-2019 tevador - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2020 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +29,13 @@ #include "crypto/common/VirtualMemory.h" +#ifdef XMRIG_SECURE_JIT +# define SECURE_PAGE_EXECUTE_READWRITE PAGE_READWRITE +#else +# define SECURE_PAGE_EXECUTE_READWRITE PAGE_EXECUTE_READWRITE +#endif + + namespace xmrig { @@ -63,7 +63,7 @@ Return value: TRUE indicates success, FALSE failure. static BOOL SetLockPagesPrivilege() { HANDLE token; - if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != TRUE) { + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token)) { return FALSE; } @@ -71,12 +71,12 @@ static BOOL SetLockPagesPrivilege() { tp.PrivilegeCount = 1; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - if (LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid)) != TRUE) { + if (!LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid))) { return FALSE; } BOOL rc = AdjustTokenPrivileges(token, FALSE, (PTOKEN_PRIVILEGES) &tp, 0, nullptr, nullptr); - if (rc != TRUE || GetLastError() != ERROR_SUCCESS) { + if (!rc || GetLastError() != ERROR_SUCCESS) { return FALSE; } @@ -101,7 +101,7 @@ static BOOL ObtainLockPagesPrivilege() { HANDLE token; PTOKEN_USER user = nullptr; - if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token) == TRUE) { + if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token)) { DWORD size = 0; GetTokenInformation(token, TokenUser, nullptr, 0, &size); @@ -162,16 +162,40 @@ bool xmrig::VirtualMemory::isOneGbPagesAvailable() } +bool xmrig::VirtualMemory::protectRW(void *p, size_t size) +{ + DWORD oldProtect; + + return VirtualProtect(p, size, PAGE_READWRITE, &oldProtect) != 0; +} + + +bool xmrig::VirtualMemory::protectRWX(void *p, size_t size) +{ + DWORD oldProtect; + + return VirtualProtect(p, size, PAGE_EXECUTE_READWRITE, &oldProtect) != 0; +} + + +bool xmrig::VirtualMemory::protectRX(void *p, size_t size) +{ + DWORD oldProtect; + + return VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect) != 0; +} + + void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages) { void* result = nullptr; if (hugePages) { - result = VirtualAlloc(nullptr, align(size), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_EXECUTE_READWRITE); + result = VirtualAlloc(nullptr, align(size), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, SECURE_PAGE_EXECUTE_READWRITE); } if (!result) { - result = VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); + result = VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, SECURE_PAGE_EXECUTE_READWRITE); } return result; @@ -209,20 +233,6 @@ void xmrig::VirtualMemory::freeLargePagesMemory(void *p, size_t) } -void xmrig::VirtualMemory::protectExecutableMemory(void *p, size_t size) -{ - DWORD oldProtect; - VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect); -} - - -void xmrig::VirtualMemory::unprotectExecutableMemory(void *p, size_t size) -{ - DWORD oldProtect; - VirtualProtect(p, size, PAGE_EXECUTE_READWRITE, &oldProtect); -} - - void xmrig::VirtualMemory::osInit(bool hugePages) { if (hugePages) { diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index 6697a4ed..206300fd 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -28,12 +28,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include "crypto/randomx/aes_hash.hpp" -#include "crypto/randomx/soft_aes.h" -#include "crypto/randomx/randomx.h" #include "base/tools/Chrono.h" -#include "base/tools/Profiler.h" +#include "crypto/randomx/randomx.h" +#include "crypto/randomx/soft_aes.h" +#include "crypto/rx/Profiler.h" #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d #define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e @@ -371,7 +372,7 @@ hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>; void SelectSoftAESImpl(size_t threadsCount) { constexpr int test_length_ms = 100; - const std::vector impl = { + const std::array impl = { &hashAndFillAes1Rx4<1,1>, &hashAndFillAes1Rx4<2,1>, &hashAndFillAes1Rx4<2,2>, diff --git a/src/crypto/randomx/asm/program_sshash_avx2_constants.inc b/src/crypto/randomx/asm/program_sshash_avx2_constants.inc new file mode 100644 index 00000000..e2e5e0b1 --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_constants.inc @@ -0,0 +1,28 @@ +r0_avx2_increments: + db 2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0 +mul_hi_avx2_data: + db 0,0,0,0,1,0,0,0 +r0_avx2_mul: + ;#/ 6364136223846793005 + db 45, 127, 149, 76, 45, 244, 81, 88 +r1_avx2_add: + ;#/ 9298411001130361340 + db 252, 161, 245, 89, 138, 151, 10, 129 +r2_avx2_add: + ;#/ 12065312585734608966 + db 70, 216, 194, 56, 223, 153, 112, 167 +r3_avx2_add: + ;#/ 9306329213124626780 + db 92, 73, 34, 191, 28, 185, 38, 129 +r4_avx2_add: + ;#/ 5281919268842080866 + db 98, 138, 159, 23, 151, 37, 77, 73 +r5_avx2_add: + ;#/ 10536153434571861004 + db 12, 236, 170, 206, 185, 239, 55, 146 +r6_avx2_add: + ;#/ 3398623926847679864 + db 120, 45, 230, 108, 116, 86, 42, 47 +r7_avx2_add: + ;#/ 9549104520008361294 + db 78, 229, 44, 182, 247, 59, 133, 132 \ No newline at end of file diff --git a/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc b/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc new file mode 100644 index 00000000..7ebd871f --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc @@ -0,0 +1,31 @@ + add rsp, 40 + pop r9 + + movdqu xmm0, xmmword ptr [rsp] + movdqu xmm1, xmmword ptr [rsp + 16] + movdqu xmm2, xmmword ptr [rsp + 32] + movdqu xmm3, xmmword ptr [rsp + 48] + movdqu xmm4, xmmword ptr [rsp + 64] + movdqu xmm5, xmmword ptr [rsp + 80] + movdqu xmm6, xmmword ptr [rsp + 96] + movdqu xmm7, xmmword ptr [rsp + 112] + movdqu xmm8, xmmword ptr [rsp + 128] + movdqu xmm9, xmmword ptr [rsp + 144] + movdqu xmm10, xmmword ptr [rsp + 160] + movdqu xmm11, xmmword ptr [rsp + 176] + movdqu xmm12, xmmword ptr [rsp + 192] + movdqu xmm13, xmmword ptr [rsp + 208] + movdqu xmm14, xmmword ptr [rsp + 224] + movdqu xmm15, xmmword ptr [rsp + 240] + vzeroupper + add rsp, 256 + + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbp + pop rbx + ret diff --git a/src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc b/src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc new file mode 100644 index 00000000..8055cf28 --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc @@ -0,0 +1,37 @@ + ;# prefetch RandomX dataset lines + prefetchnta byte ptr [rsi] + prefetchnta byte ptr [rsi+64] + prefetchnta byte ptr [rsi+128] + prefetchnta byte ptr [rsi+192] + prefetchnta byte ptr [rsi+256] + + ;# prefetch RandomX cache lines + mov rbx, rbp + and rbx, RANDOMX_CACHE_MASK + shl rbx, 6 + add rbx, rdi + prefetchnta byte ptr [rbx] + lea rax, [rbp+1] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + prefetchnta byte ptr [rax] + mov [rsp], rax + lea rax, [rbp+2] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + prefetchnta byte ptr [rax] + mov [rsp+8], rax + lea rax, [rbp+3] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + prefetchnta byte ptr [rax] + mov [rsp+16], rax + lea rax, [rbp+4] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + prefetchnta byte ptr [rax] + mov [rsp+24], rax diff --git a/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc b/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc new file mode 100644 index 00000000..09c2deeb --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc @@ -0,0 +1,38 @@ + mov qword ptr [rsi+0], r8 + vpunpcklqdq ymm8, ymm0, ymm1 + mov qword ptr [rsi+8], r9 + vpunpcklqdq ymm9, ymm2, ymm3 + mov qword ptr [rsi+16], r10 + vpunpcklqdq ymm10, ymm4, ymm5 + mov qword ptr [rsi+24], r11 + vpunpcklqdq ymm11, ymm6, ymm7 + mov qword ptr [rsi+32], r12 + vpunpckhqdq ymm12, ymm0, ymm1 + mov qword ptr [rsi+40], r13 + vpunpckhqdq ymm13, ymm2, ymm3 + mov qword ptr [rsi+48], r14 + vpunpckhqdq ymm14, ymm4, ymm5 + mov qword ptr [rsi+56], r15 + vpunpckhqdq ymm15, ymm6, ymm7 + + vperm2i128 ymm0, ymm8, ymm9, 32 + vperm2i128 ymm1, ymm10, ymm11, 32 + vmovdqu ymmword ptr [rsi+64], ymm0 + vmovdqu ymmword ptr [rsi+96], ymm1 + vperm2i128 ymm2, ymm12, ymm13, 32 + vperm2i128 ymm3, ymm14, ymm15, 32 + vmovdqu ymmword ptr [rsi+128], ymm2 + vmovdqu ymmword ptr [rsi+160], ymm3 + vperm2i128 ymm4, ymm8, ymm9, 49 + vperm2i128 ymm5, ymm10, ymm11, 49 + vmovdqu ymmword ptr [rsi+192], ymm4 + vmovdqu ymmword ptr [rsi+224], ymm5 + vperm2i128 ymm6, ymm12, ymm13, 49 + vperm2i128 ymm7, ymm14, ymm15, 49 + vmovdqu ymmword ptr [rsi+256], ymm6 + vmovdqu ymmword ptr [rsi+288], ymm7 + + add rbp, 5 + add rsi, 320 + cmp rbp, qword ptr [rsp+40] + db 15, 130, 0, 0, 0, 0 ;# jb rel32 diff --git a/src/crypto/randomx/asm/program_sshash_avx2_save_registers.inc b/src/crypto/randomx/asm/program_sshash_avx2_save_registers.inc new file mode 100644 index 00000000..a551ffa4 --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_save_registers.inc @@ -0,0 +1,27 @@ + push rbx + push rbp + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + + ;# save all XMM registers just to be safe for all calling conventions + sub rsp, 256 + movdqu xmmword ptr [rsp], xmm0 + movdqu xmmword ptr [rsp + 16], xmm1 + movdqu xmmword ptr [rsp + 32], xmm2 + movdqu xmmword ptr [rsp + 48], xmm3 + movdqu xmmword ptr [rsp + 64], xmm4 + movdqu xmmword ptr [rsp + 80], xmm5 + movdqu xmmword ptr [rsp + 96], xmm6 + movdqu xmmword ptr [rsp + 112], xmm7 + movdqu xmmword ptr [rsp + 128], xmm8 + movdqu xmmword ptr [rsp + 144], xmm9 + movdqu xmmword ptr [rsp + 160], xmm10 + movdqu xmmword ptr [rsp + 176], xmm11 + movdqu xmmword ptr [rsp + 192], xmm12 + movdqu xmmword ptr [rsp + 208], xmm13 + movdqu xmmword ptr [rsp + 224], xmm14 + movdqu xmmword ptr [rsp + 240], xmm15 diff --git a/src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc b/src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc new file mode 100644 index 00000000..bed78094 --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc @@ -0,0 +1,50 @@ + sub rsp, 40 + mov [rsp], rbx + vmovdqu ymmword ptr [rsp+8], ymm14 + + mov rax, [rsp+40] + mov rbx, [rsp+48] + mov rcx, [rsp+56] + mov rdx, [rsp+64] + + vmovdqu ymm8, ymmword ptr [rax] ;# ymm8 = r0[1], r1[1], r2[1], r3[1] + vmovdqu ymm9, ymmword ptr [rbx] ;# ymm9 = r0[2], r1[2], r2[2], r3[2] + vmovdqu ymm10, ymmword ptr [rcx] ;# ymm10 = r0[3], r1[3], r2[3], r3[3] + vmovdqu ymm11, ymmword ptr [rdx] ;# ymm11 = r0[4], r1[4], r2[4], r3[4] + + vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r0[1], r0[2], r2[1], r2[2] + vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r0[3], r0[4], r2[3], r2[4] + vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r0[1], r0[2], r0[3], r0[4] + vpxor ymm0, ymm0, ymm14 + vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r2[1], r2[2], r2[3], r2[4] + vpxor ymm2, ymm2, ymm14 + + vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r1[1], r1[2], r3[1], r3[2] + vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r1[3], r1[4], r3[3], r3[4] + vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r1[1], r1[2], r1[3], r1[4] + vpxor ymm1, ymm1, ymm14 + vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r3[1], r3[2], r3[3], r3[4] + vpxor ymm3, ymm3, ymm14 + + vmovdqu ymm8, ymmword ptr [rax+32] ;# ymm8 = r4[1], r5[1], r6[1], r7[1] + vmovdqu ymm9, ymmword ptr [rbx+32] ;# ymm9 = r4[2], r5[2], r6[2], r7[2] + vmovdqu ymm10, ymmword ptr [rcx+32] ;# ymm10 = r4[3], r5[3], r6[3], r7[3] + vmovdqu ymm11, ymmword ptr [rdx+32] ;# ymm11 = r4[4], r5[4], r6[4], r7[4] + + vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r4[1], r4[2], r6[1], r6[2] + vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r4[3], r4[4], r6[3], r6[4] + vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r4[1], r4[2], r4[3], r4[4] + vpxor ymm4, ymm4, ymm14 + vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r6[1], r6[2], r6[3], r6[4] + vpxor ymm6, ymm6, ymm14 + + vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r5[1], r5[2], r7[1], r7[2] + vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r5[3], r5[4], r7[3], r7[4] + vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r5[1], r5[2], r5[3], r5[4] + vpxor ymm5, ymm5, ymm14 + vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r7[1], r7[2], r7[3], r7[4] + vpxor ymm7, ymm7, ymm14 + + mov rbx, [rsp] + vmovdqu ymm14, ymmword ptr [rsp+8] + add rsp, 40 diff --git a/src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc b/src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc new file mode 100644 index 00000000..072de864 --- /dev/null +++ b/src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc @@ -0,0 +1,29 @@ + vmovdqu ymmword ptr [rsp], ymm0 + + mov rax, [rsp] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + mov [rsp], rax + prefetchnta byte ptr [rax] + + mov rax, [rsp+8] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + mov [rsp+8], rax + prefetchnta byte ptr [rax] + + mov rax, [rsp+16] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + mov [rsp+16], rax + prefetchnta byte ptr [rax] + + mov rax, [rsp+24] + and rax, RANDOMX_CACHE_MASK + shl rax, 6 + add rax, rdi + mov [rsp+24], rax + prefetchnta byte ptr [rax] diff --git a/src/crypto/randomx/dataset.cpp b/src/crypto/randomx/dataset.cpp index ade6f2b7..f03cd3c0 100644 --- a/src/crypto/randomx/dataset.cpp +++ b/src/crypto/randomx/dataset.cpp @@ -1,5 +1,7 @@ /* -Copyright (c) 2018-2019, tevador +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -59,10 +61,11 @@ namespace randomx { template void deallocCache(randomx_cache* cache) { - if (cache->memory != nullptr) + if (cache->memory != nullptr) { Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE); - if (cache->jit != nullptr) - delete cache->jit; + } + + delete cache->jit; } template void deallocCache(randomx_cache* cache); @@ -77,16 +80,16 @@ namespace randomx { context.pwdlen = (uint32_t)keySize; context.salt = CONST_CAST(uint8_t *)RandomX_CurrentConfig.ArgonSalt; context.saltlen = (uint32_t)strlen(RandomX_CurrentConfig.ArgonSalt); - context.secret = NULL; + context.secret = nullptr; context.secretlen = 0; - context.ad = NULL; + context.ad = nullptr; context.adlen = 0; context.t_cost = RandomX_CurrentConfig.ArgonIterations; context.m_cost = RandomX_CurrentConfig.ArgonMemory; context.lanes = RandomX_CurrentConfig.ArgonLanes; context.threads = 1; - context.allocate_cbk = NULL; - context.free_cbk = NULL; + context.allocate_cbk = nullptr; + context.free_cbk = nullptr; context.flags = ARGON2_DEFAULT_FLAGS; context.version = ARGON2_VERSION_NUMBER; @@ -100,8 +103,18 @@ namespace randomx { void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) { initCache(cache, key, keySize); + +# ifdef XMRIG_SECURE_JIT + cache->jit->enableWriting(); +# endif + cache->jit->generateSuperscalarHash(cache->programs); cache->jit->generateDatasetInitCode(); + cache->datasetInit = cache->jit->getDatasetInitFunc(); + +# ifdef XMRIG_SECURE_JIT + cache->jit->enableExecution(); +# endif } constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; diff --git a/src/crypto/randomx/dataset.hpp b/src/crypto/randomx/dataset.hpp index c15ab379..da709688 100644 --- a/src/crypto/randomx/dataset.hpp +++ b/src/crypto/randomx/dataset.hpp @@ -48,7 +48,7 @@ struct randomx_cache { randomx::DatasetInitFunc* datasetInit; randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES]; - bool isInitialized() { + bool isInitialized() const { return programs[0].getSize() != 0; } }; diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index e4072381..f98e36f6 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -1,6 +1,7 @@ /* -Copyright (c) 2018-2019, tevador -Copyright (c) 2019, SChernykh +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -28,18 +29,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "crypto/randomx/jit_compiler_a64.hpp" -#include "crypto/randomx/superscalar.hpp" +#include "crypto/common/VirtualMemory.h" #include "crypto/randomx/program.hpp" #include "crypto/randomx/reciprocal.h" +#include "crypto/randomx/superscalar.hpp" #include "crypto/randomx/virtual_memory.hpp" static bool hugePagesJIT = false; +static int optimizedDatasetInit = -1; void randomx_set_huge_pages_jit(bool hugePages) { hugePagesJIT = hugePages; } +void randomx_set_optimized_dataset_init(int value) +{ + optimizedDatasetInit = value; +} + namespace ARMV8A { constexpr uint32_t B = 0x14000000; @@ -96,37 +104,28 @@ static size_t CalcDatasetItemSize() constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; -JitCompilerA64::JitCompilerA64(bool hugePagesEnable) - : code((uint8_t*) allocExecutableMemory(CodeSize + CalcDatasetItemSize(), hugePagesJIT && hugePagesEnable)) - , literalPos(ImulRcpLiteralsEnd) - , num32bitLiterals(0) +JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) : + hugePages(hugePagesJIT && hugePagesEnable), + literalPos(ImulRcpLiteralsEnd) { - memset(reg_changed_offset, 0, sizeof(reg_changed_offset)); - memcpy(code, (void*) randomx_program_aarch64, CodeSize); } JitCompilerA64::~JitCompilerA64() { - freePagedMemory(code, CodeSize + CalcDatasetItemSize()); -} - -#if defined(ios_HOST_OS) || defined (darwin_HOST_OS) -void sys_icache_invalidate(void *start, size_t len); -#endif - -static void clear_code_cache(char* p1, char* p2) -{ -# if defined(ios_HOST_OS) || defined (darwin_HOST_OS) - sys_icache_invalidate(p1, static_cast(p2 - p1)); -# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__) - __builtin___clear_cache(p1, p2); -# else -# error "No clear code cache function found" -# endif + freePagedMemory(code, allocatedSize); } void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t) { + if (!allocatedSize) { + allocate(CodeSize); + } +#ifdef XMRIG_SECURE_JIT + else { + enableWriting(); + } +#endif + uint32_t codePos = MainLoopBegin + 4; // and w16, w10, ScratchpadL3Mask64 @@ -171,11 +170,22 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); - clear_code_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); +# ifndef XMRIG_OS_APPLE + xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); +# endif } void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset) { + if (!allocatedSize) { + allocate(CodeSize); + } +#ifdef XMRIG_SECURE_JIT + else { + enableWriting(); + } +#endif + uint32_t codePos = MainLoopBegin + 4; // and w16, w10, ScratchpadL3Mask64 @@ -226,12 +236,23 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos); emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos); - clear_code_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); +# ifndef XMRIG_OS_APPLE + xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); +# endif } template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) { + if (!allocatedSize) { + allocate(CodeSize + CalcDatasetItemSize()); + } +#ifdef XMRIG_SECURE_JIT + else { + enableWriting(); + } +#endif + uint32_t codePos = CodeSize; uint8_t* p1 = (uint8_t*)randomx_calc_dataset_item_aarch64; @@ -342,13 +363,19 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) memcpy(code + codePos, p1, p2 - p1); codePos += p2 - p1; - clear_code_cache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos)); +# ifndef XMRIG_OS_APPLE + xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos)); +# endif } template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES]); -DatasetInitFunc* JitCompilerA64::getDatasetInitFunc() +DatasetInitFunc* JitCompilerA64::getDatasetInitFunc() const { +# ifdef XMRIG_SECURE_JIT + enableExecution(); +# endif + return (DatasetInitFunc*)(code + (((uint8_t*)randomx_init_dataset_aarch64) - ((uint8_t*)randomx_program_aarch64))); } @@ -357,6 +384,26 @@ size_t JitCompilerA64::getCodeSize() return CodeSize; } +void JitCompilerA64::enableWriting() const +{ + xmrig::VirtualMemory::protectRW(code, allocatedSize); +} + +void JitCompilerA64::enableExecution() const +{ + xmrig::VirtualMemory::protectRX(code, allocatedSize); +} + + +void JitCompilerA64::allocate(size_t size) +{ + allocatedSize = size; + code = static_cast(allocExecutableMemory(allocatedSize, hugePages)); + + memcpy(code, reinterpret_cast(randomx_program_aarch64), CodeSize); +} + + void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos) { uint32_t k = codePos; diff --git a/src/crypto/randomx/jit_compiler_a64.hpp b/src/crypto/randomx/jit_compiler_a64.hpp index 29ce478d..32ff5166 100644 --- a/src/crypto/randomx/jit_compiler_a64.hpp +++ b/src/crypto/randomx/jit_compiler_a64.hpp @@ -1,6 +1,7 @@ /* -Copyright (c) 2018-2019, tevador -Copyright (c) 2019, SChernykh +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -46,7 +47,7 @@ namespace randomx { class JitCompilerA64 { public: - explicit JitCompilerA64(bool hugePagesEnable); + explicit JitCompilerA64(bool hugePagesEnable, bool optimizedInitDatasetEnable); ~JitCompilerA64(); void prepare() {} @@ -58,16 +59,32 @@ namespace randomx { void generateDatasetInitCode() {} - ProgramFunc* getProgramFunc() { return reinterpret_cast(code); } - DatasetInitFunc* getDatasetInitFunc(); + inline ProgramFunc *getProgramFunc() const { +# ifdef XMRIG_SECURE_JIT + enableExecution(); +# endif + + return reinterpret_cast(code); + } + + DatasetInitFunc* getDatasetInitFunc() const; uint8_t* getCode() { return code; } size_t getCodeSize(); + void enableWriting() const; + void enableExecution() const; + static InstructionGeneratorA64 engine[256]; - uint32_t reg_changed_offset[8]; - uint8_t* code; + + private: + const bool hugePages; + uint32_t reg_changed_offset[8]{}; + uint8_t* code = nullptr; uint32_t literalPos; - uint32_t num32bitLiterals; + uint32_t num32bitLiterals = 0; + size_t allocatedSize = 0; + + void allocate(size_t size); static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos) { @@ -90,6 +107,7 @@ namespace randomx { template void emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos); + public: void h_IADD_RS(Instruction&, uint32_t&); void h_IADD_M(Instruction&, uint32_t&); void h_ISUB_R(Instruction&, uint32_t&); diff --git a/src/crypto/randomx/jit_compiler_fallback.cpp b/src/crypto/randomx/jit_compiler_fallback.cpp index 374da678..369458a4 100644 --- a/src/crypto/randomx/jit_compiler_fallback.cpp +++ b/src/crypto/randomx/jit_compiler_fallback.cpp @@ -35,3 +35,6 @@ void randomx_set_huge_pages_jit(bool) { } +void randomx_set_optimized_dataset_init(int) +{ +} diff --git a/src/crypto/randomx/jit_compiler_fallback.hpp b/src/crypto/randomx/jit_compiler_fallback.hpp index 7f643b17..cdf87cba 100644 --- a/src/crypto/randomx/jit_compiler_fallback.hpp +++ b/src/crypto/randomx/jit_compiler_fallback.hpp @@ -1,5 +1,7 @@ /* -Copyright (c) 2018-2019, tevador +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -41,7 +43,7 @@ namespace randomx { class JitCompilerFallback { public: - explicit JitCompilerFallback(bool) { + explicit JitCompilerFallback(bool, bool) { throw std::runtime_error("JIT compilation is not supported on this platform"); } void prepare() {} @@ -70,5 +72,7 @@ namespace randomx { size_t getCodeSize() { return 0; } + void enableWriting() {} + void enableExecution() {} }; } diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 9753701e..0e79d6a4 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -1,5 +1,7 @@ /* -Copyright (c) 2018-2019, tevador +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -30,14 +32,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include + #include "crypto/randomx/jit_compiler_x86.hpp" +#include "backend/cpu/Cpu.h" +#include "crypto/common/VirtualMemory.h" #include "crypto/randomx/jit_compiler_x86_static.hpp" -#include "crypto/randomx/superscalar.hpp" #include "crypto/randomx/program.hpp" #include "crypto/randomx/reciprocal.h" +#include "crypto/randomx/superscalar.hpp" #include "crypto/randomx/virtual_memory.hpp" -#include "base/tools/Profiler.h" -#include "backend/cpu/Cpu.h" +#include "crypto/rx/Profiler.h" #ifdef XMRIG_FIX_RYZEN # include "crypto/rx/Rx.h" @@ -45,17 +49,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef _MSC_VER # include -#else -# include #endif static bool hugePagesJIT = false; +static int optimizedDatasetInit = -1; void randomx_set_huge_pages_jit(bool hugePages) { hugePagesJIT = hugePages; } +void randomx_set_optimized_dataset_init(int value) +{ + optimizedDatasetInit = value; +} + namespace randomx { /* @@ -112,6 +120,11 @@ namespace randomx { #define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init) #define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin) #define codeDatasetInit ADDR(randomx_dataset_init) + #define codeDatasetInitAVX2_prologue ADDR(randomx_dataset_init_avx2_prologue) + #define codeDatasetInitAVX2_loop_end ADDR(randomx_dataset_init_avx2_loop_end) + #define codeDatasetInitAVX2_loop_epilogue ADDR(randomx_dataset_init_avx2_epilogue) + #define codeDatasetInitAVX2_ssh_load ADDR(randomx_dataset_init_avx2_ssh_load) + #define codeDatasetInitAVX2_ssh_prefetch ADDR(randomx_dataset_init_avx2_ssh_prefetch) #define codeLoopStore ADDR(randomx_program_loop_store) #define codeLoopEnd ADDR(randomx_program_loop_end) #define codeEpilogue ADDR(randomx_program_epilogue) @@ -128,7 +141,12 @@ namespace randomx { #define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit) #define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin) #define loopStoreSize (codeLoopEnd - codeLoopStore) - #define datasetInitSize (codeEpilogue - codeDatasetInit) + #define datasetInitSize (codeDatasetInitAVX2_prologue - codeDatasetInit) + #define datasetInitAVX2_prologue_size (codeDatasetInitAVX2_loop_end - codeDatasetInitAVX2_prologue) + #define datasetInitAVX2_loop_end_size (codeDatasetInitAVX2_loop_epilogue - codeDatasetInitAVX2_loop_end) + #define datasetInitAVX2_epilogue_size (codeDatasetInitAVX2_ssh_load - codeDatasetInitAVX2_loop_epilogue) + #define datasetInitAVX2_ssh_load_size (codeDatasetInitAVX2_ssh_prefetch - codeDatasetInitAVX2_ssh_load) + #define datasetInitAVX2_ssh_prefetch_size (codeEpilogue - codeDatasetInitAVX2_ssh_prefetch) #define epilogueSize (codeShhLoad - codeEpilogue) #define codeSshLoadSize (codeShhPrefetch - codeShhLoad) #define codeSshPrefetchSize (codeShhEnd - codeShhPrefetch) @@ -166,20 +184,27 @@ namespace randomx { {0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, }; + static inline uint8_t* alignToPage(uint8_t* p, size_t pageSize) { + size_t k = (size_t) p; + k -= k % pageSize; + return (uint8_t*) k; + } + size_t JitCompilerX86::getCodeSize() { return codePos < prologueSize ? 0 : codePos - prologueSize; } - static inline void cpuid(uint32_t level, int32_t output[4]) - { - memset(output, 0, sizeof(int32_t) * 4); + void JitCompilerX86::enableWriting() const { + uint8_t* p1 = alignToPage(code, 4096); + uint8_t* p2 = code + CodeSize; + xmrig::VirtualMemory::protectRW(p1, p2 - p1); + } -# ifdef _MSC_VER - __cpuid(output, static_cast(level)); -# else - __cpuid_count(level, 0, output[0], output[1], output[2], output[3]); -# endif - } + void JitCompilerX86::enableExecution() const { + uint8_t* p1 = alignToPage(code, 4096); + uint8_t* p2 = code + CodeSize; + xmrig::VirtualMemory::protectRX(p1, p2 - p1); + } # ifdef _MSC_VER static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); } @@ -190,17 +215,68 @@ namespace randomx { static std::atomic codeOffset; constexpr size_t codeOffsetIncrement = 59 * 64; - JitCompilerX86::JitCompilerX86(bool hugePagesEnable) { + JitCompilerX86::JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable) { BranchesWithin32B = xmrig::Cpu::info()->jccErratum(); - int32_t info[4]; - cpuid(1, info); - hasAVX = ((info[2] & (1 << 27)) != 0) && ((info[2] & (1 << 28)) != 0); + hasAVX = xmrig::Cpu::info()->hasAVX(); + hasAVX2 = xmrig::Cpu::info()->hasAVX2(); - cpuid(0x80000001, info); - hasXOP = ((info[2] & (1 << 11)) != 0); + // Disable by default + initDatasetAVX2 = false; - allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2, hugePagesJIT && hugePagesEnable); + if (optimizedInitDatasetEnable) { + // Dataset init using AVX2: + // -1 = Auto detect + // 0 = Always disabled + // +1 = Always enabled + if (optimizedDatasetInit > 0) { + initDatasetAVX2 = true; + } + else if (optimizedDatasetInit < 0) { + xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor(); + xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch(); + + if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) { + // AVX2 init is faster on Intel CPUs without HT + initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads()); + } + else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) { + switch (arch) { + case xmrig::ICpuInfo::ARCH_ZEN: + case xmrig::ICpuInfo::ARCH_ZEN_PLUS: + default: + // AVX2 init is slower on Zen/Zen+ + // Also disable it for other unknown architectures + initDatasetAVX2 = false; + break; + case xmrig::ICpuInfo::ARCH_ZEN2: + // AVX2 init is faster on Zen2 without SMT (mobile CPUs) + initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads()); + break; + case xmrig::ICpuInfo::ARCH_ZEN3: + // AVX2 init is faster on Zen3 + initDatasetAVX2 = true; + break; + } + } + } + } + + // Sorry, low-end Intel CPUs + if (!hasAVX2) { + initDatasetAVX2 = false; + } + + hasXOP = xmrig::Cpu::info()->hasXOP(); + + allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2); + allocatedCode = static_cast(allocExecutableMemory(allocatedSize, +# ifdef XMRIG_SECURE_JIT + false +# else + hugePagesJIT && hugePagesEnable +# endif + )); // Shift code base address to improve caching - all threads will use different L2/L3 cache sets code = allocatedCode + (codeOffset.fetch_add(codeOffsetIncrement) % CodeSize); @@ -224,7 +300,7 @@ namespace randomx { JitCompilerX86::~JitCompilerX86() { codeOffset.fetch_sub(codeOffsetIncrement); - freePagedMemory(allocatedCode, CodeSize); + freePagedMemory(allocatedCode, allocatedSize); } void JitCompilerX86::prepare() { @@ -237,6 +313,10 @@ namespace randomx { void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { PROFILE_SCOPE(RandomX_JIT_compile); +# ifdef XMRIG_SECURE_JIT + enableWriting(); +# endif + vm_flags = flags; generateProgramPrologue(prog, pcfg); @@ -271,14 +351,49 @@ namespace randomx { template void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) { + uint8_t* p = code; + if (initDatasetAVX2) { + codePos = 0; + emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos); + + for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) { + SuperscalarProgram& prog = programs[j]; + uint32_t pos = codePos; + for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) { + generateSuperscalarCode(prog(i), p, pos); + } + codePos = pos; + emit(codeShhLoad, codeSshLoadSize, code, codePos); + emit(codeDatasetInitAVX2_ssh_load, datasetInitAVX2_ssh_load_size, code, codePos); + if (j < RandomX_CurrentConfig.CacheAccesses - 1) { + *(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast(prog.getAddressRegister()) << 16); + codePos += 3; + emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize, code, codePos); + uint8_t* p = code + codePos; + emit(codeDatasetInitAVX2_ssh_prefetch, datasetInitAVX2_ssh_prefetch_size, code, codePos); + p[3] += prog.getAddressRegister() << 3; + } + } + + emit(codeDatasetInitAVX2_loop_end, datasetInitAVX2_loop_end_size, code, codePos); + + // Number of bytes from the start of randomx_dataset_init_avx2_prologue to loop_begin label + constexpr int32_t prologue_size = 320; + *(int32_t*)(code + codePos - 4) = prologue_size - codePos; + + emit(codeDatasetInitAVX2_loop_epilogue, datasetInitAVX2_epilogue_size, code, codePos); + return; + } + memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); codePos = superScalarHashOffset + codeSshInitSize; for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) { SuperscalarProgram& prog = programs[j]; - for (unsigned i = 0; i < prog.getSize(); ++i) { - Instruction& instr = prog(i); - generateSuperscalarCode(instr); + uint32_t pos = codePos; + for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) { + generateSuperscalarCode(prog(i), p, pos); } + codePos = pos; emit(codeShhLoad, codeSshLoadSize, code, codePos); if (j < RandomX_CurrentConfig.CacheAccesses - 1) { *(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast(prog.getAddressRegister()) << 16); @@ -293,7 +408,10 @@ namespace randomx { void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES]); void JitCompilerX86::generateDatasetInitCode() { - memcpy(code, codeDatasetInit, datasetInitSize); + // AVX2 code is generated in generateSuperscalarHash() + if (!initDatasetAVX2) { + memcpy(code, codeDatasetInit, datasetInitSize); + } } void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { @@ -372,101 +490,243 @@ namespace randomx { emit32(epilogueOffset - codePos - 4, code, codePos); } - void JitCompilerX86::generateSuperscalarCode(Instruction& instr) { - static constexpr uint8_t REX_SUB_RR[] = { 0x4d, 0x2b }; - static constexpr uint8_t REX_MOV_RR64[] = { 0x49, 0x8b }; - static constexpr uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b }; - static constexpr uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf }; - static constexpr uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf }; - static constexpr uint8_t REX_MUL_R[] = { 0x49, 0xf7 }; - static constexpr uint8_t REX_81[] = { 0x49, 0x81 }; - static constexpr uint8_t MOV_RAX_I[] = { 0x48, 0xb8 }; - static constexpr uint8_t REX_LEA[] = { 0x4f, 0x8d }; - static constexpr uint8_t REX_XOR_RR[] = { 0x4D, 0x33 }; - static constexpr uint8_t REX_XOR_RI[] = { 0x49, 0x81 }; - static constexpr uint8_t REX_ROT_I8[] = { 0x49, 0xc1 }; - + template + FORCE_INLINE void JitCompilerX86::generateSuperscalarCode(Instruction& instr, uint8_t* code, uint32_t& codePos) { switch ((SuperscalarInstructionType)instr.opcode) { case randomx::SuperscalarInstructionType::ISUB_R: - emit(REX_SUB_RR, code, codePos); - emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); + *(uint32_t*)(code + codePos) = 0x00C02B4DUL + (instr.dst << 19) + (instr.src << 16); + codePos += 3; + if (AVX2) { + emit32(0xC0FBFDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos); + } break; case randomx::SuperscalarInstructionType::IXOR_R: - emit(REX_XOR_RR, code, codePos); - emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); + *(uint32_t*)(code + codePos) = 0x00C0334DUL + (instr.dst << 19) + (instr.src << 16); + codePos += 3; + if (AVX2) { + emit32(0xC0EFFDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos); + } break; case randomx::SuperscalarInstructionType::IADD_RS: - emit(REX_LEA, code, codePos); - emitByte(0x04 + 8 * instr.dst, code, codePos); - genSIB(instr.getModShift(), instr.src, instr.dst, code, codePos); + emit32(0x00048D4F + (instr.dst << 19) + (genSIB(instr.getModShift(), instr.src, instr.dst) << 24), code, codePos); + if (AVX2) { + if (instr.getModShift()) { + static const uint8_t t[] = { 0xC5, 0xBD, 0x73, 0xF0, 0x00, 0xC5, 0xBD, 0xD4, 0xC0 }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + p[3] += instr.src; + p[4] = instr.getModShift(); + p[8] += instr.dst * 9; + } + else { + emit32(0xC0D4FDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos); + } + } break; case randomx::SuperscalarInstructionType::IMUL_R: - emit(REX_IMUL_RR, code, codePos); - emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); + emit32(0xC0AF0F4DUL + (instr.dst << 27) + (instr.src << 24), code, codePos); + if (AVX2) { + static const uint8_t t[] = { + 0xC5, 0xBD, 0x73, 0xD0, 0x20, + 0xC5, 0xB5, 0x73, 0xD0, 0x20, + 0xC5, 0x7D, 0xF4, 0xD0, + 0xC5, 0x35, 0xF4, 0xD8, + 0xC5, 0xBD, 0xF4, 0xC0, + 0xC4, 0xC1, 0x25, 0x73, 0xF3, 0x20, + 0xC5, 0xFD, 0x73, 0xF0, 0x20, + 0xC4, 0x41, 0x2D, 0xD4, 0xD3, + 0xC5, 0xAD, 0xD4, 0xC0 + }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + p[3] += instr.dst; + p[8] += instr.src; + p[11] -= instr.dst * 8; + p[13] += instr.src; + p[17] += instr.dst; + p[21] += instr.dst * 8 + instr.src; + p[29] -= instr.dst * 8; + p[31] += instr.dst; + p[41] += instr.dst * 9; + } break; case randomx::SuperscalarInstructionType::IROR_C: - emit(REX_ROT_I8, code, codePos); - emitByte(0xc8 + instr.dst, code, codePos); - emitByte(instr.getImm32() & 63, code, codePos); + { + const uint32_t shift = instr.getImm32() & 63; + emit32(0x00C8C149UL + (instr.dst << 16) + (shift << 24), code, codePos); + if (AVX2) { + static const uint8_t t[] = { 0xC5, 0xBD, 0x73, 0xD0, 0x00, 0xC5, 0xB5, 0x73, 0xF0, 0x00, 0xC4, 0xC1, 0x3D, 0xEB, 0xC1 }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + p[3] += instr.dst; + p[4] = shift; + p[8] += instr.dst; + p[9] = 64 - shift; + p[14] += instr.dst * 8; + } + } break; case randomx::SuperscalarInstructionType::IADD_C7: - emit(REX_81, code, codePos); - emitByte(0xc0 + instr.dst, code, codePos); - emit32(instr.getImm32(), code, codePos); + case randomx::SuperscalarInstructionType::IADD_C8: + case randomx::SuperscalarInstructionType::IADD_C9: + if (AVX2) { + static const uint8_t t[] = { 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x03, 0xC0, 0xC4, 0x62, 0x7D, 0x19, 0x05, 0xEC, 0xFF, 0xFF, 0xFF, 0xC4, 0xC1, 0x7D, 0xD4, 0xC0 }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + *(uint64_t*)(p + 2) = signExtend2sCompl(instr.getImm32()); + p[12] += instr.dst * 8; + p[24] -= instr.dst * 8; + p[26] += instr.dst * 8; + } + else { + *(uint32_t*)(code + codePos) = 0x00C08149UL + (instr.dst << 16); + codePos += 3; + emit32(instr.getImm32(), code, codePos); + } break; case randomx::SuperscalarInstructionType::IXOR_C7: - emit(REX_XOR_RI, code, codePos); - emitByte(0xf0 + instr.dst, code, codePos); - emit32(instr.getImm32(), code, codePos); - break; - case randomx::SuperscalarInstructionType::IADD_C8: - emit(REX_81, code, codePos); - emitByte(0xc0 + instr.dst, code, codePos); - emit32(instr.getImm32(), code, codePos); - break; case randomx::SuperscalarInstructionType::IXOR_C8: - emit(REX_XOR_RI, code, codePos); - emitByte(0xf0 + instr.dst, code, codePos); - emit32(instr.getImm32(), code, codePos); - break; - case randomx::SuperscalarInstructionType::IADD_C9: - emit(REX_81, code, codePos); - emitByte(0xc0 + instr.dst, code, codePos); - emit32(instr.getImm32(), code, codePos); - break; case randomx::SuperscalarInstructionType::IXOR_C9: - emit(REX_XOR_RI, code, codePos); - emitByte(0xf0 + instr.dst, code, codePos); - emit32(instr.getImm32(), code, codePos); + if (AVX2) { + static const uint8_t t[] = { 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x33, 0xC0, 0xC4, 0x62, 0x7D, 0x19, 0x05, 0xEC, 0xFF, 0xFF, 0xFF, 0xC4, 0xC1, 0x7D, 0xEF, 0xC0 }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + *(uint64_t*)(p + 2) = signExtend2sCompl(instr.getImm32()); + p[12] += instr.dst * 8; + p[24] -= instr.dst * 8; + p[26] += instr.dst * 8; + } + else { + *(uint32_t*)(code + codePos) = 0x00F08149UL + (instr.dst << 16); + codePos += 3; + emit32(instr.getImm32(), code, codePos); + } break; case randomx::SuperscalarInstructionType::IMULH_R: - emit(REX_MOV_RR64, code, codePos); - emitByte(0xc0 + instr.dst, code, codePos); - emit(REX_MUL_R, code, codePos); - emitByte(0xe0 + instr.src, code, codePos); - emit(REX_MOV_R64R, code, codePos); - emitByte(0xc2 + 8 * instr.dst, code, codePos); + *(uint32_t*)(code + codePos) = 0x00C08B49UL + (instr.dst << 16); + codePos += 3; + *(uint32_t*)(code + codePos) = 0x00E0F749UL + (instr.src << 16); + codePos += 3; + *(uint32_t*)(code + codePos) = 0x00C28B4CUL + (instr.dst << 19); + codePos += 3; + if (AVX2) { + static const uint8_t t[] = { + 0xC5, 0xBD, 0x73, 0xD0, 0x20, + 0xC5, 0xB5, 0x73, 0xD0, 0x20, + 0xC5, 0x7D, 0xF4, 0xD0, + 0xC5, 0x3D, 0xF4, 0xD8, + 0xC4, 0x41, 0x7D, 0xF4, 0xE1, + 0xC4, 0xC1, 0x3D, 0xF4, 0xC1, + 0xC4, 0xC1, 0x2D, 0x73, 0xD2, 0x20, + 0xC4, 0x41, 0x25, 0xEF, 0xC6, + 0xC4, 0x41, 0x25, 0xD4, 0xDC, + 0xC4, 0x41, 0x25, 0xD4, 0xDA, + 0xC4, 0x41, 0x25, 0xEF, 0xCE, + 0xC4, 0x42, 0x3D, 0x37, 0xC1, + 0xC4, 0x41, 0x3D, 0xDB, 0xC7, + 0xC5, 0xBD, 0xD4, 0xC0, + 0xC4, 0xC1, 0x25, 0x73, 0xD3, 0x20, + 0xC5, 0xA5, 0xD4, 0xC0 + }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + p[3] += instr.dst; + p[8] += instr.src; + p[11] -= instr.dst * 8; + p[13] += instr.src; + p[17] += instr.src; + p[20] -= instr.dst * 8; + p[27] += instr.dst * 8; + p[67] += instr.dst * 9; + p[77] += instr.dst * 9; + } break; case randomx::SuperscalarInstructionType::ISMULH_R: - emit(REX_MOV_RR64, code, codePos); - emitByte(0xc0 + instr.dst, code, codePos); - emit(REX_MUL_R, code, codePos); - emitByte(0xe8 + instr.src, code, codePos); - emit(REX_MOV_R64R, code, codePos); - emitByte(0xc2 + 8 * instr.dst, code, codePos); + *(uint32_t*)(code + codePos) = 0x00C08B49UL + (instr.dst << 16); + codePos += 3; + *(uint32_t*)(code + codePos) = 0x00E8F749UL + (instr.src << 16); + codePos += 3; + *(uint32_t*)(code + codePos) = 0x00C28B4CUL + (instr.dst << 19); + codePos += 3; + if (AVX2) { + static const uint8_t t[] = { + 0xC5, 0xBD, 0x73, 0xD0, 0x20, + 0xC5, 0xB5, 0x73, 0xD0, 0x20, + 0xC5, 0x7D, 0xF4, 0xD0, + 0xC5, 0x3D, 0xF4, 0xD8, + 0xC4, 0x41, 0x7D, 0xF4, 0xE1, + 0xC4, 0x41, 0x3D, 0xF4, 0xE9, + 0xC4, 0xC1, 0x2D, 0x73, 0xD2, 0x20, + 0xC4, 0x41, 0x25, 0xEF, 0xC6, + 0xC4, 0x41, 0x25, 0xD4, 0xDC, + 0xC4, 0x41, 0x25, 0xD4, 0xDA, + 0xC4, 0x41, 0x25, 0xEF, 0xCE, + 0xC4, 0x42, 0x3D, 0x37, 0xC1, + 0xC4, 0x41, 0x3D, 0xDB, 0xC7, + 0xC4, 0x41, 0x15, 0xD4, 0xE8, + 0xC4, 0xC1, 0x25, 0x73, 0xD3, 0x20, + 0xC4, 0x41, 0x15, 0xD4, 0xC3, + 0xC4, 0x41, 0x35, 0xEF, 0xC9, + 0xC4, 0x62, 0x35, 0x37, 0xD0, + 0xC4, 0x62, 0x35, 0x37, 0xD8, + 0xC5, 0x2D, 0xDB, 0xD0, + 0xC5, 0x25, 0xDB, 0xD8, + 0xC4, 0x41, 0x3D, 0xFB, 0xC2, + 0xC4, 0xC1, 0x3D, 0xFB, 0xC3 + }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + p[3] += instr.dst; + p[8] += instr.src; + p[11] -= instr.dst * 8; + p[13] += instr.src; + p[17] += instr.src; + p[20] -= instr.dst * 8; + p[89] += instr.dst; + p[94] += instr.src; + p[98] += instr.src; + p[102] += instr.dst; + p[112] += instr.dst * 8; + } break; case randomx::SuperscalarInstructionType::IMUL_RCP: - emit(MOV_RAX_I, code, codePos); + *(uint32_t*)(code + codePos) = 0x0000B848UL; + codePos += 2; emit64(randomx_reciprocal_fast(instr.getImm32()), code, codePos); - emit(REX_IMUL_RM, code, codePos); - emitByte(0xc0 + 8 * instr.dst, code, codePos); + emit32(0xC0AF0F4CUL + (instr.dst << 27), code, codePos); + if (AVX2) { + static const uint8_t t[] = { + 0xC4, 0x62, 0x7D, 0x19, 0x25, 0xEB, 0xFF, 0xFF, 0xFF, + 0xC5, 0xBD, 0x73, 0xD0, 0x20, + 0xC4, 0xC1, 0x35, 0x73, 0xD4, 0x20, + 0xC4, 0x41, 0x7D, 0xF4, 0xD4, + 0xC5, 0x35, 0xF4, 0xD8, + 0xC4, 0xC1, 0x3D, 0xF4, 0xC4, + 0xC4, 0xC1, 0x25, 0x73, 0xF3, 0x20, + 0xC5, 0xFD, 0x73, 0xF0, 0x20, + 0xC4, 0x41, 0x2D, 0xD4, 0xD3, + 0xC5, 0xAD, 0xD4, 0xC0 + }; + uint8_t* p = code + codePos; + emit(t, code, codePos); + p[12] += instr.dst; + p[22] -= instr.dst * 8; + p[28] += instr.dst; + p[33] += instr.dst * 8; + p[41] -= instr.dst * 8; + p[43] += instr.dst; + p[53] += instr.dst * 9; + } break; default: UNREACHABLE; } } + template void JitCompilerX86::generateSuperscalarCode(Instruction&, uint8_t*, uint32_t&); + template void JitCompilerX86::generateSuperscalarCode(Instruction&, uint8_t*, uint32_t&); + template FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos) { *(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + (src << 16); @@ -546,10 +806,6 @@ namespace randomx { codePos = pos; } - void JitCompilerX86::genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos) { - emitByte((scale << 6) | (index << 3) | base, code, codePos); - } - void JitCompilerX86::h_ISUB_R(const Instruction& instr) { uint8_t* const p = code; uint32_t pos = codePos; @@ -1088,11 +1344,11 @@ namespace randomx { pos += 14; if (jmp_offset >= -128) { - *(uint32_t*)(p + pos) = 0x74 + (jmp_offset << 8); + *(uint32_t*)(p + pos) = 0x74 + (static_cast(jmp_offset) << 8); pos += 2; } else { - *(uint64_t*)(p + pos) = 0x840f + ((static_cast(jmp_offset) - 4) << 16); + *(uint64_t*)(p + pos) = 0x840f + (static_cast(jmp_offset - 4) << 16); pos += 6; } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 0a9148f9..5c43264c 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -1,5 +1,7 @@ /* -Copyright (c) 2018-2019, tevador +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -47,7 +49,7 @@ namespace randomx { class JitCompilerX86 { public: - explicit JitCompilerX86(bool hugePagesEnable); + explicit JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable); ~JitCompilerX86(); void prepare(); void generateProgram(Program&, ProgramConfiguration&, uint32_t); @@ -55,24 +57,38 @@ namespace randomx { template void generateSuperscalarHash(SuperscalarProgram (&programs)[N]); void generateDatasetInitCode(); - ProgramFunc* getProgramFunc() { - return (ProgramFunc*)code; + + inline ProgramFunc *getProgramFunc() const { +# ifdef XMRIG_SECURE_JIT + enableExecution(); +# endif + + return reinterpret_cast(code); } - DatasetInitFunc* getDatasetInitFunc() { + + inline DatasetInitFunc *getDatasetInitFunc() const { +# ifdef XMRIG_SECURE_JIT + enableExecution(); +# endif + return (DatasetInitFunc*)code; } + uint8_t* getCode() { return code; } size_t getCodeSize(); + void enableWriting() const; + void enableExecution() const; alignas(64) static InstructionGeneratorX86 engine[256]; - int registerUsage[RegistersCount]; - uint8_t* code; - uint32_t codePos; - uint32_t codePosFirst; - uint32_t vm_flags; + private: + int registerUsage[RegistersCount] = {}; + uint8_t* code = nullptr; + uint32_t codePos = 0; + uint32_t codePosFirst = 0; + uint32_t vm_flags = 0; # ifdef XMRIG_FIX_RYZEN std::pair mainLoopBounds; @@ -80,9 +96,12 @@ namespace randomx { bool BranchesWithin32B = false; bool hasAVX; + bool hasAVX2; + bool initDatasetAVX2; bool hasXOP; - uint8_t* allocatedCode; + uint8_t* allocatedCode = nullptr; + size_t allocatedSize = 0; void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&); @@ -90,9 +109,10 @@ namespace randomx { static void genAddressReg(const Instruction&, const uint32_t src, uint8_t* code, uint32_t& codePos); static void genAddressRegDst(const Instruction&, uint8_t* code, uint32_t& codePos); static void genAddressImm(const Instruction&, uint8_t* code, uint32_t& codePos); - static void genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos); + static uint32_t genSIB(int scale, int index, int base) { return (scale << 6) | (index << 3) | base; } - void generateSuperscalarCode(Instruction &); + template + void generateSuperscalarCode(Instruction& inst, uint8_t* code, uint32_t& codePos); static void emitByte(uint8_t val, uint8_t* code, uint32_t& codePos) { code[codePos] = val; @@ -119,6 +139,7 @@ namespace randomx { codePos += count; } + public: void h_IADD_RS(const Instruction&); void h_IADD_M(const Instruction&); void h_ISUB_R(const Instruction&); diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index 9f3a5bf1..e2177147 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -52,6 +52,11 @@ .global DECL(randomx_program_loop_store) .global DECL(randomx_program_loop_end) .global DECL(randomx_dataset_init) +.global DECL(randomx_dataset_init_avx2_prologue) +.global DECL(randomx_dataset_init_avx2_loop_end) +.global DECL(randomx_dataset_init_avx2_epilogue) +.global DECL(randomx_dataset_init_avx2_ssh_load) +.global DECL(randomx_dataset_init_avx2_ssh_prefetch) .global DECL(randomx_program_epilogue) .global DECL(randomx_sshash_load) .global DECL(randomx_sshash_prefetch) @@ -192,6 +197,97 @@ call_offset: pop rbx ret +.balign 64 +DECL(randomx_dataset_init_avx2_prologue): + #include "asm/program_sshash_avx2_save_registers.inc" + +#if defined(WINABI) + mov rdi, qword ptr [rcx] ;# cache->memory + mov rsi, rdx ;# dataset + mov rbp, r8 ;# block index + push r9 ;# max. block index +#else + mov rdi, qword ptr [rdi] ;# cache->memory + ;# dataset in rsi + mov rbp, rdx ;# block index + push rcx ;# max. block index +#endif + sub rsp, 40 + + jmp randomx_dataset_init_avx2_prologue_loop_begin + #include "asm/program_sshash_avx2_constants.inc" + +.balign 64 +randomx_dataset_init_avx2_prologue_loop_begin: + #include "asm/program_sshash_avx2_loop_begin.inc" + + ;# init integer registers (lane 0) + lea r8, [rbp+1] + imul r8, qword ptr [r0_avx2_mul+rip] + mov r9, qword ptr [r1_avx2_add+rip] + xor r9, r8 + mov r10, qword ptr [r2_avx2_add+rip] + xor r10, r8 + mov r11, qword ptr [r3_avx2_add+rip] + xor r11, r8 + mov r12, qword ptr [r4_avx2_add+rip] + xor r12, r8 + mov r13, qword ptr [r5_avx2_add+rip] + xor r13, r8 + mov r14, qword ptr [r6_avx2_add+rip] + xor r14, r8 + mov r15, qword ptr [r7_avx2_add+rip] + xor r15, r8 + + ;# init AVX registers (lanes 1-4) + mov qword ptr [rsp+32], rbp + vbroadcastsd ymm0, qword ptr [rsp+32] + vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip] + + ;# ymm0 *= r0_avx2_mul + vbroadcastsd ymm1, qword ptr [r0_avx2_mul+rip] + vpsrlq ymm8, ymm0, 32 + vpsrlq ymm9, ymm1, 32 + vpmuludq ymm10, ymm0, ymm1 + vpmuludq ymm11, ymm9, ymm0 + vpmuludq ymm0, ymm8, ymm1 + vpsllq ymm11, ymm11, 32 + vpsllq ymm0, ymm0, 32 + vpaddq ymm10, ymm10, ymm11 + vpaddq ymm0, ymm10, ymm0 + + vbroadcastsd ymm1, qword ptr [r1_avx2_add+rip] + vpxor ymm1, ymm0, ymm1 + vbroadcastsd ymm2, qword ptr [r2_avx2_add+rip] + vpxor ymm2, ymm0, ymm2 + vbroadcastsd ymm3, qword ptr [r3_avx2_add+rip] + vpxor ymm3, ymm0, ymm3 + vbroadcastsd ymm4, qword ptr [r4_avx2_add+rip] + vpxor ymm4, ymm0, ymm4 + vbroadcastsd ymm5, qword ptr [r5_avx2_add+rip] + vpxor ymm5, ymm0, ymm5 + vbroadcastsd ymm6, qword ptr [r6_avx2_add+rip] + vpxor ymm6, ymm0, ymm6 + vbroadcastsd ymm7, qword ptr [r7_avx2_add+rip] + vpxor ymm7, ymm0, ymm7 + + vbroadcastsd ymm15, qword ptr [mul_hi_avx2_data+rip] ;# carry_bit (bit 32) + vpsllq ymm14, ymm15, 31 ;# sign64 (bit 63) + + ;# generated SuperscalarHash code goes here + +DECL(randomx_dataset_init_avx2_loop_end): + #include "asm/program_sshash_avx2_loop_end.inc" + +DECL(randomx_dataset_init_avx2_epilogue): + #include "asm/program_sshash_avx2_epilogue.inc" + +DECL(randomx_dataset_init_avx2_ssh_load): + #include "asm/program_sshash_avx2_ssh_load.inc" + +DECL(randomx_dataset_init_avx2_ssh_prefetch): + #include "asm/program_sshash_avx2_ssh_prefetch.inc" + .balign 64 DECL(randomx_program_epilogue): #include "asm/program_epilogue_store.inc" diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index e36e5aaf..6e90cbf3 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -41,6 +41,11 @@ PUBLIC randomx_program_read_dataset_ryzen PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_dataset_init +PUBLIC randomx_dataset_init_avx2_prologue +PUBLIC randomx_dataset_init_avx2_loop_end +PUBLIC randomx_dataset_init_avx2_epilogue +PUBLIC randomx_dataset_init_avx2_ssh_load +PUBLIC randomx_dataset_init_avx2_ssh_prefetch PUBLIC randomx_program_loop_store PUBLIC randomx_program_loop_end PUBLIC randomx_program_epilogue @@ -183,6 +188,94 @@ init_block_loop: randomx_dataset_init ENDP ALIGN 64 +randomx_dataset_init_avx2_prologue PROC + include asm/program_sshash_avx2_save_registers.inc + + mov rdi, qword ptr [rcx] ;# cache->memory + mov rsi, rdx ;# dataset + mov rbp, r8 ;# block index + push r9 ;# max. block index + sub rsp, 40 + + jmp loop_begin + include asm/program_sshash_avx2_constants.inc + +ALIGN 64 +loop_begin: + include asm/program_sshash_avx2_loop_begin.inc + + ;# init integer registers (lane 0) + lea r8, [rbp+1] + imul r8, qword ptr [r0_avx2_mul] + mov r9, qword ptr [r1_avx2_add] + xor r9, r8 + mov r10, qword ptr [r2_avx2_add] + xor r10, r8 + mov r11, qword ptr [r3_avx2_add] + xor r11, r8 + mov r12, qword ptr [r4_avx2_add] + xor r12, r8 + mov r13, qword ptr [r5_avx2_add] + xor r13, r8 + mov r14, qword ptr [r6_avx2_add] + xor r14, r8 + mov r15, qword ptr [r7_avx2_add] + xor r15, r8 + + ;# init AVX registers (lanes 1-4) + mov qword ptr [rsp+32], rbp + vbroadcastsd ymm0, qword ptr [rsp+32] + vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments] + + ;# ymm0 *= r0_avx2_mul + vbroadcastsd ymm1, qword ptr [r0_avx2_mul] + vpsrlq ymm8, ymm0, 32 + vpsrlq ymm9, ymm1, 32 + vpmuludq ymm10, ymm0, ymm1 + vpmuludq ymm11, ymm9, ymm0 + vpmuludq ymm0, ymm8, ymm1 + vpsllq ymm11, ymm11, 32 + vpsllq ymm0, ymm0, 32 + vpaddq ymm10, ymm10, ymm11 + vpaddq ymm0, ymm10, ymm0 + + vbroadcastsd ymm1, qword ptr [r1_avx2_add] + vpxor ymm1, ymm0, ymm1 + vbroadcastsd ymm2, qword ptr [r2_avx2_add] + vpxor ymm2, ymm0, ymm2 + vbroadcastsd ymm3, qword ptr [r3_avx2_add] + vpxor ymm3, ymm0, ymm3 + vbroadcastsd ymm4, qword ptr [r4_avx2_add] + vpxor ymm4, ymm0, ymm4 + vbroadcastsd ymm5, qword ptr [r5_avx2_add] + vpxor ymm5, ymm0, ymm5 + vbroadcastsd ymm6, qword ptr [r6_avx2_add] + vpxor ymm6, ymm0, ymm6 + vbroadcastsd ymm7, qword ptr [r7_avx2_add] + vpxor ymm7, ymm0, ymm7 + + vbroadcastsd ymm15, qword ptr [mul_hi_avx2_data] ;# carry_bit (bit 32) + vpsllq ymm14, ymm15, 31 ;# sign64 (bit 63) +randomx_dataset_init_avx2_prologue ENDP + + ;# generated SuperscalarHash code goes here + +randomx_dataset_init_avx2_loop_end PROC + include asm/program_sshash_avx2_loop_end.inc +randomx_dataset_init_avx2_loop_end ENDP + +randomx_dataset_init_avx2_epilogue PROC + include asm/program_sshash_avx2_epilogue.inc +randomx_dataset_init_avx2_epilogue ENDP + +randomx_dataset_init_avx2_ssh_load PROC + include asm/program_sshash_avx2_ssh_load.inc +randomx_dataset_init_avx2_ssh_load ENDP + +randomx_dataset_init_avx2_ssh_prefetch PROC + include asm/program_sshash_avx2_ssh_prefetch.inc +randomx_dataset_init_avx2_ssh_prefetch ENDP + randomx_program_epilogue PROC include asm/program_epilogue_store.inc include asm/program_epilogue_win64.inc diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index 6523f9c4..121db5be 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -44,6 +44,11 @@ extern "C" { void randomx_program_loop_store(); void randomx_program_loop_end(); void randomx_dataset_init(); + void randomx_dataset_init_avx2_prologue(); + void randomx_dataset_init_avx2_loop_end(); + void randomx_dataset_init_avx2_epilogue(); + void randomx_dataset_init_avx2_ssh_load(); + void randomx_dataset_init_avx2_ssh_prefetch(); void randomx_program_epilogue(); void randomx_sshash_load(); void randomx_sshash_prefetch(); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 9dd4aeee..9986a33f 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "base/tools/Profiler.h" +#include "crypto/rx/Profiler.h" RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() { @@ -381,9 +381,9 @@ extern "C" { break; case RANDOMX_FLAG_JIT: - cache->jit = new randomx::JitCompiler(false); + cache->jit = new randomx::JitCompiler(false, true); cache->initialize = &randomx::initCacheCompile; - cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->datasetInit = nullptr; cache->memory = memory; break; diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 9a1fb3c7..f81df9db 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -170,6 +170,7 @@ void randomx_apply_config(const T& config) void randomx_set_scratchpad_prefetch_mode(int mode); void randomx_set_huge_pages_jit(bool hugePages); +void randomx_set_optimized_dataset_init(int value); #if defined(__cplusplus) extern "C" { diff --git a/src/crypto/randomx/superscalar.cpp b/src/crypto/randomx/superscalar.cpp index 2eca929f..98c1ea8e 100644 --- a/src/crypto/randomx/superscalar.cpp +++ b/src/crypto/randomx/superscalar.cpp @@ -196,7 +196,7 @@ namespace randomx { int latency_; int resultOp_ = 0; int dstOp_ = 0; - int srcOp_; + int srcOp_ = 0; SuperscalarInstructionInfo(const char* name) : name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {} @@ -282,11 +282,11 @@ namespace randomx { return fetchNextDefault(gen); } private: - const char* name_; - int index_; - const int* counts_; - int opsCount_; - DecoderBuffer() : index_(-1) {} + const char* name_ = nullptr; + int index_ = -1; + const int* counts_ = nullptr; + int opsCount_ = 0; + DecoderBuffer() = default; static const DecoderBuffer decodeBuffer484; static const DecoderBuffer decodeBuffer7333; static const DecoderBuffer decodeBuffer3733; @@ -555,10 +555,10 @@ namespace randomx { const SuperscalarInstructionInfo* info_; int src_ = -1; int dst_ = -1; - int mod_; - uint32_t imm32_; - SuperscalarInstructionType opGroup_; - int opGroupPar_; + int mod_ = 0; + uint32_t imm32_ = 0; + SuperscalarInstructionType opGroup_ = SuperscalarInstructionType::INVALID; + int opGroupPar_ = 0; bool canReuse_ = false; bool groupParIsSource_ = false; diff --git a/src/crypto/randomx/superscalar_program.hpp b/src/crypto/randomx/superscalar_program.hpp index dc173591..3e40eb2b 100644 --- a/src/crypto/randomx/superscalar_program.hpp +++ b/src/crypto/randomx/superscalar_program.hpp @@ -39,13 +39,13 @@ namespace randomx { Instruction& operator()(int pc) { return programBuffer[pc]; } - uint32_t getSize() { + uint32_t getSize() const { return size; } void setSize(uint32_t val) { size = val; } - int getAddressRegister() { + int getAddressRegister() const { return addrReg; } void setAddressRegister(int val) { diff --git a/src/crypto/randomx/virtual_machine.cpp b/src/crypto/randomx/virtual_machine.cpp index 4a6990b2..43e19939 100644 --- a/src/crypto/randomx/virtual_machine.cpp +++ b/src/crypto/randomx/virtual_machine.cpp @@ -30,13 +30,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include "crypto/randomx/virtual_machine.hpp" -#include "crypto/randomx/common.hpp" #include "crypto/randomx/aes_hash.hpp" -#include "crypto/randomx/blake2/blake2.h" -#include "crypto/randomx/intrin_portable.h" #include "crypto/randomx/allocator.hpp" +#include "crypto/randomx/blake2/blake2.h" +#include "crypto/randomx/common.hpp" +#include "crypto/randomx/intrin_portable.h" #include "crypto/randomx/soft_aes.h" -#include "base/tools/Profiler.h" +#include "crypto/rx/Profiler.h" randomx_vm::~randomx_vm() { diff --git a/src/crypto/randomx/vm_compiled.cpp b/src/crypto/randomx/vm_compiled.cpp index c32034b3..842bfe65 100644 --- a/src/crypto/randomx/vm_compiled.cpp +++ b/src/crypto/randomx/vm_compiled.cpp @@ -1,5 +1,7 @@ /* -Copyright (c) 2018-2019, tevador +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -28,7 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/vm_compiled.hpp" #include "crypto/randomx/common.hpp" -#include "base/tools/Profiler.h" +#include "crypto/rx/Profiler.h" namespace randomx { @@ -56,9 +58,9 @@ namespace randomx { void CompiledVm::execute() { PROFILE_SCOPE(RandomX_JIT_execute); -#ifdef XMRIG_ARM +# ifdef XMRIG_ARM memcpy(reg.f, config.eMask, sizeof(config.eMask)); -#endif +# endif compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations); } diff --git a/src/crypto/randomx/vm_compiled.hpp b/src/crypto/randomx/vm_compiled.hpp index 2db99c75..0824d6bd 100644 --- a/src/crypto/randomx/vm_compiled.hpp +++ b/src/crypto/randomx/vm_compiled.hpp @@ -59,7 +59,7 @@ namespace randomx { protected: void execute(); - JitCompiler compiler{ true }; + JitCompiler compiler{ true, false }; }; using CompiledVmDefault = CompiledVm<1>; diff --git a/src/crypto/randomx/vm_compiled_light.cpp b/src/crypto/randomx/vm_compiled_light.cpp index 12e8de0f..e7fdd7e9 100644 --- a/src/crypto/randomx/vm_compiled_light.cpp +++ b/src/crypto/randomx/vm_compiled_light.cpp @@ -1,5 +1,7 @@ /* -Copyright (c) 2018-2019, tevador +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2020, SChernykh +Copyright (c) 2019-2020, XMRig , All rights reserved. @@ -36,6 +38,11 @@ namespace randomx { void CompiledLightVm::setCache(randomx_cache* cache) { cachePtr = cache; mem.memory = cache->memory; + +# ifdef XMRIG_SECURE_JIT + compiler.enableWriting(); +# endif + compiler.generateSuperscalarHash(cache->programs); } @@ -43,7 +50,13 @@ namespace randomx { void CompiledLightVm::run(void* seed) { VmBase::generateProgram(seed); randomx_vm::initialize(); + +# ifdef XMRIG_SECURE_JIT + compiler.enableWriting(); +# endif + compiler.generateProgramLight(program, config, datasetOffset); + CompiledVm::execute(); } diff --git a/src/base/tools/Profiler.cpp b/src/crypto/rx/Profiler.cpp similarity index 93% rename from src/base/tools/Profiler.cpp rename to src/crypto/rx/Profiler.cpp index ac2a6d2c..9cfe72a0 100644 --- a/src/base/tools/Profiler.cpp +++ b/src/crypto/rx/Profiler.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,9 +17,11 @@ */ -#include "base/tools/Profiler.h" +#include "crypto/rx/Profiler.h" #include "base/io/log/Log.h" #include "base/io/log/Tags.h" + + #include #include #include diff --git a/src/base/tools/Profiler.h b/src/crypto/rx/Profiler.h similarity index 94% rename from src/base/tools/Profiler.h rename to src/crypto/rx/Profiler.h index ae3470f8..ca7d29db 100644 --- a/src/base/tools/Profiler.h +++ b/src/crypto/rx/Profiler.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index 2fffb0e6..40d3c612 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -103,6 +96,7 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode()); randomx_set_huge_pages_jit(cpu.isHugePagesJit()); + randomx_set_optimized_dataset_init(config.initDatasetAVX2()); if (!msrInitialized) { msrEnabled = msrInit(config, cpu.threads().get(seed.algorithm()).data()); diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index 1c452e8b..42bd0c15 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/RxAlgo.cpp b/src/crypto/rx/RxAlgo.cpp index 91d2c195..b7d2b083 100644 --- a/src/crypto/rx/RxAlgo.cpp +++ b/src/crypto/rx/RxAlgo.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/RxAlgo.h b/src/crypto/rx/RxAlgo.h index 81c4687c..0cbbbd35 100644 --- a/src/crypto/rx/RxAlgo.h +++ b/src/crypto/rx/RxAlgo.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/RxBasicStorage.cpp b/src/crypto/rx/RxBasicStorage.cpp index 7f389666..eacb6295 100644 --- a/src/crypto/rx/RxBasicStorage.cpp +++ b/src/crypto/rx/RxBasicStorage.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,7 +23,6 @@ #include "base/io/log/Log.h" #include "base/io/log/Tags.h" #include "base/tools/Chrono.h" -#include "base/tools/Object.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" #include "crypto/rx/RxDataset.h" diff --git a/src/crypto/rx/RxBasicStorage.h b/src/crypto/rx/RxBasicStorage.h index c689df83..463b479e 100644 --- a/src/crypto/rx/RxBasicStorage.h +++ b/src/crypto/rx/RxBasicStorage.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,7 +22,6 @@ #include "backend/common/interfaces/IRxStorage.h" -#include "base/tools/Object.h" namespace xmrig diff --git a/src/crypto/rx/RxConfig.cpp b/src/crypto/rx/RxConfig.cpp index c501e712..ae6215df 100644 --- a/src/crypto/rx/RxConfig.cpp +++ b/src/crypto/rx/RxConfig.cpp @@ -47,6 +47,7 @@ namespace xmrig { const char *RxConfig::kInit = "init"; +const char *RxConfig::kInitAVX2 = "init-avx2"; const char *RxConfig::kField = "randomx"; const char *RxConfig::kMode = "mode"; const char *RxConfig::kOneGbPages = "1gb-pages"; @@ -86,9 +87,10 @@ static_assert (kMsrArraySize == ICpuInfo::MSR_MOD_MAX, "kMsrArraySize and MSR_MO bool xmrig::RxConfig::read(const rapidjson::Value &value) { if (value.IsObject()) { - m_threads = Json::getInt(value, kInit, m_threads); - m_mode = readMode(Json::getValue(value, kMode)); - m_rdmsr = Json::getBool(value, kRdmsr, m_rdmsr); + m_threads = Json::getInt(value, kInit, m_threads); + m_initDatasetAVX2 = Json::getInt(value, kInitAVX2, m_initDatasetAVX2); + m_mode = readMode(Json::getValue(value, kMode)); + m_rdmsr = Json::getBool(value, kRdmsr, m_rdmsr); # ifdef XMRIG_FEATURE_MSR readMSR(Json::getValue(value, kWrmsr)); @@ -122,7 +124,7 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value) } # endif - const uint32_t mode = static_cast(Json::getInt(value, kScratchpadPrefetchMode, static_cast(m_scratchpadPrefetchMode))); + const auto mode = static_cast(Json::getInt(value, kScratchpadPrefetchMode, static_cast(m_scratchpadPrefetchMode))); if (mode < ScratchpadPrefetchMax) { m_scratchpadPrefetchMode = static_cast(mode); } @@ -141,6 +143,7 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const Value obj(kObjectType); obj.AddMember(StringRef(kInit), m_threads, allocator); + obj.AddMember(StringRef(kInitAVX2), m_initDatasetAVX2, allocator); obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); obj.AddMember(StringRef(kRdmsr), m_rdmsr, allocator); diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index 1e79d468..fb3a656d 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -61,6 +61,7 @@ public: static const char *kCacheQoS; static const char *kField; static const char *kInit; + static const char *kInitAVX2; static const char *kMode; static const char *kOneGbPages; static const char *kRdmsr; @@ -83,6 +84,7 @@ public: const char *modeName() const; uint32_t threads(uint32_t limit = 100) const; + inline int initDatasetAVX2() const { return m_initDatasetAVX2; } inline bool isOneGbPages() const { return m_oneGbPages; } inline bool rdmsr() const { return m_rdmsr; } inline bool wrmsr() const { return m_wrmsr; } @@ -111,11 +113,12 @@ private: Mode readMode(const rapidjson::Value &value) const; - bool m_numa = true; - bool m_oneGbPages = false; - bool m_rdmsr = true; - int m_threads = -1; - Mode m_mode = AutoMode; + bool m_numa = true; + bool m_oneGbPages = false; + bool m_rdmsr = true; + int m_threads = -1; + int m_initDatasetAVX2 = -1; + Mode m_mode = AutoMode; ScratchpadPrefetchMode m_scratchpadPrefetchMode = ScratchpadPrefetchT0; diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index c207d6f4..b47285a3 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,10 +19,12 @@ #include "crypto/rx/RxDataset.h" +#include "backend/cpu/Cpu.h" #include "base/io/log/Log.h" #include "base/io/log/Tags.h" #include "base/kernel/Platform.h" #include "crypto/common/VirtualMemory.h" +#include "crypto/randomx/randomx.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" @@ -45,7 +40,13 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache, { Platform::setThreadPriority(priority); - randomx_init_dataset(dataset, cache, startItem, itemCount); + if (Cpu::info()->hasAVX2() && (itemCount % 5)) { + randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5)); + randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5); + } + else { + randomx_init_dataset(dataset, cache, startItem, itemCount); + } } @@ -162,6 +163,22 @@ size_t xmrig::RxDataset::size(bool cache) const } +uint8_t *xmrig::RxDataset::tryAllocateScrathpad() +{ + auto p = reinterpret_cast(raw()); + if (!p) { + return nullptr; + } + + const size_t offset = m_scratchpadOffset.fetch_add(RANDOMX_SCRATCHPAD_L3_MAX_SIZE); + if (offset + RANDOMX_SCRATCHPAD_L3_MAX_SIZE > m_scratchpadLimit) { + return nullptr; + } + + return p + offset; +} + + void *xmrig::RxDataset::raw() const { return m_dataset ? randomx_get_dataset_memory(m_dataset) : nullptr; @@ -208,19 +225,3 @@ void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages) } # endif } - - -uint8_t* xmrig::RxDataset::tryAllocateScrathpad() -{ - uint8_t* p = reinterpret_cast(raw()); - if (!p) { - return nullptr; - } - - const size_t offset = m_scratchpadOffset.fetch_add(RANDOMX_SCRATCHPAD_L3_MAX_SIZE); - if (offset + RANDOMX_SCRATCHPAD_L3_MAX_SIZE > m_scratchpadLimit) { - return nullptr; - } - - return p + offset; -} diff --git a/src/crypto/rx/RxDataset.h b/src/crypto/rx/RxDataset.h index 798ec996..1621cae1 100644 --- a/src/crypto/rx/RxDataset.h +++ b/src/crypto/rx/RxDataset.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,12 +21,10 @@ #define XMRIG_RX_DATASET_H -#include "base/crypto/Algorithm.h" #include "base/tools/Buffer.h" #include "base/tools/Object.h" #include "crypto/common/HugePagesInfo.h" #include "crypto/randomx/configuration.h" -#include "crypto/randomx/randomx.h" #include "crypto/rx/RxConfig.h" #include @@ -68,11 +59,10 @@ public: bool isOneGbPages() const; HugePagesInfo hugePages(bool cache = true) const; size_t size(bool cache = true) const; + uint8_t *tryAllocateScrathpad(); void *raw() const; void setRaw(const void *raw); - uint8_t *tryAllocateScrathpad(); - static inline constexpr size_t maxSize() { return RANDOMX_DATASET_MAX_SIZE; } private: @@ -82,10 +72,9 @@ private: const uint32_t m_node; randomx_dataset *m_dataset = nullptr; RxCache *m_cache = nullptr; - VirtualMemory *m_memory = nullptr; - - std::atomic m_scratchpadOffset; size_t m_scratchpadLimit = 0; + std::atomic m_scratchpadOffset{}; + VirtualMemory *m_memory = nullptr; }; diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp index 4b4ab4d6..6bd5627f 100644 --- a/src/crypto/rx/RxNUMAStorage.cpp +++ b/src/crypto/rx/RxNUMAStorage.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,7 +25,6 @@ #include "base/io/log/Tags.h" #include "base/kernel/Platform.h" #include "base/tools/Chrono.h" -#include "base/tools/Object.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" #include "crypto/rx/RxDataset.h" diff --git a/src/crypto/rx/RxNUMAStorage.h b/src/crypto/rx/RxNUMAStorage.h index ef0d6431..29ec5fb0 100644 --- a/src/crypto/rx/RxNUMAStorage.h +++ b/src/crypto/rx/RxNUMAStorage.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,7 +22,6 @@ #include "backend/common/interfaces/IRxStorage.h" -#include "base/tools/Object.h" #include diff --git a/src/crypto/rx/RxQueue.cpp b/src/crypto/rx/RxQueue.cpp index 8badab50..0c55434d 100644 --- a/src/crypto/rx/RxQueue.cpp +++ b/src/crypto/rx/RxQueue.cpp @@ -30,6 +30,7 @@ #include "base/io/Async.h" #include "base/io/log/Log.h" #include "base/io/log/Tags.h" +#include "base/tools/Cvt.h" #include "crypto/rx/RxBasicStorage.h" @@ -149,12 +150,12 @@ void xmrig::RxQueue::backgroundInit() item.nodeset.size() > 1 ? "s" : "", item.seed.algorithm().shortName(), item.threads, - Buffer::toHex(item.seed.data().data(), 8).data() + Cvt::toHex(item.seed.data().data(), 8).data() ); m_storage->init(item.seed, item.threads, item.hugePages, item.oneGbPages, item.mode, item.priority); - lock = std::unique_lock(m_mutex); + lock.lock(); if (m_state == STATE_SHUTDOWN || !m_queue.empty()) { continue; diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index aa2217fc..f4b37375 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,6 +19,7 @@ #include "crypto/randomx/randomx.h" +#include "backend/cpu/Cpu.h" #include "crypto/rx/RxCache.h" #include "crypto/rx/RxDataset.h" #include "crypto/rx/RxVm.h" @@ -36,7 +30,7 @@ extern "C" uint32_t rx_blake2b_use_sse41; #endif -randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node) +randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, const Assembly &assembly, uint32_t node) { int flags = 0; @@ -52,11 +46,8 @@ randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so flags |= RANDOMX_FLAG_JIT; } - if (assembly == Assembly::AUTO) { - assembly = Cpu::info()->assembly(); - } - - if ((assembly == Assembly::RYZEN) || (assembly == Assembly::BULLDOZER)) { + const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id(); + if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) { flags |= RANDOMX_FLAG_AMD; } diff --git a/src/crypto/rx/RxVm.h b/src/crypto/rx/RxVm.h index 6397d726..424da209 100644 --- a/src/crypto/rx/RxVm.h +++ b/src/crypto/rx/RxVm.h @@ -1,14 +1,7 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,10 +21,6 @@ #define XMRIG_RX_VM_H -#include "base/tools/Object.h" -#include "backend/cpu/Cpu.h" - - #include @@ -42,16 +31,15 @@ namespace xmrig { +class Assembly; class RxDataset; class RxVm { public: - XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm); - - static randomx_vm* create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node); - static void destroy(randomx_vm* vm); + static randomx_vm *create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, const Assembly &assembly, uint32_t node); + static void destroy(randomx_vm *vm); }; diff --git a/src/crypto/rx/Rx_linux.cpp b/src/crypto/rx/Rx_linux.cpp index 62ce936a..34a097bb 100644 --- a/src/crypto/rx/Rx_linux.cpp +++ b/src/crypto/rx/Rx_linux.cpp @@ -1,16 +1,9 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2000 Transmeta Corporation - * Copyright 2004-2008 H. Peter Anvin - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2000 Transmeta Corporation + * Copyright (c) 2004-2008 H. Peter Anvin + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -110,7 +103,7 @@ static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t ma char msr_file_name[64]{}; - sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu); + sprintf(msr_file_name, "/dev/cpu/%u/msr", cpu); int fd = open(msr_file_name, O_WRONLY); if (fd < 0) { return false; diff --git a/src/crypto/rx/Rx_win.cpp b/src/crypto/rx/Rx_win.cpp index b32c3dae..77e7c9b4 100644 --- a/src/crypto/rx/Rx_win.cpp +++ b/src/crypto/rx/Rx_win.cpp @@ -1,17 +1,10 @@ /* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 tevador - * Copyright 2000 Transmeta Corporation - * Copyright 2004-2008 H. Peter Anvin - * Copyright 2007-2009 hiyohiyo , - * Copyright 2018-2020 SChernykh - * Copyright 2016-2020 XMRig , + * Copyright (c) 2018-2019 tevador + * Copyright (c) 2000 Transmeta Corporation + * Copyright (c) 2004-2008 H. Peter Anvin + * Copyright (c) 2007-2009 hiyohiyo , + * Copyright (c) 2018-2020 SChernykh + * Copyright (c) 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/net/JobResults.cpp b/src/net/JobResults.cpp index 36ed7b12..4040cd9a 100644 --- a/src/net/JobResults.cpp +++ b/src/net/JobResults.cpp @@ -123,6 +123,7 @@ static void getResults(JobBundle &bundle, std::vector &results, uint3 RxDataset *dataset = Rx::dataset(bundle.job, 0); if (dataset == nullptr) { errors += bundle.nonces.size(); + delete memory; return; } diff --git a/src/net/Network.h b/src/net/Network.h index 0289b2d2..94d54657 100644 --- a/src/net/Network.h +++ b/src/net/Network.h @@ -92,7 +92,6 @@ private: IStrategy *m_strategy = nullptr; NetworkState *m_state = nullptr; Timer *m_timer = nullptr; - uint32_t m_benchSize = 0; }; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index f8a391d9..28a1b1cd 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -37,6 +37,7 @@ #include "base/net/stratum/strategies/FailoverStrategy.h" #include "base/net/stratum/strategies/SinglePoolStrategy.h" #include "base/tools/Buffer.h" +#include "base/tools/Cvt.h" #include "base/tools/Timer.h" #include "core/config/Config.h" #include "core/Controller.h" @@ -65,9 +66,9 @@ xmrig::DonateStrategy::DonateStrategy(Controller *controller, IStrategyListener { uint8_t hash[200]; - const String &user = controller->config()->pools().data().front().user(); + const auto &user = controller->config()->pools().data().front().user(); keccak(reinterpret_cast(user.data()), user.size(), hash); - Buffer::toHex(hash, 32, m_userId); + Cvt::toHex(m_userId, sizeof(m_userId), hash, 32); # ifdef XMRIG_ALGO_KAWPOW constexpr Pool::Mode mode = Pool::MODE_AUTO_ETH; diff --git a/src/version.h b/src/version.h index 141eb08d..77ad0987 100644 --- a/src/version.h +++ b/src/version.h @@ -28,15 +28,15 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "6.6.2" +#define APP_VERSION "6.7.0-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com" #define APP_KIND "miner" #define APP_VER_MAJOR 6 -#define APP_VER_MINOR 6 -#define APP_VER_PATCH 2 +#define APP_VER_MINOR 7 +#define APP_VER_PATCH 0 #ifdef _MSC_VER # if (_MSC_VER >= 1920)