diff --git a/CHANGELOG.md b/CHANGELOG.md
index f43e1197..4b92cb29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+# v5.2.0
+- **[#1388](https://github.com/xmrig/xmrig/pull/1388) Added [1GB huge pages support](https://xmrig.com/docs/miner/hugepages#onegb-huge-pages) for Linux.**
+ - Added new option `1gb-pages` in `randomx` object with command line equivalent `--randomx-1gb-pages`.
+ - Added automatic huge pages configuration on Linux if use the miner with root privileges.
+- **Added [automatic Intel prefetchers configuration](https://xmrig.com/docs/miner/randomx-optimization-guide#intel-specific-optimizations) on Linux.**
+ - Added new option `wrmsr` in `randomx` object with command line equivalent `--randomx-wrmsr`.
+- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs.
+- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads.
+- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.
+- For official builds all dependencies (libuv, hwloc, openssl) updated to recent versions.
+- Windows `msvc` builds now use Visual Studio 2019 instead of 2017.
+
# v5.1.1
- [#1365](https://github.com/xmrig/xmrig/issues/1365) Fixed various system response/stability issues.
- Added new CPU option `yield` and command line equivalent `--cpu-no-yield`.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 849c1257..b65d5337 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
include (CheckIncludeFile)
include (cmake/cpu.cmake)
+include (cmake/os.cmake)
include (src/base/base.cmake)
include (src/backend/backend.cmake)
@@ -75,6 +76,7 @@ set(HEADERS_CRYPTO
src/crypto/cn/soft_aes.h
src/crypto/common/Algorithm.h
src/crypto/common/Coin.h
+ src/crypto/common/HugePagesInfo.h
src/crypto/common/keccak.h
src/crypto/common/MemoryPool.h
src/crypto/common/Nonce.h
@@ -114,6 +116,7 @@ set(SOURCES_CRYPTO
src/crypto/cn/CnHash.cpp
src/crypto/common/Algorithm.cpp
src/crypto/common/Coin.cpp
+ src/crypto/common/HugePagesInfo.cpp
src/crypto/common/keccak.cpp
src/crypto/common/MemoryPool.cpp
src/crypto/common/Nonce.cpp
@@ -131,40 +134,36 @@ if (WITH_HWLOC)
)
endif()
-if (WIN32)
- set(SOURCES_OS
- "${SOURCES_OS}"
+if (XMRIG_OS_WIN)
+ list(APPEND SOURCES_OS
res/app.rc
src/App_win.cpp
src/crypto/common/VirtualMemory_win.cpp
)
- add_definitions(/DWIN32)
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
-elseif (APPLE)
- set(SOURCES_OS
- "${SOURCES_OS}"
+elseif (XMRIG_OS_APPLE)
+ list(APPEND SOURCES_OS
src/App_unix.cpp
src/crypto/common/VirtualMemory_unix.cpp
)
else()
- set(SOURCES_OS
- "${SOURCES_OS}"
+ list(APPEND SOURCES_OS
src/App_unix.cpp
src/crypto/common/VirtualMemory_unix.cpp
)
- if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
- set(EXTRA_LIBS kvm pthread)
- else()
- set(EXTRA_LIBS pthread rt dl)
- endif()
-endif()
+ if (XMRIG_OS_ANDROID)
+ set(EXTRA_LIBS pthread rt dl log)
+ elseif (XMRIG_OS_LINUX)
+ list(APPEND SOURCES_OS
+ src/crypto/common/LinuxMemory.h
+ src/crypto/common/LinuxMemory.cpp
+ )
-if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
- EXECUTE_PROCESS(COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
- if (OPERATING_SYSTEM MATCHES "Android")
- set(EXTRA_LIBS ${EXTRA_LIBS} log)
+ set(EXTRA_LIBS pthread rt dl)
+ elseif (XMRIG_OS_FREEBSD)
+ set(EXTRA_LIBS kvm pthread)
endif()
endif()
diff --git a/README.md b/README.md
index 7563ff2d..7ed04212 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ XMRig High performance, open source, cross platform RandomX, CryptoNight and Arg
- **OpenCL** for AMD GPUs.
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
-
+
## Download
* Binary releases: https://github.com/xmrig/xmrig/releases
@@ -65,6 +65,8 @@ CPU backend:
--randomx-init=N threads count to initialize RandomX dataset
--randomx-no-numa disable NUMA support for RandomX
--randomx-mode=MODE RandomX mode: auto, fast, light
+ --randomx-1gb-pages use 1GB hugepages for dataset (Linux only)
+ --randomx-wrmsr=N write value (0-15) to Intel MSR register 0x1a4 or do nothing (-1) (Linux only)
API:
--api-worker-id=ID custom worker-id for API
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index e9533eed..d2bc70d0 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -57,9 +57,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
+ set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
add_definitions(/DNOMINMAX)
diff --git a/cmake/os.cmake b/cmake/os.cmake
new file mode 100644
index 00000000..0270cc93
--- /dev/null
+++ b/cmake/os.cmake
@@ -0,0 +1,45 @@
+if (WIN32)
+ set(XMRIG_OS_WIN ON)
+elseif (APPLE)
+ set(XMRIG_OS_APPLE ON)
+
+ if (IOS OR CMAKE_SYSTEM_NAME STREQUAL iOS)
+ set(XMRIG_OS_IOS ON)
+ else()
+ set(XMRIG_OS_MACOS ON)
+ endif()
+else()
+ set(XMRIG_OS_UNIX ON)
+
+ if (ANDROID OR CMAKE_SYSTEM_NAME MATCHES "Android")
+ set(XMRIG_OS_ANDROID ON)
+ elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ set(XMRIG_OS_LINUX ON)
+ elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
+ set(XMRIG_OS_FREEBSD ON)
+ endif()
+endif()
+
+
+if (XMRIG_OS_WIN)
+ add_definitions(/DWIN32)
+ add_definitions(/DXMRIG_OS_WIN)
+elseif(XMRIG_OS_APPLE)
+ add_definitions(/DXMRIG_OS_APPLE)
+
+ if (XMRIG_OS_IOS)
+ add_definitions(/DXMRIG_OS_IOS)
+ else()
+ add_definitions(/DXMRIG_OS_MACOS)
+ endif()
+elseif(XMRIG_OS_UNIX)
+ add_definitions(/DXMRIG_OS_UNIX)
+
+ if (XMRIG_OS_ANDROID)
+ add_definitions(/DXMRIG_OS_ANDROID)
+ elseif (XMRIG_OS_LINUX)
+ add_definitions(/DXMRIG_OS_LINUX)
+ elseif (XMRIG_OS_FREEBSD)
+ add_definitions(/DXMRIG_OS_FREEBSD)
+ endif()
+endif()
diff --git a/cmake/randomx.cmake b/cmake/randomx.cmake
index 290b8391..5a225c00 100644
--- a/cmake/randomx.cmake
+++ b/cmake/randomx.cmake
@@ -75,13 +75,12 @@ if (WITH_RANDOMX)
)
list(APPEND SOURCES_CRYPTO
- src/crypto/rx/RxConfig_hwloc.cpp
src/crypto/rx/RxNUMAStorage.cpp
)
- else()
- list(APPEND SOURCES_CRYPTO
- src/crypto/rx/RxConfig_basic.cpp
- )
+ endif()
+
+ if (XMRIG_OS_LINUX)
+ list(APPEND SOURCES_CRYPTO src/crypto/rx/Rx_linux.cpp)
endif()
else()
remove_definitions(/DXMRIG_ALGO_RANDOMX)
diff --git a/doc/screenshot_v5_2_0.png b/doc/screenshot_v5_2_0.png
new file mode 100644
index 00000000..630dbf97
Binary files /dev/null and b/doc/screenshot_v5_2_0.png differ
diff --git a/scripts/build_deps.sh b/scripts/build_deps.sh
index d8948323..7c4fc3da 100755
--- a/scripts/build_deps.sh
+++ b/scripts/build_deps.sh
@@ -1,6 +1,6 @@
#!/bin/bash -e
-UV_VERSION="1.33.1"
+UV_VERSION="1.34.0"
OPENSSL_VERSION="1.1.1d"
HWLOC_VERSION="2.1.0"
diff --git a/src/3rdparty/hwloc/AUTHORS b/src/3rdparty/hwloc/AUTHORS
index 7187a723..b4809d15 100644
--- a/src/3rdparty/hwloc/AUTHORS
+++ b/src/3rdparty/hwloc/AUTHORS
@@ -21,6 +21,7 @@ Nathalie Furmento CNRS
Bryon Gloden
Brice Goglin Inria
Gilles Gouaillardet RIST
+Valentin Hoyet Inria
Joshua Hursey UWL
Alexey Kardashevskiy IBM
Rob Latham ANL
diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt
index 431c11eb..0e56b6fc 100644
--- a/src/3rdparty/hwloc/CMakeLists.txt
+++ b/src/3rdparty/hwloc/CMakeLists.txt
@@ -5,7 +5,7 @@ include_directories(include)
include_directories(src)
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
-set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
+set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Ob2 /DNDEBUG")
set(HEADERS
include/hwloc.h
diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS
index 664c8d55..99809e6a 100644
--- a/src/3rdparty/hwloc/NEWS
+++ b/src/3rdparty/hwloc/NEWS
@@ -13,8 +13,96 @@ $HEADER$
This file contains the main features as well as overviews of specific
bug fixes (and other actions) for each version of hwloc since version
-0.9 (as initially released as "libtopology", then re-branded to "hwloc"
-in v0.9.1).
+0.9.
+
+
+Version 2.1.0
+-------------
+* API
+ + Add a new "Die" object (HWLOC_OBJ_DIE) for upcoming x86 processors
+ with multiple dies per package, in the x86 and Linux backends.
+ + Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches.
+ - They are filtered-out by default, except in command-line tools.
+ - They are only available on very recent platforms running Linux 5.2+
+ and uptodate ACPI tables.
+ - The KNL MCDRAM in cache mode is still exposed as a L3 unless
+ HWLOC_KNL_MSCACHE_L3=0 in the environment.
+ + Add HWLOC_RESTRICT_FLAG_BYNODESET and _REMOVE_MEMLESS for restricting
+ topologies based on some memory nodes.
+ + Add hwloc_topology_set_components() for blacklisting some components
+ from being enabled in a topology.
+ + Add hwloc_bitmap_nr_ulongs() and hwloc_bitmap_from/to_ulongs(),
+ thanks to Junchao Zhang for the suggestion.
+ + Improve the API for dealing with disallowed resources
+ - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM is replaced with FLAG_INCLUDE_DISALLOWED
+ and --whole-system command-line options with --disallowed.
+ . Former names are still accepted for backward compatibility.
+ - Add hwloc_topology_allow() for changing allowed sets after load().
+ - Add the HWLOC_ALLOW=all environment variable to totally ignore
+ administrative restrictions such as Linux Cgroups.
+ - Add disallowed_pu and disallowed_numa bits to the discovery support
+ structure.
+ + Group objects have a new "dont_merge" attribute to prevent them from
+ being automatically merged with identical parent or children.
+ + Add more distances-related features:
+ - Add hwloc_distances_get_name() to retrieve a string describing
+ what a distances structure contain.
+ - Add hwloc_distances_get_by_name() to retrieve distances structures
+ based on their name.
+ - Add hwloc_distances_release_remove()
+ - Distances may now cover objects of different types with new kind
+ HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES.
+* Backends
+ + Add support for Linux 5.3 new sysfs cpu topology files with Die information.
+ + Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
+ + Improve memory locality on Linux by using HMAT initiators (exposed
+ since Linux 5.2+), and NUMA distances for CPU-less NUMA nodes.
+ + The x86 backend now properly handles offline CPUs.
+ + Detect the locality of NVIDIA GPU OpenCL devices.
+ + Ignore NUMA nodes that correspond to NVIDIA GPU by default.
+ - They may be unignored if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment.
+ - Fix their CPU locality and add info attributes to identify them.
+ Thanks to Max Katz and Edgar Leon for the help.
+ + Add support for IBM S/390 drawers.
+ + Rework the heuristics for discovering KNL Cluster and Memory modes
+ to stop assuming all CPUs are online (required for mOS support).
+ Thanks to Sharath K Bhat for testing patches.
+ + Ignore NUMA node information from AMD topoext in the x86 backend,
+ unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
+ + Expose Linux DAX devices as hwloc Block OS devices.
+ + Remove support for /proc/cpuinfo-only topology discovery in Linux
+ kernel prior to 2.6.16.
+ + Disable POWER device-tree-based topology on Linux by default.
+ - It may be reenabled by setting HWLOC_USE_DT=1 in the environment.
+ + Discovery components are now divided in phases that may be individually
+ blacklisted.
+ - The linuxio component has been merged back into the linux component.
+* Tools
+ + lstopo
+ - lstopo factorizes objects by default in the graphical output when
+ there are more than 4 identical children.
+ . New options --no-factorize and --factorize may be used to configure this.
+ . Hit the 'f' key to disable factorizing in interactive outputs.
+ - Both logical and OS/physical indexes are now displayed by default
+ for PU and NUMA nodes.
+ - The X11 and Windows interactive outputs support many keyboard
+ shortcuts to dynamically customize the attributes, legend, etc.
+ - Add --linespacing and change default margins and linespacing.
+ - Add --allow for changing allowed sets.
+ - Add a native SVG backend. Its graphical output may be slightly less
+ pretty than Cairo (still used by default if available) but the SVG
+ code provides attributes to manipulate objects from HTML/JS.
+ See dynamic_SVG_example.html for an example.
+ + Add --nodeset options to hwloc-calc for converting between cpusets and
+ nodesets.
+ + Add --no-smt to lstopo, hwloc-bind and hwloc-calc to ignore multiple
+ PU in SMT cores.
+ + hwloc-annotate may annotate multiple locations at once.
+ + Add a HTML/JS version of hwloc-ps. See contrib/hwloc-ps.www/README.
+ + Add bash completions.
+* Misc
+ + Add several FAQ entries in "Compatibility between hwloc versions"
+ about API version, ABI, XML, Synthetic strings, and shmem topologies.
Version 2.0.4 (also included in 1.11.13 when appropriate)
@@ -214,6 +302,54 @@ Version 2.0.0
+ hwloc now requires a C99 compliant compiler.
+Version 1.11.13 (also included in 2.0.4)
+---------------
+* Add support for Linux 5.3 new sysfs cpu topology files with Die information.
+* Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
+* Tiles, Modules and Dies are exposed as Groups for now.
+ + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent
+ Die groups from being automatically merged with identical parent or children.
+* Ignore NUMA node information from AMD topoext in the x86 backend,
+ unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
+* Group objects have a new "dont_merge" attribute to prevent them from
+ being automatically merged with identical parent or children.
+
+
+Version 1.11.12 (also included in 2.0.3)
+---------------
+* Fix a corner case of hwloc_topology_restrict() where children would
+ become out-of-order.
+* Fix the return length of export_xmlbuffer() functions to always
+ include the ending \0.
+
+
+Version 1.11.11 (also included in 2.0.2)
+---------------
+* Add support for Hygon Dhyana processors in the x86 backend,
+ thanks to Pu Wen for the patch.
+* Fix symbol renaming to also rename internal components,
+ thanks to Evan Ramos for the patch.
+* Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues.
+* Detect PCI link speed without being root on Linux >= 4.13.
+
+
+Version 1.11.10 (also included in 2.0.1)
+---------------
+* Fix detection of cores and hyperthreads on Mac OS X.
+* Serialize pciaccess discovery to fix concurrent topology loads in
+ multiple threads.
+* Fix first touch area memory binding on Linux when thread memory
+ binding is different.
+* Some minor fixes to memory binding.
+* Fix hwloc-dump-hwdata to only process SMBIOS information that correspond
+ to the KNL and KNM configuration.
+* Add a heuristic for guessing KNL/KNM memory and cluster modes when
+ hwloc-dump-hwdata could not run as root earlier.
+* Fix discovery of NVMe OS devices on Linux >= 4.0.
+* Add get_area_memlocation() on Windows.
+* Add CPUVendor, Model, ... attributes on Mac OS X.
+
+
Version 1.11.9
--------------
* Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend,
@@ -941,7 +1077,7 @@ Version 1.6.0
+ Add a section about Synthetic topologies in the documentation.
-Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6)
+Version 1.5.2 (some of these changes are in 1.6.2 but not in 1.6)
-------------
* Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
pciutils/libpci is only used if --enable-libpci is given to configure
@@ -1076,9 +1212,8 @@ Version 1.4.2
for most of them.
-Version 1.4.1
+Version 1.4.1 (contains all 1.3.2 changes)
-------------
-* This release contains all changes from v1.3.2.
* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue.
* Fix memory leaks in some get_membind() functions.
* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h)
@@ -1091,7 +1226,7 @@ Version 1.4.1
issues.
-Version 1.4.0 (does not contain all v1.3.2 changes)
+Version 1.4.0 (does not contain all 1.3.2 changes)
-------------
* Major features
+ Add "custom" interface and "assembler" tools to build multi-node
@@ -1536,7 +1671,7 @@ Version 1.0.0
Version 0.9.4 (unreleased)
---------------------------
+-------------
* Fix reseting colors to normal in lstopo -.txt output.
* Fix Linux pthread_t binding error report.
@@ -1593,7 +1728,7 @@ Version 0.9.1
the physical location of IB devices.
-Version 0.9 (libtopology)
--------------------------
+Version 0.9 (formerly named "libtopology")
+-----------
* First release.
diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION
index 5ebc6bb4..9035ed9c 100644
--- a/src/3rdparty/hwloc/VERSION
+++ b/src/3rdparty/hwloc/VERSION
@@ -8,8 +8,8 @@
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
major=2
-minor=0
-release=4
+minor=1
+release=0
# greek is used for alpha or beta release tags. If it is non-empty,
# it will be appended to the version number. It does not have to be
@@ -22,7 +22,7 @@ greek=
# The date when this release was created
-date="Jun 03, 2019"
+date="Sep 30, 2019"
# If snapshot=1, then use the value from snapshot_version as the
# entire hwloc version (i.e., ignore major, minor, release, and
@@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
# 2. Version numbers are described in the Libtool current:revision:age
# format.
-libhwloc_so_version=15:3:0
+libhwloc_so_version=16:0:1
libnetloc_so_version=0:0:0
# Please also update the lines in contrib/windows/libhwloc.vcxproj
diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h
index ee6da6fd..e106e9cc 100644
--- a/src/3rdparty/hwloc/include/hwloc.h
+++ b/src/3rdparty/hwloc/include/hwloc.h
@@ -53,7 +53,8 @@
#ifndef HWLOC_H
#define HWLOC_H
-#include
+#include "hwloc/autogen/config.h"
+
#include
#include
#include
@@ -62,13 +63,13 @@
/*
* Symbol transforms
*/
-#include
+#include "hwloc/rename.h"
/*
* Bitmap definitions
*/
-#include
+#include "hwloc/bitmap.h"
#ifdef __cplusplus
@@ -86,13 +87,13 @@ extern "C" {
* actually modifies the API.
*
* Users may check for available features at build time using this number
- * (see \ref faq_upgrade).
+ * (see \ref faq_version_api).
*
* \note This should not be confused with HWLOC_VERSION, the library version.
* Two stable releases of the same series usually have the same ::HWLOC_API_VERSION
* even if their HWLOC_VERSION are different.
*/
-#define HWLOC_API_VERSION 0x00020000
+#define HWLOC_API_VERSION 0x00020100
/** \brief Indicate at runtime which hwloc API version was used at build time.
*
@@ -101,7 +102,7 @@ extern "C" {
HWLOC_DECLSPEC unsigned hwloc_get_api_version(void);
/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */
-#define HWLOC_COMPONENT_ABI 5
+#define HWLOC_COMPONENT_ABI 6
/** @} */
@@ -186,7 +187,8 @@ typedef enum {
HWLOC_OBJ_PACKAGE, /**< \brief Physical package.
* The physical package that usually gets inserted
* into a socket on the motherboard.
- * A processor package usually contains multiple cores.
+ * A processor package usually contains multiple cores,
+ * and possibly some dies.
*/
HWLOC_OBJ_CORE, /**< \brief Core.
* A computation unit (may be shared by several
@@ -233,6 +235,10 @@ typedef enum {
* It is usually close to some cores (the corresponding objects
* are descendants of the NUMA node object in the hwloc tree).
*
+ * This is the smallest object representing Memory resources,
+ * it cannot have any child except Misc objects.
+ * However it may have Memory-side cache parents.
+ *
* There is always at least one such object in the topology
* even if the machine is not NUMA.
*
@@ -279,6 +285,24 @@ typedef enum {
* Misc objects have NULL CPU and node sets.
*/
+ HWLOC_OBJ_MEMCACHE, /**< \brief Memory-side cache (filtered out by default).
+ * A cache in front of a specific NUMA node.
+ *
+ * This object always has at least one NUMA node as a memory child.
+ *
+ * Memory objects are not listed in the main children list,
+ * but rather in the dedicated Memory children list.
+ *
+ * Memory-side cache have a special depth ::HWLOC_TYPE_DEPTH_MEMCACHE
+ * instead of a normal depth just like other objects in the
+ * main tree.
+ */
+
+ HWLOC_OBJ_DIE, /**< \brief Die within a physical package.
+ * A subpart of the physical package, that contains multiple cores.
+ * \hideinitializer
+ */
+
HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */
} hwloc_obj_type_t;
@@ -297,8 +321,8 @@ typedef enum hwloc_obj_bridge_type_e {
/** \brief Type of a OS device. */
typedef enum hwloc_obj_osdev_type_e {
- HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device.
- * For instance "sda" on Linux. */
+ HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device, or non-volatile memory device.
+ * For instance "sda" or "dax2.0" on Linux. */
HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device.
* For instance ":0.0" for a GL display,
* "card0" for a Linux DRM device. */
@@ -434,9 +458,15 @@ struct hwloc_obj {
* These children are listed in \p memory_first_child.
*/
struct hwloc_obj *memory_first_child; /**< \brief First Memory child.
- * NUMA nodes are listed here (\p memory_arity and \p memory_first_child)
+ * NUMA nodes and Memory-side caches are listed here
+ * (\p memory_arity and \p memory_first_child)
* instead of in the normal children list.
* See also hwloc_obj_type_is_memory().
+ *
+ * A memory hierarchy starts from a normal CPU-side object
+ * (e.g. Package) and ends with NUMA nodes as leaves.
+ * There might exist some memory-side caches between them
+ * in the middle of the memory subtree.
*/
/**@}*/
@@ -471,7 +501,7 @@ struct hwloc_obj {
* object and known how (the children path between this object and the PU
* objects).
*
- * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set,
+ * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set,
* some of these CPUs may not be allowed for binding,
* see hwloc_topology_get_allowed_cpuset().
*
@@ -483,7 +513,7 @@ struct hwloc_obj {
*
* This may include not only the same as the cpuset field, but also some CPUs for
* which topology information is unknown or incomplete, some offlines CPUs, and
- * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag
+ * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED flag
* is not set.
* Thus no corresponding PU object may be found in the topology, because the
* precise position is undefined. It is however known that it would be somewhere
@@ -501,7 +531,7 @@ struct hwloc_obj {
*
* In the end, these nodes are those that are close to the current object.
*
- * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set,
+ * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set,
* some of these nodes may not be allowed for allocation,
* see hwloc_topology_get_allowed_nodeset().
*
@@ -516,7 +546,7 @@ struct hwloc_obj {
*
* This may include not only the same as the nodeset field, but also some NUMA
* nodes for which topology information is unknown or incomplete, some offlines
- * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM
+ * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
* flag is not set.
* Thus no corresponding NUMA node object may be found in the topology, because the
* precise position is undefined. It is however known that it would be
@@ -770,7 +800,8 @@ enum hwloc_get_type_depth_e {
HWLOC_TYPE_DEPTH_BRIDGE = -4, /**< \brief Virtual depth for bridge object level. \hideinitializer */
HWLOC_TYPE_DEPTH_PCI_DEVICE = -5, /**< \brief Virtual depth for PCI device object level. \hideinitializer */
HWLOC_TYPE_DEPTH_OS_DEVICE = -6, /**< \brief Virtual depth for software device object level. \hideinitializer */
- HWLOC_TYPE_DEPTH_MISC = -7 /**< \brief Virtual depth for Misc object. \hideinitializer */
+ HWLOC_TYPE_DEPTH_MISC = -7, /**< \brief Virtual depth for Misc object. \hideinitializer */
+ HWLOC_TYPE_DEPTH_MEMCACHE = -8 /**< \brief Virtual depth for MemCache object. \hideinitializer */
};
/** \brief Return the depth of parents where memory objects are attached.
@@ -1781,6 +1812,31 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo
*/
HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size);
+/** \brief Flags to be passed to hwloc_topology_set_components()
+ */
+enum hwloc_topology_components_flag_e {
+ /** \brief Blacklist the target component from being used.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST = (1UL<<0)
+};
+
+/** \brief Prevent a discovery component from being used for a topology.
+ *
+ * \p name is the name of the discovery component that should not be used
+ * when loading topology \p topology. The name is a string such as "cuda".
+ *
+ * For components with multiple phases, it may also be suffixed with the name
+ * of a phase, for instance "linux:io".
+ *
+ * \p flags should be ::HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST.
+ *
+ * This may be used to avoid expensive parts of the discovery process.
+ * For instance, CUDA-specific discovery may be expensive and unneeded
+ * while generic I/O discovery could still be useful.
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_components(hwloc_topology_t __hwloc_restrict topology, unsigned long flags, const char * __hwloc_restrict name);
+
/** @} */
@@ -1800,28 +1856,27 @@ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restric
* They may also be returned by hwloc_topology_get_flags().
*/
enum hwloc_topology_flags_e {
- /** \brief Detect the whole system, ignore reservations.
+ /** \brief Detect the whole system, ignore reservations, include disallowed objects.
*
* Gather all resources, even if some were disabled by the administrator.
* For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes.
*
* When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology.
* Parent objects (package, core, cache, etc.) are added only if some of their children are allowed.
+ * All existing PUs and NUMA nodes in the topology are allowed.
+ * hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset()
+ * are equal to the root object cpuset and nodeset.
*
* When this flag is set, the actual sets of allowed PUs and NUMA nodes are given
* by hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset().
* They may be smaller than the root object cpuset and nodeset.
*
- * When this flag is not set, all existing PUs and NUMA nodes in the topology
- * are allowed. hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset()
- * are equal to the root object cpuset and nodeset.
- *
* If the current topology is exported to XML and reimported later, this flag
* should be set again in the reimported topology so that disallowed resources
* are reimported as well.
* \hideinitializer
*/
- HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0),
+ HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED = (1UL<<0),
/** \brief Assume that the selected backend provides the topology for the
* system on which we are running.
@@ -1901,6 +1956,10 @@ struct hwloc_topology_discovery_support {
unsigned char numa;
/** \brief Detecting the amount of memory in NUMA nodes is supported. */
unsigned char numa_memory;
+ /** \brief Detecting and identifying PU objects that are not available to the current process is supported. */
+ unsigned char disallowed_pu;
+ /** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */
+ unsigned char disallowed_numa;
};
/** \brief Flags describing actual PU binding support for this topology.
@@ -1998,7 +2057,7 @@ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(h
*
* By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL).
* Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE).
- * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE).
+ * Die and Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE).
*
* Note that group objects are also ignored individually (without the entire level)
* when they do not bring structure.
@@ -2063,11 +2122,15 @@ HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwl
*/
HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
-/** \brief Set the filtering for all cache object types.
+/** \brief Set the filtering for all CPU cache object types.
+ *
+ * Memory-side caches are not involved since they are not CPU caches.
*/
HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
-/** \brief Set the filtering for all instruction cache object types.
+/** \brief Set the filtering for all CPU instruction cache object types.
+ *
+ * Memory-side caches are not involved since they are not CPU caches.
*/
HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
@@ -2110,6 +2173,19 @@ enum hwloc_restrict_flags_e {
*/
HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1UL<<0),
+ /** \brief Restrict by nodeset instead of CPU set.
+ * Only keep objects whose nodeset is included or partially included in the given set.
+ * This flag may not be used with ::HWLOC_RESTRICT_FLAG_BYNODESET.
+ */
+ HWLOC_RESTRICT_FLAG_BYNODESET = (1UL<<3),
+
+ /** \brief Remove all objects that became Memory-less.
+ * By default, only objects that contain no PU and no memory are removed.
+ * This flag may only be used with ::HWLOC_RESTRICT_FLAG_BYNODESET.
+ * \hideinitializer
+ */
+ HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS = (1UL<<4),
+
/** \brief Move Misc objects to ancestors if their parents are removed during restriction.
* If this flag is not set, Misc objects are removed when their parents are removed.
* \hideinitializer
@@ -2123,28 +2199,70 @@ enum hwloc_restrict_flags_e {
HWLOC_RESTRICT_FLAG_ADAPT_IO = (1UL<<2)
};
-/** \brief Restrict the topology to the given CPU set.
+/** \brief Restrict the topology to the given CPU set or nodeset.
*
* Topology \p topology is modified so as to remove all objects that
- * are not included (or partially included) in the CPU set \p cpuset.
+ * are not included (or partially included) in the CPU set \p set.
* All objects CPU and node sets are restricted accordingly.
*
+ * If ::HWLOC_RESTRICT_FLAG_BYNODESET is passed in \p flags,
+ * \p set is considered a nodeset instead of a CPU set.
+ *
* \p flags is a OR'ed set of ::hwloc_restrict_flags_e.
*
* \note This call may not be reverted by restricting back to a larger
- * cpuset. Once dropped during restriction, objects may not be brought
+ * set. Once dropped during restriction, objects may not be brought
* back, except by loading another topology with hwloc_topology_load().
*
* \return 0 on success.
*
- * \return -1 with errno set to EINVAL if the input cpuset is invalid.
+ * \return -1 with errno set to EINVAL if the input set is invalid.
* The topology is not modified in this case.
*
* \return -1 with errno set to ENOMEM on failure to allocate internal data.
* The topology is reinitialized in this case. It should be either
* destroyed with hwloc_topology_destroy() or configured and loaded again.
*/
-HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags);
+HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_bitmap_t set, unsigned long flags);
+
+/** \brief Flags to be given to hwloc_topology_allow(). */
+enum hwloc_allow_flags_e {
+ /** \brief Mark all objects as allowed in the topology.
+ *
+ * \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL.
+ * \hideinitializer */
+ HWLOC_ALLOW_FLAG_ALL = (1UL<<0),
+
+ /** \brief Only allow objects that are available to the current process.
+ *
+ * The topology must have ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM so that the set
+ * of available resources can actually be retrieved from the operating system.
+ *
+ * \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL.
+ * \hideinitializer */
+ HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS = (1UL<<1),
+
+ /** \brief Allow a custom set of objects, given to hwloc_topology_allow() as \p cpuset and/or \p nodeset parameters.
+ * \hideinitializer */
+ HWLOC_ALLOW_FLAG_CUSTOM = (1UL<<2)
+};
+
+/** \brief Change the sets of allowed PUs and NUMA nodes in the topology.
+ *
+ * This function only works if the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
+ * was set on the topology. It does not modify any object, it only changes
+ * the sets returned by hwloc_topology_get_allowed_cpuset() and
+ * hwloc_topology_get_allowed_nodeset().
+ *
+ * It is notably useful when importing a topology from another process
+ * running in a different Linux Cgroup.
+ *
+ * \p flags must be set to one flag among ::hwloc_allow_flags_e.
+ *
+ * \note Removing objects from a topology should rather be performed with
+ * hwloc_topology_restrict().
+ */
+HWLOC_DECLSPEC int hwloc_topology_allow(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset, unsigned long flags);
/** \brief Add a MISC object as a leaf of the topology
*
@@ -2250,21 +2368,21 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src
/* high-level helpers */
-#include
+#include "hwloc/helper.h"
/* inline code of some functions above */
-#include
+#include "hwloc/inlines.h"
/* exporting to XML or synthetic */
-#include
+#include "hwloc/export.h"
/* distances */
-#include
+#include "hwloc/distances.h"
/* topology diffs */
-#include
+#include "hwloc/diff.h"
/* deprecated headers */
-#include
+#include "hwloc/deprecated.h"
#endif /* HWLOC_H */
diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h
index 14d4481d..36669de5 100644
--- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h
+++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h
@@ -11,10 +11,10 @@
#ifndef HWLOC_CONFIG_H
#define HWLOC_CONFIG_H
-#define HWLOC_VERSION "2.0.4"
+#define HWLOC_VERSION "2.1.0"
#define HWLOC_VERSION_MAJOR 2
-#define HWLOC_VERSION_MINOR 0
-#define HWLOC_VERSION_RELEASE 4
+#define HWLOC_VERSION_MINOR 1
+#define HWLOC_VERSION_RELEASE 0
#define HWLOC_VERSION_GREEK ""
#define __hwloc_restrict
diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h
index bae623c8..d5b0ea02 100644
--- a/src/3rdparty/hwloc/include/hwloc/bitmap.h
+++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h
@@ -13,7 +13,8 @@
#ifndef HWLOC_BITMAP_H
#define HWLOC_BITMAP_H
-#include
+#include "hwloc/autogen/config.h"
+
#include
@@ -198,6 +199,9 @@ HWLOC_DECLSPEC int hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long
/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */
HWLOC_DECLSPEC int hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);
+/** \brief Setup bitmap \p bitmap from unsigned longs \p masks used as first \p nr subsets */
+HWLOC_DECLSPEC int hwloc_bitmap_from_ulongs(hwloc_bitmap_t bitmap, unsigned nr, const unsigned long *masks);
+
/*
* Modifying bitmaps.
@@ -256,6 +260,29 @@ HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap)
/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */
HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure;
+/** \brief Convert the first \p nr subsets of bitmap \p bitmap into the array of \p nr unsigned long \p masks
+ *
+ * \p nr may be determined earlier with hwloc_bitmap_nr_ulongs().
+ *
+ * \return 0
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned nr, unsigned long *masks);
+
+/** \brief Return the number of unsigned longs required for storing bitmap \p bitmap entirely
+ *
+ * This is the number of contiguous unsigned longs from the very first bit of the bitmap
+ * (even if unset) up to the last set bit.
+ * This is useful for knowing the \p nr parameter to pass to hwloc_bitmap_to_ulongs()
+ * (or which calls to hwloc_bitmap_to_ith_ulong() are needed)
+ * to entirely convert a bitmap into multiple unsigned longs.
+ *
+ * When called on the output of hwloc_topology_get_topology_cpuset(),
+ * the returned number is large enough for all cpusets of the topology.
+ *
+ * \return -1 if \p bitmap is infinite.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
/** \brief Test whether index \p id is part of bitmap \p bitmap.
*
* \return 1 if the bit at index \p id is set in bitmap \p bitmap, 0 otherwise.
diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h
index 77c8473e..6f0cda4c 100644
--- a/src/3rdparty/hwloc/include/hwloc/cuda.h
+++ b/src/3rdparty/hwloc/include/hwloc/cuda.h
@@ -16,11 +16,11 @@
#ifndef HWLOC_CUDA_H
#define HWLOC_CUDA_H
-#include
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/autogen/config.h"
+#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
-#include
+#include "hwloc/linux.h"
#endif
#include
diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h
index 63c7f59c..688b8421 100644
--- a/src/3rdparty/hwloc/include/hwloc/cudart.h
+++ b/src/3rdparty/hwloc/include/hwloc/cudart.h
@@ -16,11 +16,11 @@
#ifndef HWLOC_CUDART_H
#define HWLOC_CUDART_H
-#include
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/autogen/config.h"
+#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
-#include
+#include "hwloc/linux.h"
#endif
#include /* for CUDA_VERSION */
diff --git a/src/3rdparty/hwloc/include/hwloc/deprecated.h b/src/3rdparty/hwloc/include/hwloc/deprecated.h
index 8f3b1459..4a231f50 100644
--- a/src/3rdparty/hwloc/include/hwloc/deprecated.h
+++ b/src/3rdparty/hwloc/include/hwloc/deprecated.h
@@ -1,6 +1,6 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2017 Inria. All rights reserved.
+ * Copyright © 2009-2018 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
@@ -21,6 +21,8 @@
extern "C" {
#endif
+/* backward compat with v2.0 before WHOLE_SYSTEM renaming */
+#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
/* backward compat with v1.11 before System removal */
#define HWLOC_OBJ_SYSTEM HWLOC_OBJ_MACHINE
/* backward compat with v1.10 before Socket->Package renaming */
diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h
index d523f29f..b7baed8a 100644
--- a/src/3rdparty/hwloc/include/hwloc/distances.h
+++ b/src/3rdparty/hwloc/include/hwloc/distances.h
@@ -87,7 +87,12 @@ enum hwloc_distances_kind_e {
* Such values are currently ignored for distance-based grouping.
* \hideinitializer
*/
- HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3)
+ HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3),
+
+ /** \brief This distances structure covers objects of different types.
+ * \hideinitializer
+ */
+ HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4)
};
/** \brief Retrieve distance matrices.
@@ -131,20 +136,32 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
*
* Identical to hwloc_distances_get() with the additional \p type filter.
*/
-static __hwloc_inline int
+HWLOC_DECLSPEC int
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
unsigned *nr, struct hwloc_distances_s **distances,
- unsigned long kind, unsigned long flags)
-{
- int depth = hwloc_get_type_depth(topology, type);
- if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) {
- *nr = 0;
- return 0;
- }
- return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags);
-}
+ unsigned long kind, unsigned long flags);
-/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). */
+/** \brief Retrieve a distance matrix with the given name.
+ *
+ * Usually only one distances structure may match a given name.
+ */
+HWLOC_DECLSPEC int
+hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
+ unsigned *nr, struct hwloc_distances_s **distances,
+ unsigned long flags);
+
+/** \brief Get a description of what a distances structure contains.
+ *
+ * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
+ * or NULL if unknown.
+ */
+HWLOC_DECLSPEC const char *
+hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
+
+/** \brief Release a distance matrix structure previously returned by hwloc_distances_get().
+ *
+ * \note This function is not required if the structure is removed with hwloc_distances_release_remove().
+ */
HWLOC_DECLSPEC void
hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
@@ -221,11 +238,11 @@ enum hwloc_distances_add_flag_e {
* The distance from object i to object j is in slot i*nbobjs+j.
*
* \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
+ * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added
+ * if objects of different types are given.
*
* \p flags configures the behavior of the function using an optional OR'ed set of
* ::hwloc_distances_add_flag_e.
- *
- * Objects must be of the same type. They cannot be of type Group.
*/
HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
@@ -237,7 +254,7 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
* gathered through the OS.
*
* If these distances were used to group objects, these additional
- *Group objects are not removed from the topology.
+ * Group objects are not removed from the topology.
*/
HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
@@ -260,6 +277,12 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
return hwloc_distances_remove_by_depth(topology, depth);
}
+/** \brief Release and remove the given distance matrice from the topology.
+ *
+ * This function includes a call to hwloc_distances_release().
+ */
+HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
+
/** @} */
diff --git a/src/3rdparty/hwloc/include/hwloc/gl.h b/src/3rdparty/hwloc/include/hwloc/gl.h
index 3e643fa9..897ef784 100644
--- a/src/3rdparty/hwloc/include/hwloc/gl.h
+++ b/src/3rdparty/hwloc/include/hwloc/gl.h
@@ -14,7 +14,7 @@
#ifndef HWLOC_GL_H
#define HWLOC_GL_H
-#include
+#include "hwloc.h"
#include
#include
diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h
index 1f9ba7cd..99659e03 100644
--- a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h
+++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h
@@ -17,8 +17,9 @@
#ifndef HWLOC_GLIBC_SCHED_H
#define HWLOC_GLIBC_SCHED_H
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/helper.h"
+
#include
#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority)
diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h
index d48df15f..bc27be59 100644
--- a/src/3rdparty/hwloc/include/hwloc/helper.h
+++ b/src/3rdparty/hwloc/include/hwloc/helper.h
@@ -527,30 +527,36 @@ hwloc_obj_type_is_io(hwloc_obj_type_t type);
*
* Memory objects are objects attached to their parents
* in the Memory children list.
- * This current only includes NUMA nodes.
+ * This current includes NUMA nodes and Memory-side caches.
*
* \return 1 if an object of type \p type is a Memory object, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_memory(hwloc_obj_type_t type);
-/** \brief Check whether an object type is a Cache (Data, Unified or Instruction).
+/** \brief Check whether an object type is a CPU Cache (Data, Unified or Instruction).
+ *
+ * Memory-side caches are not CPU caches.
*
* \return 1 if an object of type \p type is a Cache, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_cache(hwloc_obj_type_t type);
-/** \brief Check whether an object type is a Data or Unified Cache.
+/** \brief Check whether an object type is a CPU Data or Unified Cache.
*
- * \return 1 if an object of type \p type is a Data or Unified Cache, 0 otherwise.
+ * Memory-side caches are not CPU caches.
+ *
+ * \return 1 if an object of type \p type is a CPU Data or Unified Cache, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_dcache(hwloc_obj_type_t type);
-/** \brief Check whether an object type is a Instruction Cache,
+/** \brief Check whether an object type is a CPU Instruction Cache,
*
- * \return 1 if an object of type \p type is a Instruction Cache, 0 otherwise.
+ * Memory-side caches are not CPU caches.
+ *
+ * \return 1 if an object of type \p type is a CPU Instruction Cache, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_icache(hwloc_obj_type_t type);
@@ -914,7 +920,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_
* \note The returned cpuset is not newly allocated and should thus not be
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
*
- * \note This is equivalent to retrieving the root object complete CPU-set.
+ * \note This is equivalent to retrieving the root object CPU-set.
*/
HWLOC_DECLSPEC hwloc_const_cpuset_t
hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
@@ -923,11 +929,11 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_
*
* \return the CPU set of allowed logical processors of the system.
*
- * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set,
+ * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set,
* this is identical to hwloc_topology_get_topology_cpuset(), which means
* all PUs are allowed.
*
- * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying
+ * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying
* hwloc_bitmap_intersects() on the result of this function and on an object
* cpuset checks whether there are allowed PUs inside that object.
* Applying hwloc_bitmap_and() returns the list of these allowed PUs.
@@ -945,7 +951,7 @@ hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_p
* \note The returned nodeset is not newly allocated and should thus not be
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
*
- * \note This is equivalent to retrieving the root object complete CPU-set.
+ * \note This is equivalent to retrieving the root object complete nodeset.
*/
HWLOC_DECLSPEC hwloc_const_nodeset_t
hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
@@ -959,7 +965,7 @@ hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute
* \note The returned nodeset is not newly allocated and should thus not be
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
*
- * \note This is equivalent to retrieving the root object complete CPU-set.
+ * \note This is equivalent to retrieving the root object nodeset.
*/
HWLOC_DECLSPEC hwloc_const_nodeset_t
hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
@@ -968,11 +974,11 @@ hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute
*
* \return the node set of allowed memory of the system.
*
- * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set,
+ * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set,
* this is identical to hwloc_topology_get_topology_nodeset(), which means
* all NUMA nodes are allowed.
*
- * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying
+ * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying
* hwloc_bitmap_intersects() on the result of this function and on an object
* nodeset checks whether there are allowed NUMA nodes inside that object.
* Applying hwloc_bitmap_and() returns the list of these allowed NUMA nodes.
diff --git a/src/3rdparty/hwloc/include/hwloc/intel-mic.h b/src/3rdparty/hwloc/include/hwloc/intel-mic.h
index 6f6f9d1b..c504cd7e 100644
--- a/src/3rdparty/hwloc/include/hwloc/intel-mic.h
+++ b/src/3rdparty/hwloc/include/hwloc/intel-mic.h
@@ -13,11 +13,13 @@
#ifndef HWLOC_INTEL_MIC_H
#define HWLOC_INTEL_MIC_H
-#include
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/autogen/config.h"
+#include "hwloc/helper.h"
+
#ifdef HWLOC_LINUX_SYS
-#include
+#include "hwloc/linux.h"
+
#include
#include
#endif
diff --git a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
index 7cea4166..0e2cc19f 100644
--- a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
+++ b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
@@ -15,7 +15,8 @@
#ifndef HWLOC_LINUX_LIBNUMA_H
#define HWLOC_LINUX_LIBNUMA_H
-#include
+#include "hwloc.h"
+
#include
diff --git a/src/3rdparty/hwloc/include/hwloc/linux.h b/src/3rdparty/hwloc/include/hwloc/linux.h
index c409e1c2..ecc86be3 100644
--- a/src/3rdparty/hwloc/include/hwloc/linux.h
+++ b/src/3rdparty/hwloc/include/hwloc/linux.h
@@ -15,7 +15,8 @@
#ifndef HWLOC_LINUX_H
#define HWLOC_LINUX_H
-#include
+#include "hwloc.h"
+
#include
diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h
index 19710866..1bc2599f 100644
--- a/src/3rdparty/hwloc/include/hwloc/nvml.h
+++ b/src/3rdparty/hwloc/include/hwloc/nvml.h
@@ -13,11 +13,11 @@
#ifndef HWLOC_NVML_H
#define HWLOC_NVML_H
-#include
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/autogen/config.h"
+#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
-#include
+#include "hwloc/linux.h"
#endif
#include
diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h
index 058968d7..ebf09848 100644
--- a/src/3rdparty/hwloc/include/hwloc/opencl.h
+++ b/src/3rdparty/hwloc/include/hwloc/opencl.h
@@ -14,19 +14,17 @@
#ifndef HWLOC_OPENCL_H
#define HWLOC_OPENCL_H
-#include
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/autogen/config.h"
+#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
-#include
+#include "hwloc/linux.h"
#endif
#ifdef __APPLE__
#include
-#include
#else
#include
-#include
#endif
#include
@@ -37,17 +35,75 @@ extern "C" {
#endif
+/* OpenCL extensions aren't always shipped with default headers, and
+ * they don't always reflect what the installed implementations support.
+ * Try everything and let the implementation return errors when non supported.
+ */
+/* Copyright (c) 2008-2018 The Khronos Group Inc. */
+
+/* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */
+#define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037
+typedef union {
+ struct { cl_uint type; cl_uint data[5]; } raw;
+ struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
+} hwloc_cl_device_topology_amd;
+#define HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
+
+/* needs "cl_nv_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */
+#define HWLOC_CL_DEVICE_PCI_BUS_ID_NV 0x4008
+#define HWLOC_CL_DEVICE_PCI_SLOT_ID_NV 0x4009
+
+
/** \defgroup hwlocality_opencl Interoperability with OpenCL
*
* This interface offers ways to retrieve topology information about
* OpenCL devices.
*
- * Only the AMD OpenCL interface currently offers useful locality information
- * about its devices.
+ * Only AMD and NVIDIA OpenCL implementations currently offer useful locality
+ * information about their devices.
*
* @{
*/
+/** \brief Return the domain, bus and device IDs of the OpenCL device \p device.
+ *
+ * Device \p device must match the local machine.
+ */
+static __hwloc_inline int
+hwloc_opencl_get_device_pci_busid(cl_device_id device,
+ unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func)
+{
+ hwloc_cl_device_topology_amd amdtopo;
+ cl_uint nvbus, nvslot;
+ cl_int clret;
+
+ clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
+ if (CL_SUCCESS == clret
+ && HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) {
+ *domain = 0; /* can't do anything better */
+ *bus = (unsigned) amdtopo.pcie.bus;
+ *dev = (unsigned) amdtopo.pcie.device;
+ *func = (unsigned) amdtopo.pcie.function;
+ return 0;
+ }
+
+ clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_ID_NV, sizeof(nvbus), &nvbus, NULL);
+ if (CL_SUCCESS == clret) {
+ clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_SLOT_ID_NV, sizeof(nvslot), &nvslot, NULL);
+ if (CL_SUCCESS == clret) {
+ /* FIXME: PCI bus only uses 8bit, assume nvidia hardcodes the domain in higher bits */
+ *domain = nvbus >> 8;
+ *bus = nvbus & 0xff;
+ /* non-documented but used in many other projects */
+ *dev = nvslot >> 3;
+ *func = nvslot & 0x7;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
/** \brief Get the CPU set of logical processors that are physically
* close to OpenCL device \p device.
*
@@ -62,7 +118,7 @@ extern "C" {
* and hwloc_opencl_get_device_osdev_by_index().
*
* This function is currently only implemented in a meaningful way for
- * Linux with the AMD OpenCL implementation; other systems will simply
+ * Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply
* get a full cpuset.
*/
static __hwloc_inline int
@@ -70,35 +126,28 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse
cl_device_id device __hwloc_attribute_unused,
hwloc_cpuset_t set)
{
-#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD)
- /* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */
+#if (defined HWLOC_LINUX_SYS)
+ /* If we're on Linux, try AMD/NVIDIA extensions + the sysfs mechanism to get the local cpus */
#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128
char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX];
- cl_device_topology_amd amdtopo;
- cl_int clret;
+ unsigned pcidomain, pcibus, pcidev, pcifunc;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return -1;
}
- clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
- if (CL_SUCCESS != clret) {
- hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
- return 0;
- }
- if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+ if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidev, &pcifunc) < 0) {
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
return 0;
}
- sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus",
- (unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function);
+ sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus", pcidomain, pcibus, pcidev, pcifunc);
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|| hwloc_bitmap_iszero(set))
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#else
- /* Non-Linux + AMD OpenCL systems simply get a full cpuset */
+ /* Non-Linux systems simply get a full cpuset */
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
return 0;
@@ -140,8 +189,8 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
* Use OpenCL device attributes to find the corresponding hwloc OS device object.
* Return NULL if there is none or if useful attributes are not available.
*
- * This function currently only works on AMD OpenCL devices that support
- * the CL_DEVICE_TOPOLOGY_AMD extension. hwloc_opencl_get_device_osdev_by_index()
+ * This function currently only works on AMD and NVIDIA OpenCL devices that support
+ * relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index()
* should be preferred whenever possible, i.e. when platform and device index
* are known.
*
@@ -159,17 +208,10 @@ static __hwloc_inline hwloc_obj_t
hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
cl_device_id device __hwloc_attribute_unused)
{
-#ifdef CL_DEVICE_TOPOLOGY_AMD
hwloc_obj_t osdev;
- cl_device_topology_amd amdtopo;
- cl_int clret;
+ unsigned pcidomain, pcibus, pcidevice, pcifunc;
- clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
- if (CL_SUCCESS != clret) {
- errno = EINVAL;
- return NULL;
- }
- if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+ if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidevice, &pcifunc) < 0) {
errno = EINVAL;
return NULL;
}
@@ -181,18 +223,15 @@ hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused
continue;
if (pcidev
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
- && pcidev->attr->pcidev.domain == 0
- && pcidev->attr->pcidev.bus == amdtopo.pcie.bus
- && pcidev->attr->pcidev.dev == amdtopo.pcie.device
- && pcidev->attr->pcidev.func == amdtopo.pcie.function)
+ && pcidev->attr->pcidev.domain == pcidomain
+ && pcidev->attr->pcidev.bus == pcibus
+ && pcidev->attr->pcidev.dev == pcidevice
+ && pcidev->attr->pcidev.func == pcifunc)
return osdev;
/* if PCI are filtered out, we need a info attr to match on */
}
return NULL;
-#else
- return NULL;
-#endif
}
/** @} */
diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h
index 174ab4a5..d247a8b1 100644
--- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h
+++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h
@@ -19,10 +19,10 @@
#ifndef HWLOC_OPENFABRICS_VERBS_H
#define HWLOC_OPENFABRICS_VERBS_H
-#include
-#include
+#include "hwloc.h"
+#include "hwloc/autogen/config.h"
#ifdef HWLOC_LINUX_SYS
-#include
+#include "hwloc/linux.h"
#endif
#include
diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h
index cb22000d..0f53ac4d 100644
--- a/src/3rdparty/hwloc/include/hwloc/plugins.h
+++ b/src/3rdparty/hwloc/include/hwloc/plugins.h
@@ -1,5 +1,5 @@
/*
- * Copyright © 2013-2017 Inria. All rights reserved.
+ * Copyright © 2013-2019 Inria. All rights reserved.
* Copyright © 2016 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
@@ -13,7 +13,8 @@
struct hwloc_backend;
-#include
+#include "hwloc.h"
+
#ifdef HWLOC_INSIDE_PLUGIN
/* needed for hwloc_plugin_check_namespace() */
#include
@@ -25,52 +26,36 @@ struct hwloc_backend;
* @{
*/
-/** \brief Discovery component type */
-typedef enum hwloc_disc_component_type_e {
- /** \brief CPU-only discovery through the OS, or generic no-OS support.
- * \hideinitializer */
- HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0),
-
- /** \brief xml or synthetic,
- * platform-specific components such as bgq.
- * Anything the discovers CPU and everything else.
- * No misc backend is expected to complement a global component.
- * \hideinitializer */
- HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1),
-
- /** \brief OpenCL, Cuda, etc.
- * \hideinitializer */
- HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2)
-} hwloc_disc_component_type_t;
-
/** \brief Discovery component structure
*
* This is the major kind of components, taking care of the discovery.
* They are registered by generic components, either statically-built or as plugins.
*/
struct hwloc_disc_component {
- /** \brief Discovery component type */
- hwloc_disc_component_type_t type;
-
/** \brief Name.
* If this component is built as a plugin, this name does not have to match the plugin filename.
*/
const char *name;
- /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e.
+ /** \brief Discovery phases performed by this component.
+ * OR'ed set of ::hwloc_disc_phase_t
+ */
+ unsigned phases;
+
+ /** \brief Component phases to exclude, as an OR'ed set of ::hwloc_disc_phase_t.
*
- * For a GLOBAL component, this usually includes all other types (~0).
+ * For a GLOBAL component, this usually includes all other phases (\c ~UL).
*
* Other components only exclude types that may bring conflicting
* topology information. MISC components should likely not be excluded
* since they usually bring non-primary additional information.
*/
- unsigned excludes;
+ unsigned excluded_phases;
/** \brief Instantiate callback to create a backend from the component.
* Parameters data1, data2, data3 are NULL except for components
* that have special enabling routines such as hwloc_topology_set_xml(). */
- struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3);
+ struct hwloc_backend * (*instantiate)(struct hwloc_topology *topology, struct hwloc_disc_component *component, unsigned excluded_phases, const void *data1, const void *data2, const void *data3);
/** \brief Component priority.
* Used to sort topology->components, higher priority first.
@@ -107,6 +92,72 @@ struct hwloc_disc_component {
* @{
*/
+/** \brief Discovery phase */
+typedef enum hwloc_disc_phase_e {
+ /** \brief xml or synthetic, platform-specific components such as bgq.
+ * Discovers everything including CPU, memory, I/O and everything else.
+ * A component with a Global phase usually excludes all other phases.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_GLOBAL = (1U<<0),
+
+ /** \brief CPU discovery.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_CPU = (1U<<1),
+
+ /** \brief Attach memory to existing CPU objects.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_MEMORY = (1U<<2),
+
+ /** \brief Attach PCI devices and bridges to existing CPU objects.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_PCI = (1U<<3),
+
+ /** \brief I/O discovery that requires PCI devices (OS devices such as OpenCL, CUDA, etc.).
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_IO = (1U<<4),
+
+ /** \brief Misc objects that gets added below anything else.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_MISC = (1U<<5),
+
+ /** \brief Annotating existing objects, adding distances, etc.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_ANNOTATE = (1U<<6),
+
+ /** \brief Final tweaks to a ready-to-use topology.
+ * This phase runs once the topology is loaded, before it is returned to the topology.
+ * Hence it may only use the main hwloc API for modifying the topology,
+ * for instance by restricting it, adding info attributes, etc.
+ * \hideinitializer */
+ HWLOC_DISC_PHASE_TWEAK = (1U<<7)
+} hwloc_disc_phase_t;
+
+/** \brief Discovery status flags */
+enum hwloc_disc_status_flag_e {
+ /** \brief The sets of allowed resources were already retrieved \hideinitializer */
+ HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES = (1UL<<1)
+};
+
+/** \brief Discovery status structure
+ *
+ * Used by the core and backends to inform about what has been/is being done
+ * during the discovery process.
+ */
+struct hwloc_disc_status {
+ /** \brief The current discovery phase that is performed.
+ * Must match one of the phases in the component phases field.
+ */
+ hwloc_disc_phase_t phase;
+
+ /** \brief Dynamically excluded phases.
+ * If a component decides during discovery that some phases are no longer needed.
+ */
+ unsigned excluded_phases;
+
+ /** \brief OR'ed set of hwloc_disc_status_flag_e */
+ unsigned long flags;
+};
+
/** \brief Discovery backend structure
*
* A backend is the instantiation of a discovery component.
@@ -116,6 +167,14 @@ struct hwloc_disc_component {
* hwloc_backend_alloc() initializes all fields to default values
* that the component may change (except "component" and "next")
* before enabling the backend with hwloc_backend_enable().
+ *
+ * Most backends assume that the topology is_thissystem flag is
+ * set because they talk to the underlying operating system.
+ * However they may still be used in topologies without the
+ * is_thissystem flag for debugging reasons.
+ * In practice, they are usually auto-disabled in such cases
+ * (excluded by xml or synthetic backends, or by environment
+ * variables when changing the Linux fsroot or the x86 cpuid path).
*/
struct hwloc_backend {
/** \private Reserved for the core, set by hwloc_backend_alloc() */
@@ -127,12 +186,20 @@ struct hwloc_backend {
/** \private Reserved for the core. Used internally to list backends topology->backends. */
struct hwloc_backend * next;
+ /** \brief Discovery phases performed by this component, possibly without some of them if excluded by other components.
+ * OR'ed set of ::hwloc_disc_phase_t
+ */
+ unsigned phases;
+
/** \brief Backend flags, currently always 0. */
unsigned long flags;
/** \brief Backend-specific 'is_thissystem' property.
- * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled.
- * Set to -1 if the backend doesn't care (default). */
+ * Set to 0 if the backend disables the thissystem flag for this topology
+ * (e.g. loading from xml or synthetic string,
+ * or using a different fsroot on Linux, or a x86 CPUID dump).
+ * Set to -1 if the backend doesn't care (default).
+ */
int is_thissystem;
/** \brief Backend private data, or NULL if none. */
@@ -147,20 +214,22 @@ struct hwloc_backend {
* or because of an actual discovery/gathering failure.
* May be NULL.
*/
- int (*discover)(struct hwloc_backend *backend);
+ int (*discover)(struct hwloc_backend *backend, struct hwloc_disc_status *status);
- /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend.
- * May be NULL. */
+ /** \brief Callback to retrieve the locality of a PCI object.
+ * Called by the PCI core when attaching PCI hierarchy to CPU objects.
+ * May be NULL.
+ */
int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset);
};
/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc.
* The caller will then modify whatever needed, and call hwloc_backend_enable().
*/
-HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component);
+HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_topology *topology, struct hwloc_disc_component *component);
/** \brief Enable a previously allocated and setup backend. */
-HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend);
+HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend);
/** @} */
@@ -480,7 +549,9 @@ HWLOC_DECLSPEC hwloc_obj_type_t hwloc_pcidisc_check_bridge_type(unsigned device_
*
* Returns -1 and destroys /p obj if bridge fields are invalid.
*/
-HWLOC_DECLSPEC int hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, const unsigned char *config);
+HWLOC_DECLSPEC int hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func,
+ unsigned *secondary_busp, unsigned *subordinate_busp,
+ const unsigned char *config);
/** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs.
*
@@ -490,10 +561,7 @@ HWLOC_DECLSPEC void hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
/** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the topology.
*
- * For now, they will be attached to the root object. The core will move them to their actual PCI
- * locality using hwloc_pci_belowroot_apply_locality() at the end of the discovery.
- *
- * In the meantime, other backends lookup PCI objects or localities (for instance to attach OS devices)
+ * Other backends may lookup PCI objects or localities (for instance to attach OS devices)
* by using hwloc_pcidisc_find_by_busid() or hwloc_pcidisc_find_busid_parent().
*/
HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree);
@@ -507,32 +575,14 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st
* @{
*/
-/** \brief Find the PCI object that matches the bus ID.
- *
- * To be used after a PCI backend added PCI devices with hwloc_pcidisc_tree_attach()
- * and before the core moves them to their actual location with hwloc_pci_belowroot_apply_locality().
- *
- * If no exactly matching object is found, return the container bridge if any, or NULL.
- *
- * On failure, it may be possible to find the PCI locality (instead of the PCI device)
- * by calling hwloc_pcidisc_find_busid_parent().
- *
- * \note This is semantically identical to hwloc_get_pcidev_by_busid() which only works
- * after the topology is fully loaded.
- */
-HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
-
/** \brief Find the normal parent of a PCI bus ID.
*
* Look at PCI affinity to find out where the given PCI bus ID should be attached.
*
- * This function should be used to attach an I/O device directly under a normal
- * (non-I/O) object, instead of below a PCI object.
- * It is usually used by backends when hwloc_pcidisc_find_by_busid() failed
- * to find the hwloc object corresponding to this bus ID, for instance because
- * PCI discovery is not supported on this platform.
+ * This function should be used to attach an I/O device under the corresponding
+ * PCI object (if any), or under a normal (non-I/O) object with same locality.
*/
-HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
+HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/** @} */
diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h
index 7cef1b2e..a23738d0 100644
--- a/src/3rdparty/hwloc/include/hwloc/rename.h
+++ b/src/3rdparty/hwloc/include/hwloc/rename.h
@@ -1,13 +1,13 @@
/*
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
- * Copyright © 2010-2018 Inria. All rights reserved.
+ * Copyright © 2010-2019 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
#ifndef HWLOC_RENAME_H
#define HWLOC_RENAME_H
-#include
+#include "hwloc/autogen/config.h"
#ifdef __cplusplus
@@ -49,7 +49,9 @@ extern "C" {
#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE)
#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE)
+#define HWLOC_OBJ_MEMCACHE HWLOC_NAME_CAPS(OBJ_MEMCACHE)
#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE)
+#define HWLOC_OBJ_DIE HWLOC_NAME_CAPS(OBJ_DIE)
#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE)
#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU)
#define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE)
@@ -116,7 +118,7 @@ extern "C" {
#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e)
-#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM)
+#define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED)
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
@@ -124,6 +126,9 @@ extern "C" {
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml)
#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer)
+#define hwloc_topology_components_flag_e HWLOC_NAME(hwloc_topology_components_flag_e)
+#define HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST HWLOC_NAME_CAPS(TOPOLOGY_COMPONENTS_FLAG_BLACKLIST)
+#define hwloc_topology_set_components HWLOC_NAME(topology_set_components)
#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags)
#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem)
@@ -151,10 +156,18 @@ extern "C" {
#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e)
#define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS)
+#define HWLOC_RESTRICT_FLAG_BYNODESET HWLOC_NAME_CAPS(RESTRICT_FLAG_BYNODESET)
+#define HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_MEMLESS)
#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC)
#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO)
#define hwloc_topology_restrict HWLOC_NAME(topology_restrict)
+#define hwloc_allow_flags_e HWLOC_NAME(allow_flags_e)
+#define HWLOC_ALLOW_FLAG_ALL HWLOC_NAME_CAPS(ALLOW_FLAG_ALL)
+#define HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS HWLOC_NAME_CAPS(ALLOW_FLAG_LOCAL_RESTRICTIONS)
+#define HWLOC_ALLOW_FLAG_CUSTOM HWLOC_NAME_CAPS(ALLOW_FLAG_CUSTOM)
+#define hwloc_topology_allow HWLOC_NAME(topology_allow)
+
#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
@@ -172,6 +185,7 @@ extern "C" {
#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE)
#define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC)
#define HWLOC_TYPE_DEPTH_NUMANODE HWLOC_NAME_CAPS(TYPE_DEPTH_NUMANODE)
+#define HWLOC_TYPE_DEPTH_MEMCACHE HWLOC_NAME_CAPS(TYPE_DEPTH_MEMCACHE)
#define hwloc_get_depth_type HWLOC_NAME(get_depth_type)
#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth)
@@ -266,10 +280,12 @@ extern "C" {
#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero)
#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill)
#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong)
-
+#define hwloc_bitmap_from_ulongs HWLOC_NAME(bitmap_from_ulongs)
#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong)
#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong)
#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong)
+#define hwloc_bitmap_to_ulongs HWLOC_NAME(bitmap_to_ulongs)
+#define hwloc_bitmap_nr_ulongs HWLOC_NAME(bitmap_nr_ulongs)
#define hwloc_bitmap_only HWLOC_NAME(bitmap_only)
#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut)
#define hwloc_bitmap_set HWLOC_NAME(bitmap_set)
@@ -380,10 +396,13 @@ extern "C" {
#define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER)
#define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY)
#define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH)
+#define HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES HWLOC_NAME_CAPS(DISTANCES_KIND_HETEROGENEOUS_TYPES)
#define hwloc_distances_get HWLOC_NAME(distances_get)
#define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth)
#define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type)
+#define hwloc_distances_get_by_name HWLOC_NAME(distances_get_by_name)
+#define hwloc_distances_get_name HWLOC_NAME(distances_get_name)
#define hwloc_distances_release HWLOC_NAME(distances_release)
#define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index)
#define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values)
@@ -396,6 +415,7 @@ extern "C" {
#define hwloc_distances_remove HWLOC_NAME(distances_remove)
#define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth)
#define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type)
+#define hwloc_distances_release_remove HWLOC_NAME(distances_release_remove)
/* diff.h */
@@ -469,6 +489,8 @@ extern "C" {
/* opencl.h */
+#define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd)
+#define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids)
#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev)
#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index)
@@ -502,13 +524,22 @@ extern "C" {
/* hwloc/plugins.h */
-#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e)
-#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU)
-#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL)
-#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC)
-#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t)
+#define hwloc_disc_phase_e HWLOC_NAME(disc_phase_e)
+#define HWLOC_DISC_PHASE_GLOBAL HWLOC_NAME_CAPS(DISC_PHASE_GLOBAL)
+#define HWLOC_DISC_PHASE_CPU HWLOC_NAME_CAPS(DISC_PHASE_CPU)
+#define HWLOC_DISC_PHASE_MEMORY HWLOC_NAME_CAPS(DISC_PHASE_MEMORY)
+#define HWLOC_DISC_PHASE_PCI HWLOC_NAME_CAPS(DISC_PHASE_PCI)
+#define HWLOC_DISC_PHASE_IO HWLOC_NAME_CAPS(DISC_PHASE_IO)
+#define HWLOC_DISC_PHASE_MISC HWLOC_NAME_CAPS(DISC_PHASE_MISC)
+#define HWLOC_DISC_PHASE_ANNOTATE HWLOC_NAME_CAPS(DISC_PHASE_ANNOTATE)
+#define HWLOC_DISC_PHASE_TWEAK HWLOC_NAME_CAPS(DISC_PHASE_TWEAK)
+#define hwloc_disc_phase_t HWLOC_NAME(disc_phase_t)
#define hwloc_disc_component HWLOC_NAME(disc_component)
+#define hwloc_disc_status_flag_e HWLOC_NAME(disc_status_flag_e)
+#define HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES HWLOC_NAME_CAPS(DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES)
+#define hwloc_disc_status HWLOC_NAME(disc_status)
+
#define hwloc_backend HWLOC_NAME(backend)
#define hwloc_backend_alloc HWLOC_NAME(backend_alloc)
@@ -540,12 +571,11 @@ extern "C" {
#define hwloc_pcidisc_find_cap HWLOC_NAME(pcidisc_find_cap)
#define hwloc_pcidisc_find_linkspeed HWLOC_NAME(pcidisc_find_linkspeed)
#define hwloc_pcidisc_check_bridge_type HWLOC_NAME(pcidisc_check_bridge_type)
-#define hwloc_pcidisc_setup_bridge_attr HWLOC_NAME(pcidisc_setup_bridge_attr)
+#define hwloc_pcidisc_find_bridge_buses HWLOC_NAME(pcidisc_find_bridge_buses)
#define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid)
#define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach)
-#define hwloc_pcidisc_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
-#define hwloc_pcidisc_find_busid_parent HWLOC_NAME(pcidisc_find_busid_parent)
+#define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent)
/* hwloc/deprecated.h */
@@ -571,8 +601,9 @@ extern "C" {
/* private/misc.h */
+#ifndef HWLOC_HAVE_CORRECT_SNPRINTF
#define hwloc_snprintf HWLOC_NAME(snprintf)
-#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp)
+#endif
#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual)
#define hwloc_ffs32 HWLOC_NAME(ffs32)
#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32)
@@ -631,8 +662,9 @@ extern "C" {
#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem)
#define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks)
-#define hwloc_backends_init HWLOC_NAME(backends_init)
+#define hwloc_topology_components_init HWLOC_NAME(topology_components_init)
#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all)
+#define hwloc_topology_components_fini HWLOC_NAME(topology_components_fini)
#define hwloc_components_init HWLOC_NAME(components_init)
#define hwloc_components_fini HWLOC_NAME(components_fini)
@@ -656,7 +688,6 @@ extern "C" {
#define hwloc_cuda_component HWLOC_NAME(cuda_component)
#define hwloc_gl_component HWLOC_NAME(gl_component)
-#define hwloc_linuxio_component HWLOC_NAME(linuxio_component)
#define hwloc_nvml_component HWLOC_NAME(nvml_component)
#define hwloc_opencl_component HWLOC_NAME(opencl_component)
#define hwloc_pci_component HWLOC_NAME(pci_component)
@@ -669,6 +700,9 @@ extern "C" {
#define hwloc_special_level_s HWLOC_NAME(special_level_s)
#define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s)
+#define hwloc_pci_locality_s HWLOC_NAME(pci_locality_s)
+
+#define hwloc_topology_forced_component_s HWLOC_NAME(topology_forced_component)
#define hwloc_alloc_root_sets HWLOC_NAME(alloc_root_sets)
#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level)
@@ -687,8 +721,8 @@ extern "C" {
#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init)
#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare)
#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit)
+#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
#define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset)
-#define hwloc_pci_belowroot_apply_locality HWLOC_NAME(pci_belowroot_apply_locality)
#define hwloc__add_info HWLOC_NAME(_add_info)
#define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup)
diff --git a/src/3rdparty/hwloc/include/hwloc/shmem.h b/src/3rdparty/hwloc/include/hwloc/shmem.h
index 22249463..86f57b4f 100644
--- a/src/3rdparty/hwloc/include/hwloc/shmem.h
+++ b/src/3rdparty/hwloc/include/hwloc/shmem.h
@@ -10,7 +10,7 @@
#ifndef HWLOC_SHMEM_H
#define HWLOC_SHMEM_H
-#include
+#include "hwloc.h"
#ifdef __cplusplus
extern "C" {
diff --git a/src/3rdparty/hwloc/include/private/components.h b/src/3rdparty/hwloc/include/private/components.h
index 8525bbe4..e28c00b1 100644
--- a/src/3rdparty/hwloc/include/private/components.h
+++ b/src/3rdparty/hwloc/include/private/components.h
@@ -1,5 +1,5 @@
/*
- * Copyright © 2012-2015 Inria. All rights reserved.
+ * Copyright © 2012-2019 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
@@ -16,13 +16,13 @@
#ifndef PRIVATE_COMPONENTS_H
#define PRIVATE_COMPONENTS_H 1
-#include
+#include "hwloc/plugins.h"
struct hwloc_topology;
extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology,
int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */
- int type, const char *name,
+ const char *name,
const void *data1, const void *data2, const void *data3);
extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology);
@@ -30,10 +30,12 @@ extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology)
extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology);
extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology);
-/* Initialize the list of backends used by a topology */
-extern void hwloc_backends_init(struct hwloc_topology *topology);
+/* Initialize the lists of components and backends used by a topology */
+extern void hwloc_topology_components_init(struct hwloc_topology *topology);
/* Disable and destroy all backends used by a topology */
extern void hwloc_backends_disable_all(struct hwloc_topology *topology);
+/* Cleanup the lists of components used by a topology */
+extern void hwloc_topology_components_fini(struct hwloc_topology *topology);
/* Used by the core to setup/destroy the list of components */
extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */
diff --git a/src/3rdparty/hwloc/include/private/debug.h b/src/3rdparty/hwloc/include/private/debug.h
index 74b697db..637e0141 100644
--- a/src/3rdparty/hwloc/include/private/debug.h
+++ b/src/3rdparty/hwloc/include/private/debug.h
@@ -11,8 +11,8 @@
#ifndef HWLOC_DEBUG_H
#define HWLOC_DEBUG_H
-#include
-#include
+#include "private/autogen/config.h"
+#include "private/misc.h"
#ifdef HWLOC_DEBUG
#include
diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h
index b138a0eb..d3c89783 100644
--- a/src/3rdparty/hwloc/include/private/internal-components.h
+++ b/src/3rdparty/hwloc/include/private/internal-components.h
@@ -1,5 +1,5 @@
/*
- * Copyright © 2018 Inria. All rights reserved.
+ * Copyright © 2018-2019 Inria. All rights reserved.
*
* See COPYING in top-level directory.
*/
@@ -29,7 +29,6 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component;
/* I/O discovery */
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component;
-HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxio_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
diff --git a/src/3rdparty/hwloc/include/private/misc.h b/src/3rdparty/hwloc/include/private/misc.h
index 66608bc7..6c02d793 100644
--- a/src/3rdparty/hwloc/include/private/misc.h
+++ b/src/3rdparty/hwloc/include/private/misc.h
@@ -1,6 +1,6 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2018 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
@@ -11,9 +11,9 @@
#ifndef HWLOC_PRIVATE_MISC_H
#define HWLOC_PRIVATE_MISC_H
-#include
-#include
-#include
+#include "hwloc/autogen/config.h"
+#include "private/autogen/config.h"
+#include "hwloc.h"
#ifdef HWLOC_HAVE_DECL_STRNCASECMP
#ifdef HAVE_STRINGS_H
@@ -439,14 +439,14 @@ hwloc_linux_pci_link_speed_from_string(const char *string)
static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type)
{
/* type contiguity is asserted in topology_check() */
- return type <= HWLOC_OBJ_GROUP;
+ return type <= HWLOC_OBJ_GROUP || type == HWLOC_OBJ_DIE;
}
-/* Any object attached to memory children, currently only NUMA nodes */
+/* Any object attached to memory children, currently NUMA nodes or Memory-side caches */
static __hwloc_inline int hwloc__obj_type_is_memory (hwloc_obj_type_t type)
{
/* type contiguity is asserted in topology_check() */
- return type == HWLOC_OBJ_NUMANODE;
+ return type == HWLOC_OBJ_NUMANODE || type == HWLOC_OBJ_MEMCACHE;
}
/* I/O or Misc object, without cpusets or nodesets. */
@@ -463,6 +463,7 @@ static __hwloc_inline int hwloc__obj_type_is_io (hwloc_obj_type_t type)
return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE;
}
+/* Any CPU caches (not Memory-side caches) */
static __hwloc_inline int
hwloc__obj_type_is_cache(hwloc_obj_type_t type)
{
@@ -572,12 +573,4 @@ typedef SSIZE_T ssize_t;
# endif
#endif
-#if defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined(__CYGWIN__)
-/* MSVC doesn't support C99 variable-length array */
-#include
-#define HWLOC_VLA(_type, _name, _nb) _type *_name = (_type*) _alloca((_nb)*sizeof(_type))
-#else
-#define HWLOC_VLA(_type, _name, _nb) _type _name[_nb]
-#endif
-
#endif /* HWLOC_PRIVATE_MISC_H */
diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h
index 8e3964ab..5f878937 100644
--- a/src/3rdparty/hwloc/include/private/private.h
+++ b/src/3rdparty/hwloc/include/private/private.h
@@ -22,11 +22,12 @@
#ifndef HWLOC_PRIVATE_H
#define HWLOC_PRIVATE_H
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "hwloc/bitmap.h"
+#include "private/components.h"
+#include "private/misc.h"
+
#include
#ifdef HAVE_UNISTD_H
#include
@@ -39,7 +40,7 @@
#endif
#include
-#define HWLOC_TOPOLOGY_ABI 0x20000 /* version of the layout of struct topology */
+#define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */
/*****************************************************
* WARNING:
@@ -67,12 +68,13 @@ struct hwloc_topology {
void *adopted_shmem_addr;
size_t adopted_shmem_length;
-#define HWLOC_NR_SLEVELS 5
+#define HWLOC_NR_SLEVELS 6
#define HWLOC_SLEVEL_NUMANODE 0
#define HWLOC_SLEVEL_BRIDGE 1
#define HWLOC_SLEVEL_PCIDEV 2
#define HWLOC_SLEVEL_OSDEV 3
#define HWLOC_SLEVEL_MISC 4
+#define HWLOC_SLEVEL_MEMCACHE 5
/* order must match negative depth, it's asserted in setup_defaults() */
#define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x))
#define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x))
@@ -86,6 +88,7 @@ struct hwloc_topology {
hwloc_bitmap_t allowed_nodeset;
struct hwloc_binding_hooks {
+ /* These are actually rather OS hooks since some of them are not about binding */
int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
@@ -127,20 +130,35 @@ struct hwloc_topology {
int userdata_not_decoded;
struct hwloc_internal_distances_s {
- hwloc_obj_type_t type;
+ char *name; /* FIXME: needs an API to set it from user */
+
+ unsigned id; /* to match the container id field of public distances structure
+ * not exported to XML, regenerated during _add()
+ */
+
+ /* if all objects have the same type, different_types is NULL and unique_type is valid.
+ * otherwise unique_type is HWLOC_OBJ_TYPE_NONE and different_types contains individual objects types.
+ */
+ hwloc_obj_type_t unique_type;
+ hwloc_obj_type_t *different_types;
+
/* add union hwloc_obj_attr_u if we ever support groups */
unsigned nbobjs;
- uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. */
+ uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs.
+ * OS indexes for distances covering only PUs or only NUMAnodes.
+ */
+#define HWLOC_DIST_TYPE_USE_OS_INDEX(_type) ((_type) == HWLOC_OBJ_PU || (_type == HWLOC_OBJ_NUMANODE))
uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array.
* distance from i to j is stored in slot i*nbnodes+j.
*/
unsigned long kind;
+#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */
+ unsigned iflags;
+
/* objects are currently stored in physical_index order */
hwloc_obj_t *objs; /* array of objects */
- int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */
- unsigned id; /* to match the container id field of public distances structure */
struct hwloc_internal_distances_s *prev, *next;
} *first_dist, *last_dist;
unsigned next_dist_id;
@@ -153,8 +171,9 @@ struct hwloc_topology {
/* list of enabled backends. */
struct hwloc_backend * backends;
- struct hwloc_backend * get_pci_busid_cpuset_backend;
- unsigned backend_excludes;
+ struct hwloc_backend * get_pci_busid_cpuset_backend; /* first backend that provides get_pci_busid_cpuset() callback */
+ unsigned backend_phases;
+ unsigned backend_excluded_phases;
/* memory allocator for topology objects */
struct hwloc_tma * tma;
@@ -176,7 +195,6 @@ struct hwloc_topology {
struct hwloc_numanode_attr_s machine_memory;
/* pci stuff */
- int need_pci_belowroot_apply_locality;
int pci_has_forced_locality;
unsigned pci_forced_locality_nr;
struct hwloc_pci_forced_locality_s {
@@ -185,13 +203,32 @@ struct hwloc_topology {
hwloc_bitmap_t cpuset;
} * pci_forced_locality;
+ /* component blacklisting */
+ unsigned nr_blacklisted_components;
+ struct hwloc_topology_forced_component_s {
+ struct hwloc_disc_component *component;
+ unsigned phases;
+ } *blacklisted_components;
+
+ /* FIXME: keep until topo destroy and reuse for finding specific buses */
+ struct hwloc_pci_locality_s {
+ unsigned domain;
+ unsigned bus_min;
+ unsigned bus_max;
+ hwloc_bitmap_t cpuset;
+ hwloc_obj_t parent;
+ struct hwloc_pci_locality_s *prev, *next;
+ } *first_pci_locality, *last_pci_locality;
};
extern void hwloc_alloc_root_sets(hwloc_obj_t root);
extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus);
extern int hwloc_get_sysctlbyname(const char *name, int64_t *n);
extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n);
-extern int hwloc_fallback_nbprocessors(struct hwloc_topology *topology);
+
+/* returns the number of CPU from the OS (only valid if thissystem) */
+#define HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE 1 /* by default we try to get only the online CPUs */
+extern int hwloc_fallback_nbprocessors(unsigned flags);
extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2);
extern void hwloc__reorder_children(hwloc_obj_t parent);
@@ -208,19 +245,17 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology);
extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology);
extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology);
+/* Look for an object matching the given domain/bus/func,
+ * either exactly or return the smallest container bridge
+ */
+extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
+
/* Look for an object matching complete cpuset exactly, or insert one.
* Return NULL on failure.
* Return a good fallback (object above) on failure to insert.
*/
extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset);
-/* Move PCI objects currently attached to the root object ot their actual location.
- * Called by the core at the end of hwloc_topology_load().
- * Prior to this call, all PCI objects may be found below the root object.
- * After this call and a reconnect of levels, all PCI objects are available through levels.
- */
-extern int hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology);
-
extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value);
extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace);
extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp);
@@ -313,8 +348,8 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology);
extern void hwloc_internal_distances_destroy(hwloc_topology_t topology);
extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old);
extern void hwloc_internal_distances_refresh(hwloc_topology_t topology);
-extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
-extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
+extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
+extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology);
/* encode src buffer into target buffer.
@@ -330,13 +365,19 @@ extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *targe
*/
extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize);
-/* Check whether needle matches the beginning of haystack, at least n, and up
- * to a colon or \0 */
-extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n);
-
/* On some systems, snprintf returns the size of written data, not the actually
- * required size. hwloc_snprintf always report the actually required size. */
+ * required size. Sometimes it returns -1 on truncation too.
+ * And sometimes it doesn't like NULL output buffers.
+ * http://www.gnu.org/software/gnulib/manual/html_node/snprintf.html
+ *
+ * hwloc_snprintf behaves properly, but it's a bit overkill on the vast majority
+ * of platforms, so don't enable it unless really needed.
+ */
+#ifdef HWLOC_HAVE_CORRECT_SNPRINTF
+#define hwloc_snprintf snprintf
+#else
extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4);
+#endif
/* Return the name of the currently running program, if supported.
* If not NULL, must be freed by the caller.
@@ -356,7 +397,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
#define HWLOC_GROUP_KIND_INTEL_MODULE 102 /* no subkind */
#define HWLOC_GROUP_KIND_INTEL_TILE 103 /* no subkind */
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
-#define HWLOC_GROUP_KIND_S390_BOOK 110 /* no subkind */
+#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
/* then, OS-specific groups */
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */
diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h
index 7c73384d..f59fca1f 100644
--- a/src/3rdparty/hwloc/include/private/xml.h
+++ b/src/3rdparty/hwloc/include/private/xml.h
@@ -1,12 +1,12 @@
/*
- * Copyright © 2009-2019 Inria. All rights reserved.
+ * Copyright © 2009-2017 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
#ifndef PRIVATE_XML_H
#define PRIVATE_XML_H 1
-#include
+#include "hwloc.h"
#include
@@ -54,7 +54,6 @@ struct hwloc_xml_backend_data_s {
unsigned nbnumanodes;
hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */
struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist;
- int dont_merge_die_groups;
};
/**************
diff --git a/src/3rdparty/hwloc/src/base64.c b/src/3rdparty/hwloc/src/base64.c
index 7b3e1210..4df67bf9 100644
--- a/src/3rdparty/hwloc/src/base64.c
+++ b/src/3rdparty/hwloc/src/base64.c
@@ -11,7 +11,7 @@
/* include hwloc's config before anything else
* so that extensions and features are properly enabled
*/
-#include
+#include "private/private.h"
/* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */
diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c
index b3457bc7..0bd85e25 100644
--- a/src/3rdparty/hwloc/src/bind.c
+++ b/src/3rdparty/hwloc/src/bind.c
@@ -1,15 +1,16 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2018 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2010, 2012 Université Bordeaux
* Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "hwloc/helper.h"
+
#ifdef HAVE_SYS_MMAN_H
# include
#endif
@@ -885,6 +886,8 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology)
} else {
/* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */
hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support);
+
+ /* Linux has some hooks that also work in this case, but they are not strictly needed yet. */
}
/* if not is_thissystem, set_cpubind is fake
diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c
index ea1264af..5fb9cd35 100644
--- a/src/3rdparty/hwloc/src/bitmap.c
+++ b/src/3rdparty/hwloc/src/bitmap.c
@@ -1,18 +1,18 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2017 Inria. All rights reserved.
+ * Copyright © 2009-2018 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc/autogen/config.h"
+#include "hwloc.h"
+#include "private/misc.h"
+#include "private/private.h"
+#include "private/debug.h"
+#include "hwloc/bitmap.h"
#include
#include
@@ -766,6 +766,21 @@ int hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned
return 0;
}
+int hwloc_bitmap_from_ulongs(struct hwloc_bitmap_s *set, unsigned nr, const unsigned long *masks)
+{
+ unsigned j;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (hwloc_bitmap_reset_by_ulongs(set, nr) < 0)
+ return -1;
+
+ for(j=0; julongs[j] = masks[j];
+ set->infinite = 0;
+ return 0;
+}
+
unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set)
{
HWLOC__BITMAP_CHECK(set);
@@ -780,6 +795,30 @@ unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsign
return HWLOC_SUBBITMAP_READULONG(set, i);
}
+int hwloc_bitmap_to_ulongs(const struct hwloc_bitmap_s *set, unsigned nr, unsigned long *masks)
+{
+ unsigned j;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ for(j=0; jinfinite)
+ return -1;
+
+ last = hwloc_bitmap_last(set);
+ return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG;
+}
+
int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu)
{
unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c
index bd7c00e3..5c2879b6 100644
--- a/src/3rdparty/hwloc/src/components.c
+++ b/src/3rdparty/hwloc/src/components.c
@@ -1,18 +1,19 @@
/*
- * Copyright © 2009-2017 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2012 Université Bordeaux
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/xml.h"
+#include "private/misc.h"
#define HWLOC_COMPONENT_STOP_NAME "stop"
#define HWLOC_COMPONENT_EXCLUDE_CHAR '-'
#define HWLOC_COMPONENT_SEPS ","
+#define HWLOC_COMPONENT_PHASESEP_CHAR ':'
/* list of all registered discovery components, sorted by priority, higher priority first.
* noos is last because its priority is 0.
@@ -232,17 +233,6 @@ hwloc_plugins_init(void)
#endif /* HWLOC_HAVE_PLUGINS */
-static const char *
-hwloc_disc_component_type_string(hwloc_disc_component_type_t type)
-{
- switch (type) {
- case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu";
- case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global";
- case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc";
- default: return "**unknown**";
- }
-}
-
static int
hwloc_disc_component_register(struct hwloc_disc_component *component,
const char *filename)
@@ -256,21 +246,26 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
return -1;
}
if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR)
+ || strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR)
|| strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) {
if (hwloc_components_verbose)
fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n",
component->name, HWLOC_COMPONENT_EXCLUDE_CHAR);
return -1;
}
- /* check that the component type is valid */
- switch ((unsigned) component->type) {
- case HWLOC_DISC_COMPONENT_TYPE_CPU:
- case HWLOC_DISC_COMPONENT_TYPE_GLOBAL:
- case HWLOC_DISC_COMPONENT_TYPE_MISC:
- break;
- default:
- fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n",
- component->name, (unsigned) component->type);
+
+ /* check that the component phases are valid */
+ if (!component->phases
+ || (component->phases != HWLOC_DISC_PHASE_GLOBAL
+ && component->phases & ~(HWLOC_DISC_PHASE_CPU
+ |HWLOC_DISC_PHASE_MEMORY
+ |HWLOC_DISC_PHASE_PCI
+ |HWLOC_DISC_PHASE_IO
+ |HWLOC_DISC_PHASE_MISC
+ |HWLOC_DISC_PHASE_ANNOTATE
+ |HWLOC_DISC_PHASE_TWEAK))) {
+ fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n",
+ component->name, component->phases);
return -1;
}
@@ -295,8 +290,8 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
prev = &((*prev)->next);
}
if (hwloc_components_verbose)
- fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n",
- hwloc_disc_component_type_string(component->type), component->name, component->priority,
+ fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n",
+ component->name, component->phases, component->priority,
filename ? "from plugin " : "statically build", filename ? filename : "");
prev = &hwloc_disc_components;
@@ -310,7 +305,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
return 0;
}
-#include
+#include "static-components.h"
static void (**hwloc_component_finalize_cbs)(unsigned long);
static unsigned hwloc_component_finalize_cb_count;
@@ -415,31 +410,152 @@ hwloc_components_init(void)
}
void
-hwloc_backends_init(struct hwloc_topology *topology)
+hwloc_topology_components_init(struct hwloc_topology *topology)
{
+ topology->nr_blacklisted_components = 0;
+ topology->blacklisted_components = NULL;
+
topology->backends = NULL;
- topology->backend_excludes = 0;
+ topology->backend_phases = 0;
+ topology->backend_excluded_phases = 0;
}
+/* look for name among components, ignoring things after `:' */
static struct hwloc_disc_component *
-hwloc_disc_component_find(int type /* hwloc_disc_component_type_t or -1 if any */,
- const char *name /* name of NULL if any */)
+hwloc_disc_component_find(const char *name, const char **endp)
{
- struct hwloc_disc_component *comp = hwloc_disc_components;
+ struct hwloc_disc_component *comp;
+ size_t length;
+ const char *end = strchr(name, HWLOC_COMPONENT_PHASESEP_CHAR);
+ if (end) {
+ length = end-name;
+ if (endp)
+ *endp = end+1;
+ } else {
+ length = strlen(name);
+ if (endp)
+ *endp = NULL;
+ }
+
+ comp = hwloc_disc_components;
while (NULL != comp) {
- if ((-1 == type || type == (int) comp->type)
- && (NULL == name || !strcmp(name, comp->name)))
+ if (!strncmp(name, comp->name, length))
return comp;
comp = comp->next;
}
return NULL;
}
+static unsigned
+hwloc_phases_from_string(const char *s)
+{
+ if (!s)
+ return ~0U;
+ if (s[0]<'0' || s[0]>'9') {
+ if (!strcasecmp(s, "global"))
+ return HWLOC_DISC_PHASE_GLOBAL;
+ else if (!strcasecmp(s, "cpu"))
+ return HWLOC_DISC_PHASE_CPU;
+ if (!strcasecmp(s, "memory"))
+ return HWLOC_DISC_PHASE_MEMORY;
+ if (!strcasecmp(s, "pci"))
+ return HWLOC_DISC_PHASE_PCI;
+ if (!strcasecmp(s, "io"))
+ return HWLOC_DISC_PHASE_IO;
+ if (!strcasecmp(s, "misc"))
+ return HWLOC_DISC_PHASE_MISC;
+ if (!strcasecmp(s, "annotate"))
+ return HWLOC_DISC_PHASE_ANNOTATE;
+ if (!strcasecmp(s, "tweak"))
+ return HWLOC_DISC_PHASE_TWEAK;
+ return 0;
+ }
+ return (unsigned) strtoul(s, NULL, 0);
+}
+
+static int
+hwloc_disc_component_blacklist_one(struct hwloc_topology *topology,
+ const char *name)
+{
+ struct hwloc_topology_forced_component_s *blacklisted;
+ struct hwloc_disc_component *comp;
+ unsigned phases;
+ unsigned i;
+
+ if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) {
+ /* replace linuxpci and linuxio with linux (with IO phases)
+ * for backward compatibility with pre-v2.0 and v2.0 respectively */
+ if (hwloc_components_verbose)
+ fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name);
+ comp = hwloc_disc_component_find("linux", NULL);
+ phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE;
+
+ } else {
+ /* normal lookup */
+ const char *end;
+ comp = hwloc_disc_component_find(name, &end);
+ phases = hwloc_phases_from_string(end);
+ }
+ if (!comp) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (hwloc_components_verbose)
+ fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases);
+
+ for(i=0; inr_blacklisted_components; i++) {
+ if (topology->blacklisted_components[i].component == comp) {
+ topology->blacklisted_components[i].phases |= phases;
+ return 0;
+ }
+ }
+
+ blacklisted = realloc(topology->blacklisted_components, (topology->nr_blacklisted_components+1)*sizeof(*blacklisted));
+ if (!blacklisted)
+ return -1;
+
+ blacklisted[topology->nr_blacklisted_components].component = comp;
+ blacklisted[topology->nr_blacklisted_components].phases = phases;
+ topology->blacklisted_components = blacklisted;
+ topology->nr_blacklisted_components++;
+ return 0;
+}
+
+int
+hwloc_topology_set_components(struct hwloc_topology *topology,
+ unsigned long flags,
+ const char *name)
+{
+ if (topology->is_loaded) {
+ errno = EBUSY;
+ return -1;
+ }
+
+ if (flags & ~HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* this flag is strictly required for now */
+ if (flags != HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!strncmp(name, "all", 3) && name[3] == HWLOC_COMPONENT_PHASESEP_CHAR) {
+ topology->backend_excluded_phases = hwloc_phases_from_string(name+4);
+ return 0;
+ }
+
+ return hwloc_disc_component_blacklist_one(topology, name);
+}
+
/* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */
int
hwloc_disc_component_force_enable(struct hwloc_topology *topology,
int envvar_forced,
- int type, const char *name,
+ const char *name,
const void *data1, const void *data2, const void *data3)
{
struct hwloc_disc_component *comp;
@@ -450,18 +566,28 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology,
return -1;
}
- comp = hwloc_disc_component_find(type, name);
+ comp = hwloc_disc_component_find(name, NULL);
if (!comp) {
errno = ENOSYS;
return -1;
}
- backend = comp->instantiate(comp, data1, data2, data3);
+ backend = comp->instantiate(topology, comp, 0U /* force-enabled don't get any phase blacklisting */,
+ data1, data2, data3);
if (backend) {
+ int err;
backend->envvar_forced = envvar_forced;
if (topology->backends)
hwloc_backends_disable_all(topology);
- return hwloc_backend_enable(topology, backend);
+ err = hwloc_backend_enable(backend);
+
+ if (comp->phases == HWLOC_DISC_PHASE_GLOBAL) {
+ char *env = getenv("HWLOC_ANNOTATE_GLOBAL_COMPONENTS");
+ if (env && atoi(env))
+ topology->backend_excluded_phases &= ~HWLOC_DISC_PHASE_ANNOTATE;
+ }
+
+ return err;
} else
return -1;
}
@@ -469,29 +595,32 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology,
static int
hwloc_disc_component_try_enable(struct hwloc_topology *topology,
struct hwloc_disc_component *comp,
- const char *comparg,
- int envvar_forced)
+ int envvar_forced,
+ unsigned blacklisted_phases)
{
struct hwloc_backend *backend;
- if (topology->backend_excludes & comp->type) {
+ if (!(comp->phases & ~(topology->backend_excluded_phases | blacklisted_phases))) {
+ /* all this backend phases are already excluded, exclude the backend entirely */
if (hwloc_components_verbose)
/* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc.
*/
- fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n",
- hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes);
+ fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n",
+ comp->name, comp->phases, topology->backend_excluded_phases);
return -1;
}
- backend = comp->instantiate(comp, comparg, NULL, NULL);
+ backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases,
+ NULL, NULL, NULL);
if (!backend) {
if (hwloc_components_verbose || envvar_forced)
fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name);
return -1;
}
+ backend->phases &= ~blacklisted_phases;
backend->envvar_forced = envvar_forced;
- return hwloc_backend_enable(topology, backend);
+ return hwloc_backend_enable(backend);
}
void
@@ -502,11 +631,12 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
int tryall = 1;
const char *_env;
char *env; /* we'll to modify the env value, so duplicate it */
+ unsigned i;
_env = getenv("HWLOC_COMPONENTS");
env = _env ? strdup(_env) : NULL;
- /* enable explicitly listed components */
+ /* blacklist disabled components */
if (env) {
char *curenv = env;
size_t s;
@@ -516,21 +646,41 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
if (s) {
char c;
- /* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */
- if (!strncmp(curenv, "linuxpci", 8) && s == 8) {
- curenv[5] = 'i';
- curenv[6] = 'o';
- curenv[7] = *HWLOC_COMPONENT_SEPS;
- } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) {
- curenv[6] = 'i';
- curenv[7] = 'o';
- curenv[8] = *HWLOC_COMPONENT_SEPS;
- /* skip this name, it's a negated one */
+ if (curenv[0] != HWLOC_COMPONENT_EXCLUDE_CHAR)
goto nextname;
- }
- if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR)
- goto nextname;
+ /* save the last char and replace with \0 */
+ c = curenv[s];
+ curenv[s] = '\0';
+
+ /* blacklist it, and just ignore failures to allocate */
+ hwloc_disc_component_blacklist_one(topology, curenv+1);
+
+ /* remove that blacklisted name from the string */
+ for(i=0; inr_blacklisted_components; i++)
+ if (comp == topology->blacklisted_components[i].component) {
+ blacklisted_phases = topology->blacklisted_components[i].phases;
+ break;
+ }
+ if (comp->phases & ~blacklisted_phases)
+ hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases);
} else {
- fprintf(stderr, "Cannot find discovery component `%s'\n", curenv);
+ fprintf(stderr, "Cannot find discovery component `%s'\n", name);
}
/* restore chars (the second loop below needs env to be unmodified) */
curenv[s] = c;
}
-nextname:
curenv += s;
if (*curenv)
/* Skip comma */
@@ -566,26 +729,24 @@ nextname:
if (tryall) {
comp = hwloc_disc_components;
while (NULL != comp) {
+ unsigned blacklisted_phases = 0U;
if (!comp->enabled_by_default)
goto nextcomp;
- /* check if this component was explicitly excluded in env */
- if (env) {
- char *curenv = env;
- while (*curenv) {
- size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS);
- if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) {
- if (hwloc_components_verbose)
- fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n",
- hwloc_disc_component_type_string(comp->type), comp->name);
- goto nextcomp;
- }
- curenv += s;
- if (*curenv)
- /* Skip comma */
- curenv++;
+ /* check if this component was blacklisted by the application */
+ for(i=0; inr_blacklisted_components; i++)
+ if (comp == topology->blacklisted_components[i].component) {
+ blacklisted_phases = topology->blacklisted_components[i].phases;
+ break;
}
+
+ if (!(comp->phases & ~blacklisted_phases)) {
+ if (hwloc_components_verbose)
+ fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n",
+ comp->name, comp->phases);
+ goto nextcomp;
}
- hwloc_disc_component_try_enable(topology, comp, NULL, 0 /* defaults, not envvar forced */);
+
+ hwloc_disc_component_try_enable(topology, comp, 0 /* defaults, not envvar forced */, blacklisted_phases);
nextcomp:
comp = comp->next;
}
@@ -597,7 +758,7 @@ nextcomp:
backend = topology->backends;
fprintf(stderr, "Final list of enabled discovery components: ");
while (backend != NULL) {
- fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name);
+ fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases);
backend = backend->next;
first = 0;
}
@@ -638,7 +799,8 @@ hwloc_components_fini(void)
}
struct hwloc_backend *
-hwloc_backend_alloc(struct hwloc_disc_component *component)
+hwloc_backend_alloc(struct hwloc_topology *topology,
+ struct hwloc_disc_component *component)
{
struct hwloc_backend * backend = malloc(sizeof(*backend));
if (!backend) {
@@ -646,6 +808,12 @@ hwloc_backend_alloc(struct hwloc_disc_component *component)
return NULL;
}
backend->component = component;
+ backend->topology = topology;
+ /* filter-out component phases that are excluded */
+ backend->phases = component->phases & ~topology->backend_excluded_phases;
+ if (backend->phases != component->phases && hwloc_components_verbose)
+ fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n",
+ component->name, backend->phases, component->phases);
backend->flags = 0;
backend->discover = NULL;
backend->get_pci_busid_cpuset = NULL;
@@ -665,14 +833,15 @@ hwloc_backend_disable(struct hwloc_backend *backend)
}
int
-hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend)
+hwloc_backend_enable(struct hwloc_backend *backend)
{
+ struct hwloc_topology *topology = backend->topology;
struct hwloc_backend **pprev;
/* check backend flags */
if (backend->flags) {
- fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n",
- hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags);
+ fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n",
+ backend->component->name, backend->component->phases, backend->flags);
return -1;
}
@@ -681,8 +850,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back
while (NULL != *pprev) {
if ((*pprev)->component == backend->component) {
if (hwloc_components_verbose)
- fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n",
- hwloc_disc_component_type_string(backend->component->type), backend->component->name);
+ fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n",
+ backend->component->name, backend->component->phases);
hwloc_backend_disable(backend);
errno = EBUSY;
return -1;
@@ -691,8 +860,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back
}
if (hwloc_components_verbose)
- fprintf(stderr, "Enabling %s discovery component `%s'\n",
- hwloc_disc_component_type_string(backend->component->type), backend->component->name);
+ fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n",
+ backend->component->name, backend->phases, backend->component->phases);
/* enqueue at the end */
pprev = &topology->backends;
@@ -701,8 +870,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back
backend->next = *pprev;
*pprev = backend;
- backend->topology = topology;
- topology->backend_excludes |= backend->component->excludes;
+ topology->backend_phases |= backend->component->phases;
+ topology->backend_excluded_phases |= backend->component->excluded_phases;
return 0;
}
@@ -712,7 +881,7 @@ hwloc_backends_is_thissystem(struct hwloc_topology *topology)
struct hwloc_backend *backend;
const char *local_env;
- /* Apply is_thissystem topology flag before we enforce envvar backends.
+ /*
* If the application changed the backend with set_foo(),
* it may use set_flags() update the is_thissystem flag here.
* If it changes the backend with environment variables below,
@@ -775,11 +944,20 @@ hwloc_backends_disable_all(struct hwloc_topology *topology)
while (NULL != (backend = topology->backends)) {
struct hwloc_backend *next = backend->next;
if (hwloc_components_verbose)
- fprintf(stderr, "Disabling %s discovery component `%s'\n",
- hwloc_disc_component_type_string(backend->component->type), backend->component->name);
+ fprintf(stderr, "Disabling discovery component `%s'\n",
+ backend->component->name);
hwloc_backend_disable(backend);
topology->backends = next;
}
topology->backends = NULL;
- topology->backend_excludes = 0;
+ topology->backend_excluded_phases = 0;
+}
+
+void
+hwloc_topology_components_fini(struct hwloc_topology *topology)
+{
+ /* hwloc_backends_disable_all() must have been called earlier */
+ assert(!topology->backends);
+
+ free(topology->blacklisted_components);
}
diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c
index 00811a7b..7794358b 100644
--- a/src/3rdparty/hwloc/src/diff.c
+++ b/src/3rdparty/hwloc/src/diff.c
@@ -1,11 +1,11 @@
/*
- * Copyright © 2013-2018 Inria. All rights reserved.
+ * Copyright © 2013-2019 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "private/private.h"
+#include "private/misc.h"
int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff)
{
@@ -351,7 +351,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
err = 1;
break;
}
- if (dist1->type != dist2->type
+ if (dist1->unique_type != dist2->unique_type
+ || dist1->different_types || dist2->different_types /* too lazy to support this case */
|| dist1->nbobjs != dist2->nbobjs
|| dist1->kind != dist2->kind
|| memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) {
@@ -463,6 +464,10 @@ int hwloc_topology_diff_apply(hwloc_topology_t topology,
errno = EINVAL;
return -1;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return -1;
+ }
if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) {
errno = EINVAL;
diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c
index f0b91f01..9e56a969 100644
--- a/src/3rdparty/hwloc/src/distances.c
+++ b/src/3rdparty/hwloc/src/distances.c
@@ -1,19 +1,22 @@
/*
- * Copyright © 2010-2018 Inria. All rights reserved.
+ * Copyright © 2010-2019 Inria. All rights reserved.
* Copyright © 2011-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/debug.h"
+#include "private/misc.h"
#include
#include
+static struct hwloc_internal_distances_s *
+hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances);
+
/******************************************************
* Global init, prepare, destroy, dup
*/
@@ -70,6 +73,8 @@ void hwloc_internal_distances_prepare(struct hwloc_topology *topology)
static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist)
{
+ free(dist->name);
+ free(dist->different_types);
free(dist->indexes);
free(dist->objs);
free(dist->values);
@@ -96,15 +101,35 @@ static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct h
newdist = hwloc_tma_malloc(tma, sizeof(*newdist));
if (!newdist)
return -1;
+ if (olddist->name) {
+ newdist->name = hwloc_tma_strdup(tma, olddist->name);
+ if (!newdist->name) {
+ assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
+ hwloc_internal_distances_free(newdist);
+ return -1;
+ }
+ } else {
+ newdist->name = NULL;
+ }
- newdist->type = olddist->type;
+ if (olddist->different_types) {
+ newdist->different_types = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->different_types));
+ if (!newdist->different_types) {
+ assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
+ hwloc_internal_distances_free(newdist);
+ return -1;
+ }
+ memcpy(newdist->different_types, olddist->different_types, nbobjs * sizeof(*newdist->different_types));
+ } else
+ newdist->different_types = NULL;
+ newdist->unique_type = olddist->unique_type;
newdist->nbobjs = nbobjs;
newdist->kind = olddist->kind;
newdist->id = olddist->id;
newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes));
newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs));
- newdist->objs_are_valid = 0;
+ newdist->iflags = olddist->iflags & ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; /* must be revalidated after dup() */
newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values));
if (!newdist->indexes || !newdist->objs || !newdist->values) {
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
@@ -150,6 +175,10 @@ int hwloc_distances_remove(hwloc_topology_t topology)
errno = EINVAL;
return -1;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return -1;
+ }
hwloc_internal_distances_destroy(topology);
return 0;
}
@@ -163,6 +192,10 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
errno = EINVAL;
return -1;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return -1;
+ }
/* switch back to types since we don't support groups for now */
type = hwloc_get_depth_type(topology, depth);
@@ -174,7 +207,7 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
next = topology->first_dist;
while ((dist = next) != NULL) {
next = dist->next;
- if (dist->type == type) {
+ if (dist->unique_type == type) {
if (next)
next->prev = dist->prev;
else
@@ -190,6 +223,27 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
return 0;
}
+int hwloc_distances_release_remove(hwloc_topology_t topology,
+ struct hwloc_distances_s *distances)
+{
+ struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
+ if (!dist) {
+ errno = EINVAL;
+ return -1;
+ }
+ if (dist->prev)
+ dist->prev->next = dist->next;
+ else
+ topology->first_dist = dist->next;
+ if (dist->next)
+ dist->next->prev = dist->prev;
+ else
+ topology->last_dist = dist->prev;
+ hwloc_internal_distances_free(dist);
+ hwloc_distances_release(topology, distances);
+ return 0;
+}
+
/******************************************************
* Add distances to the topology
*/
@@ -201,17 +255,34 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, str
* the caller gives us the distances and objs pointers, we'll free them later.
*/
static int
-hwloc_internal_distances__add(hwloc_topology_t topology,
- hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
- unsigned long kind)
+hwloc_internal_distances__add(hwloc_topology_t topology, const char *name,
+ hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types,
+ unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
+ unsigned long kind, unsigned iflags)
{
- struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist));
+ struct hwloc_internal_distances_s *dist;
+
+ if (different_types) {
+ kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */
+ } else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) {
+ errno = EINVAL;
+ goto err;
+ }
+
+ dist = calloc(1, sizeof(*dist));
if (!dist)
goto err;
- dist->type = type;
+ if (name)
+ dist->name = strdup(name); /* ignore failure */
+
+ dist->unique_type = unique_type;
+ dist->different_types = different_types;
dist->nbobjs = nbobjs;
dist->kind = kind;
+ dist->iflags = iflags;
+
+ assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs);
if (!objs) {
assert(indexes);
@@ -220,18 +291,16 @@ hwloc_internal_distances__add(hwloc_topology_t topology,
dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t));
if (!dist->objs)
goto err_with_dist;
- dist->objs_are_valid = 0;
} else {
unsigned i;
assert(!indexes);
/* we only have objs, generate the indexes arrays so that we can refresh objs later */
dist->objs = objs;
- dist->objs_are_valid = 1;
dist->indexes = malloc(nbobjs * sizeof(*dist->indexes));
if (!dist->indexes)
goto err_with_dist;
- if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) {
+ if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) {
for(i=0; iindexes[i] = objs[i]->os_index;
} else {
@@ -256,16 +325,19 @@ hwloc_internal_distances__add(hwloc_topology_t topology,
err_with_dist:
free(dist);
err:
+ free(different_types);
free(objs);
free(indexes);
free(values);
return -1;
}
-int hwloc_internal_distances_add_by_index(hwloc_topology_t topology,
- hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
+int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name,
+ hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
unsigned long kind, unsigned long flags)
{
+ unsigned iflags = 0; /* objs not valid */
+
if (nbobjs < 2) {
errno = EINVAL;
goto err;
@@ -279,24 +351,71 @@ int hwloc_internal_distances_add_by_index(hwloc_topology_t topology,
goto err;
}
- return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind);
+ return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags);
err:
free(indexes);
free(values);
+ free(different_types);
return -1;
}
-int hwloc_internal_distances_add(hwloc_topology_t topology,
+static void
+hwloc_internal_distances_restrict(hwloc_obj_t *objs,
+ uint64_t *indexes,
+ uint64_t *values,
+ unsigned nbobjs, unsigned disappeared);
+
+int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
unsigned long kind, unsigned long flags)
{
+ hwloc_obj_type_t unique_type, *different_types;
+ unsigned i, disappeared = 0;
+ unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
+
if (nbobjs < 2) {
errno = EINVAL;
goto err;
}
- if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) {
+ /* is there any NULL object? (useful in case of problem during insert in backends) */
+ for(i=0; itype;
+ for(i=1; itype != unique_type) {
+ unique_type = HWLOC_OBJ_TYPE_NONE;
+ break;
+ }
+ if (unique_type == HWLOC_OBJ_TYPE_NONE) {
+ /* heterogeneous types */
+ different_types = malloc(nbobjs * sizeof(*different_types));
+ if (!different_types)
+ goto err;
+ for(i=0; itype;
+
+ } else {
+ /* homogeneous types */
+ different_types = NULL;
+ }
+
+ if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) {
float full_accuracy = 0.f;
float *accuracies;
unsigned nbaccuracies;
@@ -310,8 +429,8 @@ int hwloc_internal_distances_add(hwloc_topology_t topology,
}
if (topology->grouping_verbose) {
- unsigned i, j;
- int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU);
+ unsigned j;
+ int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type);
fprintf(stderr, "Trying to group objects using distance matrix:\n");
fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
for(j=0; jtype, nbobjs, objs, NULL, values, kind);
+ return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags);
err:
free(objs);
@@ -348,7 +467,6 @@ int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
unsigned long kind, unsigned long flags)
{
- hwloc_obj_type_t type;
unsigned i;
uint64_t *_values;
hwloc_obj_t *_objs;
@@ -358,6 +476,10 @@ int hwloc_distances_add(hwloc_topology_t topology,
errno = EINVAL;
return -1;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return -1;
+ }
if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1
@@ -368,15 +490,8 @@ int hwloc_distances_add(hwloc_topology_t topology,
/* no strict need to check for duplicates, things shouldn't break */
- type = objs[0]->type;
- if (type == HWLOC_OBJ_GROUP) {
- /* not supported yet, would require we save the subkind together with the type. */
- errno = EINVAL;
- return -1;
- }
-
for(i=1; itype != type) {
+ if (!objs[i]) {
errno = EINVAL;
return -1;
}
@@ -389,7 +504,7 @@ int hwloc_distances_add(hwloc_topology_t topology,
memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t));
memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values));
- err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags);
+ err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags);
if (err < 0)
goto out; /* _objs and _values freed in hwloc_internal_distances_add() */
@@ -409,9 +524,9 @@ int hwloc_distances_add(hwloc_topology_t topology,
* Refresh objects in distances
*/
-static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
+static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index)
{
- hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0);
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
while (obj) {
if (obj->gp_index == gp_index)
return obj;
@@ -420,12 +535,31 @@ static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology
return NULL;
}
-static void
-hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
- hwloc_obj_t *objs,
- unsigned disappeared)
+static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+ return NULL;
+ if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) {
+ int topodepth = hwloc_topology_get_depth(topology);
+ for(depth=0; depthnbobjs;
unsigned i, newi;
unsigned j, newj;
@@ -433,7 +567,7 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
if (objs[i]) {
for(j=0, newj=0; jvalues[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j];
+ values[newi*(nbobjs-disappeared)+newj] = values[i*nbobjs+j];
newj++;
}
newi++;
@@ -442,25 +576,25 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
for(i=0, newi=0; iindexes[newi] = dist->indexes[i];
+ if (indexes)
+ indexes[newi] = indexes[i];
newi++;
}
-
- dist->nbobjs -= disappeared;
}
static int
hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
struct hwloc_internal_distances_s *dist)
{
- hwloc_obj_type_t type = dist->type;
+ hwloc_obj_type_t unique_type = dist->unique_type;
+ hwloc_obj_type_t *different_types = dist->different_types;
unsigned nbobjs = dist->nbobjs;
hwloc_obj_t *objs = dist->objs;
uint64_t *indexes = dist->indexes;
unsigned disappeared = 0;
unsigned i;
- if (dist->objs_are_valid)
+ if (dist->iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID)
return 0;
for(i=0; iindexes, dist->values, nbobjs, disappeared);
+ dist->nbobjs -= disappeared;
+ }
- dist->objs_are_valid = 1;
+ dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
return 0;
}
@@ -520,32 +660,64 @@ hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology)
{
struct hwloc_internal_distances_s *dist;
for(dist = topology->first_dist; dist; dist = dist->next)
- dist->objs_are_valid = 0;
+ dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
}
/******************************************************
* User API for getting distances
*/
+/* what we actually allocate for user queries, even if we only
+ * return the distances part of it.
+ */
+struct hwloc_distances_container_s {
+ unsigned id;
+ struct hwloc_distances_s distances;
+};
+
+#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL)
+#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET )
+
+static struct hwloc_internal_distances_s *
+hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances)
+{
+ struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances);
+ struct hwloc_internal_distances_s *dist;
+ for(dist = topology->first_dist; dist; dist = dist->next)
+ if (dist->id == cont->id)
+ return dist;
+ return NULL;
+}
+
void
hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused,
struct hwloc_distances_s *distances)
{
+ struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances);
free(distances->values);
free(distances->objs);
- free(distances);
+ free(cont);
+}
+
+const char *
+hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances)
+{
+ struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
+ return dist ? dist->name : NULL;
}
static struct hwloc_distances_s *
hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused,
struct hwloc_internal_distances_s *dist)
{
+ struct hwloc_distances_container_s *cont;
struct hwloc_distances_s *distances;
unsigned nbobjs;
- distances = malloc(sizeof(*distances));
- if (!distances)
+ cont = malloc(sizeof(*cont));
+ if (!cont)
return NULL;
+ distances = &cont->distances;
nbobjs = distances->nbobjs = dist->nbobjs;
@@ -560,18 +732,20 @@ hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused,
memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values));
distances->kind = dist->kind;
+
+ cont->id = dist->id;
return distances;
out_with_objs:
free(distances->objs);
out:
- free(distances);
+ free(cont);
return NULL;
}
static int
hwloc__distances_get(hwloc_topology_t topology,
- hwloc_obj_type_t type,
+ const char *name, hwloc_obj_type_t type,
unsigned *nrp, struct hwloc_distances_s **distancesp,
unsigned long kind, unsigned long flags __hwloc_attribute_unused)
{
@@ -602,7 +776,10 @@ hwloc__distances_get(hwloc_topology_t topology,
unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL;
unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL;
- if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type)
+ if (name && (!dist->name || strcmp(name, dist->name)))
+ continue;
+
+ if (type != HWLOC_OBJ_TYPE_NONE && type != dist->unique_type)
continue;
if (kind_from && !(kind_from & dist->kind))
@@ -640,7 +817,7 @@ hwloc_distances_get(hwloc_topology_t topology,
return -1;
}
- return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags);
+ return hwloc__distances_get(topology, NULL, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags);
}
int
@@ -655,14 +832,40 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
return -1;
}
- /* switch back to types since we don't support groups for now */
+ /* FIXME: passing the depth of a group level may return group distances at a different depth */
type = hwloc_get_depth_type(topology, depth);
if (type == (hwloc_obj_type_t)-1) {
errno = EINVAL;
return -1;
}
- return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags);
+ return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags);
+}
+
+int
+hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
+ unsigned *nrp, struct hwloc_distances_s **distancesp,
+ unsigned long flags)
+{
+ if (flags || !topology->is_loaded) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return hwloc__distances_get(topology, name, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, HWLOC_DISTANCES_KIND_ALL, flags);
+}
+
+int
+hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
+ unsigned *nrp, struct hwloc_distances_s **distancesp,
+ unsigned long kind, unsigned long flags)
+{
+ if (flags || !topology->is_loaded) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags);
}
/******************************************************
@@ -823,10 +1026,14 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
float *accuracies,
int needcheck)
{
- HWLOC_VLA(unsigned, groupids, nbobjs);
+ unsigned *groupids;
unsigned nbgroups = 0;
unsigned i,j;
int verbose = topology->grouping_verbose;
+ hwloc_obj_t *groupobjs;
+ unsigned * groupsizes;
+ uint64_t *groupvalues;
+ unsigned failed = 0;
if (nbobjs <= 2)
return;
@@ -836,6 +1043,10 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
/* TODO hwloc__find_groups_by_max_distance() for bandwidth */
return;
+ groupids = malloc(nbobjs * sizeof(*groupids));
+ if (!groupids)
+ return;
+
for(i=0; i
-#include
-#include
+#include "private/autogen/config.h"
+#include "private/private.h"
+#include "private/misc.h"
#include
#ifdef HAVE_SYS_UTSNAME_H
@@ -28,6 +28,7 @@ extern char *program_invocation_name;
extern char *__progname;
#endif
+#ifndef HWLOC_HAVE_CORRECT_SNPRINTF
int hwloc_snprintf(char *str, size_t size, const char *format, ...)
{
int ret;
@@ -77,21 +78,7 @@ int hwloc_snprintf(char *str, size_t size, const char *format, ...)
return ret;
}
-
-int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n)
-{
- size_t i = 0;
- while (*haystack && *haystack != ':') {
- int ha = *haystack++;
- int low_h = tolower(ha);
- int ne = *needle++;
- int low_n = tolower(ne);
- if (low_h != low_n)
- return 1;
- i++;
- }
- return i < n;
-}
+#endif
void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused,
void *cached_uname __hwloc_attribute_unused)
diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c
index 00f08a9e..deca5cce 100644
--- a/src/3rdparty/hwloc/src/pci-common.c
+++ b/src/3rdparty/hwloc/src/pci-common.c
@@ -1,14 +1,14 @@
/*
- * Copyright © 2009-2018 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "hwloc/plugins.h"
+#include "private/private.h"
+#include "private/debug.h"
+#include "private/misc.h"
#include
#ifdef HAVE_UNISTD_H
@@ -23,6 +23,11 @@
#define close _close
#endif
+
+/**************************************
+ * Init/Exit and Forced PCI localities
+ */
+
static void
hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology,
const char *string /* must contain a ' ' */,
@@ -109,11 +114,11 @@ hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_en
void
hwloc_pci_discovery_init(struct hwloc_topology *topology)
{
- topology->need_pci_belowroot_apply_locality = 0;
-
topology->pci_has_forced_locality = 0;
topology->pci_forced_locality_nr = 0;
topology->pci_forced_locality = NULL;
+
+ topology->first_pci_locality = topology->last_pci_locality = NULL;
}
void
@@ -135,7 +140,7 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
if (!err) {
if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */
buffer = malloc(st.st_size+1);
- if (read(fd, buffer, st.st_size) == st.st_size) {
+ if (buffer && read(fd, buffer, st.st_size) == st.st_size) {
buffer[st.st_size] = '\0';
hwloc_pci_forced_locality_parse(topology, buffer);
}
@@ -152,16 +157,31 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
}
void
-hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused)
+hwloc_pci_discovery_exit(struct hwloc_topology *topology)
{
+ struct hwloc_pci_locality_s *cur;
unsigned i;
+
for(i=0; ipci_forced_locality_nr; i++)
hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset);
free(topology->pci_forced_locality);
+ cur = topology->first_pci_locality;
+ while (cur) {
+ struct hwloc_pci_locality_s *next = cur->next;
+ hwloc_bitmap_free(cur->cpuset);
+ free(cur);
+ cur = next;
+ }
+
hwloc_pci_discovery_init(topology);
}
+
+/******************************
+ * Inserting in Tree by Bus ID
+ */
+
#ifdef HWLOC_DEBUG
static void
hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
@@ -324,32 +344,16 @@ hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj);
}
-int
-hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old_tree)
+
+/**********************
+ * Attaching PCI Trees
+ */
+
+static struct hwloc_obj *
+hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
+ struct hwloc_obj *old_tree)
{
- struct hwloc_obj **next_hb_p;
- enum hwloc_type_filter_e bfilter;
-
- if (!old_tree)
- /* found nothing, exit */
- return 0;
-
-#ifdef HWLOC_DEBUG
- hwloc_debug("%s", "\nPCI hierarchy:\n");
- hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb);
- hwloc_debug("%s", "\n");
-#endif
-
- next_hb_p = &hwloc_get_root_obj(topology)->io_first_child;
- while (*next_hb_p)
- next_hb_p = &((*next_hb_p)->next_sibling);
-
- bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
- if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) {
- *next_hb_p = old_tree;
- topology->modified = 1;
- goto done;
- }
+ struct hwloc_obj * new = NULL, **newp = &new;
/*
* tree points to all objects connected to any upstream bus in the machine.
@@ -358,15 +362,29 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old
*/
while (old_tree) {
/* start a new host bridge */
- struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX);
- struct hwloc_obj **dstnextp = &hostbridge->io_first_child;
- struct hwloc_obj **srcnextp = &old_tree;
- struct hwloc_obj *child = *srcnextp;
- unsigned short current_domain = child->attr->pcidev.domain;
- unsigned char current_bus = child->attr->pcidev.bus;
- unsigned char current_subordinate = current_bus;
+ struct hwloc_obj *hostbridge;
+ struct hwloc_obj **dstnextp;
+ struct hwloc_obj **srcnextp;
+ struct hwloc_obj *child;
+ unsigned short current_domain;
+ unsigned char current_bus;
+ unsigned char current_subordinate;
- hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
+ hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX);
+ if (!hostbridge) {
+ /* just queue remaining things without hostbridges and return */
+ *newp = old_tree;
+ return new;
+ }
+ dstnextp = &hostbridge->io_first_child;
+
+ srcnextp = &old_tree;
+ child = *srcnextp;
+ current_domain = child->attr->pcidev.domain;
+ current_bus = child->attr->pcidev.bus;
+ current_subordinate = current_bus;
+
+ hwloc_debug("Adding new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
next_child:
/* remove next child from tree */
@@ -395,19 +413,14 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old
hostbridge->attr->bridge.downstream.pci.domain = current_domain;
hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus;
hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate;
- hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n",
+ hwloc_debug(" new PCI hostbridge covers %04x:[%02x-%02x]\n",
current_domain, current_bus, current_subordinate);
- *next_hb_p = hostbridge;
- next_hb_p = &hostbridge->next_sibling;
- topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality()
- * or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root.
- */
+ *newp = hostbridge;
+ newp = &hostbridge->next_sibling;
}
- done:
- topology->need_pci_belowroot_apply_locality = 1;
- return 0;
+ return new;
}
static struct hwloc_obj *
@@ -458,6 +471,9 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
unsigned i;
int err;
+ hwloc_debug("Looking for parent of PCI busid %04x:%02x:%02x.%01x\n",
+ busid->domain, busid->bus, busid->dev, busid->func);
+
/* try to match a forced locality */
if (topology->pci_has_forced_locality) {
for(i=0; ipci_forced_locality_nr; i++) {
@@ -489,7 +505,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
}
if (*env) {
/* force the cpuset */
- hwloc_debug("Overriding localcpus using %s in the environment\n", envname);
+ hwloc_debug("Overriding PCI locality using %s in the environment\n", envname);
hwloc_bitmap_sscanf(cpuset, env);
forced = 1;
}
@@ -499,7 +515,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
}
if (!forced) {
- /* get the cpuset by asking the OS backend. */
+ /* get the cpuset by asking the backend that provides the relevant hook, if any. */
struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
if (backend)
err = backend->get_pci_busid_cpuset(backend, busid, cpuset);
@@ -510,7 +526,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology));
}
- hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset);
+ hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset);
parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
if (parent) {
@@ -526,11 +542,129 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
return parent;
}
+int
+hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree)
+{
+ enum hwloc_type_filter_e bfilter;
+
+ if (!tree)
+ /* found nothing, exit */
+ return 0;
+
+#ifdef HWLOC_DEBUG
+ hwloc_debug("%s", "\nPCI hierarchy:\n");
+ hwloc_pci_traverse(NULL, tree, hwloc_pci_traverse_print_cb);
+ hwloc_debug("%s", "\n");
+#endif
+
+ bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
+ if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
+ tree = hwloc_pcidisc_add_hostbridges(topology, tree);
+ }
+
+ while (tree) {
+ struct hwloc_obj *obj, *pciobj;
+ struct hwloc_obj *parent;
+ struct hwloc_pci_locality_s *loc;
+ unsigned domain, bus_min, bus_max;
+
+ obj = tree;
+
+ /* hostbridges don't have a PCI busid for looking up locality, use their first child */
+ if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
+ pciobj = obj->io_first_child;
+ else
+ pciobj = obj;
+ /* now we have a pci device or a pci bridge */
+ assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE
+ || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI));
+
+ if (obj->type == HWLOC_OBJ_BRIDGE) {
+ domain = obj->attr->bridge.downstream.pci.domain;
+ bus_min = obj->attr->bridge.downstream.pci.secondary_bus;
+ bus_max = obj->attr->bridge.downstream.pci.subordinate_bus;
+ } else {
+ domain = pciobj->attr->pcidev.domain;
+ bus_min = pciobj->attr->pcidev.bus;
+ bus_max = pciobj->attr->pcidev.bus;
+ }
+
+ /* find where to attach that PCI bus */
+ parent = hwloc__pci_find_busid_parent(topology, &pciobj->attr->pcidev);
+
+ /* reuse the previous locality if possible */
+ if (topology->last_pci_locality
+ && parent == topology->last_pci_locality->parent
+ && domain == topology->last_pci_locality->domain
+ && (bus_min == topology->last_pci_locality->bus_max
+ || bus_min == topology->last_pci_locality->bus_max+1)) {
+ hwloc_debug(" Reusing PCI locality up to bus %04x:%02x\n",
+ domain, bus_max);
+ topology->last_pci_locality->bus_max = bus_max;
+ goto done;
+ }
+
+ loc = malloc(sizeof(*loc));
+ if (!loc) {
+ /* fallback to attaching to root */
+ parent = hwloc_get_root_obj(topology);
+ goto done;
+ }
+
+ loc->domain = domain;
+ loc->bus_min = bus_min;
+ loc->bus_max = bus_max;
+ loc->parent = parent;
+ loc->cpuset = hwloc_bitmap_dup(parent->cpuset);
+ if (!loc->cpuset) {
+ /* fallback to attaching to root */
+ free(loc);
+ parent = hwloc_get_root_obj(topology);
+ goto done;
+ }
+
+ hwloc_debug("Adding PCI locality %s P#%u for bus %04x:[%02x:%02x]\n",
+ hwloc_obj_type_string(parent->type), parent->os_index, loc->domain, loc->bus_min, loc->bus_max);
+ if (topology->last_pci_locality) {
+ loc->prev = topology->last_pci_locality;
+ loc->next = NULL;
+ topology->last_pci_locality->next = loc;
+ topology->last_pci_locality = loc;
+ } else {
+ loc->prev = NULL;
+ loc->next = NULL;
+ topology->first_pci_locality = loc;
+ topology->last_pci_locality = loc;
+ }
+
+ done:
+ /* dequeue this object */
+ tree = obj->next_sibling;
+ obj->next_sibling = NULL;
+ hwloc_insert_object_by_parent(topology, parent, obj);
+ }
+
+ return 0;
+}
+
+
+/*********************************
+ * Finding PCI objects or parents
+ */
+
struct hwloc_obj *
-hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology,
- unsigned domain, unsigned bus, unsigned dev, unsigned func)
+hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology,
+ unsigned domain, unsigned bus, unsigned dev, unsigned func)
{
struct hwloc_pcidev_attr_s busid;
+ hwloc_obj_t parent;
+
+ /* try to find that exact busid */
+ parent = hwloc_pci_find_by_busid(topology, domain, bus, dev, func);
+ if (parent)
+ return parent;
+
+ /* try to find the locality of that bus instead */
busid.domain = domain;
busid.bus = bus;
busid.dev = dev;
@@ -538,66 +672,10 @@ hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology,
return hwloc__pci_find_busid_parent(topology, &busid);
}
-int
-hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology)
-{
- struct hwloc_obj *root = hwloc_get_root_obj(topology);
- struct hwloc_obj **listp, *obj;
-
- if (!topology->need_pci_belowroot_apply_locality)
- return 0;
- topology->need_pci_belowroot_apply_locality = 0;
-
- /* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things.
- * insert the PCI trees according to their PCI-locality.
- */
- listp = &root->io_first_child;
- while ((obj = *listp) != NULL) {
- struct hwloc_pcidev_attr_s *busid;
- struct hwloc_obj *parent;
-
- /* skip non-PCI objects */
- if (obj->type != HWLOC_OBJ_PCI_DEVICE
- && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)
- && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
- listp = &obj->next_sibling;
- continue;
- }
-
- if (obj->type == HWLOC_OBJ_PCI_DEVICE
- || (obj->type == HWLOC_OBJ_BRIDGE
- && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
- busid = &obj->attr->pcidev;
- else {
- /* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */
- hwloc_obj_t child = obj->io_first_child;
- if (child && (child->type == HWLOC_OBJ_PCI_DEVICE
- || (child->type == HWLOC_OBJ_BRIDGE
- && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)))
- busid = &obj->io_first_child->attr->pcidev;
- else
- continue;
- }
-
- /* attach the object (and children) where it belongs */
- parent = hwloc__pci_find_busid_parent(topology, busid);
- if (parent == root) {
- /* keep this object here */
- listp = &obj->next_sibling;
- } else {
- /* dequeue this object */
- *listp = obj->next_sibling;
- obj->next_sibling = NULL;
- hwloc_insert_object_by_parent(topology, parent, obj);
- }
- }
-
- return 0;
-}
-
+/* return the smallest object that contains the desired busid */
static struct hwloc_obj *
-hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
- unsigned domain, unsigned bus, unsigned dev, unsigned func)
+hwloc__pci_find_by_busid(hwloc_obj_t parent,
+ unsigned domain, unsigned bus, unsigned dev, unsigned func)
{
hwloc_obj_t child;
@@ -622,7 +700,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
&& child->attr->bridge.downstream.pci.secondary_bus <= bus
&& child->attr->bridge.downstream.pci.subordinate_bus >= bus)
/* not the right bus id, but it's included in the bus below that bridge */
- return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func);
+ return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
} else if (child->type == HWLOC_OBJ_BRIDGE
&& child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
@@ -632,7 +710,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
&& child->attr->bridge.downstream.pci.secondary_bus <= bus
&& child->attr->bridge.downstream.pci.subordinate_bus >= bus) {
/* contains our bus, recurse */
- return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func);
+ return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
}
}
/* didn't find anything, return parent */
@@ -640,17 +718,54 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
}
struct hwloc_obj *
-hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology,
- unsigned domain, unsigned bus, unsigned dev, unsigned func)
+hwloc_pci_find_by_busid(struct hwloc_topology *topology,
+ unsigned domain, unsigned bus, unsigned dev, unsigned func)
{
+ struct hwloc_pci_locality_s *loc;
hwloc_obj_t root = hwloc_get_root_obj(topology);
- hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func);
- if (parent == root)
+ hwloc_obj_t parent = NULL;
+
+ hwloc_debug("pcidisc looking for bus id %04x:%02x:%02x.%01x\n", domain, bus, dev, func);
+ loc = topology->first_pci_locality;
+ while (loc) {
+ if (loc->domain == domain && loc->bus_min <= bus && loc->bus_max >= bus) {
+ parent = loc->parent;
+ assert(parent);
+ hwloc_debug(" found pci locality for %04x:[%02x:%02x]\n",
+ loc->domain, loc->bus_min, loc->bus_max);
+ break;
+ }
+ loc = loc->next;
+ }
+ /* if we failed to insert localities, look at root too */
+ if (!parent)
+ parent = root;
+
+ hwloc_debug(" looking for bus %04x:%02x:%02x.%01x below %s P#%u\n",
+ domain, bus, dev, func,
+ hwloc_obj_type_string(parent->type), parent->os_index);
+ parent = hwloc__pci_find_by_busid(parent, domain, bus, dev, func);
+ if (parent == root) {
+ hwloc_debug(" found nothing better than root object, ignoring\n");
return NULL;
- else
+ } else {
+ if (parent->type == HWLOC_OBJ_PCI_DEVICE
+ || (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
+ hwloc_debug(" found busid %04x:%02x:%02x.%01x\n",
+ parent->attr->pcidev.domain, parent->attr->pcidev.bus,
+ parent->attr->pcidev.dev, parent->attr->pcidev.func);
+ else
+ hwloc_debug(" found parent %s P#%u\n",
+ hwloc_obj_type_string(parent->type), parent->os_index);
return parent;
+ }
}
+
+/*******************************
+ * Parsing the PCI Config Space
+ */
+
#define HWLOC_PCI_STATUS 0x06
#define HWLOC_PCI_STATUS_CAP_LIST 0x10
#define HWLOC_PCI_CAPABILITY_LIST 0x34
@@ -703,13 +818,14 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config,
* PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
+ * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
*/
/* lanespeed in Gbit/s */
if (speed <= 2)
lanespeed = 2.5f * speed * 0.8f;
else
- lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */
+ lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
/* linkspeed in GB/s */
*linkspeed = lanespeed * width / 8;
@@ -738,30 +854,27 @@ hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *conf
#define HWLOC_PCI_SUBORDINATE_BUS 0x1a
int
-hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj,
+hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func,
+ unsigned *secondary_busp, unsigned *subordinate_busp,
const unsigned char *config)
{
- struct hwloc_bridge_attr_s *battr = &obj->attr->bridge;
- struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci;
+ unsigned secondary_bus, subordinate_bus;
- if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) {
+ if (config[HWLOC_PCI_PRIMARY_BUS] != bus) {
/* Sometimes the config space contains 00 instead of the actual primary bus number.
* Always trust the bus ID because it was built by the system which has more information
* to workaround such problems (e.g. ACPI information about PCI parent/children).
*/
hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n",
- pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]);
+ domain, bus, dev, func, config[HWLOC_PCI_PRIMARY_BUS]);
}
- battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI;
- battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI;
- battr->downstream.pci.domain = pattr->domain;
- battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
- battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
+ secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
+ subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
- if (battr->downstream.pci.secondary_bus <= pattr->bus
- || battr->downstream.pci.subordinate_bus <= pattr->bus
- || battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) {
+ if (secondary_bus <= bus
+ || subordinate_bus <= bus
+ || secondary_bus > subordinate_bus) {
/* This should catch most cases of invalid bridge information
* (e.g. 00 for secondary and subordinate).
* Ideally we would also check that [secondary-subordinate] is included
@@ -769,15 +882,21 @@ hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj,
* because objects may be discovered out of order (especially in the fsroot case).
*/
hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n",
- pattr->domain, pattr->bus, pattr->dev, pattr->func,
- battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus);
- hwloc_free_unlinked_object(obj);
+ domain, bus, dev, func,
+ secondary_bus, subordinate_bus);
return -1;
}
+ *secondary_busp = secondary_bus;
+ *subordinate_busp = subordinate_bus;
return 0;
}
+
+/****************
+ * Class Strings
+ */
+
const char *
hwloc_pci_class_string(unsigned short class_id)
{
diff --git a/src/3rdparty/hwloc/src/shmem.c b/src/3rdparty/hwloc/src/shmem.c
index 6c507f52..94d55eef 100644
--- a/src/3rdparty/hwloc/src/shmem.c
+++ b/src/3rdparty/hwloc/src/shmem.c
@@ -1,12 +1,12 @@
/*
- * Copyright © 2017-2018 Inria. All rights reserved.
+ * Copyright © 2017-2019 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "hwloc/shmem.h"
+#include "private/private.h"
#ifndef HWLOC_WIN_SYS
@@ -214,6 +214,8 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
new->support.discovery = malloc(sizeof(*new->support.discovery));
new->support.cpubind = malloc(sizeof(*new->support.cpubind));
new->support.membind = malloc(sizeof(*new->support.membind));
+ if (!new->support.discovery || !new->support.cpubind || !new->support.membind)
+ goto out_with_support;
memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery));
memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind));
memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind));
@@ -230,6 +232,11 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
*topologyp = new;
return 0;
+ out_with_support:
+ free(new->support.discovery);
+ free(new->support.cpubind);
+ free(new->support.membind);
+ free(new);
out_with_components:
hwloc_components_fini();
out_with_mmap:
diff --git a/src/3rdparty/hwloc/src/topology-noos.c b/src/3rdparty/hwloc/src/topology-noos.c
index 77871eb1..174b6fd8 100644
--- a/src/3rdparty/hwloc/src/topology-noos.c
+++ b/src/3rdparty/hwloc/src/topology-noos.c
@@ -1,26 +1,34 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2017 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
static int
-hwloc_look_noos(struct hwloc_backend *backend)
+hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{
+ /*
+ * This backend uses the underlying OS.
+ * However we don't enforce topology->is_thissystem so that
+ * we may still force use this backend when debugging with !thissystem.
+ */
+
struct hwloc_topology *topology = backend->topology;
int nbprocs;
+ assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
+
if (topology->levels[0][0]->cpuset)
/* somebody discovered things */
return -1;
- nbprocs = hwloc_fallback_nbprocessors(topology);
+ nbprocs = hwloc_fallback_nbprocessors(0);
if (nbprocs >= 1)
topology->support.discovery->pu = 1;
else
@@ -33,13 +41,15 @@ hwloc_look_noos(struct hwloc_backend *backend)
}
static struct hwloc_backend *
-hwloc_noos_component_instantiate(struct hwloc_disc_component *component,
+hwloc_noos_component_instantiate(struct hwloc_topology *topology,
+ struct hwloc_disc_component *component,
+ unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1 __hwloc_attribute_unused,
const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused)
{
struct hwloc_backend *backend;
- backend = hwloc_backend_alloc(component);
+ backend = hwloc_backend_alloc(topology, component);
if (!backend)
return NULL;
backend->discover = hwloc_look_noos;
@@ -47,9 +57,9 @@ hwloc_noos_component_instantiate(struct hwloc_disc_component *component,
}
static struct hwloc_disc_component hwloc_noos_disc_component = {
- HWLOC_DISC_COMPONENT_TYPE_CPU,
"no_os",
- HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+ HWLOC_DISC_PHASE_CPU,
+ HWLOC_DISC_PHASE_GLOBAL,
hwloc_noos_component_instantiate,
40, /* lower than native OS component, higher than globals */
1,
diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c
index 1fe334d1..686efce1 100644
--- a/src/3rdparty/hwloc/src/topology-synthetic.c
+++ b/src/3rdparty/hwloc/src/topology-synthetic.c
@@ -6,11 +6,11 @@
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/misc.h"
+#include "private/debug.h"
#include
#include
@@ -122,6 +122,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
unsigned long nbs = 1;
unsigned j, mul;
const char *tmp;
+ struct hwloc_synthetic_intlv_loop_s *loops;
tmp = attr;
while (tmp) {
@@ -132,9 +133,10 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
tmp++;
}
- {
/* nr_loops colon-separated fields, but we may need one more at the end */
- HWLOC_VLA(struct hwloc_synthetic_intlv_loop_s, loops, nr_loops+1);
+ loops = malloc((nr_loops+1) * sizeof(*loops));
+ if (!loops)
+ goto out_with_array;
if (*attr >= '0' && *attr <= '9') {
/* interleaving as x*y:z*t:... */
@@ -148,11 +150,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (tmp2 == tmp || *tmp2 != '*') {
if (verbose)
fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp);
+ free(loops);
goto out_with_array;
}
if (!step) {
if (verbose)
fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp);
+ free(loops);
goto out_with_array;
}
tmp2++;
@@ -160,11 +164,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) {
if (verbose)
fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp);
+ free(loops);
goto out_with_array;
}
if (!nb) {
if (verbose)
fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2);
+ free(loops);
goto out_with_array;
}
loops[cur_loop].step = step;
@@ -192,11 +198,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (err < 0) {
if (verbose)
fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp);
+ free(loops);
goto out_with_array;
}
if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) {
if (verbose)
fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp);
+ free(loops);
goto out_with_array;
}
for(i=0; ; i++) {
@@ -217,6 +225,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (verbose)
fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s'\n",
tmp);
+ free(loops);
goto out_with_array;
}
tmp = strchr(tmp, ':');
@@ -235,6 +244,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (loops[i].level_depth == mydepth && i != cur_loop) {
if (verbose)
fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr);
+ free(loops);
goto out_with_array;
}
if (loops[i].level_depth < mydepth
@@ -264,6 +274,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
} else {
if (verbose)
fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total);
+ free(loops);
goto out_with_array;
}
}
@@ -278,6 +289,8 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
mul *= nb;
}
+ free(loops);
+
/* check that we have the right values (cannot pass total, cannot give duplicate 0) */
for(j=0; j= total) {
@@ -293,7 +306,6 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
}
indexes->array = array;
- }
}
return;
@@ -527,7 +539,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
if (*pos < '0' || *pos > '9') {
if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) {
- if (!strncmp(pos, "Die", 3) || !strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) {
+ if (!strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) {
+ /* possible future types */
type = HWLOC_OBJ_GROUP;
} else {
/* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */
@@ -645,6 +658,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
errno = EINVAL;
return -1;
}
+ if (type_count[HWLOC_OBJ_DIE] > 1) {
+ if (verbose)
+ fprintf(stderr, "Synthetic string cannot have several die levels\n");
+ errno = EINVAL;
+ return -1;
+ }
if (type_count[HWLOC_OBJ_NUMANODE] > 1) {
if (verbose)
fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n");
@@ -829,6 +848,7 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr,
obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096;
break;
case HWLOC_OBJ_PACKAGE:
+ case HWLOC_OBJ_DIE:
break;
case HWLOC_OBJ_L1CACHE:
case HWLOC_OBJ_L2CACHE:
@@ -953,13 +973,19 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
}
static int
-hwloc_look_synthetic(struct hwloc_backend *backend)
+hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{
+ /*
+ * This backend enforces !topology->is_thissystem by default.
+ */
+
struct hwloc_topology *topology = backend->topology;
struct hwloc_synthetic_backend_data_s *data = backend->private_data;
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
unsigned i;
+ assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL);
+
assert(!topology->levels[0][0]->cpuset);
hwloc_alloc_root_sets(topology->levels[0][0]);
@@ -1001,7 +1027,9 @@ hwloc_synthetic_backend_disable(struct hwloc_backend *backend)
}
static struct hwloc_backend *
-hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component,
+hwloc_synthetic_component_instantiate(struct hwloc_topology *topology,
+ struct hwloc_disc_component *component,
+ unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1,
const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused)
@@ -1021,7 +1049,7 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component,
}
}
- backend = hwloc_backend_alloc(component);
+ backend = hwloc_backend_alloc(topology, component);
if (!backend)
goto out;
@@ -1051,8 +1079,8 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component,
}
static struct hwloc_disc_component hwloc_synthetic_disc_component = {
- HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
"synthetic",
+ HWLOC_DISC_PHASE_GLOBAL,
~0,
hwloc_synthetic_component_instantiate,
30,
@@ -1267,6 +1295,12 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag
/* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */
res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys);
+ } else if (obj->type == HWLOC_OBJ_DIE
+ && (flags & (HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES
+ |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1))) {
+ /* if exporting to v1 or without extended-types, use all-v1-compatible Group name */
+ res = hwloc_snprintf(tmp, tmplen, "Group%s", aritys);
+
} else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */
|| flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) {
res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys);
@@ -1323,16 +1357,26 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
}
while (mchild) {
- /* v2: export all NUMA children */
-
- assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */
+ /* FIXME: really recurse to export memcaches and numanode,
+ * but it requires clever parsing of [ memcache [numa] [numa] ] during import,
+ * better attaching of things to describe the hierarchy.
+ */
+ hwloc_obj_t numanode = mchild;
+ /* only export the first NUMA node leaf of each memory child
+ * FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms
+ */
+ while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) {
+ assert(numanode->arity == 1);
+ numanode = numanode->memory_first_child;
+ }
+ assert(numanode); /* there's always a numanode at the bottom of the memory tree */
if (needprefix)
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' ');
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, '[');
- res = hwloc__export_synthetic_obj(topology, flags, mchild, (unsigned)-1, tmp, tmplen);
+ res = hwloc__export_synthetic_obj(topology, flags, numanode, (unsigned)-1, tmp, tmplen);
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
return -1;
@@ -1366,9 +1410,8 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology)
assert(node);
first_parent = node->parent;
- assert(hwloc__obj_type_is_normal(first_parent->type)); /* only depth-1 memory children for now */
- /* check whether all object on parent's level have same number of NUMA children */
+ /* check whether all object on parent's level have same number of NUMA bits */
for(i=0; idepth); i++) {
hwloc_obj_t parent, mchild;
@@ -1379,10 +1422,9 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology)
if (parent->memory_arity != first_parent->memory_arity)
goto out_with_bitmap;
- /* clear these NUMA children from remaining_nodes */
+ /* clear children NUMA bits from remaining_nodes */
mchild = parent->memory_first_child;
while (mchild) {
- assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */
hwloc_bitmap_clr(remaining_nodes, mchild->os_index); /* cannot use parent->nodeset, some normal children may have other NUMA nodes */
mchild = mchild->next_sibling;
}
diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c
index d03645c0..22521aa3 100644
--- a/src/3rdparty/hwloc/src/topology-windows.c
+++ b/src/3rdparty/hwloc/src/topology-windows.c
@@ -1,6 +1,6 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2018 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
@@ -9,10 +9,10 @@
/* To try to get all declarations duplicated below. */
#define _WIN32_WINNT 0x0601
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/debug.h"
#include
@@ -731,8 +731,14 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse
*/
static int
-hwloc_look_windows(struct hwloc_backend *backend)
+hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{
+ /*
+ * This backend uses the underlying OS.
+ * However we don't enforce topology->is_thissystem so that
+ * we may still force use this backend when debugging with !thissystem.
+ */
+
struct hwloc_topology *topology = backend->topology;
hwloc_bitmap_t groups_pu_set = NULL;
SYSTEM_INFO SystemInfo;
@@ -740,6 +746,8 @@ hwloc_look_windows(struct hwloc_backend *backend)
int gotnuma = 0;
int gotnumamemory = 0;
+ assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
+
if (topology->levels[0][0]->cpuset)
/* somebody discovered things */
return -1;
@@ -1136,13 +1144,15 @@ static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribu
}
static struct hwloc_backend *
-hwloc_windows_component_instantiate(struct hwloc_disc_component *component,
+hwloc_windows_component_instantiate(struct hwloc_topology *topology,
+ struct hwloc_disc_component *component,
+ unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1 __hwloc_attribute_unused,
const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused)
{
struct hwloc_backend *backend;
- backend = hwloc_backend_alloc(component);
+ backend = hwloc_backend_alloc(topology, component);
if (!backend)
return NULL;
backend->discover = hwloc_look_windows;
@@ -1150,9 +1160,9 @@ hwloc_windows_component_instantiate(struct hwloc_disc_component *component,
}
static struct hwloc_disc_component hwloc_windows_disc_component = {
- HWLOC_DISC_COMPONENT_TYPE_CPU,
"windows",
- HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+ HWLOC_DISC_PHASE_CPU,
+ HWLOC_DISC_PHASE_GLOBAL,
hwloc_windows_component_instantiate,
50,
1,
@@ -1168,10 +1178,12 @@ const struct hwloc_component hwloc_windows_component = {
};
int
-hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) {
+hwloc_fallback_nbprocessors(unsigned flags __hwloc_attribute_unused) {
int n;
SYSTEM_INFO sysinfo;
+ /* TODO handle flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE */
+
/* by default, ignore groups (return only the number in the current group) */
GetSystemInfo(&sysinfo);
n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */
diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c
index 4aefdcf1..1060157d 100644
--- a/src/3rdparty/hwloc/src/topology-x86.c
+++ b/src/3rdparty/hwloc/src/topology-x86.c
@@ -14,13 +14,12 @@
* on various architectures, without having to use this x86-specific code.
*/
-#include
-#include
-#include
-#include
-#include
-
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/debug.h"
+#include "private/misc.h"
+#include "private/cpuid-x86.h"
#include
#ifdef HAVE_DIRENT_H
@@ -70,6 +69,8 @@ cpuiddump_read(const char *dirpath, unsigned idx)
{
struct cpuiddump *cpuiddump;
struct cpuiddump_entry *cur;
+ size_t filenamelen;
+ char *filename;
FILE *file;
char line[128];
unsigned nr;
@@ -80,16 +81,16 @@ cpuiddump_read(const char *dirpath, unsigned idx)
goto out;
}
- {
- size_t filenamelen = strlen(dirpath) + 15;
- HWLOC_VLA(char, filename, filenamelen);
+ filenamelen = strlen(dirpath) + 15;
+ filename = malloc(filenamelen);
+ if (!filename)
+ goto out_with_dump;
snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
- goto out_with_dump;
+ goto out_with_filename;
}
- }
nr = 0;
while (fgets(line, sizeof(line), file))
@@ -117,10 +118,13 @@ cpuiddump_read(const char *dirpath, unsigned idx)
cpuiddump->nr = nr;
fclose(file);
+ free(filename);
return cpuiddump;
out_with_file:
fclose(file);
+ out_with_filename:
+ free(filename);
out_with_dump:
free(cpuiddump);
out:
@@ -170,6 +174,11 @@ static void cpuid_or_from_dump(unsigned *eax, unsigned *ebx, unsigned *ecx, unsi
* Core detection routines and structures
*/
+enum hwloc_x86_disc_flags {
+ HWLOC_X86_DISC_FLAG_FULL = (1<<0), /* discover everything instead of only annotating */
+ HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES = (1<<1) /* use AMD topoext numanode information */
+};
+
#define has_topoext(features) ((features)[6] & (1 << 22))
#define has_x2apic(features) ((features)[4] & (1 << 21))
@@ -190,12 +199,15 @@ struct cacheinfo {
struct procinfo {
unsigned present;
unsigned apicid;
- unsigned packageid;
- unsigned dieid;
- unsigned nodeid;
- unsigned unitid;
- unsigned threadid;
- unsigned coreid;
+#define PKG 0
+#define CORE 1
+#define NODE 2
+#define UNIT 3
+#define TILE 4
+#define MODULE 5
+#define DIE 6
+#define HWLOC_X86_PROCINFO_ID_NR 7
+ unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
unsigned *otherids;
unsigned levels;
unsigned numcaches;
@@ -215,7 +227,8 @@ enum cpuid_type {
unknown
};
-static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid)
+/* AMD legacy cache information from specific CPUID 0x80000005-6 leaves */
+static void setup__amd_cache_legacy(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid)
{
struct cacheinfo *cache, *tmpcaches;
unsigned cachenum;
@@ -262,7 +275,249 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cac
hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
}
-static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
+/* AMD legacy cache information from CPUID 0x80000005-6 leaves */
+static void read_amd_caches_legacy(struct procinfo *infos, struct cpuiddump *src_cpuiddump, unsigned legacy_max_log_proc)
+{
+ unsigned eax, ebx, ecx, edx;
+
+ eax = 0x80000005;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+ setup__amd_cache_legacy(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */
+ setup__amd_cache_legacy(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */
+
+ eax = 0x80000006;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+ if (ecx & 0xf000)
+ /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
+ * Could be useful if some Intels (at least before Core micro-architecture)
+ * support this leaf without leaf 0x4.
+ */
+ setup__amd_cache_legacy(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */
+ if (edx & 0xf000)
+ setup__amd_cache_legacy(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */
+}
+
+/* AMD caches from CPUID 0x8000001d leaf (topoext) */
+static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *src_cpuiddump)
+{
+ unsigned eax, ebx, ecx, edx;
+ unsigned cachenum;
+ struct cacheinfo *cache;
+
+ /* the code below doesn't want any other cache yet */
+ assert(!infos->numcaches);
+
+ for (cachenum = 0; ; cachenum++) {
+ eax = 0x8000001d;
+ ecx = cachenum;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+ if ((eax & 0x1f) == 0)
+ break;
+ infos->numcaches++;
+ }
+
+ cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
+ if (cache) {
+ for (cachenum = 0; ; cachenum++) {
+ unsigned long linesize, linepart, ways, sets;
+ eax = 0x8000001d;
+ ecx = cachenum;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+ if ((eax & 0x1f) == 0)
+ break;
+ switch (eax & 0x1f) {
+ case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
+ case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
+ default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
+ }
+
+ cache->level = (eax >> 5) & 0x7;
+ /* Note: actually number of cores */
+ cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
+
+ cache->linesize = linesize = (ebx & 0xfff) + 1;
+ cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
+ ways = ((ebx >> 22) & 0x3ff) + 1;
+
+ if (eax & (1 << 9))
+ /* Fully associative */
+ cache->ways = -1;
+ else
+ cache->ways = ways;
+ cache->sets = sets = ecx + 1;
+ cache->size = linesize * linepart * ways * sets;
+ cache->inclusive = edx & 0x2;
+
+ hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
+ cachenum, cache->level,
+ cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
+ cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
+
+ cache++;
+ }
+ } else {
+ infos->numcaches = 0;
+ }
+}
+
+/* Intel cache info from CPUID 0x04 leaf */
+static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, struct cpuiddump *src_cpuiddump)
+{
+ unsigned level;
+ struct cacheinfo *tmpcaches;
+ unsigned eax, ebx, ecx, edx;
+ unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */
+ unsigned cachenum;
+ struct cacheinfo *cache;
+
+ for (cachenum = 0; ; cachenum++) {
+ eax = 0x04;
+ ecx = cachenum;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+ hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f);
+ if ((eax & 0x1f) == 0)
+ break;
+ level = (eax >> 5) & 0x7;
+ if (data->is_knl && level == 3)
+ /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
+ break;
+ infos->numcaches++;
+ }
+
+ tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache));
+ if (!tmpcaches) {
+ infos->numcaches = oldnumcaches;
+ } else {
+ infos->cache = tmpcaches;
+ cache = &infos->cache[oldnumcaches];
+
+ for (cachenum = 0; ; cachenum++) {
+ unsigned long linesize, linepart, ways, sets;
+ eax = 0x04;
+ ecx = cachenum;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+ if ((eax & 0x1f) == 0)
+ break;
+ level = (eax >> 5) & 0x7;
+ if (data->is_knl && level == 3)
+ /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
+ break;
+ switch (eax & 0x1f) {
+ case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
+ case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
+ default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
+ }
+
+ cache->level = level;
+ cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
+
+ cache->linesize = linesize = (ebx & 0xfff) + 1;
+ cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
+ ways = ((ebx >> 22) & 0x3ff) + 1;
+ if (eax & (1 << 9))
+ /* Fully associative */
+ cache->ways = -1;
+ else
+ cache->ways = ways;
+ cache->sets = sets = ecx + 1;
+ cache->size = linesize * linepart * ways * sets;
+ cache->inclusive = edx & 0x2;
+
+ hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
+ cachenum, cache->level,
+ cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
+ cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
+ cache++;
+ }
+ }
+}
+
+/* AMD core/thread info from CPUID 0x80000008 leaf */
+static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_cpuiddump)
+{
+ unsigned eax, ebx, ecx, edx;
+ unsigned max_nbcores;
+ unsigned max_nbthreads;
+ unsigned coreidsize;
+ unsigned logprocid;
+ unsigned threadid __hwloc_attribute_unused;
+
+ eax = 0x80000008;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+ coreidsize = (ecx >> 12) & 0xf;
+ hwloc_debug("core ID size: %u\n", coreidsize);
+ if (!coreidsize) {
+ max_nbcores = (ecx & 0xff) + 1;
+ } else
+ max_nbcores = 1 << coreidsize;
+ hwloc_debug("Thus max # of cores: %u\n", max_nbcores);
+
+ /* No multithreaded AMD for this old CPUID leaf */
+ max_nbthreads = 1 ;
+ hwloc_debug("and max # of threads: %u\n", max_nbthreads);
+
+ /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores,
+ * which is the maximum number of cores that the processor could theoretically support
+ * (see "Multiple Core Calculation" in the AMD CPUID specification).
+ * Recompute packageid/coreid accordingly.
+ */
+ infos->ids[PKG] = infos->apicid / max_nbcores;
+ logprocid = infos->apicid % max_nbcores;
+ infos->ids[CORE] = logprocid / max_nbthreads;
+ threadid = logprocid % max_nbthreads;
+ hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]);
+}
+
+/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
+static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
+{
+ unsigned apic_id, nodes_per_proc = 0;
+ unsigned eax, ebx, ecx, edx;
+
+ eax = 0x8000001e;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+ infos->apicid = apic_id = eax;
+
+ if (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES) {
+ if (infos->cpufamilynumber == 0x16) {
+ /* ecx is reserved */
+ infos->ids[NODE] = 0;
+ nodes_per_proc = 1;
+ } else {
+ /* AMD other families or Hygon family 18h */
+ infos->ids[NODE] = ecx & 0xff;
+ nodes_per_proc = ((ecx >> 8) & 7) + 1;
+ }
+ if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
+ || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) {
+ hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
+ }
+ }
+
+ if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
+ unsigned cores_per_unit;
+ /* coreid was obtained from read_amd_cores_legacy() earlier */
+ infos->ids[UNIT] = ebx & 0xff;
+ cores_per_unit = ((ebx >> 8) & 0xff) + 1;
+ hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]);
+ /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
+ * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively).
+ * It's not clear if we should do this as well.
+ */
+ } else {
+ unsigned threads_per_core;
+ infos->ids[CORE] = ebx & 0xff;
+ threads_per_core = ((ebx >> 8) & 0xff) + 1;
+ hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, infos->ids[NODE], threads_per_core, infos->ids[CORE]);
+ }
+}
+
+/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
+static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
{
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
@@ -302,11 +557,19 @@ static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuid
/* apic_number is the actual number of threads per core */
break;
case 2:
- infos->coreid = id;
- /* apic_number is the actual number of threads per module */
+ infos->ids[CORE] = id;
+ /* apic_number is the actual number of threads per die */
+ break;
+ case 3:
+ infos->ids[MODULE] = id;
+ /* apic_number is the actual number of threads per tile */
+ break;
+ case 4:
+ infos->ids[TILE] = id;
+ /* apic_number is the actual number of threads per die */
break;
case 5:
- infos->dieid = id;
+ infos->ids[DIE] = id;
/* apic_number is the actual number of threads per package */
break;
default:
@@ -317,16 +580,16 @@ static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuid
apic_shift = apic_nextshift;
}
infos->apicid = apic_id;
- infos->packageid = apic_id >> apic_shift;
- hwloc_debug("x2APIC remainder: %u\n", infos->packageid);
- hwloc_debug("this is thread %u of core %u\n", threadid, infos->coreid);
+ infos->ids[PKG] = apic_id >> apic_shift;
+ hwloc_debug("x2APIC remainder: %u\n", infos->ids[PKG]);
+ hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]);
}
}
}
/* Fetch information from the processor itself thanks to cpuid and store it in
* infos for summarize to analyze them globally */
-static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump)
+static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump)
{
struct hwloc_x86_backend_data_s *data = backend->private_data;
unsigned eax, ebx, ecx = 0, edx;
@@ -348,9 +611,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
else
legacy_max_log_proc = 1;
hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc);
- infos->packageid = infos->apicid / legacy_max_log_proc;
+ infos->ids[PKG] = infos->apicid / legacy_max_log_proc;
legacy_log_proc_id = infos->apicid % legacy_max_log_proc;
- hwloc_debug("phys %u legacy thread %u\n", infos->packageid, legacy_log_proc_id);
+ hwloc_debug("phys %u legacy thread %u\n", infos->ids[PKG], legacy_log_proc_id);
/* Get cpu model/family/stepping numbers from same cpuid */
_model = (eax>>4) & 0xf;
@@ -397,258 +660,88 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
/* infos was calloc'ed, already ends with \0 */
}
- /* Get core/thread information from cpuid 0x80000008
- * (not supported on Intel)
- */
- if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) {
- unsigned max_nbcores;
- unsigned max_nbthreads;
- unsigned coreidsize;
- unsigned logprocid;
- eax = 0x80000008;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
- coreidsize = (ecx >> 12) & 0xf;
- hwloc_debug("core ID size: %u\n", coreidsize);
- if (!coreidsize) {
- max_nbcores = (ecx & 0xff) + 1;
- } else
- max_nbcores = 1 << coreidsize;
- hwloc_debug("Thus max # of cores: %u\n", max_nbcores);
- /* Still no multithreaded AMD */
- max_nbthreads = 1 ;
- hwloc_debug("and max # of threads: %u\n", max_nbthreads);
- /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores,
- * which is the maximum number of cores that the processor could theoretically support
- * (see "Multiple Core Calculation" in the AMD CPUID specification).
- * Recompute packageid/threadid/coreid accordingly.
- */
- infos->packageid = infos->apicid / max_nbcores;
- logprocid = infos->apicid % max_nbcores;
- infos->threadid = logprocid % max_nbthreads;
- infos->coreid = logprocid / max_nbthreads;
- hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
- }
-
- infos->numcaches = 0;
- infos->cache = NULL;
-
- /* Get apicid, nodeid, unitid from cpuid 0x8000001e
- * and cache information from cpuid 0x8000001d
- * (AMD topology extension)
- */
- if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) {
- unsigned apic_id, node_id, nodes_per_proc;
-
- /* the code below doesn't want any other cache yet */
- assert(!infos->numcaches);
-
- eax = 0x8000001e;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
- infos->apicid = apic_id = eax;
-
- if (infos->cpufamilynumber == 0x16) {
- /* ecx is reserved */
- node_id = 0;
- nodes_per_proc = 1;
- } else {
- /* AMD other families or Hygon family 18h */
- node_id = ecx & 0xff;
- nodes_per_proc = ((ecx >> 8) & 7) + 1;
- }
- infos->nodeid = node_id;
- if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
- || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) {
- hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
- }
-
- if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
- unsigned unit_id, cores_per_unit;
- infos->unitid = unit_id = ebx & 0xff;
- cores_per_unit = ((ebx >> 8) & 0xff) + 1;
- hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
- /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
- * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively).
- * It's not clear if we should do this as well.
- */
- } else {
- unsigned core_id, threads_per_core;
- infos->coreid = core_id = ebx & 0xff;
- threads_per_core = ((ebx >> 8) & 0xff) + 1;
- hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
- }
-
- for (cachenum = 0; ; cachenum++) {
- eax = 0x8000001d;
- ecx = cachenum;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
- if ((eax & 0x1f) == 0)
- break;
- infos->numcaches++;
- }
-
- cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
- if (cache) {
- for (cachenum = 0; ; cachenum++) {
- unsigned long linesize, linepart, ways, sets;
- eax = 0x8000001d;
- ecx = cachenum;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
-
- if ((eax & 0x1f) == 0)
- break;
- switch (eax & 0x1f) {
- case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
- case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
- default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
- }
-
- cache->level = (eax >> 5) & 0x7;
- /* Note: actually number of cores */
- cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
-
- cache->linesize = linesize = (ebx & 0xfff) + 1;
- cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
- ways = ((ebx >> 22) & 0x3ff) + 1;
-
- if (eax & (1 << 9))
- /* Fully associative */
- cache->ways = -1;
- else
- cache->ways = ways;
- cache->sets = sets = ecx + 1;
- cache->size = linesize * linepart * ways * sets;
- cache->inclusive = edx & 0x2;
-
- hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
- cachenum, cache->level,
- cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
- cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
-
- cache++;
- }
- } else {
- infos->numcaches = 0;
- }
- } else {
- /* If there's no topoext,
- * get cache information from cpuid 0x80000005 and 0x80000006
- * (not supported on Intel)
- */
- if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) {
- eax = 0x80000005;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
- fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */
- fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */
- }
- if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) {
- eax = 0x80000006;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
- if (ecx & 0xf000)
- /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
- * Could be useful if some Intels (at least before Core micro-architecture)
- * support this leaf without leaf 0x4.
- */
- fill_amd_cache(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */
- if (edx & 0xf000)
- fill_amd_cache(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */
- }
- }
-
- /* Get thread/core + cache information from cpuid 0x04
- * (not supported on AMD)
- */
if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) {
- unsigned max_nbcores;
- unsigned max_nbthreads;
- unsigned level;
- struct cacheinfo *tmpcaches;
- unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */
-
- for (cachenum = 0; ; cachenum++) {
- eax = 0x04;
- ecx = cachenum;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
-
- hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f);
- if ((eax & 0x1f) == 0)
- break;
- level = (eax >> 5) & 0x7;
- if (data->is_knl && level == 3)
- /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
- break;
- infos->numcaches++;
-
- if (!cachenum) {
- /* by the way, get thread/core information from the first cache */
- max_nbcores = ((eax >> 26) & 0x3f) + 1;
- max_nbthreads = legacy_max_log_proc / max_nbcores;
- hwloc_debug("thus %u threads\n", max_nbthreads);
- infos->threadid = legacy_log_proc_id % max_nbthreads;
- infos->coreid = legacy_log_proc_id / max_nbthreads;
- hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
- }
+ /* Get core/thread information from first cache reported by cpuid 0x04
+ * (not supported on AMD)
+ */
+ eax = 0x04;
+ ecx = 0;
+ cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+ if ((eax & 0x1f) != 0) {
+ /* cache looks valid */
+ unsigned max_nbcores;
+ unsigned max_nbthreads;
+ unsigned threadid __hwloc_attribute_unused;
+ max_nbcores = ((eax >> 26) & 0x3f) + 1;
+ max_nbthreads = legacy_max_log_proc / max_nbcores;
+ hwloc_debug("thus %u threads\n", max_nbthreads);
+ threadid = legacy_log_proc_id % max_nbthreads;
+ infos->ids[CORE] = legacy_log_proc_id / max_nbthreads;
+ hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]);
}
+ }
- tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache));
- if (!tmpcaches) {
- infos->numcaches = oldnumcaches;
- } else {
- infos->cache = tmpcaches;
- cache = &infos->cache[oldnumcaches];
+ /*********************************************************************************
+ * Get the hierarchy of thread, core, die, package, etc. from CPU-specific leaves
+ */
- for (cachenum = 0; ; cachenum++) {
- unsigned long linesize, linepart, ways, sets;
- eax = 0x04;
- ecx = cachenum;
- cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+ if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008 && !has_x2apic(features)) {
+ /* Get core/thread information from cpuid 0x80000008
+ * (not supported on Intel)
+ * We could ignore this codepath when x2apic is supported, but we may need
+ * nodeids if HWLOC_X86_TOPOEXT_NUMANODES is set.
+ */
+ read_amd_cores_legacy(infos, src_cpuiddump);
+ }
- if ((eax & 0x1f) == 0)
- break;
- level = (eax >> 5) & 0x7;
- if (data->is_knl && level == 3)
- /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
- break;
- switch (eax & 0x1f) {
- case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
- case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
- default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
- }
-
- cache->level = level;
- cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
-
- cache->linesize = linesize = (ebx & 0xfff) + 1;
- cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
- ways = ((ebx >> 22) & 0x3ff) + 1;
- if (eax & (1 << 9))
- /* Fully associative */
- cache->ways = -1;
- else
- cache->ways = ways;
- cache->sets = sets = ecx + 1;
- cache->size = linesize * linepart * ways * sets;
- cache->inclusive = edx & 0x2;
-
- hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
- cachenum, cache->level,
- cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
- cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
- cache++;
- }
- }
+ if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) {
+ /* Get apicid, nodeid, unitid/coreid from cpuid 0x8000001e (AMD topology extension).
+ * Requires read_amd_cores_legacy() for coreid on family 0x15-16.
+ *
+ * Only needed when x2apic supported if NUMA nodes are needed.
+ */
+ read_amd_cores_topoext(infos, flags, src_cpuiddump);
}
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
* (Intel v2 Extended Topology Enumeration)
*/
- look_exttopoenum(infos, 0x1f, src_cpuiddump);
+ read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump);
- } else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) {
+ } else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
+ && highest_cpuid >= 0x0b && has_x2apic(features)) {
/* Get package/core/thread information from cpuid 0x0b
* (Intel v1 Extended Topology Enumeration)
*/
- look_exttopoenum(infos, 0x0b, src_cpuiddump);
+ read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump);
+ }
+
+ /**************************************
+ * Get caches from CPU-specific leaves
+ */
+
+ infos->numcaches = 0;
+ infos->cache = NULL;
+
+ if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) {
+ /* Get cache information from cpuid 0x8000001d (AMD topology extension) */
+ read_amd_caches_topoext(infos, src_cpuiddump);
+
+ } else if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) {
+ /* If there's no topoext,
+ * get cache information from cpuid 0x80000005 and 0x80000006.
+ * (not supported on Intel)
+ * It looks like we cannot have 0x80000005 without 0x80000006.
+ */
+ read_amd_caches_legacy(infos, src_cpuiddump, legacy_max_log_proc);
+ }
+
+ if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) {
+ /* Get cache information from cpuid 0x04
+ * (not supported on AMD)
+ */
+ read_intel_caches(data, infos, src_cpuiddump);
}
/* Now that we have all info, compute cacheids and apply quirks */
@@ -736,8 +829,55 @@ hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int replace)
hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUStepping", number, replace);
}
+static void
+hwloc_x86_add_groups(hwloc_topology_t topology,
+ struct procinfo *infos,
+ unsigned nbprocs,
+ hwloc_bitmap_t remaining_cpuset,
+ unsigned type,
+ const char *subtype,
+ unsigned kind,
+ int dont_merge)
+{
+ hwloc_bitmap_t obj_cpuset;
+ hwloc_obj_t obj;
+ unsigned i, j;
+
+ while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+ unsigned packageid = infos[i].ids[PKG];
+ unsigned id = infos[i].ids[type];
+
+ if (id == (unsigned)-1) {
+ hwloc_bitmap_clr(remaining_cpuset, i);
+ continue;
+ }
+
+ obj_cpuset = hwloc_bitmap_alloc();
+ for (j = i; j < nbprocs; j++) {
+ if (infos[j].ids[type] == (unsigned) -1) {
+ hwloc_bitmap_clr(remaining_cpuset, j);
+ continue;
+ }
+
+ if (infos[j].ids[PKG] == packageid && infos[j].ids[type] == id) {
+ hwloc_bitmap_set(obj_cpuset, j);
+ hwloc_bitmap_clr(remaining_cpuset, j);
+ }
+ }
+
+ obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
+ obj->cpuset = obj_cpuset;
+ obj->subtype = strdup(subtype);
+ obj->attr->group.kind = kind;
+ obj->attr->group.dont_merge = dont_merge;
+ hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n",
+ subtype, id, obj_cpuset);
+ hwloc_insert_object_by_cpuset(topology, obj);
+ }
+}
+
/* Analyse information stored in infos, and build/annotate topology levels accordingly */
-static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
+static void summarize(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags)
{
struct hwloc_topology *topology = backend->topology;
struct hwloc_x86_backend_data_s *data = backend->private_data;
@@ -747,6 +887,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
int one = -1;
hwloc_bitmap_t remaining_cpuset;
int gotnuma = 0;
+ int fulldiscovery = (flags & HWLOC_X86_DISC_FLAG_FULL);
for (i = 0; i < nbprocs; i++)
if (infos[i].present) {
@@ -773,11 +914,11 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
if (fulldiscovery) {
- unsigned packageid = infos[i].packageid;
+ unsigned packageid = infos[i].ids[PKG];
hwloc_bitmap_t package_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
- if (infos[j].packageid == packageid) {
+ if (infos[j].ids[PKG] == packageid) {
hwloc_bitmap_set(package_cpuset, j);
hwloc_bitmap_clr(remaining_cpuset, j);
}
@@ -811,7 +952,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
}
/* Look for Numa nodes inside packages (cannot be filtered-out) */
- if (fulldiscovery && getenv("HWLOC_X86_TOPOEXT_NUMANODES")) {
+ if (fulldiscovery && (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES)) {
hwloc_bitmap_t node_cpuset;
hwloc_obj_t node;
@@ -819,8 +960,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
- unsigned packageid = infos[i].packageid;
- unsigned nodeid = infos[i].nodeid;
+ unsigned packageid = infos[i].ids[PKG];
+ unsigned nodeid = infos[i].ids[NODE];
if (nodeid == (unsigned)-1) {
hwloc_bitmap_clr(remaining_cpuset, i);
@@ -829,12 +970,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
node_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
- if (infos[j].nodeid == (unsigned) -1) {
+ if (infos[j].ids[NODE] == (unsigned) -1) {
hwloc_bitmap_clr(remaining_cpuset, j);
continue;
}
- if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
+ if (infos[j].ids[PKG] == packageid && infos[j].ids[NODE] == nodeid) {
hwloc_bitmap_set(node_cpuset, j);
hwloc_bitmap_clr(remaining_cpuset, j);
}
@@ -852,77 +993,21 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
if (fulldiscovery) {
- char *env;
- int dont_merge;
- hwloc_bitmap_t unit_cpuset, die_cpuset;
- hwloc_obj_t unit, die;
-
- /* Look for Compute units inside packages */
+ /* Look for AMD Compute units inside packages */
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
- while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
- unsigned packageid = infos[i].packageid;
- unsigned unitid = infos[i].unitid;
-
- if (unitid == (unsigned)-1) {
- hwloc_bitmap_clr(remaining_cpuset, i);
- continue;
- }
-
- unit_cpuset = hwloc_bitmap_alloc();
- for (j = i; j < nbprocs; j++) {
- if (infos[j].unitid == (unsigned) -1) {
- hwloc_bitmap_clr(remaining_cpuset, j);
- continue;
- }
-
- if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
- hwloc_bitmap_set(unit_cpuset, j);
- hwloc_bitmap_clr(remaining_cpuset, j);
- }
- }
- unit = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unitid);
- unit->cpuset = unit_cpuset;
- unit->subtype = strdup("ComputeUnit");
- unit->attr->group.kind = HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT;
- hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
- unitid, unit_cpuset);
- hwloc_insert_object_by_cpuset(topology, unit);
- }
-
- /* Look for Dies inside packages */
- env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS");
- dont_merge = env && atoi(env);
+ hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
+ UNIT, "Compute Unit",
+ HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
+ /* Look for Intel Modules inside packages */
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
- while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
- unsigned packageid = infos[i].packageid;
- unsigned dieid = infos[i].dieid;
-
- if (dieid == (unsigned)-1) {
- hwloc_bitmap_clr(remaining_cpuset, i);
- continue;
- }
-
- die_cpuset = hwloc_bitmap_alloc();
- for (j = i; j < nbprocs; j++) {
- if (infos[j].dieid == (unsigned) -1) {
- hwloc_bitmap_clr(remaining_cpuset, j);
- continue;
- }
-
- if (infos[j].packageid == packageid && infos[j].dieid == dieid) {
- hwloc_bitmap_set(die_cpuset, j);
- hwloc_bitmap_clr(remaining_cpuset, j);
- }
- }
- die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, dieid);
- die->cpuset = die_cpuset;
- die->subtype = strdup("Die");
- die->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE;
- die->attr->group.dont_merge = dont_merge;
- hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n",
- dieid, die_cpuset);
- hwloc_insert_object_by_cpuset(topology, die);
- }
+ hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
+ MODULE, "Module",
+ HWLOC_GROUP_KIND_INTEL_MODULE, 0);
+ /* Look for Intel Tiles inside packages */
+ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+ hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
+ TILE, "Tile",
+ HWLOC_GROUP_KIND_INTEL_TILE, 0);
/* Look for unknown objects */
if (infos[one].otherids) {
@@ -956,6 +1041,43 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
}
}
+ if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
+ /* Look for Intel Dies inside packages */
+ if (fulldiscovery) {
+ hwloc_bitmap_t die_cpuset;
+ hwloc_obj_t die;
+
+ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+ while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+ unsigned packageid = infos[i].ids[PKG];
+ unsigned dieid = infos[i].ids[DIE];
+
+ if (dieid == (unsigned) -1) {
+ hwloc_bitmap_clr(remaining_cpuset, i);
+ continue;
+ }
+
+ die_cpuset = hwloc_bitmap_alloc();
+ for (j = i; j < nbprocs; j++) {
+ if (infos[j].ids[DIE] == (unsigned) -1) {
+ hwloc_bitmap_clr(remaining_cpuset, j);
+ continue;
+ }
+
+ if (infos[j].ids[PKG] == packageid && infos[j].ids[DIE] == dieid) {
+ hwloc_bitmap_set(die_cpuset, j);
+ hwloc_bitmap_clr(remaining_cpuset, j);
+ }
+ }
+ die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_DIE, dieid);
+ die->cpuset = die_cpuset;
+ hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n",
+ dieid, die_cpuset);
+ hwloc_insert_object_by_cpuset(topology, die);
+ }
+ }
+ }
+
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
/* Look for cores */
if (fulldiscovery) {
@@ -964,9 +1086,9 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
- unsigned packageid = infos[i].packageid;
- unsigned nodeid = infos[i].nodeid;
- unsigned coreid = infos[i].coreid;
+ unsigned packageid = infos[i].ids[PKG];
+ unsigned nodeid = infos[i].ids[NODE];
+ unsigned coreid = infos[i].ids[CORE];
if (coreid == (unsigned) -1) {
hwloc_bitmap_clr(remaining_cpuset, i);
@@ -975,12 +1097,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
core_cpuset = hwloc_bitmap_alloc();
for (j = i; j < nbprocs; j++) {
- if (infos[j].coreid == (unsigned) -1) {
+ if (infos[j].ids[CORE] == (unsigned) -1) {
hwloc_bitmap_clr(remaining_cpuset, j);
continue;
}
- if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
+ if (infos[j].ids[PKG] == packageid && infos[j].ids[NODE] == nodeid && infos[j].ids[CORE] == coreid) {
hwloc_bitmap_set(core_cpuset, j);
hwloc_bitmap_clr(remaining_cpuset, j);
}
@@ -1056,7 +1178,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
} else {
/* Add the missing cache */
hwloc_bitmap_t cache_cpuset;
- unsigned packageid = infos[i].packageid;
+ unsigned packageid = infos[i].ids[PKG];
unsigned cacheid = infos[i].cache[l].cacheid;
/* Now look for others sharing it */
cache_cpuset = hwloc_bitmap_alloc();
@@ -1071,7 +1193,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
hwloc_bitmap_clr(remaining_cpuset, j);
continue;
}
- if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
+ if (infos[j].ids[PKG] == packageid && infos[j].cache[l2].cacheid == cacheid) {
hwloc_bitmap_set(cache_cpuset, j);
hwloc_bitmap_clr(remaining_cpuset, j);
}
@@ -1103,7 +1225,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int
}
static int
-look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
+look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags,
unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
@@ -1139,7 +1261,7 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscov
}
}
- look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
+ look_proc(backend, &infos[i], flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
if (data->src_cpuiddump_path) {
cpuiddump_free(src_cpuiddump);
@@ -1152,10 +1274,10 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscov
hwloc_bitmap_free(orig_cpuset);
}
- if (!data->apicid_unique)
- fulldiscovery = 0;
- else
- summarize(backend, infos, fulldiscovery);
+ if (data->apicid_unique)
+ summarize(backend, infos, flags);
+ /* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */
+
return 0;
}
@@ -1223,7 +1345,7 @@ static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
}
static
-int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
+int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
{
struct hwloc_x86_backend_data_s *data = backend->private_data;
unsigned nbprocs = data->nbprocs;
@@ -1245,13 +1367,18 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
int ret = -1;
if (data->src_cpuiddump_path) {
- /* just read cpuid from the dump */
+ /* Just read cpuid from the dump (implies !topology->is_thissystem by default) */
src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0);
if (!src_cpuiddump)
goto out;
} else {
- /* otherwise check if binding works */
+ /* Using real hardware.
+ * However we don't enforce topology->is_thissystem so that
+ * we may still force use this backend when debugging with !thissystem.
+ */
+
+ /* check if binding works */
memset(&hooks, 0, sizeof(hooks));
support.membind = &memsupport;
hwloc_set_native_binding_hooks(&hooks, &support);
@@ -1281,12 +1408,13 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
if (NULL == infos)
goto out;
for (i = 0; i < nbprocs; i++) {
- infos[i].nodeid = (unsigned) -1;
- infos[i].packageid = (unsigned) -1;
- infos[i].dieid = (unsigned) -1;
- infos[i].unitid = (unsigned) -1;
- infos[i].coreid = (unsigned) -1;
- infos[i].threadid = (unsigned) -1;
+ infos[i].ids[PKG] = (unsigned) -1;
+ infos[i].ids[CORE] = (unsigned) -1;
+ infos[i].ids[NODE] = (unsigned) -1;
+ infos[i].ids[UNIT] = (unsigned) -1;
+ infos[i].ids[TILE] = (unsigned) -1;
+ infos[i].ids[MODULE] = (unsigned) -1;
+ infos[i].ids[DIE] = (unsigned) -1;
}
eax = 0x00;
@@ -1334,7 +1462,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
hwloc_x86_os_state_save(&os_state, src_cpuiddump);
- ret = look_procs(backend, infos, fulldiscovery,
+ ret = look_procs(backend, infos, flags,
highest_cpuid, highest_ext_cpuid, features, cpuid_type,
get_cpubind, set_cpubind);
if (!ret)
@@ -1343,8 +1471,8 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
if (nbprocs == 1) {
/* only one processor, no need to bind */
- look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
- summarize(backend, infos, fulldiscovery);
+ look_proc(backend, &infos[0], flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
+ summarize(backend, infos, flags);
ret = 0;
}
@@ -1367,13 +1495,20 @@ out:
}
static int
-hwloc_x86_discover(struct hwloc_backend *backend)
+hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{
struct hwloc_x86_backend_data_s *data = backend->private_data;
struct hwloc_topology *topology = backend->topology;
+ unsigned long flags = 0;
int alreadypus = 0;
int ret;
+ assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
+
+ if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) {
+ flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES;
+ }
+
#if HAVE_DECL_RUNNING_ON_VALGRIND
if (RUNNING_ON_VALGRIND && !data->src_cpuiddump_path) {
fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n"
@@ -1387,7 +1522,7 @@ hwloc_x86_discover(struct hwloc_backend *backend)
assert(data->nbprocs > 0); /* enforced by hwloc_x86_component_instantiate() */
topology->support.discovery->pu = 1;
} else {
- int nbprocs = hwloc_fallback_nbprocessors(topology);
+ int nbprocs = hwloc_fallback_nbprocessors(HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE);
if (nbprocs >= 1)
topology->support.discovery->pu = 1;
else
@@ -1405,7 +1540,7 @@ hwloc_x86_discover(struct hwloc_backend *backend)
/* several object types were added, we can't easily complete, just do partial discovery */
hwloc_topology_reconnect(topology, 0);
- ret = hwloc_look_x86(backend, 0);
+ ret = hwloc_look_x86(backend, flags);
if (ret)
hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
return 0;
@@ -1415,7 +1550,7 @@ hwloc_x86_discover(struct hwloc_backend *backend)
}
fulldiscovery:
- if (hwloc_look_x86(backend, 1) < 0) {
+ if (hwloc_look_x86(backend, flags | HWLOC_X86_DISC_FLAG_FULL) < 0) {
/* if failed, create PUs */
if (!alreadypus)
hwloc_setup_pu_level(topology, data->nbprocs);
@@ -1446,6 +1581,7 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
#if !(defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined __CYGWIN__) /* needs a lot of work */
struct dirent *dirent;
DIR *dir;
+ char *path;
FILE *file;
char line [32];
@@ -1453,23 +1589,26 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
if (!dir)
return -1;
- char path[strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1];
+ path = malloc(strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1);
+ if (!path)
+ goto out_with_dir;
sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
file = fopen(path, "r");
if (!file) {
fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path);
- goto out_with_dir;
+ goto out_with_path;
}
if (!fgets(line, sizeof(line), file)) {
fprintf(stderr, "Found read dumped cpuid summary in %s\n", path);
fclose(file);
- goto out_with_dir;
+ goto out_with_path;
}
fclose(file);
if (strcmp(line, "Architecture: x86\n")) {
fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
- goto out_with_dir;
+ goto out_with_path;
}
+ free(path);
while ((dirent = readdir(dir)) != NULL) {
if (!strncmp(dirent->d_name, "pu", 2)) {
@@ -1497,7 +1636,9 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
return 0;
-out_with_dir:
+ out_with_path:
+ free(path);
+ out_with_dir:
closedir(dir);
#endif /* HWLOC_WIN_SYS & !__MINGW32__ needs a lot of work */
return -1;
@@ -1513,7 +1654,9 @@ hwloc_x86_backend_disable(struct hwloc_backend *backend)
}
static struct hwloc_backend *
-hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
+hwloc_x86_component_instantiate(struct hwloc_topology *topology,
+ struct hwloc_disc_component *component,
+ unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1 __hwloc_attribute_unused,
const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused)
@@ -1522,7 +1665,7 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
struct hwloc_x86_backend_data_s *data;
const char *src_cpuiddump_path;
- backend = hwloc_backend_alloc(component);
+ backend = hwloc_backend_alloc(topology, component);
if (!backend)
goto out;
@@ -1565,9 +1708,9 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
}
static struct hwloc_disc_component hwloc_x86_disc_component = {
- HWLOC_DISC_COMPONENT_TYPE_CPU,
"x86",
- HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+ HWLOC_DISC_PHASE_CPU,
+ HWLOC_DISC_PHASE_GLOBAL,
hwloc_x86_component_instantiate,
45, /* between native and no_os */
1,
diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c
index 5a0d02da..d0e9ec16 100644
--- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c
+++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c
@@ -1,18 +1,18 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2018 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "hwloc/plugins.h"
+#include "private/private.h"
+#include "private/misc.h"
+#include "private/xml.h"
+#include "private/debug.h"
#include
#include
@@ -27,9 +27,8 @@
*******************/
struct hwloc__nolibxml_backend_data_s {
- size_t buflen; /* size of both buffer and copy buffers, set during backend_init() */
+ size_t buflen; /* size of both buffer, set during backend_init() */
char *buffer; /* allocated and filled during backend_init() */
- char *copy; /* allocated during backend_init(), used later during actual parsing */
};
typedef struct hwloc__nolibxml_import_state_data_s {
@@ -260,14 +259,11 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata,
struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data;
unsigned major, minor;
char *end;
- char *buffer;
+ char *buffer = nbdata->buffer;
+ char *tagname;
HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data));
- /* use a copy in the temporary buffer, we may modify during parsing */
- buffer = nbdata->copy;
- memcpy(buffer, nbdata->buffer, nbdata->buflen);
-
/* skip headers */
while (!strncmp(buffer, "version_major = major;
bdata->version_minor = minor;
end = strchr(buffer, '>') + 1;
+ tagname = "topology";
} else if (!strncmp(buffer, "", 10)) {
bdata->version_major = 1;
bdata->version_minor = 0;
end = buffer + 10;
+ tagname = "topology";
} else if (!strncmp(buffer, "", 6)) {
bdata->version_major = 0;
bdata->version_minor = 9;
end = buffer + 6;
+ tagname = "root";
} else
goto failed;
@@ -301,7 +300,7 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata,
state->parent = NULL;
nstate->closed = 0;
nstate->tagbuffer = end;
- nstate->tagname = (char *) "topology";
+ nstate->tagname = tagname;
nstate->attrbuffer = NULL;
return 0; /* success */
@@ -320,10 +319,6 @@ hwloc_nolibxml_free_buffers(struct hwloc_xml_backend_data_s *bdata)
free(nbdata->buffer);
nbdata->buffer = NULL;
}
- if (nbdata->copy) {
- free(nbdata->copy);
- nbdata->copy = NULL;
- }
}
static void
@@ -429,19 +424,11 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
goto out_with_nbdata;
}
- /* allocate a temporary copy buffer that we may modify during parsing */
- nbdata->copy = malloc(nbdata->buflen+1);
- if (!nbdata->copy)
- goto out_with_buffer;
- nbdata->copy[nbdata->buflen] = '\0';
-
bdata->look_init = hwloc_nolibxml_look_init;
bdata->look_done = hwloc_nolibxml_look_done;
bdata->backend_exit = hwloc_nolibxml_backend_exit;
return 0;
-out_with_buffer:
- free(nbdata->buffer);
out_with_nbdata:
free(nbdata);
out:
@@ -666,7 +653,7 @@ hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *n
}
static void
-hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length)
+hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length __hwloc_attribute_unused)
{
hwloc__nolibxml_export_state_data_t ndata = (void *) state->data;
int res;
@@ -678,7 +665,7 @@ hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *
}
ndata->has_content = 1;
- res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length);
+ res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%s", buffer);
hwloc__nolibxml_export_update_buffer(ndata, res);
}
@@ -799,6 +786,7 @@ hwloc___nolibxml_prepare_export_diff(hwloc_topology_diff_t diff, const char *ref
state.new_prop = hwloc__nolibxml_export_new_prop;
state.add_content = hwloc__nolibxml_export_add_content;
state.end_object = hwloc__nolibxml_export_end_object;
+ state.global = NULL;
ndata->indent = 0;
ndata->written = 0;
diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c
index e7c5ef62..f6bb210c 100644
--- a/src/3rdparty/hwloc/src/topology-xml.c
+++ b/src/3rdparty/hwloc/src/topology-xml.c
@@ -6,12 +6,12 @@
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/xml.h"
+#include "private/private.h"
+#include "private/misc.h"
+#include "private/debug.h"
#include
@@ -158,7 +158,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_size")) {
unsigned long long lvalue = strtoull(value, NULL, 10);
- if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD)
+ if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE)
obj->attr->cache.size = lvalue;
else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n",
@@ -167,7 +167,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_linesize")) {
unsigned long lvalue = strtoul(value, NULL, 10);
- if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD)
+ if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE)
obj->attr->cache.linesize = lvalue;
else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n",
@@ -176,7 +176,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_associativity")) {
int lvalue = atoi(value);
- if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD)
+ if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE)
obj->attr->cache.associativity = lvalue;
else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n",
@@ -185,7 +185,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_type")) {
unsigned long lvalue = strtoul(value, NULL, 10);
- if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) {
+ if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) {
if (lvalue == HWLOC_OBJ_CACHE_UNIFIED
|| lvalue == HWLOC_OBJ_CACHE_DATA
|| lvalue == HWLOC_OBJ_CACHE_INSTRUCTION)
@@ -211,7 +211,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "depth")) {
unsigned long lvalue = strtoul(value, NULL, 10);
- if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) {
+ if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) {
obj->attr->cache.depth = lvalue;
} else if (obj->type == HWLOC_OBJ_GROUP || obj->type == HWLOC_OBJ_BRIDGE) {
/* will be overwritten by the core */
@@ -805,21 +805,13 @@ hwloc__xml_import_object(hwloc_topology_t topology,
state->global->msgprefix);
goto error_with_object;
}
- } else if (!strcasecmp(attrvalue, "Die")) {
- /* deal with possible future type */
- obj->type = HWLOC_OBJ_GROUP;
- obj->subtype = strdup("Die");
- obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE;
- obj->attr->group.dont_merge = data->dont_merge_die_groups;
} else if (!strcasecmp(attrvalue, "Tile")) {
/* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP;
- obj->subtype = strdup("Tile");
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE;
} else if (!strcasecmp(attrvalue, "Module")) {
/* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP;
- obj->subtype = strdup("Module");
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
} else if (!strcasecmp(attrvalue, "MemCache")) {
/* ignore possible future type */
@@ -1053,6 +1045,13 @@ hwloc__xml_import_object(hwloc_topology_t topology,
/* end of 1.x specific checks */
}
+ /* 2.0 backward compatibility */
+ if (obj->type == HWLOC_OBJ_GROUP) {
+ if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE
+ || (obj->subtype && !strcmp(obj->subtype, "Die")))
+ obj->type = HWLOC_OBJ_DIE;
+ }
+
/* check that cache attributes are coherent with the actual type */
if (hwloc__obj_type_is_cache(obj->type)
&& obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) {
@@ -1212,19 +1211,24 @@ hwloc__xml_import_object(hwloc_topology_t topology,
static int
hwloc__xml_v2import_distances(hwloc_topology_t topology,
- hwloc__xml_import_state_t state)
+ hwloc__xml_import_state_t state,
+ int heterotypes)
{
- hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE;
+ hwloc_obj_type_t unique_type = HWLOC_OBJ_TYPE_NONE;
+ hwloc_obj_type_t *different_types = NULL;
unsigned nbobjs = 0;
- int indexing = 0;
+ int indexing = heterotypes;
int os_indexing = 0;
- int gp_indexing = 0;
+ int gp_indexing = heterotypes;
+ char *name = NULL;
unsigned long kind = 0;
unsigned nr_indexes, nr_u64values;
uint64_t *indexes;
uint64_t *u64values;
int ret;
+#define _TAG_NAME (heterotypes ? "distances2hetero" : "distances2")
+
/* process attributes */
while (1) {
char *attrname, *attrvalue;
@@ -1233,8 +1237,12 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
if (!strcmp(attrname, "nbobjs"))
nbobjs = strtoul(attrvalue, NULL, 10);
else if (!strcmp(attrname, "type")) {
- if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0)
+ if (hwloc_type_sscanf(attrvalue, &unique_type, NULL, 0) < 0) {
+ if (hwloc__xml_verbose())
+ fprintf(stderr, "%s: unrecognized %s type %s\n",
+ state->global->msgprefix, _TAG_NAME, attrvalue);
goto out;
+ }
}
else if (!strcmp(attrname, "indexing")) {
indexing = 1;
@@ -1246,27 +1254,32 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
else if (!strcmp(attrname, "kind")) {
kind = strtoul(attrvalue, NULL, 10);
}
+ else if (!strcmp(attrname, "name")) {
+ name = attrvalue;
+ }
else {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: ignoring unknown distance attribute %s\n",
- state->global->msgprefix, attrname);
+ fprintf(stderr, "%s: ignoring unknown %s attribute %s\n",
+ state->global->msgprefix, _TAG_NAME, attrname);
}
}
/* abort if missing attribute */
- if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) {
+ if (!nbobjs || (!heterotypes && unique_type == HWLOC_OBJ_TYPE_NONE) || !indexing || !kind) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 missing some attributes\n",
- state->global->msgprefix);
+ fprintf(stderr, "%s: %s missing some attributes\n",
+ state->global->msgprefix, _TAG_NAME);
goto out;
}
indexes = malloc(nbobjs*sizeof(*indexes));
u64values = malloc(nbobjs*nbobjs*sizeof(*u64values));
- if (!indexes || !u64values) {
+ if (heterotypes)
+ different_types = malloc(nbobjs*sizeof(*different_types));
+ if (!indexes || !u64values || (heterotypes && !different_types)) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n",
- state->global->msgprefix, nbobjs);
+ fprintf(stderr, "%s: failed to allocate %s arrays for %u objects\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays;
}
@@ -1290,16 +1303,16 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
is_u64values = 1;
if (!is_index && !is_u64values) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 with unrecognized child %s\n",
- state->global->msgprefix, tag);
+ fprintf(stderr, "%s: %s with unrecognized child %s\n",
+ state->global->msgprefix, _TAG_NAME, tag);
goto out_with_arrays;
}
if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0
|| strcmp(attrname, "length")) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 child must have length attribute\n",
- state->global->msgprefix);
+ fprintf(stderr, "%s: %s child must have length attribute\n",
+ state->global->msgprefix, _TAG_NAME);
goto out_with_arrays;
}
length = atoi(attrvalue);
@@ -1307,24 +1320,43 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
ret = state->global->get_content(&childstate, &buffer, length);
if (ret < 0) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 child needs content of length %d\n",
- state->global->msgprefix, length);
+ fprintf(stderr, "%s: %s child needs content of length %d\n",
+ state->global->msgprefix, _TAG_NAME, length);
goto out_with_arrays;
}
if (is_index) {
/* get indexes */
- char *tmp;
+ char *tmp, *tmp2;
if (nr_indexes >= nbobjs) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 with more than %u indexes\n",
- state->global->msgprefix, nbobjs);
+ fprintf(stderr, "%s: %s with more than %u indexes\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays;
}
tmp = buffer;
while (1) {
char *next;
- unsigned long long u = strtoull(tmp, &next, 0);
+ unsigned long long u;
+ if (heterotypes) {
+ hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE;
+ if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) {
+ if (hwloc__xml_verbose())
+ fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n",
+ state->global->msgprefix, _TAG_NAME, tmp);
+ goto out_with_arrays;
+ }
+ tmp2 = strchr(tmp, ':');
+ if (!tmp2) {
+ if (hwloc__xml_verbose())
+ fprintf(stderr, "%s: %s with missing colon after heterogeneous type %s\n",
+ state->global->msgprefix, _TAG_NAME, tmp);
+ goto out_with_arrays;
+ }
+ tmp = tmp2+1;
+ different_types[nr_indexes] = t;
+ }
+ u = strtoull(tmp, &next, 0);
if (next == tmp)
break;
indexes[nr_indexes++] = u;
@@ -1340,8 +1372,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
char *tmp;
if (nr_u64values >= nbobjs*nbobjs) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 with more than %u u64values\n",
- state->global->msgprefix, nbobjs*nbobjs);
+ fprintf(stderr, "%s: %s with more than %u u64values\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs);
goto out_with_arrays;
}
tmp = buffer;
@@ -1364,8 +1396,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
ret = state->global->close_tag(&childstate);
if (ret < 0) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 with more than %u indexes\n",
- state->global->msgprefix, nbobjs);
+ fprintf(stderr, "%s: %s with more than %u indexes\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays;
}
@@ -1374,56 +1406,60 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
if (nr_indexes != nbobjs) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 with less than %u indexes\n",
- state->global->msgprefix, nbobjs);
+ fprintf(stderr, "%s: %s with less than %u indexes\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays;
}
if (nr_u64values != nbobjs*nbobjs) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: distance2 with less than %u u64values\n",
- state->global->msgprefix, nbobjs*nbobjs);
+ fprintf(stderr, "%s: %s with less than %u u64values\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs);
goto out_with_arrays;
}
if (nbobjs < 2) {
/* distances with a single object are useless, even if the XML isn't invalid */
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: ignoring distances2 with only %u objects\n",
- state->global->msgprefix, nbobjs);
+ fprintf(stderr, "%s: ignoring %s with only %u objects\n",
+ state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_ignore;
}
- if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) {
+ if (unique_type == HWLOC_OBJ_PU || unique_type == HWLOC_OBJ_NUMANODE) {
if (!os_indexing) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n",
- state->global->msgprefix);
+ fprintf(stderr, "%s: ignoring PU or NUMA %s without os_indexing\n",
+ state->global->msgprefix, _TAG_NAME);
goto out_ignore;
}
} else {
if (!gp_indexing) {
if (hwloc__xml_verbose())
- fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n",
- state->global->msgprefix);
+ fprintf(stderr, "%s: ignoring !PU or !NUMA %s without gp_indexing\n",
+ state->global->msgprefix, _TAG_NAME);
goto out_ignore;
}
}
- hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0);
+ hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0);
/* prevent freeing below */
indexes = NULL;
u64values = NULL;
+ different_types = NULL;
out_ignore:
+ free(different_types);
free(indexes);
free(u64values);
return state->global->close_tag(state);
out_with_arrays:
+ free(different_types);
free(indexes);
free(u64values);
out:
return -1;
+#undef _TAG_NAME
}
static int
@@ -1625,8 +1661,12 @@ hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, flo
/* this canNOT be the first XML call */
static int
-hwloc_look_xml(struct hwloc_backend *backend)
+hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{
+ /*
+ * This backend enforces !topology->is_thissystem by default.
+ */
+
struct hwloc_topology *topology = backend->topology;
struct hwloc_xml_backend_data_s *data = backend->private_data;
struct hwloc__xml_import_state_s state, childstate;
@@ -1634,9 +1674,10 @@ hwloc_look_xml(struct hwloc_backend *backend)
char *tag;
int gotignored = 0;
hwloc_localeswitch_declare;
- char *env;
int ret;
+ assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL);
+
state.global = data;
assert(!root->cpuset);
@@ -1647,9 +1688,6 @@ hwloc_look_xml(struct hwloc_backend *backend)
data->first_numanode = data->last_numanode = NULL;
data->first_v1dist = data->last_v1dist = NULL;
- env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS");
- data->dont_merge_die_groups = env && atoi(env);
-
ret = data->look_init(data, &state);
if (ret < 0)
goto failed;
@@ -1684,15 +1722,20 @@ hwloc_look_xml(struct hwloc_backend *backend)
goto failed;
if (!ret)
break;
- if (strcmp(tag, "distances2")) {
+ if (!strcmp(tag, "distances2")) {
+ ret = hwloc__xml_v2import_distances(topology, &childstate, 0);
+ if (ret < 0)
+ goto failed;
+ } else if (!strcmp(tag, "distances2hetero")) {
+ ret = hwloc__xml_v2import_distances(topology, &childstate, 1);
+ if (ret < 0)
+ goto failed;
+ } else {
if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n",
data->msgprefix, tag);
goto done;
}
- ret = hwloc__xml_v2import_distances(topology, &childstate);
- if (ret < 0)
- goto failed;
state.global->close_child(&childstate);
}
}
@@ -1742,8 +1785,8 @@ done:
inext_cousin)
objs[i] = node;
-hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values);
- hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0);
+ hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values);
+ hwloc_internal_distances_add(topology, NULL, nbobjs, objs, values, v1dist->kind, 0);
} else {
free(objs);
free(values);
@@ -1791,9 +1834,11 @@ hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values);
/* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */
topology->support.discovery->pu = 1;
+ topology->support.discovery->disallowed_pu = 1;
if (data->nbnumanodes) {
topology->support.discovery->numa = 1;
topology->support.discovery->numa_memory = 1; // FIXME
+ topology->support.discovery->disallowed_numa = 1;
}
if (data->look_done)
@@ -1936,6 +1981,9 @@ hwloc__xml_export_safestrdup(const char *old)
char *new = malloc(strlen(old)+1);
char *dst = new;
const char *src = old;
+ if (!new)
+ return NULL;
+
while (*src) {
if (HWLOC_XML_CHAR_VALID(*src))
*(dst++) = *src;
@@ -1955,6 +2003,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
if (v1export && obj->type == HWLOC_OBJ_PACKAGE)
state->new_prop(state, "type", "Socket");
+ else if (v1export && obj->type == HWLOC_OBJ_DIE)
+ state->new_prop(state, "type", "Group");
else if (v1export && hwloc__obj_type_is_cache(obj->type))
state->new_prop(state, "type", "Cache");
else
@@ -1966,8 +2016,23 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
}
if (obj->cpuset) {
- if (v1export && obj->type == HWLOC_OBJ_NUMANODE && obj->sibling_rank > 0) {
- /* v1 non-first NUMA nodes have empty cpusets */
+ int empty_cpusets = 0;
+
+ if (v1export && obj->type == HWLOC_OBJ_NUMANODE) {
+ /* walk up this memory hierarchy to find-out if we are the first numa node.
+ * v1 non-first NUMA nodes have empty cpusets.
+ */
+ hwloc_obj_t parent = obj;
+ while (!hwloc_obj_type_is_normal(parent->type)) {
+ if (parent->sibling_rank > 0) {
+ empty_cpusets = 1;
+ break;
+ }
+ parent = parent->parent;
+ }
+ }
+
+ if (empty_cpusets) {
state->new_prop(state, "cpuset", "0x0");
state->new_prop(state, "online_cpuset", "0x0");
state->new_prop(state, "complete_cpuset", "0x0");
@@ -2024,13 +2089,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
if (obj->name) {
char *name = hwloc__xml_export_safestrdup(obj->name);
- state->new_prop(state, "name", name);
- free(name);
+ if (name) {
+ state->new_prop(state, "name", name);
+ free(name);
+ }
}
if (!v1export && obj->subtype) {
char *subtype = hwloc__xml_export_safestrdup(obj->subtype);
- state->new_prop(state, "subtype", subtype);
- free(subtype);
+ if (subtype) {
+ state->new_prop(state, "subtype", subtype);
+ free(subtype);
+ }
}
switch (obj->type) {
@@ -2057,6 +2126,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE:
+ case HWLOC_OBJ_MEMCACHE:
sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size);
state->new_prop(state, "cache_size", tmp);
sprintf(tmp, "%u", obj->attr->cache.depth);
@@ -2125,23 +2195,34 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
for(i=0; iinfos_count; i++) {
char *name = hwloc__xml_export_safestrdup(obj->infos[i].name);
char *value = hwloc__xml_export_safestrdup(obj->infos[i].value);
- struct hwloc__xml_export_state_s childstate;
- state->new_child(state, &childstate, "info");
- childstate.new_prop(&childstate, "name", name);
- childstate.new_prop(&childstate, "value", value);
- childstate.end_object(&childstate, "info");
+ if (name && value) {
+ struct hwloc__xml_export_state_s childstate;
+ state->new_child(state, &childstate, "info");
+ childstate.new_prop(&childstate, "name", name);
+ childstate.new_prop(&childstate, "value", value);
+ childstate.end_object(&childstate, "info");
+ }
free(name);
free(value);
}
if (v1export && obj->subtype) {
char *subtype = hwloc__xml_export_safestrdup(obj->subtype);
+ if (subtype) {
+ struct hwloc__xml_export_state_s childstate;
+ int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);
+ state->new_child(state, &childstate, "info");
+ childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type");
+ childstate.new_prop(&childstate, "value", subtype);
+ childstate.end_object(&childstate, "info");
+ free(subtype);
+ }
+ }
+ if (v1export && obj->type == HWLOC_OBJ_DIE) {
struct hwloc__xml_export_state_s childstate;
- int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);
state->new_child(state, &childstate, "info");
- childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type");
- childstate.new_prop(&childstate, "value", subtype);
+ childstate.new_prop(&childstate, "name", "Type");
+ childstate.new_prop(&childstate, "value", "Die");
childstate.end_object(&childstate, "info");
- free(subtype);
}
if (v1export && !obj->parent) {
@@ -2152,19 +2233,27 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
for(dist = topology->first_dist; dist; dist = dist->next) {
struct hwloc__xml_export_state_s childstate;
unsigned nbobjs = dist->nbobjs;
+ unsigned *logical_to_v2array;
int depth;
- if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type))
+ if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->unique_type))
continue;
if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY))
continue;
- {
- HWLOC_VLA(unsigned, logical_to_v2array, nbobjs);
+ if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)
+ continue;
+
+ logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array));
+ if (!logical_to_v2array) {
+ fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n");
+ continue;
+ }
+
for(i=0; iobjs[i]->logical_index] = i;
/* compute the relative depth */
- if (dist->type == HWLOC_OBJ_NUMANODE) {
+ if (dist->unique_type == HWLOC_OBJ_NUMANODE) {
/* for NUMA nodes, use the highest normal-parent depth + 1 */
depth = -1;
for(i=0; itype) + parent_with_memory;
+ depth = hwloc_get_type_depth(topology, dist->unique_type) + parent_with_memory;
}
state->new_child(state, &childstate, "distances");
@@ -2210,7 +2299,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
}
}
childstate.end_object(&childstate, "distances");
- }
+ free(logical_to_v2array);
}
}
@@ -2243,13 +2332,90 @@ hwloc__xml_v2export_object (hwloc__xml_export_state_t parentstate, hwloc_topolog
static void
hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags);
+static hwloc_obj_t
+hwloc__xml_v1export_object_next_numanode(hwloc_obj_t obj, hwloc_obj_t cur)
+{
+ hwloc_obj_t parent;
+
+ if (!cur) {
+ /* first numa node is on the very bottom left */
+ cur = obj->memory_first_child;
+ goto find_first;
+ }
+
+ /* walk-up until there's a next sibling */
+ parent = cur;
+ while (1) {
+ if (parent->next_sibling) {
+ /* found a next sibling, we'll walk down-left from there */
+ cur = parent->next_sibling;
+ break;
+ }
+ parent = parent->parent;
+ if (parent == obj)
+ return NULL;
+ }
+
+ find_first:
+ while (cur->type != HWLOC_OBJ_NUMANODE)
+ cur = cur->memory_first_child;
+ assert(cur);
+ return cur;
+}
+
+static unsigned
+hwloc__xml_v1export_object_list_numanodes(hwloc_obj_t obj, hwloc_obj_t *first_p, hwloc_obj_t **nodes_p)
+{
+ hwloc_obj_t *nodes, cur;
+ int nr;
+
+ if (!obj->memory_first_child) {
+ *first_p = NULL;
+ *nodes_p = NULL;
+ return 0;
+ }
+ /* we're sure there's at least one numa node */
+
+ nr = hwloc_bitmap_weight(obj->nodeset);
+ assert(nr > 0);
+ /* these are local nodes, but some of them may be attached above instead of here */
+
+ nodes = calloc(nr, sizeof(*nodes));
+ if (!nodes) {
+ /* only return the first node */
+ cur = hwloc__xml_v1export_object_next_numanode(obj, NULL);
+ assert(cur);
+ *first_p = cur;
+ *nodes_p = NULL;
+ return 1;
+ }
+
+ nr = 0;
+ cur = NULL;
+ while (1) {
+ cur = hwloc__xml_v1export_object_next_numanode(obj, cur);
+ if (!cur)
+ break;
+ nodes[nr++] = cur;
+ }
+
+ *first_p = nodes[0];
+ *nodes_p = nodes;
+ return nr;
+}
+
static void
hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags)
{
struct hwloc__xml_export_state_s gstate, mstate, ostate, *state = parentstate;
hwloc_obj_t child;
+ unsigned nr_numanodes;
+ hwloc_obj_t *numanodes, first_numanode;
+ unsigned i;
- if (obj->parent->arity > 1 && obj->memory_arity > 1 && parentstate->global->v1_memory_group) {
+ nr_numanodes = hwloc__xml_v1export_object_list_numanodes(obj, &first_numanode, &numanodes);
+
+ if (obj->parent->arity > 1 && nr_numanodes > 1 && parentstate->global->v1_memory_group) {
/* child has sibling, we must add a Group around those memory children */
hwloc_obj_t group = parentstate->global->v1_memory_group;
parentstate->new_child(parentstate, &gstate, "object");
@@ -2266,10 +2432,8 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
}
/* export first memory child */
- child = obj->memory_first_child;
- assert(child->type == HWLOC_OBJ_NUMANODE);
state->new_child(state, &mstate, "object");
- hwloc__xml_export_object_contents (&mstate, topology, child, flags);
+ hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags);
/* then the actual object */
mstate.new_child(&mstate, &ostate, "object");
@@ -2288,9 +2452,10 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
mstate.end_object(&mstate, "object");
/* now other memory children */
- for_each_memory_child(child, obj)
- if (child->sibling_rank > 0)
- hwloc__xml_v1export_object (state, topology, child, flags);
+ for(i=1; inew_child(state, &_childstate, tagname); \
+ for(_j=0; \
+ _i+_j<(nr) && _jtype), (unsigned long long) (objs)[_i+_j]->gp_index); \
+ _i += _j; \
+ sprintf(_tmp2, "%lu", (unsigned long) _len); \
+ _childstate.new_prop(&_childstate, "length", _tmp2); \
+ _childstate.add_content(&_childstate, _tmp, _len); \
+ _childstate.end_object(&_childstate, tagname); \
+ } \
+} while (0)
+
+static void
+hwloc___xml_v2export_distances(hwloc__xml_export_state_t parentstate, struct hwloc_internal_distances_s *dist)
+{
+ char tmp[255];
+ unsigned nbobjs = dist->nbobjs;
+ struct hwloc__xml_export_state_s state;
+
+ if (dist->different_types) {
+ parentstate->new_child(parentstate, &state, "distances2hetero");
+ } else {
+ parentstate->new_child(parentstate, &state, "distances2");
+ state.new_prop(&state, "type", hwloc_obj_type_string(dist->unique_type));
+ }
+
+ sprintf(tmp, "%u", nbobjs);
+ state.new_prop(&state, "nbobjs", tmp);
+ sprintf(tmp, "%lu", dist->kind);
+ state.new_prop(&state, "kind", tmp);
+ if (dist->name)
+ state.new_prop(&state, "name", dist->name);
+
+ if (!dist->different_types) {
+ state.new_prop(&state, "indexing",
+ HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type) ? "os" : "gp");
+ }
+
+ /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */
+ if (dist->different_types) {
+ EXPORT_TYPE_GPINDEX_ARRAY(&state, nbobjs, dist->objs, "indexes", 10);
+ } else {
+ EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10);
+ }
+ EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10);
+ state.end_object(&state, dist->different_types ? "distances2hetero" : "distances2");
+}
+
static void
hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology)
{
struct hwloc_internal_distances_s *dist;
- for(dist = topology->first_dist; dist; dist = dist->next) {
- char tmp[255];
- unsigned nbobjs = dist->nbobjs;
- struct hwloc__xml_export_state_s state;
-
- parentstate->new_child(parentstate, &state, "distances2");
-
- state.new_prop(&state, "type", hwloc_obj_type_string(dist->type));
- sprintf(tmp, "%u", nbobjs);
- state.new_prop(&state, "nbobjs", tmp);
- sprintf(tmp, "%lu", dist->kind);
- state.new_prop(&state, "kind", tmp);
-
- state.new_prop(&state, "indexing",
- (dist->type == HWLOC_OBJ_NUMANODE || dist->type == HWLOC_OBJ_PU) ? "os" : "gp");
- /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */
- EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10);
- EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10);
- state.end_object(&state, "distances2");
- }
+ for(dist = topology->first_dist; dist; dist = dist->next)
+ if (!dist->different_types)
+ hwloc___xml_v2export_distances(parentstate, dist);
+ /* export homogeneous distances first in case the importer doesn't support heterogeneous and stops there */
+ for(dist = topology->first_dist; dist; dist = dist->next)
+ if (dist->different_types)
+ hwloc___xml_v2export_distances(parentstate, dist);
}
void
@@ -2378,18 +2587,22 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top
hwloc_obj_t root = hwloc_get_root_obj(topology);
if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) {
- if (root->memory_first_child) {
+ hwloc_obj_t *numanodes, first_numanode;
+ unsigned nr_numanodes;
+
+ nr_numanodes = hwloc__xml_v1export_object_list_numanodes(root, &first_numanode, &numanodes);
+
+ if (nr_numanodes) {
/* we don't use hwloc__xml_v1export_object_with_memory() because we want/can keep root above the numa node */
struct hwloc__xml_export_state_s rstate, mstate;
hwloc_obj_t child;
+ unsigned i;
/* export the root */
state->new_child(state, &rstate, "object");
hwloc__xml_export_object_contents (&rstate, topology, root, flags);
/* export first memory child */
- child = root->memory_first_child;
- assert(child->type == HWLOC_OBJ_NUMANODE);
rstate.new_child(&rstate, &mstate, "object");
- hwloc__xml_export_object_contents (&mstate, topology, child, flags);
+ hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags);
/* then its normal/io/misc children */
for_each_child(child, root)
hwloc__xml_v1export_object (&mstate, topology, child, flags);
@@ -2400,15 +2613,16 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top
/* close first memory child */
mstate.end_object(&mstate, "object");
/* now other memory children */
- for_each_memory_child(child, root)
- if (child->sibling_rank > 0)
- hwloc__xml_v1export_object (&rstate, topology, child, flags);
+ for(i=1; i
+#include "private/autogen/config.h"
#define _ATFILE_SOURCE
#include
@@ -25,10 +25,10 @@
#include
#include
-#include
-#include
-#include
-#include
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/debug.h"
+#include "private/misc.h"
#ifdef HAVE_MACH_MACH_INIT_H
#include
@@ -136,14 +136,28 @@ int hwloc_get_sysctl(int name[], unsigned namelen, int *ret)
}
#endif
-/* Return the OS-provided number of processors. Unlike other methods such as
- reading sysfs on Linux, this method is not virtualizable; thus it's only
- used as a fall-back method, allowing virtual backends (FSROOT, etc) to
- have the desired effect. */
+/* Return the OS-provided number of processors.
+ * Assumes topology->is_thissystem is true.
+ */
#ifndef HWLOC_WIN_SYS /* The windows implementation is in topology-windows.c */
int
-hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) {
+hwloc_fallback_nbprocessors(unsigned flags) {
int n;
+
+ if (flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE) {
+ /* try to get all CPUs for Linux and Solaris that can handle offline CPUs */
+#if HAVE_DECL__SC_NPROCESSORS_CONF
+ n = sysconf(_SC_NPROCESSORS_CONF);
+#elif HAVE_DECL__SC_NPROC_CONF
+ n = sysconf(_SC_NPROC_CONF);
+#else
+ n = -1;
+#endif
+ if (n != -1)
+ return n;
+ }
+
+ /* try getting only online CPUs, or whatever we can get */
#if HAVE_DECL__SC_NPROCESSORS_ONLN
n = sysconf(_SC_NPROCESSORS_ONLN);
#elif HAVE_DECL__SC_NPROC_ONLN
@@ -762,9 +776,7 @@ hwloc__duplicate_object(struct hwloc_topology *newtopology,
/* place us for real */
assert(newobj->logical_index < level_width);
level[newobj->logical_index] = newobj;
- /* link to already-inserted cousins
- * (hwloc_pci_belowroot_apply_locality() can cause out-of-order logical indexes)
- */
+ /* link to already-inserted cousins */
if (newobj->logical_index > 0 && level[newobj->logical_index-1]) {
newobj->prev_cousin = level[newobj->logical_index-1];
level[newobj->logical_index-1]->next_cousin = newobj;
@@ -991,31 +1003,35 @@ hwloc_topology_dup(hwloc_topology_t *newp,
/***** Make sure you update obj_type_priority[] below as well. *****/
static const unsigned obj_type_order[] = {
/* first entry is HWLOC_OBJ_MACHINE */ 0,
- /* next entry is HWLOC_OBJ_PACKAGE */ 3,
- /* next entry is HWLOC_OBJ_CORE */ 12,
- /* next entry is HWLOC_OBJ_PU */ 16,
- /* next entry is HWLOC_OBJ_L1CACHE */ 10,
- /* next entry is HWLOC_OBJ_L2CACHE */ 8,
- /* next entry is HWLOC_OBJ_L3CACHE */ 6,
- /* next entry is HWLOC_OBJ_L4CACHE */ 5,
- /* next entry is HWLOC_OBJ_L5CACHE */ 4,
- /* next entry is HWLOC_OBJ_L1ICACHE */ 11,
- /* next entry is HWLOC_OBJ_L2ICACHE */ 9,
- /* next entry is HWLOC_OBJ_L3ICACHE */ 7,
+ /* next entry is HWLOC_OBJ_PACKAGE */ 4,
+ /* next entry is HWLOC_OBJ_CORE */ 14,
+ /* next entry is HWLOC_OBJ_PU */ 18,
+ /* next entry is HWLOC_OBJ_L1CACHE */ 12,
+ /* next entry is HWLOC_OBJ_L2CACHE */ 10,
+ /* next entry is HWLOC_OBJ_L3CACHE */ 8,
+ /* next entry is HWLOC_OBJ_L4CACHE */ 7,
+ /* next entry is HWLOC_OBJ_L5CACHE */ 6,
+ /* next entry is HWLOC_OBJ_L1ICACHE */ 13,
+ /* next entry is HWLOC_OBJ_L2ICACHE */ 11,
+ /* next entry is HWLOC_OBJ_L3ICACHE */ 9,
/* next entry is HWLOC_OBJ_GROUP */ 1,
- /* next entry is HWLOC_OBJ_NUMANODE */ 2,
- /* next entry is HWLOC_OBJ_BRIDGE */ 13,
- /* next entry is HWLOC_OBJ_PCI_DEVICE */ 14,
- /* next entry is HWLOC_OBJ_OS_DEVICE */ 15,
- /* next entry is HWLOC_OBJ_MISC */ 17
+ /* next entry is HWLOC_OBJ_NUMANODE */ 3,
+ /* next entry is HWLOC_OBJ_BRIDGE */ 15,
+ /* next entry is HWLOC_OBJ_PCI_DEVICE */ 16,
+ /* next entry is HWLOC_OBJ_OS_DEVICE */ 17,
+ /* next entry is HWLOC_OBJ_MISC */ 19,
+ /* next entry is HWLOC_OBJ_MEMCACHE */ 2,
+ /* next entry is HWLOC_OBJ_DIE */ 5
};
#ifndef NDEBUG /* only used in debug check assert if !NDEBUG */
static const hwloc_obj_type_t obj_order_type[] = {
HWLOC_OBJ_MACHINE,
HWLOC_OBJ_GROUP,
+ HWLOC_OBJ_MEMCACHE,
HWLOC_OBJ_NUMANODE,
HWLOC_OBJ_PACKAGE,
+ HWLOC_OBJ_DIE,
HWLOC_OBJ_L5CACHE,
HWLOC_OBJ_L4CACHE,
HWLOC_OBJ_L3CACHE,
@@ -1040,6 +1056,7 @@ static const hwloc_obj_type_t obj_order_type[] = {
* Always keep Machine/NUMANode/PU/PCIDev/OSDev
* then Core
* then Package
+ * then Die
* then Cache,
* then Instruction Caches
* then always drop Group/Misc/Bridge.
@@ -1065,7 +1082,9 @@ static const int obj_type_priority[] = {
/* next entry is HWLOC_OBJ_BRIDGE */ 0,
/* next entry is HWLOC_OBJ_PCI_DEVICE */ 100,
/* next entry is HWLOC_OBJ_OS_DEVICE */ 100,
- /* next entry is HWLOC_OBJ_MISC */ 0
+ /* next entry is HWLOC_OBJ_MISC */ 0,
+ /* next entry is HWLOC_OBJ_MEMCACHE */ 19,
+ /* next entry is HWLOC_OBJ_DIE */ 30
};
int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2)
@@ -1118,12 +1137,10 @@ hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2)
/*
* How to compare objects based on cpusets.
*/
-
static int
hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2)
{
hwloc_bitmap_t set1, set2;
- int res = HWLOC_OBJ_DIFFERENT;
assert(!hwloc__obj_type_is_special(obj1->type));
assert(!hwloc__obj_type_is_special(obj2->type));
@@ -1136,45 +1153,10 @@ hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2)
set1 = obj1->cpuset;
set2 = obj2->cpuset;
}
- if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) {
- res = hwloc_bitmap_compare_inclusion(set1, set2);
- if (res == HWLOC_OBJ_INTERSECTS)
- return HWLOC_OBJ_INTERSECTS;
- }
+ if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2))
+ return hwloc_bitmap_compare_inclusion(set1, set2);
- /* then compare nodesets, and combine the results */
- if (obj1->complete_nodeset && obj2->complete_nodeset) {
- set1 = obj1->complete_nodeset;
- set2 = obj2->complete_nodeset;
- } else {
- set1 = obj1->nodeset;
- set2 = obj2->nodeset;
- }
- if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) {
- int noderes = hwloc_bitmap_compare_inclusion(set1, set2);
- /* deal with conflicting cpusets/nodesets inclusions */
- if (noderes == HWLOC_OBJ_INCLUDED) {
- if (res == HWLOC_OBJ_CONTAINS)
- /* contradicting order for cpusets and nodesets */
- return HWLOC_OBJ_INTERSECTS;
- res = HWLOC_OBJ_INCLUDED;
-
- } else if (noderes == HWLOC_OBJ_CONTAINS) {
- if (res == HWLOC_OBJ_INCLUDED)
- /* contradicting order for cpusets and nodesets */
- return HWLOC_OBJ_INTERSECTS;
- res = HWLOC_OBJ_CONTAINS;
-
- } else if (noderes == HWLOC_OBJ_INTERSECTS) {
- return HWLOC_OBJ_INTERSECTS;
-
- } else {
- /* nodesets are different, keep the cpuset order */
-
- }
- }
-
- return res;
+ return HWLOC_OBJ_DIFFERENT;
}
/* Compare object cpusets based on complete_cpuset if defined (always correctly ordered),
@@ -1189,10 +1171,6 @@ hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2)
return hwloc_bitmap_compare_first(obj1->complete_cpuset, obj2->complete_cpuset);
else if (obj1->cpuset && obj2->cpuset)
return hwloc_bitmap_compare_first(obj1->cpuset, obj2->cpuset);
- else if (obj1->complete_nodeset && obj2->complete_nodeset)
- return hwloc_bitmap_compare_first(obj1->complete_nodeset, obj2->complete_nodeset);
- else if (obj1->nodeset && obj2->nodeset)
- return hwloc_bitmap_compare_first(obj1->nodeset, obj2->nodeset);
return 0;
}
@@ -1346,7 +1324,11 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new)
}
}
-/* Try to insert OBJ in CUR, recurse if needed.
+/*
+ * The main insertion routine, only used for CPU-side object (normal types)
+ * uisng cpuset only (or complete_cpuset).
+ *
+ * Try to insert OBJ in CUR, recurse if needed.
* Returns the object if it was inserted,
* the remaining object it was merged,
* NULL if failed to insert.
@@ -1546,17 +1528,116 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob
return group;
}
-/*attach the given memory object below the given normal parent. */
+/* only works for MEMCACHE and NUMAnode with a single bit in nodeset */
+static hwloc_obj_t
+hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_obj_t parent,
+ hwloc_obj_t obj,
+ hwloc_report_error_t report_error)
+{
+ hwloc_obj_t *curp = &parent->memory_first_child;
+ unsigned first = hwloc_bitmap_first(obj->nodeset);
+
+ while (*curp) {
+ hwloc_obj_t cur = *curp;
+ unsigned curfirst = hwloc_bitmap_first(cur->nodeset);
+
+ if (first < curfirst) {
+ /* insert before cur */
+ obj->next_sibling = cur;
+ *curp = obj;
+ obj->memory_first_child = NULL;
+ obj->parent = parent;
+ topology->modified = 1;
+ return obj;
+ }
+
+ if (first == curfirst) {
+ /* identical nodeset */
+ if (obj->type == HWLOC_OBJ_NUMANODE) {
+ if (cur->type == HWLOC_OBJ_NUMANODE) {
+ /* identical NUMA nodes? ignore the new one */
+ if (report_error) {
+ char curstr[512];
+ char objstr[512];
+ char msg[1100];
+ hwloc__report_error_format_obj(curstr, sizeof(curstr), cur);
+ hwloc__report_error_format_obj(objstr, sizeof(objstr), obj);
+ snprintf(msg, sizeof(msg), "%s and %s have identical nodesets!", objstr, curstr);
+ report_error(msg, __LINE__);
+ }
+ return NULL;
+ }
+ assert(cur->type == HWLOC_OBJ_MEMCACHE);
+ /* insert the new NUMA node below that existing memcache */
+ return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error);
+
+ } else {
+ assert(obj->type == HWLOC_OBJ_MEMCACHE);
+ if (cur->type == HWLOC_OBJ_MEMCACHE) {
+ if (cur->attr->cache.depth == obj->attr->cache.depth)
+ /* memcache with same nodeset and depth, ignore the new one */
+ return NULL;
+ if (cur->attr->cache.depth > obj->attr->cache.depth)
+ /* memcache with higher cache depth is actually *higher* in the hierarchy
+ * (depth starts from the NUMA node).
+ * insert the new memcache below the existing one
+ */
+ return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error);
+ }
+ /* insert the memcache above the existing memcache or numa node */
+ obj->next_sibling = cur->next_sibling;
+ cur->next_sibling = NULL;
+ obj->memory_first_child = cur;
+ cur->parent = obj;
+ *curp = obj;
+ obj->parent = parent;
+ topology->modified = 1;
+ return obj;
+ }
+ }
+
+ curp = &cur->next_sibling;
+ }
+
+ /* append to the end of the list */
+ obj->next_sibling = NULL;
+ *curp = obj;
+ obj->memory_first_child = NULL;
+ obj->parent = parent;
+ topology->modified = 1;
+ return obj;
+}
+
+/* Attach the given memory object below the given normal parent.
+ *
+ * Only the nodeset is used to find the location inside memory children below parent.
+ *
+ * Nodeset inclusion inside the given memory hierarchy is guaranteed by this function,
+ * but nodesets are not propagated to CPU-side parent yet. It will be done by
+ * propagate_nodeset() later.
+ */
struct hwloc_obj *
hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
hwloc_obj_t obj,
- hwloc_report_error_t report_error __hwloc_attribute_unused)
+ hwloc_report_error_t report_error)
{
- hwloc_obj_t *cur_children;
+ hwloc_obj_t result;
assert(parent);
assert(hwloc__obj_type_is_normal(parent->type));
+ /* Check the nodeset */
+ if (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset))
+ return NULL;
+ /* Initialize or check the complete nodeset */
+ if (!obj->complete_nodeset) {
+ obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset);
+ } else if (!hwloc_bitmap_isincluded(obj->nodeset, obj->complete_nodeset)) {
+ return NULL;
+ }
+ /* Neither ACPI nor Linux support multinode mscache */
+ assert(hwloc_bitmap_weight(obj->nodeset) == 1);
+
#if 0
/* TODO: enable this instead of hack in fixup_sets once NUMA nodes are inserted late */
/* copy the parent cpuset in case it's larger than expected.
@@ -1565,35 +1646,22 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
* However, the user decided the ignore Groups, so hierarchy/locality loss is expected.
*/
hwloc_bitmap_copy(obj->cpuset, parent->cpuset);
+ hwloc_bitmap_copy(obj->complete_cpuset, parent->complete_cpuset);
#endif
- /* only NUMA nodes are memory for now, just append to the end of the list */
- assert(obj->type == HWLOC_OBJ_NUMANODE);
- assert(obj->nodeset);
- cur_children = &parent->memory_first_child;
- while (*cur_children) {
- /* TODO check that things are inserted in order.
- * it's OK for KNL, the only user so far
- */
- cur_children = &(*cur_children)->next_sibling;
- }
- *cur_children = obj;
- obj->next_sibling = NULL;
-
- /* Initialize the complete nodeset if needed */
- if (!obj->complete_nodeset) {
- obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset);
- }
-
- /* Add the bit to the top sets, and to the parent CPU-side object */
- if (obj->type == HWLOC_OBJ_NUMANODE) {
- if (hwloc_bitmap_isset(obj->nodeset, obj->os_index))
+ result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, report_error);
+ if (result == obj) {
+ /* Add the bit to the top sets, and to the parent CPU-side object */
+ if (obj->type == HWLOC_OBJ_NUMANODE) {
hwloc_bitmap_set(topology->levels[0][0]->nodeset, obj->os_index);
- hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index);
+ hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index);
+ }
}
-
- topology->modified = 1;
- return obj;
+ if (result != obj) {
+ /* either failed to insert, or got merged, free the original object */
+ hwloc_free_unlinked_object(obj);
+ }
+ return result;
}
/* insertion routine that lets you change the error reporting callback */
@@ -1699,11 +1767,18 @@ hwloc_alloc_setup_object(hwloc_topology_t topology,
hwloc_obj_type_t type, unsigned os_index)
{
struct hwloc_obj *obj = hwloc_tma_malloc(topology->tma, sizeof(*obj));
+ if (!obj)
+ return NULL;
memset(obj, 0, sizeof(*obj));
obj->type = type;
obj->os_index = os_index;
obj->gp_index = topology->next_gp_index++;
obj->attr = hwloc_tma_malloc(topology->tma, sizeof(*obj->attr));
+ if (!obj->attr) {
+ assert(!topology->tma || !topology->tma->dontfree); /* this tma cannot fail to allocate */
+ free(obj);
+ return NULL;
+ }
memset(obj->attr, 0, sizeof(*obj->attr));
/* do not allocate the cpuset here, let the caller do it */
return obj;
@@ -1717,6 +1792,10 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology)
errno = EINVAL;
return NULL;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return NULL;
+ }
return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX);
}
@@ -1736,6 +1815,10 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
errno = EINVAL;
return NULL;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return NULL;
+ }
if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE) {
hwloc_free_unlinked_object(obj);
@@ -1754,12 +1837,30 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
hwloc_bitmap_and(obj->complete_nodeset, obj->complete_nodeset, root->complete_nodeset);
if ((!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset))
- && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset))
- && (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset))
- && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) {
- hwloc_free_unlinked_object(obj);
- errno = EINVAL;
- return NULL;
+ && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset))) {
+ /* we'll insert by cpuset, so build cpuset from the nodeset */
+ hwloc_const_bitmap_t nodeset = obj->nodeset ? obj->nodeset : obj->complete_nodeset;
+ hwloc_obj_t numa;
+
+ if ((!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset))
+ && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) {
+ hwloc_free_unlinked_object(obj);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!obj->cpuset) {
+ obj->cpuset = hwloc_bitmap_alloc();
+ if (!obj->cpuset) {
+ hwloc_free_unlinked_object(obj);
+ return NULL;
+ }
+ }
+
+ numa = NULL;
+ while ((numa = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa)) != NULL)
+ if (hwloc_bitmap_isset(nodeset, numa->os_index))
+ hwloc_bitmap_or(obj->cpuset, obj->cpuset, numa->cpuset);
}
cmp = hwloc_obj_cmp_sets(obj, root);
@@ -1806,6 +1907,10 @@ hwloc_topology_insert_misc_object(struct hwloc_topology *topology, hwloc_obj_t p
errno = EINVAL;
return NULL;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return NULL;
+ }
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MISC, HWLOC_UNKNOWN_INDEX);
if (name)
@@ -1963,6 +2068,7 @@ fixup_sets(hwloc_obj_t obj)
in_memory_list = 0;
/* iterate over normal children first, we'll come back for memory children later */
+ /* FIXME: if memory objects are inserted late, we should update their cpuset and complete_cpuset at insertion instead of here */
iterate:
while (child) {
/* our cpuset must be included in our parent's one */
@@ -1980,6 +2086,12 @@ fixup_sets(hwloc_obj_t obj)
child->complete_nodeset = hwloc_bitmap_dup(child->nodeset);
}
+ if (hwloc_obj_type_is_memory(child->type)) {
+ /* update memory children cpusets in case some CPU-side parent was removed */
+ hwloc_bitmap_copy(child->cpuset, obj->cpuset);
+ hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset);
+ }
+
fixup_sets(child);
child = child->next_sibling;
}
@@ -2025,9 +2137,8 @@ hwloc_obj_add_children_sets(hwloc_obj_t obj)
/* CPU objects are inserted by cpusets, we know their cpusets are properly included.
* We just need fixup_sets() to make sure they aren't too wide.
*
- * Memory objects are inserted by cpusets to find their CPU parent,
- * but nodesets are only used inside the memory hierarchy below that parent.
- * Thus we need to propagate nodesets to CPU-side parents and children.
+ * Within each memory hierarchy, nodeset are consistent as well.
+ * However they must be propagated to their CPU-side parents.
*
* A memory object nodeset consists of NUMA nodes below it.
* A normal object nodeset consists in NUMA nodes attached to any
@@ -2060,27 +2171,12 @@ propagate_nodeset(hwloc_obj_t obj)
/* now add our local nodeset */
for_each_memory_child(child, obj) {
- /* FIXME rather recurse in the memory hierarchy */
-
- /* first, update children complete_nodeset if needed */
- if (!child->complete_nodeset)
- child->complete_nodeset = hwloc_bitmap_dup(child->nodeset);
- else
- hwloc_bitmap_or(child->complete_nodeset, child->complete_nodeset, child->nodeset);
-
/* add memory children nodesets to ours */
hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset);
hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, child->complete_nodeset);
-
- /* by the way, copy our cpusets to memory children */
- if (child->cpuset)
- hwloc_bitmap_copy(child->cpuset, obj->cpuset);
- else
- child->cpuset = hwloc_bitmap_dup(obj->cpuset);
- if (child->complete_cpuset)
- hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset);
- else
- child->complete_cpuset = hwloc_bitmap_dup(obj->complete_cpuset);
+ /* no need to recurse because hwloc__attach_memory_object()
+ * makes sure nodesets are consistent within each memory hierarchy.
+ */
}
/* Propagate our nodeset to CPU children. */
@@ -2219,6 +2315,7 @@ hwloc_reset_normal_type_depths(hwloc_topology_t topology)
for (i=HWLOC_OBJ_TYPE_MIN; i<=HWLOC_OBJ_GROUP; i++)
topology->type_depth[i] = HWLOC_TYPE_DEPTH_UNKNOWN;
/* type contiguity is asserted in topology_check() */
+ topology->type_depth[HWLOC_OBJ_DIE] = HWLOC_TYPE_DEPTH_UNKNOWN;
}
static int
@@ -2245,6 +2342,8 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i)
return -1;
for(j=0; jlevel_nbobjects[i]; j++) {
+ if (topology->levels[i-1][j] != topology->levels[i][j]->parent)
+ return -1;
if (topology->levels[i-1][j]->arity != 1)
return -1;
if (checkmemory && topology->levels[i-1][j]->memory_arity)
@@ -2434,6 +2533,7 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root)
{
hwloc_obj_t child;
unsigned arity = root->arity;
+ hwloc_obj_t *array;
int ok;
/* assume we're not symmetric by default */
@@ -2465,8 +2565,9 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root)
/* now check that children subtrees are identical.
* just walk down the first child in each tree and compare their depth and arities
*/
-{
- HWLOC_VLA(hwloc_obj_t, array, arity);
+ array = malloc(arity * sizeof(*array));
+ if (!array)
+ return;
memcpy(array, root->children, arity * sizeof(*array));
while (1) {
unsigned i;
@@ -2474,8 +2575,9 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root)
for(i=1; idepth != array[0]->depth
|| array[i]->arity != array[0]->arity) {
- return;
- }
+ free(array);
+ return;
+ }
if (!array[0]->arity)
/* no more children level, we're ok */
break;
@@ -2483,7 +2585,7 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root)
for(i=0; ifirst_child;
}
-}
+ free(array);
/* everything went fine, we're symmetric */
good:
@@ -2601,57 +2703,23 @@ hwloc_connect_children(hwloc_obj_t parent)
}
/*
- * Check whether there is an object below ROOT that has the same type as OBJ
+ * Check whether there is an object strictly below ROOT that has the same type as OBJ
*/
static int
find_same_type(hwloc_obj_t root, hwloc_obj_t obj)
{
hwloc_obj_t child;
- if (hwloc_type_cmp(root, obj) == HWLOC_OBJ_EQUAL)
- return 1;
-
- for_each_child (child, root)
+ for_each_child (child, root) {
+ if (hwloc_type_cmp(child, obj) == HWLOC_OBJ_EQUAL)
+ return 1;
if (find_same_type(child, obj))
return 1;
+ }
return 0;
}
-/* traverse the array of current object and compare them with top_obj.
- * if equal, take the object and put its children into the remaining objs.
- * if not equal, put the object into the remaining objs.
- */
-static unsigned
-hwloc_level_take_objects(hwloc_obj_t top_obj,
- hwloc_obj_t *current_objs, unsigned n_current_objs,
- hwloc_obj_t *taken_objs, unsigned n_taken_objs __hwloc_attribute_unused,
- hwloc_obj_t *remaining_objs, unsigned n_remaining_objs __hwloc_attribute_unused)
-{
- unsigned taken_i = 0;
- unsigned new_i = 0;
- unsigned i, j;
-
- for (i = 0; i < n_current_objs; i++)
- if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_OBJ_EQUAL) {
- /* Take it, add main children. */
- taken_objs[taken_i++] = current_objs[i];
- for (j = 0; j < current_objs[i]->arity; j++)
- remaining_objs[new_i++] = current_objs[i]->children[j];
- } else {
- /* Leave it. */
- remaining_objs[new_i++] = current_objs[i];
- }
-
-#ifdef HWLOC_DEBUG
- /* Make sure we didn't mess up. */
- assert(taken_i == n_taken_objs);
- assert(new_i == n_current_objs - n_taken_objs + n_remaining_objs);
-#endif
-
- return new_i;
-}
-
static int
hwloc_build_level_from_list(struct hwloc_special_level_s *slevel)
{
@@ -2670,6 +2738,9 @@ hwloc_build_level_from_list(struct hwloc_special_level_s *slevel)
if (nb) {
/* allocate and fill level */
slevel->objs = malloc(nb * sizeof(struct hwloc_obj *));
+ if (!slevel->objs)
+ return -1;
+
obj = slevel->first;
i = 0;
while (obj) {
@@ -2709,7 +2780,17 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj)
/* Insert the main NUMA node list */
hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_NUMANODE], obj);
- /* Recurse */
+ /* Recurse, NUMA nodes only have Misc children */
+ for_each_misc_child(child, obj)
+ hwloc_list_special_objects(topology, child);
+
+ } else if (obj->type == HWLOC_OBJ_MEMCACHE) {
+ obj->next_cousin = NULL;
+ obj->depth = HWLOC_TYPE_DEPTH_MEMCACHE;
+ /* Insert the main MemCache list */
+ hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_MEMCACHE], obj);
+
+ /* Recurse, MemCaches have NUMA nodes or Misc children */
for_each_memory_child(child, obj)
hwloc_list_special_objects(topology, child);
for_each_misc_child(child, obj)
@@ -2742,6 +2823,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj)
/* Insert in the main osdev list */
hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_OSDEV], obj);
}
+
/* Recurse, I/O only have I/O and Misc children */
for_each_io_child(child, obj)
hwloc_list_special_objects(topology, child);
@@ -2762,7 +2844,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj)
}
/* Build I/O levels */
-static void
+static int
hwloc_connect_io_misc_levels(hwloc_topology_t topology)
{
unsigned i;
@@ -2773,8 +2855,12 @@ hwloc_connect_io_misc_levels(hwloc_topology_t topology)
hwloc_list_special_objects(topology, topology->levels[0][0]);
- for(i=0; islevels[i]);
+ for(i=0; islevels[i]) < 0)
+ return -1;
+ }
+
+ return 0;
}
/*
@@ -2849,32 +2935,48 @@ hwloc_connect_levels(hwloc_topology_t topology)
/* Now peek all objects of the same type, build a level with that and
* replace them with their children. */
- /* First count them. */
- n_taken_objs = 0;
- n_new_objs = 0;
- for (i = 0; i < n_objs; i++)
- if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) {
- n_taken_objs++;
- n_new_objs += objs[i]->arity;
- }
-
- /* New level. */
- taken_objs = malloc((n_taken_objs + 1) * sizeof(taken_objs[0]));
- /* New list of pending objects. */
- if (n_objs - n_taken_objs + n_new_objs) {
- new_objs = malloc((n_objs - n_taken_objs + n_new_objs) * sizeof(new_objs[0]));
- } else {
-#ifdef HWLOC_DEBUG
- assert(!n_new_objs);
- assert(n_objs == n_taken_objs);
-#endif
- new_objs = NULL;
+ /* allocate enough to take all current objects and an ending NULL */
+ taken_objs = malloc((n_objs+1) * sizeof(taken_objs[0]));
+ if (!taken_objs) {
+ free(objs);
+ errno = ENOMEM;
+ return -1;
}
- n_new_objs = hwloc_level_take_objects(top_obj,
- objs, n_objs,
- taken_objs, n_taken_objs,
- new_objs, n_new_objs);
+ /* allocate enough to keep all current objects or their children */
+ n_new_objs = 0;
+ for (i = 0; i < n_objs; i++) {
+ if (objs[i]->arity)
+ n_new_objs += objs[i]->arity;
+ else
+ n_new_objs++;
+ }
+ new_objs = malloc(n_new_objs * sizeof(new_objs[0]));
+ if (!new_objs) {
+ free(objs);
+ free(taken_objs);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ /* now actually take these objects */
+ n_new_objs = 0;
+ n_taken_objs = 0;
+ for (i = 0; i < n_objs; i++)
+ if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) {
+ /* Take it, add main children. */
+ taken_objs[n_taken_objs++] = objs[i];
+ memcpy(&new_objs[n_new_objs], objs[i]->children, objs[i]->arity * sizeof(new_objs[0]));
+ n_new_objs += objs[i]->arity;
+ } else {
+ /* Leave it. */
+ new_objs[n_new_objs++] = objs[i];
+ }
+
+ if (!n_new_objs) {
+ free(new_objs);
+ new_objs = NULL;
+ }
/* Ok, put numbers in the level and link cousins. */
for (i = 0; i < n_taken_objs; i++) {
@@ -2964,13 +3066,69 @@ hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
if (hwloc_connect_levels(topology) < 0)
return -1;
- hwloc_connect_io_misc_levels(topology);
+ if (hwloc_connect_io_misc_levels(topology) < 0)
+ return -1;
topology->modified = 0;
return 0;
}
+/* for regression testing, make sure the order of io devices
+ * doesn't change with the dentry order in the filesystem
+ *
+ * Only needed for OSDev for now.
+ */
+static hwloc_obj_t
+hwloc_debug_insert_osdev_sorted(hwloc_obj_t queue, hwloc_obj_t obj)
+{
+ hwloc_obj_t *pcur = &queue;
+ while (*pcur && strcmp((*pcur)->name, obj->name) < 0)
+ pcur = &((*pcur)->next_sibling);
+ obj->next_sibling = *pcur;
+ *pcur = obj;
+ return queue;
+}
+
+static void
+hwloc_debug_sort_children(hwloc_obj_t root)
+{
+ hwloc_obj_t child;
+
+ if (root->io_first_child) {
+ hwloc_obj_t osdevqueue, *pchild;
+
+ pchild = &root->io_first_child;
+ osdevqueue = NULL;
+ while ((child = *pchild) != NULL) {
+ if (child->type != HWLOC_OBJ_OS_DEVICE) {
+ /* keep non-osdev untouched */
+ pchild = &child->next_sibling;
+ continue;
+ }
+
+ /* dequeue this child */
+ *pchild = child->next_sibling;
+ child->next_sibling = NULL;
+
+ /* insert in osdev queue in order */
+ osdevqueue = hwloc_debug_insert_osdev_sorted(osdevqueue, child);
+ }
+
+ /* requeue the now-sorted osdev queue */
+ *pchild = osdevqueue;
+ }
+
+ /* Recurse */
+ for_each_child(child, root)
+ hwloc_debug_sort_children(child);
+ for_each_memory_child(child, root)
+ hwloc_debug_sort_children(child);
+ for_each_io_child(child, root)
+ hwloc_debug_sort_children(child);
+ /* no I/O under Misc */
+}
+
void hwloc_alloc_root_sets(hwloc_obj_t root)
{
/*
@@ -2992,11 +3150,32 @@ void hwloc_alloc_root_sets(hwloc_obj_t root)
root->complete_nodeset = hwloc_bitmap_alloc();
}
-/* Main discovery loop */
-static int
-hwloc_discover(struct hwloc_topology *topology)
+static void
+hwloc_discover_by_phase(struct hwloc_topology *topology,
+ struct hwloc_disc_status *dstatus,
+ const char *phasename __hwloc_attribute_unused)
{
struct hwloc_backend *backend;
+ hwloc_debug("%s phase discovery...\n", phasename);
+ for(backend = topology->backends; backend; backend = backend->next) {
+ if (dstatus->phase & dstatus->excluded_phases)
+ break;
+ if (!(backend->phases & dstatus->phase))
+ continue;
+ if (!backend->discover)
+ continue;
+ hwloc_debug("%s phase discovery in component %s...\n", phasename, backend->component->name);
+ backend->discover(backend, dstatus);
+ hwloc_debug_print_objects(0, topology->levels[0][0]);
+ }
+}
+
+/* Main discovery loop */
+static int
+hwloc_discover(struct hwloc_topology *topology,
+ struct hwloc_disc_status *dstatus)
+{
+ const char *env;
topology->modified = 0; /* no need to reconnect yet */
@@ -3038,38 +3217,70 @@ hwloc_discover(struct hwloc_topology *topology)
* automatically propagated to the whole tree after detection.
*/
- /*
- * Discover CPUs first
- */
- backend = topology->backends;
- while (NULL != backend) {
- if (backend->component->type != HWLOC_DISC_COMPONENT_TYPE_CPU
- && backend->component->type != HWLOC_DISC_COMPONENT_TYPE_GLOBAL)
- /* not yet */
- goto next_cpubackend;
- if (!backend->discover)
- goto next_cpubackend;
- backend->discover(backend);
- hwloc_debug_print_objects(0, topology->levels[0][0]);
+ if (topology->backend_phases & HWLOC_DISC_PHASE_GLOBAL) {
+ /* usually, GLOBAL is alone.
+ * but HWLOC_ANNOTATE_GLOBAL_COMPONENTS=1 allows optional ANNOTATE steps.
+ */
+ struct hwloc_backend *global_backend = topology->backends;
+ assert(global_backend);
+ assert(global_backend->phases == HWLOC_DISC_PHASE_GLOBAL);
-next_cpubackend:
- backend = backend->next;
+ /*
+ * Perform the single-component-based GLOBAL discovery
+ */
+ hwloc_debug("GLOBAL phase discovery...\n");
+ hwloc_debug("GLOBAL phase discovery with component %s...\n", global_backend->component->name);
+ dstatus->phase = HWLOC_DISC_PHASE_GLOBAL;
+ global_backend->discover(global_backend, dstatus);
+ hwloc_debug_print_objects(0, topology->levels[0][0]);
+ }
+ /* Don't explicitly ignore other phases, in case there's ever
+ * a need to bring them back.
+ * The component with usually exclude them by default anyway.
+ * Except if annotating global components is explicitly requested.
+ */
+
+ if (topology->backend_phases & HWLOC_DISC_PHASE_CPU) {
+ /*
+ * Discover CPUs first
+ */
+ dstatus->phase = HWLOC_DISC_PHASE_CPU;
+ hwloc_discover_by_phase(topology, dstatus, "CPU");
+ }
+
+ if (!(topology->backend_phases & (HWLOC_DISC_PHASE_GLOBAL|HWLOC_DISC_PHASE_CPU))) {
+ hwloc_debug("No GLOBAL or CPU component phase found\n");
+ /* we'll fail below */
}
/* One backend should have called hwloc_alloc_root_sets()
* and set bits during PU and NUMA insert.
*/
if (!topology->levels[0][0]->cpuset || hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) {
- hwloc_debug("%s", "No PU added by any CPU and global backend\n");
+ hwloc_debug("%s", "No PU added by any CPU or GLOBAL component phase\n");
errno = EINVAL;
return -1;
}
- if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) {
- const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES");
- if ((env && atoi(env))
- || (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES))
- topology->binding_hooks.get_allowed_resources(topology);
+ /*
+ * Memory-specific discovery
+ */
+ if (topology->backend_phases & HWLOC_DISC_PHASE_MEMORY) {
+ dstatus->phase = HWLOC_DISC_PHASE_MEMORY;
+ hwloc_discover_by_phase(topology, dstatus, "MEMORY");
+ }
+
+ if (/* check if getting the sets of locally allowed resources is possible */
+ topology->binding_hooks.get_allowed_resources
+ && topology->is_thissystem
+ /* check whether it has been done already */
+ && !(dstatus->flags & HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES)
+ /* check whether it was explicitly requested */
+ && ((topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) != 0
+ || ((env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES")) != NULL && atoi(env)))) {
+ /* OK, get the sets of locally allowed resources */
+ topology->binding_hooks.get_allowed_resources(topology);
+ dstatus->flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES;
}
/* If there's no NUMA node, add one with all the memory.
@@ -3113,7 +3324,7 @@ next_cpubackend:
hwloc_debug_print_objects(0, topology->levels[0][0]);
- if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) {
+ if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) {
hwloc_debug("%s", "\nRemoving unauthorized sets from all sets\n");
remove_unused_sets(topology, topology->levels[0][0]);
hwloc_debug_print_objects(0, topology->levels[0][0]);
@@ -3149,28 +3360,27 @@ next_cpubackend:
hwloc_debug_print_objects(0, topology->levels[0][0]);
/*
- * Additional discovery with other backends
+ * Additional discovery
*/
-
- backend = topology->backends;
- while (NULL != backend) {
- if (backend->component->type == HWLOC_DISC_COMPONENT_TYPE_CPU
- || backend->component->type == HWLOC_DISC_COMPONENT_TYPE_GLOBAL)
- /* already done above */
- goto next_noncpubackend;
- if (!backend->discover)
- goto next_noncpubackend;
- backend->discover(backend);
- hwloc_debug_print_objects(0, topology->levels[0][0]);
-
-next_noncpubackend:
- backend = backend->next;
+ if (topology->backend_phases & HWLOC_DISC_PHASE_PCI) {
+ dstatus->phase = HWLOC_DISC_PHASE_PCI;
+ hwloc_discover_by_phase(topology, dstatus, "PCI");
+ }
+ if (topology->backend_phases & HWLOC_DISC_PHASE_IO) {
+ dstatus->phase = HWLOC_DISC_PHASE_IO;
+ hwloc_discover_by_phase(topology, dstatus, "IO");
+ }
+ if (topology->backend_phases & HWLOC_DISC_PHASE_MISC) {
+ dstatus->phase = HWLOC_DISC_PHASE_MISC;
+ hwloc_discover_by_phase(topology, dstatus, "MISC");
+ }
+ if (topology->backend_phases & HWLOC_DISC_PHASE_ANNOTATE) {
+ dstatus->phase = HWLOC_DISC_PHASE_ANNOTATE;
+ hwloc_discover_by_phase(topology, dstatus, "ANNOTATE");
}
- hwloc_pci_belowroot_apply_locality(topology);
-
- hwloc_debug("%s", "\nNow reconnecting\n");
- hwloc_debug_print_objects(0, topology->levels[0][0]);
+ if (getenv("HWLOC_DEBUG_SORT_CHILDREN"))
+ hwloc_debug_sort_children(topology->levels[0][0]);
/* Remove some stuff */
@@ -3217,7 +3427,8 @@ next_noncpubackend:
/* add some identification attributes if not loading from XML */
if (topology->backends
- && strcmp(topology->backends->component->name, "xml")) {
+ && strcmp(topology->backends->component->name, "xml")
+ && !getenv("HWLOC_DONT_ADD_VERSION_INFO")) {
char *value;
/* add a hwlocVersion */
hwloc_obj_add_info(topology->levels[0][0], "hwlocVersion", HWLOC_VERSION);
@@ -3269,6 +3480,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology)
HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_BRIDGE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_BRIDGE));
HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_PCIDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_PCI_DEVICE));
HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_OSDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_OS_DEVICE));
+ HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_MEMCACHE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_MEMCACHE));
/* sane values to type_depth */
hwloc_reset_normal_type_depths(topology);
@@ -3277,6 +3489,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology)
topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE;
topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE;
topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE;
+ topology->type_depth[HWLOC_OBJ_MEMCACHE] = HWLOC_TYPE_DEPTH_MEMCACHE;
/* Create the actual machine object, but don't touch its attributes yet
* since the OS backend may still change the object into something else
@@ -3303,7 +3516,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
topology->tma = tma;
hwloc_components_init(); /* uses malloc without tma, but won't need it since dup() caller already took a reference */
- hwloc_backends_init(topology);
+ hwloc_topology_components_init(topology);
hwloc_pci_discovery_init(topology); /* make sure both dup() and load() get sane variables */
/* Setup topology context */
@@ -3320,7 +3533,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind));
topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind));
- topology->nb_levels_allocated = nblevels; /* enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */
+ topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */
topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels));
topology->level_nbobjects = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects));
@@ -3343,7 +3556,7 @@ int
hwloc_topology_init (struct hwloc_topology **topologyp)
{
return hwloc__topology_init(topologyp,
- 16, /* 16 is enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */
+ 16, /* 16 is enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */
NULL); /* no TMA for normal topologies, too many allocations to fix */
}
@@ -3376,7 +3589,7 @@ hwloc_topology_set_synthetic(struct hwloc_topology *topology, const char *descri
return hwloc_disc_component_force_enable(topology,
0 /* api */,
- -1, "synthetic",
+ "synthetic",
description, NULL, NULL);
}
@@ -3391,7 +3604,7 @@ hwloc_topology_set_xml(struct hwloc_topology *topology,
return hwloc_disc_component_force_enable(topology,
0 /* api */,
- -1, "xml",
+ "xml",
xmlpath, NULL, NULL);
}
@@ -3407,7 +3620,7 @@ hwloc_topology_set_xmlbuffer(struct hwloc_topology *topology,
return hwloc_disc_component_force_enable(topology,
0 /* api */,
- -1, "xml", NULL,
+ "xml", NULL,
xmlbuffer, (void*) (uintptr_t) size);
}
@@ -3420,7 +3633,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags)
return -1;
}
- if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) {
+ if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) {
errno = EINVAL;
return -1;
}
@@ -3445,6 +3658,7 @@ hwloc__topology_filter_init(struct hwloc_topology *topology)
topology->type_filter[HWLOC_OBJ_L1ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
topology->type_filter[HWLOC_OBJ_L2ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
topology->type_filter[HWLOC_OBJ_L3ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+ topology->type_filter[HWLOC_OBJ_MEMCACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
topology->type_filter[HWLOC_OBJ_GROUP] = HWLOC_TYPE_FILTER_KEEP_STRUCTURE;
topology->type_filter[HWLOC_OBJ_MISC] = HWLOC_TYPE_FILTER_KEEP_NONE;
topology->type_filter[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_FILTER_KEEP_NONE;
@@ -3575,6 +3789,7 @@ hwloc_topology_destroy (struct hwloc_topology *topology)
}
hwloc_backends_disable_all(topology);
+ hwloc_topology_components_fini(topology);
hwloc_components_fini();
hwloc_topology_clear(topology);
@@ -3591,6 +3806,8 @@ hwloc_topology_destroy (struct hwloc_topology *topology)
int
hwloc_topology_load (struct hwloc_topology *topology)
{
+ struct hwloc_disc_status dstatus;
+ const char *env;
int err;
if (topology->is_loaded) {
@@ -3617,7 +3834,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
if (fsroot_path_env)
hwloc_disc_component_force_enable(topology,
1 /* env force */,
- HWLOC_DISC_COMPONENT_TYPE_CPU, "linux",
+ "linux",
NULL /* backend will getenv again */, NULL, NULL);
}
if (!topology->backends) {
@@ -3625,7 +3842,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
if (cpuid_path_env)
hwloc_disc_component_force_enable(topology,
1 /* env force */,
- HWLOC_DISC_COMPONENT_TYPE_CPU, "x86",
+ "x86",
NULL /* backend will getenv again */, NULL, NULL);
}
if (!topology->backends) {
@@ -3633,7 +3850,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
if (synthetic_env)
hwloc_disc_component_force_enable(topology,
1 /* env force */,
- -1, "synthetic",
+ "synthetic",
synthetic_env, NULL, NULL);
}
if (!topology->backends) {
@@ -3641,11 +3858,19 @@ hwloc_topology_load (struct hwloc_topology *topology)
if (xmlpath_env)
hwloc_disc_component_force_enable(topology,
1 /* env force */,
- -1, "xml",
+ "xml",
xmlpath_env, NULL, NULL);
}
}
+ dstatus.excluded_phases = 0;
+ dstatus.flags = 0; /* did nothing yet */
+
+ env = getenv("HWLOC_ALLOW");
+ if (env && !strcmp(env, "all"))
+ /* don't retrieve the sets of allowed resources */
+ dstatus.flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES;
+
/* instantiate all possible other backends now */
hwloc_disc_components_enable_others(topology);
/* now that backends are enabled, update the thissystem flag and some callbacks */
@@ -3660,7 +3885,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
hwloc_pci_discovery_prepare(topology);
/* actual topology discovery */
- err = hwloc_discover(topology);
+ err = hwloc_discover(topology, &dstatus);
if (err < 0)
goto out;
@@ -3682,6 +3907,12 @@ hwloc_topology_load (struct hwloc_topology *topology)
hwloc_internal_distances_refresh(topology);
topology->is_loaded = 1;
+
+ if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) {
+ dstatus.phase = HWLOC_DISC_PHASE_TWEAK;
+ hwloc_discover_by_phase(topology, &dstatus, "TWEAK");
+ }
+
return 0;
out:
@@ -3740,7 +3971,75 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_
&& hwloc_bitmap_iszero(obj->cpuset)
&& (obj->type != HWLOC_OBJ_NUMANODE || (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS))) {
/* remove object */
- hwloc_debug("%s", "\nRemoving object during restrict");
+ hwloc_debug("%s", "\nRemoving object during restrict by cpuset");
+ hwloc_debug_print_object(0, obj);
+
+ if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) {
+ hwloc_free_object_siblings_and_children(obj->io_first_child);
+ obj->io_first_child = NULL;
+ }
+ if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_MISC)) {
+ hwloc_free_object_siblings_and_children(obj->misc_first_child);
+ obj->misc_first_child = NULL;
+ }
+ assert(!obj->first_child);
+ assert(!obj->memory_first_child);
+ unlink_and_free_single_object(pobj);
+ topology->modified = 1;
+ }
+}
+
+/* adjust object nodesets according the given droppednodeset,
+ * drop object whose nodeset becomes empty and that have no children,
+ * and propagate PU removal as cpuset changes in parents.
+ */
+static void
+restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc_obj_t *pobj,
+ hwloc_bitmap_t droppedcpuset, hwloc_bitmap_t droppednodeset)
+{
+ hwloc_obj_t obj = *pobj, child, *pchild;
+ int modified = 0;
+
+ if (hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) {
+ hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
+ hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
+ modified = 1;
+ } else {
+ if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)
+ && hwloc_bitmap_iszero(obj->complete_nodeset)) {
+ /* we're empty, there's a PU below us, it'll be removed this time */
+ modified = 1;
+ }
+ /* cpuset cannot intersect unless nodeset intersects or is empty */
+ if (droppedcpuset)
+ assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)
+ || hwloc_bitmap_iszero(obj->complete_nodeset));
+ }
+ if (droppedcpuset) {
+ hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
+ hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
+ }
+
+ if (modified) {
+ for_each_child_safe(child, obj, pchild)
+ restrict_object_by_nodeset(topology, flags, pchild, droppedcpuset, droppednodeset);
+ if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)
+ /* cpuset may have changed above where some NUMA nodes were removed.
+ * if some hwloc_bitmap_first(child->complete_cpuset) changed, children might need to be reordered */
+ hwloc__reorder_children(obj);
+
+ for_each_memory_child_safe(child, obj, pchild)
+ restrict_object_by_nodeset(topology, flags, pchild, droppedcpuset, droppednodeset);
+ /* FIXME: we may have to reorder CPU-less groups of NUMA nodes if some of their nodes were removed */
+
+ /* Nothing to restrict under I/O or Misc */
+ }
+
+ if (!obj->first_child && !obj->memory_first_child /* arity not updated before connect_children() */
+ && hwloc_bitmap_iszero(obj->nodeset)
+ && (obj->type != HWLOC_OBJ_PU || (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS))) {
+ /* remove object */
+ hwloc_debug("%s", "\nRemoving object during restrict by nodeset");
hwloc_debug_print_object(0, obj);
if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) {
@@ -3759,7 +4058,7 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_
}
int
-hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, unsigned long flags)
+hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t set, unsigned long flags)
{
hwloc_bitmap_t droppedcpuset, droppednodeset;
@@ -3767,15 +4066,35 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp
errno = EINVAL;
return -1;
}
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ return -1;
+ }
if (flags & ~(HWLOC_RESTRICT_FLAG_REMOVE_CPULESS
- |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO)) {
+ |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO
+ |HWLOC_RESTRICT_FLAG_BYNODESET|HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)) {
errno = EINVAL;
return -1;
}
+ if (flags & HWLOC_RESTRICT_FLAG_BYNODESET) {
+ /* cannot use CPULESS with BYNODESET */
+ if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) {
+ errno = EINVAL;
+ return -1;
+ }
+ } else {
+ /* cannot use MEMLESS without BYNODESET */
+ if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) {
+ errno = EINVAL;
+ return -1;
+ }
+ }
+
/* make sure we'll keep something in the topology */
- if (!hwloc_bitmap_intersects(cpuset, topology->allowed_cpuset)) {
+ if (((flags & HWLOC_RESTRICT_FLAG_BYNODESET) && !hwloc_bitmap_intersects(set, topology->allowed_nodeset))
+ || (!(flags & HWLOC_RESTRICT_FLAG_BYNODESET) && !hwloc_bitmap_intersects(set, topology->allowed_cpuset))) {
errno = EINVAL; /* easy failure, just don't touch the topology */
return -1;
}
@@ -3788,39 +4107,76 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp
return -1;
}
- /* cpuset to clear */
- hwloc_bitmap_not(droppedcpuset, cpuset);
- /* nodeset to clear */
- if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) {
- hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
- do {
- /* node will be removed if nodeset gets or was empty */
- if (hwloc_bitmap_iszero(node->cpuset)
- || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset))
- hwloc_bitmap_set(droppednodeset, node->os_index);
- node = node->next_cousin;
- } while (node);
+ if (flags & HWLOC_RESTRICT_FLAG_BYNODESET) {
+ /* nodeset to clear */
+ hwloc_bitmap_not(droppednodeset, set);
+ /* cpuset to clear */
+ if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) {
+ hwloc_obj_t pu = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
+ do {
+ /* PU will be removed if cpuset gets or was empty */
+ if (hwloc_bitmap_iszero(pu->cpuset)
+ || hwloc_bitmap_isincluded(pu->nodeset, droppednodeset))
+ hwloc_bitmap_set(droppedcpuset, pu->os_index);
+ pu = pu->next_cousin;
+ } while (pu);
- /* check we're not removing all NUMA nodes */
- if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) {
- errno = EINVAL; /* easy failure, just don't touch the topology */
- hwloc_bitmap_free(droppedcpuset);
- hwloc_bitmap_free(droppednodeset);
- return -1;
+ /* check we're not removing all PUs */
+ if (hwloc_bitmap_isincluded(topology->allowed_cpuset, droppedcpuset)) {
+ errno = EINVAL; /* easy failure, just don't touch the topology */
+ hwloc_bitmap_free(droppedcpuset);
+ hwloc_bitmap_free(droppednodeset);
+ return -1;
+ }
+ }
+ /* remove cpuset if empty */
+ if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)
+ || hwloc_bitmap_iszero(droppedcpuset)) {
+ hwloc_bitmap_free(droppedcpuset);
+ droppedcpuset = NULL;
}
- }
- /* remove nodeset if empty */
- if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS)
- || hwloc_bitmap_iszero(droppednodeset)) {
- hwloc_bitmap_free(droppednodeset);
- droppednodeset = NULL;
- }
- /* now recurse to filter sets and drop things */
- restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset);
- hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset);
- if (droppednodeset)
+ /* now recurse to filter sets and drop things */
+ restrict_object_by_nodeset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset);
hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset);
+ if (droppedcpuset)
+ hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset);
+
+ } else {
+ /* cpuset to clear */
+ hwloc_bitmap_not(droppedcpuset, set);
+ /* nodeset to clear */
+ if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) {
+ hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
+ do {
+ /* node will be removed if nodeset gets or was empty */
+ if (hwloc_bitmap_iszero(node->cpuset)
+ || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset))
+ hwloc_bitmap_set(droppednodeset, node->os_index);
+ node = node->next_cousin;
+ } while (node);
+
+ /* check we're not removing all NUMA nodes */
+ if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) {
+ errno = EINVAL; /* easy failure, just don't touch the topology */
+ hwloc_bitmap_free(droppedcpuset);
+ hwloc_bitmap_free(droppednodeset);
+ return -1;
+ }
+ }
+ /* remove nodeset if empty */
+ if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS)
+ || hwloc_bitmap_iszero(droppednodeset)) {
+ hwloc_bitmap_free(droppednodeset);
+ droppednodeset = NULL;
+ }
+
+ /* now recurse to filter sets and drop things */
+ restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset);
+ hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset);
+ if (droppednodeset)
+ hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset);
+ }
hwloc_bitmap_free(droppedcpuset);
hwloc_bitmap_free(droppednodeset);
@@ -3849,6 +4205,72 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp
return -1;
}
+int
+hwloc_topology_allow(struct hwloc_topology *topology,
+ hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset,
+ unsigned long flags)
+{
+ if (!topology->is_loaded)
+ goto einval;
+
+ if (topology->adopted_shmem_addr) {
+ errno = EPERM;
+ goto error;
+ }
+
+ if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED))
+ goto einval;
+
+ if (flags & ~(HWLOC_ALLOW_FLAG_ALL|HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS|HWLOC_ALLOW_FLAG_CUSTOM))
+ goto einval;
+
+ switch (flags) {
+ case HWLOC_ALLOW_FLAG_ALL: {
+ if (cpuset || nodeset)
+ goto einval;
+ hwloc_bitmap_copy(topology->allowed_cpuset, hwloc_get_root_obj(topology)->complete_cpuset);
+ hwloc_bitmap_copy(topology->allowed_nodeset, hwloc_get_root_obj(topology)->complete_nodeset);
+ break;
+ }
+ case HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS: {
+ if (cpuset || nodeset)
+ goto einval;
+ if (!topology->is_thissystem)
+ goto einval;
+ if (!topology->binding_hooks.get_allowed_resources) {
+ errno = ENOSYS;
+ goto error;
+ }
+ topology->binding_hooks.get_allowed_resources(topology);
+ break;
+ }
+ case HWLOC_ALLOW_FLAG_CUSTOM: {
+ if (cpuset) {
+ /* keep the intersection with the full topology cpuset, if not empty */
+ if (!hwloc_bitmap_intersects(hwloc_get_root_obj(topology)->cpuset, cpuset))
+ goto einval;
+ hwloc_bitmap_and(topology->allowed_cpuset, hwloc_get_root_obj(topology)->cpuset, cpuset);
+ }
+ if (nodeset) {
+ /* keep the intersection with the full topology nodeset, if not empty */
+ if (!hwloc_bitmap_intersects(hwloc_get_root_obj(topology)->nodeset, nodeset))
+ goto einval;
+ hwloc_bitmap_and(topology->allowed_nodeset, hwloc_get_root_obj(topology)->nodeset, nodeset);
+ }
+ break;
+ }
+ default:
+ goto einval;
+ }
+
+ return 0;
+
+ einval:
+ errno = EINVAL;
+ error:
+ return -1;
+}
+
int
hwloc_topology_is_thissystem(struct hwloc_topology *topology)
{
@@ -4005,7 +4427,7 @@ hwloc__check_children_cpusets(hwloc_topology_t topology __hwloc_attribute_unused
assert(hwloc_bitmap_first(obj->cpuset) == (int) obj->os_index);
assert(hwloc_bitmap_weight(obj->complete_cpuset) == 1);
assert(hwloc_bitmap_first(obj->complete_cpuset) == (int) obj->os_index);
- if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) {
+ if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) {
assert(hwloc_bitmap_isset(topology->allowed_cpuset, (int) obj->os_index));
}
assert(!obj->arity);
@@ -4166,6 +4588,8 @@ hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_
assert(obj->cpuset);
if (obj->type == HWLOC_OBJ_NUMANODE)
assert(obj->depth == HWLOC_TYPE_DEPTH_NUMANODE);
+ else if (obj->type == HWLOC_OBJ_MEMCACHE)
+ assert(obj->depth == HWLOC_TYPE_DEPTH_MEMCACHE);
else
assert(obj->depth >= 0);
}
@@ -4219,7 +4643,7 @@ hwloc__check_nodesets(hwloc_topology_t topology, hwloc_obj_t obj, hwloc_bitmap_t
assert(hwloc_bitmap_first(obj->nodeset) == (int) obj->os_index);
assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1);
assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index);
- if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) {
+ if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) {
assert(hwloc_bitmap_isset(topology->allowed_nodeset, (int) obj->os_index));
}
assert(!obj->arity);
@@ -4362,7 +4786,9 @@ hwloc_topology_check(struct hwloc_topology *topology)
HWLOC_BUILD_ASSERT(HWLOC_OBJ_BRIDGE + 1 == HWLOC_OBJ_PCI_DEVICE);
HWLOC_BUILD_ASSERT(HWLOC_OBJ_PCI_DEVICE + 1 == HWLOC_OBJ_OS_DEVICE);
HWLOC_BUILD_ASSERT(HWLOC_OBJ_OS_DEVICE + 1 == HWLOC_OBJ_MISC);
- HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_TYPE_MAX);
+ HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_MEMCACHE);
+ HWLOC_BUILD_ASSERT(HWLOC_OBJ_MEMCACHE + 1 == HWLOC_OBJ_DIE);
+ HWLOC_BUILD_ASSERT(HWLOC_OBJ_DIE + 1 == HWLOC_OBJ_TYPE_MAX);
/* make sure order and priority arrays have the right size */
HWLOC_BUILD_ASSERT(sizeof(obj_type_order)/sizeof(*obj_type_order) == HWLOC_OBJ_TYPE_MAX);
@@ -4408,6 +4834,7 @@ hwloc_topology_check(struct hwloc_topology *topology)
int d;
type = hwloc_get_depth_type(topology, j);
assert(type != HWLOC_OBJ_NUMANODE);
+ assert(type != HWLOC_OBJ_MEMCACHE);
assert(type != HWLOC_OBJ_PCI_DEVICE);
assert(type != HWLOC_OBJ_BRIDGE);
assert(type != HWLOC_OBJ_OS_DEVICE);
@@ -4423,6 +4850,9 @@ hwloc_topology_check(struct hwloc_topology *topology)
if (type == HWLOC_OBJ_NUMANODE) {
assert(d == HWLOC_TYPE_DEPTH_NUMANODE);
assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_NUMANODE);
+ } else if (type == HWLOC_OBJ_MEMCACHE) {
+ assert(d == HWLOC_TYPE_DEPTH_MEMCACHE);
+ assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_MEMCACHE);
} else if (type == HWLOC_OBJ_BRIDGE) {
assert(d == HWLOC_TYPE_DEPTH_BRIDGE);
assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_BRIDGE);
@@ -4449,7 +4879,7 @@ hwloc_topology_check(struct hwloc_topology *topology)
assert(!obj->depth);
/* check that allowed sets are larger than the main sets */
- if (topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) {
+ if (topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED) {
assert(hwloc_bitmap_isincluded(topology->allowed_cpuset, obj->cpuset));
assert(hwloc_bitmap_isincluded(topology->allowed_nodeset, obj->nodeset));
} else {
diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c
index 9c5e6268..0b744d78 100644
--- a/src/3rdparty/hwloc/src/traversal.c
+++ b/src/3rdparty/hwloc/src/traversal.c
@@ -1,16 +1,17 @@
/*
* Copyright © 2009 CNRS
- * Copyright © 2009-2018 Inria. All rights reserved.
+ * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
-#include
-#include
-#include
-#include
-#include
+#include "private/autogen/config.h"
+#include "hwloc.h"
+#include "private/private.h"
+#include "private/misc.h"
+#include "private/debug.h"
+
#ifdef HAVE_STRINGS_H
#include
#endif /* HAVE_STRINGS_H */
@@ -40,6 +41,8 @@ hwloc_get_depth_type (hwloc_topology_t topology, int depth)
return HWLOC_OBJ_OS_DEVICE;
case HWLOC_TYPE_DEPTH_MISC:
return HWLOC_OBJ_MISC;
+ case HWLOC_TYPE_DEPTH_MEMCACHE:
+ return HWLOC_OBJ_MEMCACHE;
default:
return HWLOC_OBJ_TYPE_NONE;
}
@@ -237,8 +240,10 @@ hwloc_obj_type_string (hwloc_obj_type_t obj)
case HWLOC_OBJ_MACHINE: return "Machine";
case HWLOC_OBJ_MISC: return "Misc";
case HWLOC_OBJ_GROUP: return "Group";
+ case HWLOC_OBJ_MEMCACHE: return "MemCache";
case HWLOC_OBJ_NUMANODE: return "NUMANode";
case HWLOC_OBJ_PACKAGE: return "Package";
+ case HWLOC_OBJ_DIE: return "Die";
case HWLOC_OBJ_L1CACHE: return "L1Cache";
case HWLOC_OBJ_L2CACHE: return "L2Cache";
case HWLOC_OBJ_L3CACHE: return "L3Cache";
@@ -256,6 +261,41 @@ hwloc_obj_type_string (hwloc_obj_type_t obj)
}
}
+/* Check if string matches the given type at least on minmatch chars.
+ * On success, return the address of where matching stop, either pointing to \0 or to a suffix (digits, colon, etc)
+ * On error, return NULL;
+ */
+static __hwloc_inline const char *
+hwloc__type_match(const char *string,
+ const char *type, /* type must be lowercase */
+ size_t minmatch)
+{
+ const char *s, *t;
+ unsigned i;
+ for(i=0, s=string, t=type; ; i++, s++, t++) {
+ if (!*s) {
+ /* string ends before type */
+ if (i= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || *s == '-')
+ /* valid character that doesn't match */
+ return NULL;
+ /* invalid character, we reached the end of the type namein string, stop matching here */
+ if (i= '0' && string[1] <= '9') {
+ char *suffix;
depthattr = strtol(string+1, &end, 10);
- if (*end == 'i') {
+ if (*end == 'i' || *end == 'I') {
if (depthattr >= 1 && depthattr <= 3) {
type = HWLOC_OBJ_L1ICACHE + depthattr-1;
cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION;
+ suffix = end+1;
} else
return -1;
} else {
if (depthattr >= 1 && depthattr <= 5) {
type = HWLOC_OBJ_L1CACHE + depthattr-1;
- cachetypeattr = *end == 'd' ? HWLOC_OBJ_CACHE_DATA : HWLOC_OBJ_CACHE_UNIFIED;
+ if (*end == 'd' || *end == 'D') {
+ cachetypeattr = HWLOC_OBJ_CACHE_DATA;
+ suffix = end+1;
+ } else if (*end == 'u' || *end == 'U') {
+ cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED;
+ suffix = end+1;
+ } else {
+ cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED;
+ suffix = end;
+ }
} else
return -1;
}
+ /* check whether the optional suffix matches "cache" */
+ if (!hwloc__type_match(suffix, "cache", 0))
+ return -1;
- } else if (!hwloc_strncasecmp(string, "group", 2)) {
- size_t length;
+ } else if ((end = (char *) hwloc__type_match(string, "group", 2)) != NULL) {
type = HWLOC_OBJ_GROUP;
- length = strcspn(string, "0123456789");
- if (length <= 5 && !hwloc_strncasecmp(string, "group", length)
- && string[length] >= '0' && string[length] <= '9') {
- depthattr = strtol(string+length, &end, 10);
+ if (*end >= '0' && *end <= '9') {
+ depthattr = strtol(end, &end, 10);
}
} else
@@ -421,7 +477,9 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
case HWLOC_OBJ_MISC:
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_NUMANODE:
+ case HWLOC_OBJ_MEMCACHE:
case HWLOC_OBJ_PACKAGE:
+ case HWLOC_OBJ_DIE:
case HWLOC_OBJ_CORE:
case HWLOC_OBJ_PU:
return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type));
@@ -523,6 +581,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE:
+ case HWLOC_OBJ_MEMCACHE:
if (verbose) {
char assoc[32];
if (obj->attr->cache.associativity == -1)
diff --git a/src/App.cpp b/src/App.cpp
index 3acccc5d..17060f96 100644
--- a/src/App.cpp
+++ b/src/App.cpp
@@ -50,6 +50,8 @@ xmrig::App::App(Process *process)
xmrig::App::~App()
{
+ Cpu::release();
+
delete m_signals;
delete m_console;
delete m_controller;
diff --git a/src/Summary.cpp b/src/Summary.cpp
index 757297c5..695138d1 100644
--- a/src/Summary.cpp
+++ b/src/Summary.cpp
@@ -39,6 +39,11 @@
#include "version.h"
+#ifdef XMRIG_ALGO_RANDOMX
+# include "crypto/rx/RxConfig.h"
+#endif
+
+
namespace xmrig {
@@ -59,24 +64,36 @@ inline static const char *asmName(Assembly::Id assembly)
#endif
-static void print_memory(Config *config) {
-# ifdef _WIN32
+static void print_memory(Config *config)
+{
+# ifdef XMRIG_OS_WIN
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
"HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled"));
+# else
+ Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled"));
+# endif
+
+# ifdef XMRIG_ALGO_RANDOMX
+# ifdef XMRIG_OS_LINUX
+ Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
+ "1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable")));
+# else
+ Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable"));
+# endif
# endif
}
static void print_cpu(Config *)
{
- const ICpuInfo *info = Cpu::info();
+ const auto info = Cpu::info();
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES",
"CPU",
info->brand(),
info->packages(),
- info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-",
- info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-"
+ info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-",
+ info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-"
);
# if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC)
Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB")
diff --git a/src/backend/common/interfaces/IRxStorage.h b/src/backend/common/interfaces/IRxStorage.h
index 9e0407b0..56788518 100644
--- a/src/backend/common/interfaces/IRxStorage.h
+++ b/src/backend/common/interfaces/IRxStorage.h
@@ -25,6 +25,7 @@
#include "crypto/rx/RxConfig.h"
+#include "crypto/common/HugePagesInfo.h"
#include
@@ -44,9 +45,9 @@ class IRxStorage
public:
virtual ~IRxStorage() = default;
- virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0;
- virtual std::pair hugePages() const = 0;
- virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) = 0;
+ virtual HugePagesInfo hugePages() const = 0;
+ virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0;
+ virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) = 0;
};
diff --git a/src/backend/cpu/CpuBackend.cpp b/src/backend/cpu/CpuBackend.cpp
index ddee6768..ac459497 100644
--- a/src/backend/cpu/CpuBackend.cpp
+++ b/src/backend/cpu/CpuBackend.cpp
@@ -68,17 +68,15 @@ static std::mutex mutex;
struct CpuLaunchStatus
{
public:
- inline size_t hugePages() const { return m_hugePages; }
- inline size_t memory() const { return m_ways * m_memory; }
- inline size_t pages() const { return m_pages; }
- inline size_t threads() const { return m_threads; }
- inline size_t ways() const { return m_ways; }
+ inline const HugePagesInfo &hugePages() const { return m_hugePages; }
+ inline size_t memory() const { return m_ways * m_memory; }
+ inline size_t threads() const { return m_threads; }
+ inline size_t ways() const { return m_ways; }
inline void start(const std::vector &threads, size_t memory)
{
- m_hugePages = 0;
+ m_hugePages.reset();
m_memory = memory;
- m_pages = 0;
m_started = 0;
m_errors = 0;
m_threads = threads.size();
@@ -89,11 +87,9 @@ public:
inline bool started(IWorker *worker, bool ready)
{
if (ready) {
- auto hugePages = worker->memory()->hugePages();
-
m_started++;
- m_hugePages += hugePages.first;
- m_pages += hugePages.second;
+
+ m_hugePages += worker->memory()->hugePages();
m_ways += worker->intensity();
}
else {
@@ -115,19 +111,18 @@ public:
tag,
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
m_started, m_threads, m_ways,
- (m_hugePages == m_pages ? GREEN_BOLD_S : (m_hugePages == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
- m_hugePages == 0 ? 0.0 : static_cast(m_hugePages) / m_pages * 100.0,
- m_hugePages, m_pages,
+ (m_hugePages.isFullyAllocated() ? GREEN_BOLD_S : (m_hugePages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
+ m_hugePages.percent(),
+ m_hugePages.allocated, m_hugePages.total,
memory() / 1024,
Chrono::steadyMSecs() - m_ts
);
}
private:
+ HugePagesInfo m_hugePages;
size_t m_errors = 0;
- size_t m_hugePages = 0;
size_t m_memory = 0;
- size_t m_pages = 0;
size_t m_started = 0;
size_t m_threads = 0;
size_t m_ways = 0;
@@ -169,18 +164,17 @@ public:
rapidjson::Value hugePages(int version, rapidjson::Document &doc)
{
- std::pair pages(0, 0);
+ HugePagesInfo pages;
# ifdef XMRIG_ALGO_RANDOMX
if (algo.family() == Algorithm::RANDOM_X) {
- pages = Rx::hugePages();
+ pages += Rx::hugePages();
}
# endif
mutex.lock();
- pages.first += status.hugePages();
- pages.second += status.pages();
+ pages += status.hugePages();
mutex.unlock();
@@ -188,11 +182,11 @@ public:
if (version > 1) {
hugepages.SetArray();
- hugepages.PushBack(pages.first, doc.GetAllocator());
- hugepages.PushBack(pages.second, doc.GetAllocator());
+ hugepages.PushBack(static_cast(pages.allocated), doc.GetAllocator());
+ hugepages.PushBack(static_cast(pages.total), doc.GetAllocator());
}
else {
- hugepages = pages.first == pages.second;
+ hugepages = pages.isFullyAllocated();
}
return hugepages;
diff --git a/src/backend/cpu/CpuConfig.cpp b/src/backend/cpu/CpuConfig.cpp
index 7ebe904b..a9e10338 100644
--- a/src/backend/cpu/CpuConfig.cpp
+++ b/src/backend/cpu/CpuConfig.cpp
@@ -119,10 +119,10 @@ std::vector xmrig::CpuConfig::get(const Miner *miner, cons
void xmrig::CpuConfig::read(const rapidjson::Value &value)
{
if (value.IsObject()) {
- m_enabled = Json::getBool(value, kEnabled, m_enabled);
- m_hugePages = Json::getBool(value, kHugePages, m_hugePages);
- m_limit = Json::getUint(value, kMaxThreadsHint, m_limit);
- m_yield = Json::getBool(value, kYield, m_yield);
+ m_enabled = Json::getBool(value, kEnabled, m_enabled);
+ m_hugePages = Json::getBool(value, kHugePages, m_hugePages);
+ m_limit = Json::getUint(value, kMaxThreadsHint, m_limit);
+ m_yield = Json::getBool(value, kYield, m_yield);
setAesMode(Json::getValue(value, kHwAes));
setPriority(Json::getInt(value, kPriority, -1));
diff --git a/src/backend/cpu/CpuConfig.h b/src/backend/cpu/CpuConfig.h
index 2306057f..a9c40806 100644
--- a/src/backend/cpu/CpuConfig.h
+++ b/src/backend/cpu/CpuConfig.h
@@ -60,6 +60,7 @@ public:
inline const String &argon2Impl() const { return m_argon2Impl; }
inline const Threads &threads() const { return m_threads; }
inline int priority() const { return m_priority; }
+ inline uint32_t limit() const { return m_limit; }
private:
void generate();
diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp
index 2e223a31..f93f7dac 100644
--- a/src/backend/cpu/CpuWorker.cpp
+++ b/src/backend/cpu/CpuWorker.cpp
@@ -63,7 +63,7 @@ xmrig::CpuWorker::CpuWorker(size_t id, const CpuLaunchData &data) :
m_miner(data.miner),
m_ctx()
{
- m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, true, m_node);
+ m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, m_node);
}
@@ -96,7 +96,7 @@ void xmrig::CpuWorker::allocateRandomX_VM()
}
if (!m_vm) {
- m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES);
+ m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly);
}
}
#endif
diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h
index 20e72391..674668b5 100644
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@@ -37,6 +37,12 @@ namespace xmrig {
class ICpuInfo
{
public:
+ enum Vendor {
+ VENDOR_UNKNOWN,
+ VENDOR_INTEL,
+ VENDOR_AMD
+ };
+
virtual ~ICpuInfo() = default;
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
@@ -48,6 +54,7 @@ public:
virtual Assembly::Id assembly() const = 0;
virtual bool hasAES() const = 0;
virtual bool hasAVX2() const = 0;
+ virtual bool hasOneGbPages() const = 0;
virtual const char *backend() const = 0;
virtual const char *brand() const = 0;
virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0;
@@ -57,6 +64,7 @@ public:
virtual size_t nodes() const = 0;
virtual size_t packages() const = 0;
virtual size_t threads() const = 0;
+ virtual Vendor vendor() const = 0;
};
diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp
index 5cae55e2..4a3c6f62 100644
--- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp
+++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp
@@ -22,6 +22,17 @@
* along with this program. If not, see .
*/
+#include "backend/cpu/platform/AdvancedCpuInfo.h"
+#include "3rdparty/libcpuid/libcpuid.h"
+
+
+#ifdef _MSC_VER
+# include
+#else
+# include
+#endif
+
+
#include
#include
#include
@@ -29,10 +40,6 @@
#include
-#include "3rdparty/libcpuid/libcpuid.h"
-#include "backend/cpu/platform/AdvancedCpuInfo.h"
-
-
namespace xmrig {
@@ -54,11 +61,38 @@ static inline void cpu_brand_string(char out[64], const char *in) {
}
+static inline void cpuid(uint32_t level, int32_t output[4])
+{
+ memset(output, 0, sizeof(int32_t) * 4);
+
+# ifdef _MSC_VER
+ __cpuid(output, static_cast(level));
+# else
+ __cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
+# endif
+}
+
+
+static inline bool has_feature(uint32_t level, uint32_t reg, int32_t bit)
+{
+ int32_t cpu_info[4] = { 0 };
+ cpuid(level, cpu_info);
+
+ return (cpu_info[reg] & bit) != 0;
+}
+
+
+static inline bool has_pdpe1gb()
+{
+ return has_feature(0x80000001, 3, 1 << 26);
+}
+
+
} // namespace xmrig
xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
- m_brand()
+ m_pdpe1gb(has_pdpe1gb())
{
struct cpu_raw_data_t raw = {};
struct cpu_id_t data = {};
@@ -69,21 +103,28 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
cpu_brand_string(m_brand, data.brand_str);
snprintf(m_backend, sizeof m_backend, "libcpuid/%s", cpuid_lib_version());
+ if (data.vendor == ::VENDOR_INTEL) {
+ m_vendor = VENDOR_INTEL;
+ }
+ else if (data.vendor == ::VENDOR_AMD) {
+ m_vendor = VENDOR_AMD;
+ }
+
m_threads = static_cast(data.total_logical_cpus);
m_packages = std::max(threads() / static_cast(data.num_logical_cpus), 1);
m_cores = static_cast(data.num_cores) * m_packages;
m_L3 = data.l3_cache > 0 ? static_cast(data.l3_cache) * m_packages : 0;
- const size_t l2 = static_cast(data.l2_cache);
+ const auto l2 = static_cast(data.l2_cache);
// Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97
- if (data.vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) {
+ if (m_vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) {
m_L2 = l2 * (cores() / 2) * m_packages;
m_L2_exclusive = true;
}
// Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue
// These processors have L2 cache shared by 2 cores.
- else if (data.vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) {
+ else if (m_vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) {
size_t l2_count_per_socket = cores() > 1 ? cores() / 2 : 1;
m_L2 = data.l2_cache > 0 ? l2 * l2_count_per_socket * m_packages : 0;
}
@@ -97,10 +138,10 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
if (data.flags[CPU_FEATURE_AES]) {
m_aes = true;
- if (data.vendor == VENDOR_AMD) {
+ if (m_vendor == VENDOR_AMD) {
m_assembly = (data.ext_family >= 23) ? Assembly::RYZEN : Assembly::BULLDOZER;
}
- else if (data.vendor == VENDOR_INTEL) {
+ else if (m_vendor == VENDOR_INTEL) {
m_assembly = Assembly::INTEL;
}
}
diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.h b/src/backend/cpu/platform/AdvancedCpuInfo.h
index e2909a91..f6691b8f 100644
--- a/src/backend/cpu/platform/AdvancedCpuInfo.h
+++ b/src/backend/cpu/platform/AdvancedCpuInfo.h
@@ -43,6 +43,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; }
+ inline bool hasOneGbPages() const override { return m_pdpe1gb; }
inline const char *backend() const override { return m_backend; }
inline const char *brand() const override { return m_brand; }
inline size_t cores() const override { return m_cores; }
@@ -51,19 +52,22 @@ protected:
inline size_t nodes() const override { return 0; }
inline size_t packages() const override { return m_packages; }
inline size_t threads() const override { return m_threads; }
+ inline Vendor vendor() const override { return m_vendor; }
private:
Assembly m_assembly;
bool m_aes = false;
bool m_avx2 = false;
bool m_L2_exclusive = false;
- char m_backend[32];
- char m_brand[64 + 5];
+ char m_backend[32]{};
+ char m_brand[64 + 5]{};
+ const bool m_pdpe1gb = false;
size_t m_cores = 0;
size_t m_L2 = 0;
size_t m_L3 = 0;
size_t m_packages = 1;
size_t m_threads = 0;
+ Vendor m_vendor = VENDOR_UNKNOWN;
};
diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp
index db3741ee..b586fad0 100644
--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -23,7 +23,7 @@
*/
#include
-#include
+#include
#include
@@ -45,6 +45,10 @@
# define bit_AVX2 (1 << 5)
#endif
+#ifndef bit_PDPE1GB
+# define bit_PDPE1GB (1 << 26)
+#endif
+
#include "backend/cpu/platform/BasicCpuInfo.h"
#include "crypto/common/Assembly.h"
@@ -53,6 +57,7 @@
#define VENDOR_ID (0)
#define PROCESSOR_INFO (1)
#define EXTENDED_FEATURES (7)
+#define PROCESSOR_EXT_INFO (0x80000001)
#define PROCESSOR_BRAND_STRING_1 (0x80000002)
#define PROCESSOR_BRAND_STRING_2 (0x80000003)
#define PROCESSOR_BRAND_STRING_3 (0x80000004)
@@ -108,7 +113,7 @@ static void cpu_brand_string(char out[64 + 6]) {
}
-static bool has_feature(uint32_t level, uint32_t reg, int32_t bit)
+static inline bool has_feature(uint32_t level, uint32_t reg, int32_t bit)
{
int32_t cpu_info[4] = { 0 };
cpuid(level, cpu_info);
@@ -136,15 +141,20 @@ static inline bool has_avx2()
}
+static inline bool has_pdpe1gb()
+{
+ return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB);
+}
+
+
} // namespace xmrig
xmrig::BasicCpuInfo::BasicCpuInfo() :
- m_brand(),
m_threads(std::thread::hardware_concurrency()),
- m_assembly(Assembly::NONE),
m_aes(has_aes_ni()),
- m_avx2(has_avx2())
+ m_avx2(has_avx2()),
+ m_pdpe1gb(has_pdpe1gb())
{
cpu_brand_string(m_brand);
@@ -160,12 +170,15 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
memcpy(vendor + 8, &data[2], 4);
if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
+ m_vendor = VENDOR_AMD;
+
cpuid(PROCESSOR_INFO, data);
const int32_t family = get_masked(data[EAX_Reg], 12, 8) + get_masked(data[EAX_Reg], 28, 20);
m_assembly = family >= 23 ? Assembly::RYZEN : Assembly::BULLDOZER;
}
- else {
+ else if (memcmp(vendor, "GenuineIntel", 12) == 0) {
+ m_vendor = VENDOR_INTEL;
m_assembly = Assembly::INTEL;
}
}
@@ -179,7 +192,7 @@ const char *xmrig::BasicCpuInfo::backend() const
}
-xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
+xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
{
const size_t count = std::thread::hardware_concurrency();
diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h
index 4c68c5f8..019c1dc0 100644
--- a/src/backend/cpu/platform/BasicCpuInfo.h
+++ b/src/backend/cpu/platform/BasicCpuInfo.h
@@ -44,6 +44,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; }
+ inline bool hasOneGbPages() const override { return m_pdpe1gb; }
inline const char *brand() const override { return m_brand; }
inline size_t cores() const override { return 0; }
inline size_t L2() const override { return 0; }
@@ -51,15 +52,18 @@ protected:
inline size_t nodes() const override { return 0; }
inline size_t packages() const override { return 1; }
inline size_t threads() const override { return m_threads; }
+ inline Vendor vendor() const override { return m_vendor; }
protected:
- char m_brand[64 + 6];
+ char m_brand[64 + 6]{};
size_t m_threads;
private:
- Assembly m_assembly;
- bool m_aes;
- const bool m_avx2;
+ Assembly m_assembly = Assembly::NONE;
+ bool m_aes = false;
+ const bool m_avx2 = false;
+ const bool m_pdpe1gb = false;
+ Vendor m_vendor = VENDOR_UNKNOWN;
};
diff --git a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp
index e52bdf94..00f5f01f 100644
--- a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp
@@ -22,7 +22,7 @@
* along with this program. If not, see .
*/
-#include
+#include
#include
@@ -36,10 +36,7 @@
xmrig::BasicCpuInfo::BasicCpuInfo() :
- m_brand(),
- m_threads(std::thread::hardware_concurrency()),
- m_aes(false),
- m_avx2(false)
+ m_threads(std::thread::hardware_concurrency())
{
# ifdef XMRIG_ARMv8
memcpy(m_brand, "ARMv8", 5);
diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp
index 3983e8b0..a66bf9fd 100644
--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -262,7 +262,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3
void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
{
- constexpr size_t oneMiB = 1024u * 1024u;
+ constexpr size_t oneMiB = 1024U * 1024U;
size_t PUs = countByType(cache, HWLOC_OBJ_PU);
if (PUs == 0) {
diff --git a/src/base/io/log/backends/FileLog.cpp b/src/base/io/log/backends/FileLog.cpp
index 1ff01637..c581b880 100644
--- a/src/base/io/log/backends/FileLog.cpp
+++ b/src/base/io/log/backends/FileLog.cpp
@@ -24,13 +24,14 @@
*/
-#include
-#include
-
-
#include "base/io/log/backends/FileLog.h"
+#include
+#include
+#include
+
+
xmrig::FileLog::FileLog(const char *fileName)
{
uv_fs_t req;
@@ -45,13 +46,12 @@ void xmrig::FileLog::print(int, const char *line, size_t, size_t size, bool colo
return;
}
-# ifdef _WIN32
- uv_buf_t buf = uv_buf_init(strdup(line), static_cast(size));
-# else
- uv_buf_t buf = uv_buf_init(strdup(line), size);
-# endif
+ assert(strlen(line) == size);
- uv_fs_t *req = new uv_fs_t;
+ uv_buf_t buf = uv_buf_init(new char[size], size);
+ memcpy(buf.base, line, size);
+
+ auto req = new uv_fs_t;
req->data = buf.base;
uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, -1, FileLog::onWrite);
diff --git a/src/base/kernel/interfaces/IConfig.h b/src/base/kernel/interfaces/IConfig.h
index dbbf82cd..fe1a24b5 100644
--- a/src/base/kernel/interfaces/IConfig.h
+++ b/src/base/kernel/interfaces/IConfig.h
@@ -90,6 +90,8 @@ public:
RandomXInitKey = 1022,
RandomXNumaKey = 1023,
RandomXModeKey = 1029,
+ RandomX1GbPagesKey = 1031,
+ RandomXWrmsrKey = 1032,
CPUMaxThreadsKey = 1026,
MemoryPoolKey = 1027,
YieldKey = 1030,
diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp
index 9729f3fb..f9c56e05 100644
--- a/src/base/net/stratum/Client.cpp
+++ b/src/base/net/stratum/Client.cpp
@@ -338,6 +338,10 @@ bool xmrig::Client::isCriticalError(const char *message)
return true;
}
+ if (strncasecmp(message, "Invalid job id", 14) == 0) {
+ return true;
+ }
+
return false;
}
@@ -558,7 +562,7 @@ void xmrig::Client::connect(sockaddr *addr)
{
setState(ConnectingState);
- uv_connect_t *req = new uv_connect_t;
+ auto req = new uv_connect_t;
req->data = m_storage.ptr(m_key);
m_socket = new uv_tcp_t;
@@ -799,7 +803,7 @@ void xmrig::Client::ping()
void xmrig::Client::read(ssize_t nread)
{
- const size_t size = static_cast(nread);
+ const auto size = static_cast(nread);
if (nread > 0 && size > m_recvBuf.available()) {
nread = UV_ENOBUFS;
@@ -859,7 +863,7 @@ void xmrig::Client::reconnect()
void xmrig::Client::setState(SocketState state)
{
- LOG_DEBUG("[%s] state: \"%s\"", url(), states[state]);
+ LOG_DEBUG("[%s] state: \"%s\" -> \"%s\"", url(), states[m_state], states[state]);
if (m_state == state) {
return;
@@ -956,6 +960,12 @@ void xmrig::Client::onConnect(uv_connect_t *req, int status)
return;
}
+ if (client->state() == ConnectedState) {
+ LOG_ERR("[%s] already connected");
+
+ return;
+ }
+
client->m_stream = static_cast