From a066f9a49c8ad4dd25a4fd369341249d35250aab Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 5 Dec 2019 12:47:31 +0700 Subject: [PATCH] hwloc for MSVC updated to v2.1.0. --- cmake/flags.cmake | 4 +- src/3rdparty/hwloc/AUTHORS | 1 + src/3rdparty/hwloc/CMakeLists.txt | 2 +- src/3rdparty/hwloc/NEWS | 153 ++- src/3rdparty/hwloc/VERSION | 8 +- src/3rdparty/hwloc/include/hwloc.h | 188 ++- .../hwloc/include/hwloc/autogen/config.h | 6 +- src/3rdparty/hwloc/include/hwloc/bitmap.h | 29 +- src/3rdparty/hwloc/include/hwloc/cuda.h | 8 +- src/3rdparty/hwloc/include/hwloc/cudart.h | 8 +- src/3rdparty/hwloc/include/hwloc/deprecated.h | 4 +- src/3rdparty/hwloc/include/hwloc/distances.h | 53 +- src/3rdparty/hwloc/include/hwloc/gl.h | 2 +- .../hwloc/include/hwloc/glibc-sched.h | 5 +- src/3rdparty/hwloc/include/hwloc/helper.h | 32 +- src/3rdparty/hwloc/include/hwloc/intel-mic.h | 10 +- .../hwloc/include/hwloc/linux-libnuma.h | 3 +- src/3rdparty/hwloc/include/hwloc/linux.h | 3 +- src/3rdparty/hwloc/include/hwloc/nvml.h | 8 +- src/3rdparty/hwloc/include/hwloc/opencl.h | 119 +- .../hwloc/include/hwloc/openfabrics-verbs.h | 6 +- src/3rdparty/hwloc/include/hwloc/plugins.h | 170 ++- src/3rdparty/hwloc/include/hwloc/rename.h | 66 +- src/3rdparty/hwloc/include/hwloc/shmem.h | 2 +- .../hwloc/include/private/components.h | 12 +- src/3rdparty/hwloc/include/private/debug.h | 4 +- .../include/private/internal-components.h | 3 +- src/3rdparty/hwloc/include/private/misc.h | 23 +- src/3rdparty/hwloc/include/private/private.h | 101 +- src/3rdparty/hwloc/include/private/xml.h | 5 +- src/3rdparty/hwloc/src/base64.c | 2 +- src/3rdparty/hwloc/src/bind.c | 13 +- src/3rdparty/hwloc/src/bitmap.c | 55 +- src/3rdparty/hwloc/src/components.c | 370 ++++-- src/3rdparty/hwloc/src/diff.c | 15 +- src/3rdparty/hwloc/src/distances.c | 369 ++++-- src/3rdparty/hwloc/src/misc.c | 25 +- src/3rdparty/hwloc/src/pci-common.c | 401 ++++--- src/3rdparty/hwloc/src/shmem.c | 17 +- src/3rdparty/hwloc/src/topology-noos.c | 30 +- src/3rdparty/hwloc/src/topology-synthetic.c | 84 +- src/3rdparty/hwloc/src/topology-windows.c | 34 +- src/3rdparty/hwloc/src/topology-x86.c | 905 +++++++++------ .../hwloc/src/topology-xml-nolibxml.c | 48 +- src/3rdparty/hwloc/src/topology-xml.c | 470 ++++++-- src/3rdparty/hwloc/src/topology.c | 1026 ++++++++++++----- src/3rdparty/hwloc/src/traversal.c | 131 ++- 47 files changed, 3481 insertions(+), 1552 deletions(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e9533eed..d2bc70d0 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -57,9 +57,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU) add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE) elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL") + set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL") add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(/D_CRT_NONSTDC_NO_WARNINGS) add_definitions(/DNOMINMAX) diff --git a/src/3rdparty/hwloc/AUTHORS b/src/3rdparty/hwloc/AUTHORS index 7187a723..b4809d15 100644 --- a/src/3rdparty/hwloc/AUTHORS +++ b/src/3rdparty/hwloc/AUTHORS @@ -21,6 +21,7 @@ Nathalie Furmento CNRS Bryon Gloden Brice Goglin Inria Gilles Gouaillardet RIST +Valentin Hoyet Inria Joshua Hursey UWL Alexey Kardashevskiy IBM Rob Latham ANL diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt index 431c11eb..0e56b6fc 100644 --- a/src/3rdparty/hwloc/CMakeLists.txt +++ b/src/3rdparty/hwloc/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(include) include_directories(src) add_definitions(/D_CRT_SECURE_NO_WARNINGS) -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT") +set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Ob2 /DNDEBUG") set(HEADERS include/hwloc.h diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 664c8d55..99809e6a 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -13,8 +13,96 @@ $HEADER$ This file contains the main features as well as overviews of specific bug fixes (and other actions) for each version of hwloc since version -0.9 (as initially released as "libtopology", then re-branded to "hwloc" -in v0.9.1). +0.9. + + +Version 2.1.0 +------------- +* API + + Add a new "Die" object (HWLOC_OBJ_DIE) for upcoming x86 processors + with multiple dies per package, in the x86 and Linux backends. + + Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches. + - They are filtered-out by default, except in command-line tools. + - They are only available on very recent platforms running Linux 5.2+ + and uptodate ACPI tables. + - The KNL MCDRAM in cache mode is still exposed as a L3 unless + HWLOC_KNL_MSCACHE_L3=0 in the environment. + + Add HWLOC_RESTRICT_FLAG_BYNODESET and _REMOVE_MEMLESS for restricting + topologies based on some memory nodes. + + Add hwloc_topology_set_components() for blacklisting some components + from being enabled in a topology. + + Add hwloc_bitmap_nr_ulongs() and hwloc_bitmap_from/to_ulongs(), + thanks to Junchao Zhang for the suggestion. + + Improve the API for dealing with disallowed resources + - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM is replaced with FLAG_INCLUDE_DISALLOWED + and --whole-system command-line options with --disallowed. + . Former names are still accepted for backward compatibility. + - Add hwloc_topology_allow() for changing allowed sets after load(). + - Add the HWLOC_ALLOW=all environment variable to totally ignore + administrative restrictions such as Linux Cgroups. + - Add disallowed_pu and disallowed_numa bits to the discovery support + structure. + + Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + Add more distances-related features: + - Add hwloc_distances_get_name() to retrieve a string describing + what a distances structure contain. + - Add hwloc_distances_get_by_name() to retrieve distances structures + based on their name. + - Add hwloc_distances_release_remove() + - Distances may now cover objects of different types with new kind + HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES. +* Backends + + Add support for Linux 5.3 new sysfs cpu topology files with Die information. + + Add support for Intel v2 Extended Topology Enumeration in the x86 backend. + + Improve memory locality on Linux by using HMAT initiators (exposed + since Linux 5.2+), and NUMA distances for CPU-less NUMA nodes. + + The x86 backend now properly handles offline CPUs. + + Detect the locality of NVIDIA GPU OpenCL devices. + + Ignore NUMA nodes that correspond to NVIDIA GPU by default. + - They may be unignored if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment. + - Fix their CPU locality and add info attributes to identify them. + Thanks to Max Katz and Edgar Leon for the help. + + Add support for IBM S/390 drawers. + + Rework the heuristics for discovering KNL Cluster and Memory modes + to stop assuming all CPUs are online (required for mOS support). + Thanks to Sharath K Bhat for testing patches. + + Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. + + Expose Linux DAX devices as hwloc Block OS devices. + + Remove support for /proc/cpuinfo-only topology discovery in Linux + kernel prior to 2.6.16. + + Disable POWER device-tree-based topology on Linux by default. + - It may be reenabled by setting HWLOC_USE_DT=1 in the environment. + + Discovery components are now divided in phases that may be individually + blacklisted. + - The linuxio component has been merged back into the linux component. +* Tools + + lstopo + - lstopo factorizes objects by default in the graphical output when + there are more than 4 identical children. + . New options --no-factorize and --factorize may be used to configure this. + . Hit the 'f' key to disable factorizing in interactive outputs. + - Both logical and OS/physical indexes are now displayed by default + for PU and NUMA nodes. + - The X11 and Windows interactive outputs support many keyboard + shortcuts to dynamically customize the attributes, legend, etc. + - Add --linespacing and change default margins and linespacing. + - Add --allow for changing allowed sets. + - Add a native SVG backend. Its graphical output may be slightly less + pretty than Cairo (still used by default if available) but the SVG + code provides attributes to manipulate objects from HTML/JS. + See dynamic_SVG_example.html for an example. + + Add --nodeset options to hwloc-calc for converting between cpusets and + nodesets. + + Add --no-smt to lstopo, hwloc-bind and hwloc-calc to ignore multiple + PU in SMT cores. + + hwloc-annotate may annotate multiple locations at once. + + Add a HTML/JS version of hwloc-ps. See contrib/hwloc-ps.www/README. + + Add bash completions. +* Misc + + Add several FAQ entries in "Compatibility between hwloc versions" + about API version, ABI, XML, Synthetic strings, and shmem topologies. Version 2.0.4 (also included in 1.11.13 when appropriate) @@ -214,6 +302,54 @@ Version 2.0.0 + hwloc now requires a C99 compliant compiler. +Version 1.11.13 (also included in 2.0.4) +--------------- +* Add support for Linux 5.3 new sysfs cpu topology files with Die information. +* Add support for Intel v2 Extended Topology Enumeration in the x86 backend. +* Tiles, Modules and Dies are exposed as Groups for now. + + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent + Die groups from being automatically merged with identical parent or children. +* Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. +* Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + +Version 1.11.12 (also included in 2.0.3) +--------------- +* Fix a corner case of hwloc_topology_restrict() where children would + become out-of-order. +* Fix the return length of export_xmlbuffer() functions to always + include the ending \0. + + +Version 1.11.11 (also included in 2.0.2) +--------------- +* Add support for Hygon Dhyana processors in the x86 backend, + thanks to Pu Wen for the patch. +* Fix symbol renaming to also rename internal components, + thanks to Evan Ramos for the patch. +* Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues. +* Detect PCI link speed without being root on Linux >= 4.13. + + +Version 1.11.10 (also included in 2.0.1) +--------------- +* Fix detection of cores and hyperthreads on Mac OS X. +* Serialize pciaccess discovery to fix concurrent topology loads in + multiple threads. +* Fix first touch area memory binding on Linux when thread memory + binding is different. +* Some minor fixes to memory binding. +* Fix hwloc-dump-hwdata to only process SMBIOS information that correspond + to the KNL and KNM configuration. +* Add a heuristic for guessing KNL/KNM memory and cluster modes when + hwloc-dump-hwdata could not run as root earlier. +* Fix discovery of NVMe OS devices on Linux >= 4.0. +* Add get_area_memlocation() on Windows. +* Add CPUVendor, Model, ... attributes on Mac OS X. + + Version 1.11.9 -------------- * Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend, @@ -941,7 +1077,7 @@ Version 1.6.0 + Add a section about Synthetic topologies in the documentation. -Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) +Version 1.5.2 (some of these changes are in 1.6.2 but not in 1.6) ------------- * Use libpciaccess instead of pciutils/libpci by default for I/O discovery. pciutils/libpci is only used if --enable-libpci is given to configure @@ -1076,9 +1212,8 @@ Version 1.4.2 for most of them. -Version 1.4.1 +Version 1.4.1 (contains all 1.3.2 changes) ------------- -* This release contains all changes from v1.3.2. * Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. * Fix memory leaks in some get_membind() functions. * Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) @@ -1091,7 +1226,7 @@ Version 1.4.1 issues. -Version 1.4.0 (does not contain all v1.3.2 changes) +Version 1.4.0 (does not contain all 1.3.2 changes) ------------- * Major features + Add "custom" interface and "assembler" tools to build multi-node @@ -1536,7 +1671,7 @@ Version 1.0.0 Version 0.9.4 (unreleased) --------------------------- +------------- * Fix reseting colors to normal in lstopo -.txt output. * Fix Linux pthread_t binding error report. @@ -1593,7 +1728,7 @@ Version 0.9.1 the physical location of IB devices. -Version 0.9 (libtopology) -------------------------- +Version 0.9 (formerly named "libtopology") +----------- * First release. diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index 5ebc6bb4..9035ed9c 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,8 +8,8 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=0 -release=4 +minor=1 +release=0 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Jun 03, 2019" +date="Sep 30, 2019" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=15:3:0 +libhwloc_so_version=16:0:1 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index ee6da6fd..e106e9cc 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -53,7 +53,8 @@ #ifndef HWLOC_H #define HWLOC_H -#include +#include "hwloc/autogen/config.h" + #include #include #include @@ -62,13 +63,13 @@ /* * Symbol transforms */ -#include +#include "hwloc/rename.h" /* * Bitmap definitions */ -#include +#include "hwloc/bitmap.h" #ifdef __cplusplus @@ -86,13 +87,13 @@ extern "C" { * actually modifies the API. * * Users may check for available features at build time using this number - * (see \ref faq_upgrade). + * (see \ref faq_version_api). * * \note This should not be confused with HWLOC_VERSION, the library version. * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020000 +#define HWLOC_API_VERSION 0x00020100 /** \brief Indicate at runtime which hwloc API version was used at build time. * @@ -101,7 +102,7 @@ extern "C" { HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); /** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ -#define HWLOC_COMPONENT_ABI 5 +#define HWLOC_COMPONENT_ABI 6 /** @} */ @@ -186,7 +187,8 @@ typedef enum { HWLOC_OBJ_PACKAGE, /**< \brief Physical package. * The physical package that usually gets inserted * into a socket on the motherboard. - * A processor package usually contains multiple cores. + * A processor package usually contains multiple cores, + * and possibly some dies. */ HWLOC_OBJ_CORE, /**< \brief Core. * A computation unit (may be shared by several @@ -233,6 +235,10 @@ typedef enum { * It is usually close to some cores (the corresponding objects * are descendants of the NUMA node object in the hwloc tree). * + * This is the smallest object representing Memory resources, + * it cannot have any child except Misc objects. + * However it may have Memory-side cache parents. + * * There is always at least one such object in the topology * even if the machine is not NUMA. * @@ -279,6 +285,24 @@ typedef enum { * Misc objects have NULL CPU and node sets. */ + HWLOC_OBJ_MEMCACHE, /**< \brief Memory-side cache (filtered out by default). + * A cache in front of a specific NUMA node. + * + * This object always has at least one NUMA node as a memory child. + * + * Memory objects are not listed in the main children list, + * but rather in the dedicated Memory children list. + * + * Memory-side cache have a special depth ::HWLOC_TYPE_DEPTH_MEMCACHE + * instead of a normal depth just like other objects in the + * main tree. + */ + + HWLOC_OBJ_DIE, /**< \brief Die within a physical package. + * A subpart of the physical package, that contains multiple cores. + * \hideinitializer + */ + HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ } hwloc_obj_type_t; @@ -297,8 +321,8 @@ typedef enum hwloc_obj_bridge_type_e { /** \brief Type of a OS device. */ typedef enum hwloc_obj_osdev_type_e { - HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. - * For instance "sda" on Linux. */ + HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device, or non-volatile memory device. + * For instance "sda" or "dax2.0" on Linux. */ HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. * For instance ":0.0" for a GL display, * "card0" for a Linux DRM device. */ @@ -434,9 +458,15 @@ struct hwloc_obj { * These children are listed in \p memory_first_child. */ struct hwloc_obj *memory_first_child; /**< \brief First Memory child. - * NUMA nodes are listed here (\p memory_arity and \p memory_first_child) + * NUMA nodes and Memory-side caches are listed here + * (\p memory_arity and \p memory_first_child) * instead of in the normal children list. * See also hwloc_obj_type_is_memory(). + * + * A memory hierarchy starts from a normal CPU-side object + * (e.g. Package) and ends with NUMA nodes as leaves. + * There might exist some memory-side caches between them + * in the middle of the memory subtree. */ /**@}*/ @@ -471,7 +501,7 @@ struct hwloc_obj { * object and known how (the children path between this object and the PU * objects). * - * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, + * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, * some of these CPUs may not be allowed for binding, * see hwloc_topology_get_allowed_cpuset(). * @@ -483,7 +513,7 @@ struct hwloc_obj { * * This may include not only the same as the cpuset field, but also some CPUs for * which topology information is unknown or incomplete, some offlines CPUs, and - * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag + * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED flag * is not set. * Thus no corresponding PU object may be found in the topology, because the * precise position is undefined. It is however known that it would be somewhere @@ -501,7 +531,7 @@ struct hwloc_obj { * * In the end, these nodes are those that are close to the current object. * - * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, + * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, * some of these nodes may not be allowed for allocation, * see hwloc_topology_get_allowed_nodeset(). * @@ -516,7 +546,7 @@ struct hwloc_obj { * * This may include not only the same as the nodeset field, but also some NUMA * nodes for which topology information is unknown or incomplete, some offlines - * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM + * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED * flag is not set. * Thus no corresponding NUMA node object may be found in the topology, because the * precise position is undefined. It is however known that it would be @@ -770,7 +800,8 @@ enum hwloc_get_type_depth_e { HWLOC_TYPE_DEPTH_BRIDGE = -4, /**< \brief Virtual depth for bridge object level. \hideinitializer */ HWLOC_TYPE_DEPTH_PCI_DEVICE = -5, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ HWLOC_TYPE_DEPTH_OS_DEVICE = -6, /**< \brief Virtual depth for software device object level. \hideinitializer */ - HWLOC_TYPE_DEPTH_MISC = -7 /**< \brief Virtual depth for Misc object. \hideinitializer */ + HWLOC_TYPE_DEPTH_MISC = -7, /**< \brief Virtual depth for Misc object. \hideinitializer */ + HWLOC_TYPE_DEPTH_MEMCACHE = -8 /**< \brief Virtual depth for MemCache object. \hideinitializer */ }; /** \brief Return the depth of parents where memory objects are attached. @@ -1781,6 +1812,31 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo */ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); +/** \brief Flags to be passed to hwloc_topology_set_components() + */ +enum hwloc_topology_components_flag_e { + /** \brief Blacklist the target component from being used. + * \hideinitializer + */ + HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST = (1UL<<0) +}; + +/** \brief Prevent a discovery component from being used for a topology. + * + * \p name is the name of the discovery component that should not be used + * when loading topology \p topology. The name is a string such as "cuda". + * + * For components with multiple phases, it may also be suffixed with the name + * of a phase, for instance "linux:io". + * + * \p flags should be ::HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST. + * + * This may be used to avoid expensive parts of the discovery process. + * For instance, CUDA-specific discovery may be expensive and unneeded + * while generic I/O discovery could still be useful. + */ +HWLOC_DECLSPEC int hwloc_topology_set_components(hwloc_topology_t __hwloc_restrict topology, unsigned long flags, const char * __hwloc_restrict name); + /** @} */ @@ -1800,28 +1856,27 @@ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restric * They may also be returned by hwloc_topology_get_flags(). */ enum hwloc_topology_flags_e { - /** \brief Detect the whole system, ignore reservations. + /** \brief Detect the whole system, ignore reservations, include disallowed objects. * * Gather all resources, even if some were disabled by the administrator. * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes. * * When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology. * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. + * All existing PUs and NUMA nodes in the topology are allowed. + * hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset() + * are equal to the root object cpuset and nodeset. * * When this flag is set, the actual sets of allowed PUs and NUMA nodes are given * by hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset(). * They may be smaller than the root object cpuset and nodeset. * - * When this flag is not set, all existing PUs and NUMA nodes in the topology - * are allowed. hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset() - * are equal to the root object cpuset and nodeset. - * * If the current topology is exported to XML and reimported later, this flag * should be set again in the reimported topology so that disallowed resources * are reimported as well. * \hideinitializer */ - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), + HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED = (1UL<<0), /** \brief Assume that the selected backend provides the topology for the * system on which we are running. @@ -1901,6 +1956,10 @@ struct hwloc_topology_discovery_support { unsigned char numa; /** \brief Detecting the amount of memory in NUMA nodes is supported. */ unsigned char numa_memory; + /** \brief Detecting and identifying PU objects that are not available to the current process is supported. */ + unsigned char disallowed_pu; + /** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */ + unsigned char disallowed_numa; }; /** \brief Flags describing actual PU binding support for this topology. @@ -1998,7 +2057,7 @@ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(h * * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL). * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). - * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). + * Die and Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). * * Note that group objects are also ignored individually (without the entire level) * when they do not bring structure. @@ -2063,11 +2122,15 @@ HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwl */ HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); -/** \brief Set the filtering for all cache object types. +/** \brief Set the filtering for all CPU cache object types. + * + * Memory-side caches are not involved since they are not CPU caches. */ HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); -/** \brief Set the filtering for all instruction cache object types. +/** \brief Set the filtering for all CPU instruction cache object types. + * + * Memory-side caches are not involved since they are not CPU caches. */ HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); @@ -2110,6 +2173,19 @@ enum hwloc_restrict_flags_e { */ HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1UL<<0), + /** \brief Restrict by nodeset instead of CPU set. + * Only keep objects whose nodeset is included or partially included in the given set. + * This flag may not be used with ::HWLOC_RESTRICT_FLAG_BYNODESET. + */ + HWLOC_RESTRICT_FLAG_BYNODESET = (1UL<<3), + + /** \brief Remove all objects that became Memory-less. + * By default, only objects that contain no PU and no memory are removed. + * This flag may only be used with ::HWLOC_RESTRICT_FLAG_BYNODESET. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS = (1UL<<4), + /** \brief Move Misc objects to ancestors if their parents are removed during restriction. * If this flag is not set, Misc objects are removed when their parents are removed. * \hideinitializer @@ -2123,28 +2199,70 @@ enum hwloc_restrict_flags_e { HWLOC_RESTRICT_FLAG_ADAPT_IO = (1UL<<2) }; -/** \brief Restrict the topology to the given CPU set. +/** \brief Restrict the topology to the given CPU set or nodeset. * * Topology \p topology is modified so as to remove all objects that - * are not included (or partially included) in the CPU set \p cpuset. + * are not included (or partially included) in the CPU set \p set. * All objects CPU and node sets are restricted accordingly. * + * If ::HWLOC_RESTRICT_FLAG_BYNODESET is passed in \p flags, + * \p set is considered a nodeset instead of a CPU set. + * * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. * * \note This call may not be reverted by restricting back to a larger - * cpuset. Once dropped during restriction, objects may not be brought + * set. Once dropped during restriction, objects may not be brought * back, except by loading another topology with hwloc_topology_load(). * * \return 0 on success. * - * \return -1 with errno set to EINVAL if the input cpuset is invalid. + * \return -1 with errno set to EINVAL if the input set is invalid. * The topology is not modified in this case. * * \return -1 with errno set to ENOMEM on failure to allocate internal data. * The topology is reinitialized in this case. It should be either * destroyed with hwloc_topology_destroy() or configured and loaded again. */ -HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); +HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_bitmap_t set, unsigned long flags); + +/** \brief Flags to be given to hwloc_topology_allow(). */ +enum hwloc_allow_flags_e { + /** \brief Mark all objects as allowed in the topology. + * + * \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL. + * \hideinitializer */ + HWLOC_ALLOW_FLAG_ALL = (1UL<<0), + + /** \brief Only allow objects that are available to the current process. + * + * The topology must have ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM so that the set + * of available resources can actually be retrieved from the operating system. + * + * \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL. + * \hideinitializer */ + HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS = (1UL<<1), + + /** \brief Allow a custom set of objects, given to hwloc_topology_allow() as \p cpuset and/or \p nodeset parameters. + * \hideinitializer */ + HWLOC_ALLOW_FLAG_CUSTOM = (1UL<<2) +}; + +/** \brief Change the sets of allowed PUs and NUMA nodes in the topology. + * + * This function only works if the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED + * was set on the topology. It does not modify any object, it only changes + * the sets returned by hwloc_topology_get_allowed_cpuset() and + * hwloc_topology_get_allowed_nodeset(). + * + * It is notably useful when importing a topology from another process + * running in a different Linux Cgroup. + * + * \p flags must be set to one flag among ::hwloc_allow_flags_e. + * + * \note Removing objects from a topology should rather be performed with + * hwloc_topology_restrict(). + */ +HWLOC_DECLSPEC int hwloc_topology_allow(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset, unsigned long flags); /** \brief Add a MISC object as a leaf of the topology * @@ -2250,21 +2368,21 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src /* high-level helpers */ -#include +#include "hwloc/helper.h" /* inline code of some functions above */ -#include +#include "hwloc/inlines.h" /* exporting to XML or synthetic */ -#include +#include "hwloc/export.h" /* distances */ -#include +#include "hwloc/distances.h" /* topology diffs */ -#include +#include "hwloc/diff.h" /* deprecated headers */ -#include +#include "hwloc/deprecated.h" #endif /* HWLOC_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 14d4481d..36669de5 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.0.4" +#define HWLOC_VERSION "2.1.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 0 -#define HWLOC_VERSION_RELEASE 4 +#define HWLOC_VERSION_MINOR 1 +#define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h index bae623c8..d5b0ea02 100644 --- a/src/3rdparty/hwloc/include/hwloc/bitmap.h +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -13,7 +13,8 @@ #ifndef HWLOC_BITMAP_H #define HWLOC_BITMAP_H -#include +#include "hwloc/autogen/config.h" + #include @@ -198,6 +199,9 @@ HWLOC_DECLSPEC int hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long /** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */ HWLOC_DECLSPEC int hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); +/** \brief Setup bitmap \p bitmap from unsigned longs \p masks used as first \p nr subsets */ +HWLOC_DECLSPEC int hwloc_bitmap_from_ulongs(hwloc_bitmap_t bitmap, unsigned nr, const unsigned long *masks); + /* * Modifying bitmaps. @@ -256,6 +260,29 @@ HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) /** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */ HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure; +/** \brief Convert the first \p nr subsets of bitmap \p bitmap into the array of \p nr unsigned long \p masks + * + * \p nr may be determined earlier with hwloc_bitmap_nr_ulongs(). + * + * \return 0 + */ +HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned nr, unsigned long *masks); + +/** \brief Return the number of unsigned longs required for storing bitmap \p bitmap entirely + * + * This is the number of contiguous unsigned longs from the very first bit of the bitmap + * (even if unset) up to the last set bit. + * This is useful for knowing the \p nr parameter to pass to hwloc_bitmap_to_ulongs() + * (or which calls to hwloc_bitmap_to_ith_ulong() are needed) + * to entirely convert a bitmap into multiple unsigned longs. + * + * When called on the output of hwloc_topology_get_topology_cpuset(), + * the returned number is large enough for all cpusets of the topology. + * + * \return -1 if \p bitmap is infinite. + */ +HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + /** \brief Test whether index \p id is part of bitmap \p bitmap. * * \return 1 if the bit at index \p id is set in bitmap \p bitmap, 0 otherwise. diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h index 77c8473e..6f0cda4c 100644 --- a/src/3rdparty/hwloc/include/hwloc/cuda.h +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -16,11 +16,11 @@ #ifndef HWLOC_CUDA_H #define HWLOC_CUDA_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h index 63c7f59c..688b8421 100644 --- a/src/3rdparty/hwloc/include/hwloc/cudart.h +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -16,11 +16,11 @@ #ifndef HWLOC_CUDART_H #define HWLOC_CUDART_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include /* for CUDA_VERSION */ diff --git a/src/3rdparty/hwloc/include/hwloc/deprecated.h b/src/3rdparty/hwloc/include/hwloc/deprecated.h index 8f3b1459..4a231f50 100644 --- a/src/3rdparty/hwloc/include/hwloc/deprecated.h +++ b/src/3rdparty/hwloc/include/hwloc/deprecated.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -21,6 +21,8 @@ extern "C" { #endif +/* backward compat with v2.0 before WHOLE_SYSTEM renaming */ +#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED /* backward compat with v1.11 before System removal */ #define HWLOC_OBJ_SYSTEM HWLOC_OBJ_MACHINE /* backward compat with v1.10 before Socket->Package renaming */ diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index d523f29f..b7baed8a 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -87,7 +87,12 @@ enum hwloc_distances_kind_e { * Such values are currently ignored for distance-based grouping. * \hideinitializer */ - HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3) + HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3), + + /** \brief This distances structure covers objects of different types. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4) }; /** \brief Retrieve distance matrices. @@ -131,20 +136,32 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, * * Identical to hwloc_distances_get() with the additional \p type filter. */ -static __hwloc_inline int +HWLOC_DECLSPEC int hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned *nr, struct hwloc_distances_s **distances, - unsigned long kind, unsigned long flags) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) { - *nr = 0; - return 0; - } - return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags); -} + unsigned long kind, unsigned long flags); -/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). */ +/** \brief Retrieve a distance matrix with the given name. + * + * Usually only one distances structure may match a given name. + */ +HWLOC_DECLSPEC int +hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, + unsigned *nr, struct hwloc_distances_s **distances, + unsigned long flags); + +/** \brief Get a description of what a distances structure contains. + * + * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT), + * or NULL if unknown. + */ +HWLOC_DECLSPEC const char * +hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances); + +/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). + * + * \note This function is not required if the structure is removed with hwloc_distances_release_remove(). + */ HWLOC_DECLSPEC void hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); @@ -221,11 +238,11 @@ enum hwloc_distances_add_flag_e { * The distance from object i to object j is in slot i*nbobjs+j. * * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. + * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added + * if objects of different types are given. * * \p flags configures the behavior of the function using an optional OR'ed set of * ::hwloc_distances_add_flag_e. - * - * Objects must be of the same type. They cannot be of type Group. */ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, @@ -237,7 +254,7 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, * gathered through the OS. * * If these distances were used to group objects, these additional - *Group objects are not removed from the topology. + * Group objects are not removed from the topology. */ HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); @@ -260,6 +277,12 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) return hwloc_distances_remove_by_depth(topology, depth); } +/** \brief Release and remove the given distance matrice from the topology. + * + * This function includes a call to hwloc_distances_release(). + */ +HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances); + /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/gl.h b/src/3rdparty/hwloc/include/hwloc/gl.h index 3e643fa9..897ef784 100644 --- a/src/3rdparty/hwloc/include/hwloc/gl.h +++ b/src/3rdparty/hwloc/include/hwloc/gl.h @@ -14,7 +14,7 @@ #ifndef HWLOC_GL_H #define HWLOC_GL_H -#include +#include "hwloc.h" #include #include diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h index 1f9ba7cd..99659e03 100644 --- a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h +++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h @@ -17,8 +17,9 @@ #ifndef HWLOC_GLIBC_SCHED_H #define HWLOC_GLIBC_SCHED_H -#include -#include +#include "hwloc.h" +#include "hwloc/helper.h" + #include #if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index d48df15f..bc27be59 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -527,30 +527,36 @@ hwloc_obj_type_is_io(hwloc_obj_type_t type); * * Memory objects are objects attached to their parents * in the Memory children list. - * This current only includes NUMA nodes. + * This current includes NUMA nodes and Memory-side caches. * * \return 1 if an object of type \p type is a Memory object, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_memory(hwloc_obj_type_t type); -/** \brief Check whether an object type is a Cache (Data, Unified or Instruction). +/** \brief Check whether an object type is a CPU Cache (Data, Unified or Instruction). + * + * Memory-side caches are not CPU caches. * * \return 1 if an object of type \p type is a Cache, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_cache(hwloc_obj_type_t type); -/** \brief Check whether an object type is a Data or Unified Cache. +/** \brief Check whether an object type is a CPU Data or Unified Cache. * - * \return 1 if an object of type \p type is a Data or Unified Cache, 0 otherwise. + * Memory-side caches are not CPU caches. + * + * \return 1 if an object of type \p type is a CPU Data or Unified Cache, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_dcache(hwloc_obj_type_t type); -/** \brief Check whether an object type is a Instruction Cache, +/** \brief Check whether an object type is a CPU Instruction Cache, * - * \return 1 if an object of type \p type is a Instruction Cache, 0 otherwise. + * Memory-side caches are not CPU caches. + * + * \return 1 if an object of type \p type is a CPU Instruction Cache, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_icache(hwloc_obj_type_t type); @@ -914,7 +920,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object CPU-set. */ HWLOC_DECLSPEC hwloc_const_cpuset_t hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -923,11 +929,11 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * * \return the CPU set of allowed logical processors of the system. * - * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, + * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_cpuset(), which means * all PUs are allowed. * - * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying + * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying * hwloc_bitmap_intersects() on the result of this function and on an object * cpuset checks whether there are allowed PUs inside that object. * Applying hwloc_bitmap_and() returns the list of these allowed PUs. @@ -945,7 +951,7 @@ hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_p * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object complete nodeset. */ HWLOC_DECLSPEC hwloc_const_nodeset_t hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -959,7 +965,7 @@ hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object nodeset. */ HWLOC_DECLSPEC hwloc_const_nodeset_t hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -968,11 +974,11 @@ hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute * * \return the node set of allowed memory of the system. * - * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, + * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_nodeset(), which means * all NUMA nodes are allowed. * - * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying + * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying * hwloc_bitmap_intersects() on the result of this function and on an object * nodeset checks whether there are allowed NUMA nodes inside that object. * Applying hwloc_bitmap_and() returns the list of these allowed NUMA nodes. diff --git a/src/3rdparty/hwloc/include/hwloc/intel-mic.h b/src/3rdparty/hwloc/include/hwloc/intel-mic.h index 6f6f9d1b..c504cd7e 100644 --- a/src/3rdparty/hwloc/include/hwloc/intel-mic.h +++ b/src/3rdparty/hwloc/include/hwloc/intel-mic.h @@ -13,11 +13,13 @@ #ifndef HWLOC_INTEL_MIC_H #define HWLOC_INTEL_MIC_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" + #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" + #include #include #endif diff --git a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h index 7cea4166..0e2cc19f 100644 --- a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h +++ b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h @@ -15,7 +15,8 @@ #ifndef HWLOC_LINUX_LIBNUMA_H #define HWLOC_LINUX_LIBNUMA_H -#include +#include "hwloc.h" + #include diff --git a/src/3rdparty/hwloc/include/hwloc/linux.h b/src/3rdparty/hwloc/include/hwloc/linux.h index c409e1c2..ecc86be3 100644 --- a/src/3rdparty/hwloc/include/hwloc/linux.h +++ b/src/3rdparty/hwloc/include/hwloc/linux.h @@ -15,7 +15,8 @@ #ifndef HWLOC_LINUX_H #define HWLOC_LINUX_H -#include +#include "hwloc.h" + #include diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h index 19710866..1bc2599f 100644 --- a/src/3rdparty/hwloc/include/hwloc/nvml.h +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -13,11 +13,11 @@ #ifndef HWLOC_NVML_H #define HWLOC_NVML_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 058968d7..ebf09848 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -14,19 +14,17 @@ #ifndef HWLOC_OPENCL_H #define HWLOC_OPENCL_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #ifdef __APPLE__ #include -#include #else #include -#include #endif #include @@ -37,17 +35,75 @@ extern "C" { #endif +/* OpenCL extensions aren't always shipped with default headers, and + * they don't always reflect what the installed implementations support. + * Try everything and let the implementation return errors when non supported. + */ +/* Copyright (c) 2008-2018 The Khronos Group Inc. */ + +/* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */ +#define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037 +typedef union { + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} hwloc_cl_device_topology_amd; +#define HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +/* needs "cl_nv_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */ +#define HWLOC_CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define HWLOC_CL_DEVICE_PCI_SLOT_ID_NV 0x4009 + + /** \defgroup hwlocality_opencl Interoperability with OpenCL * * This interface offers ways to retrieve topology information about * OpenCL devices. * - * Only the AMD OpenCL interface currently offers useful locality information - * about its devices. + * Only AMD and NVIDIA OpenCL implementations currently offer useful locality + * information about their devices. * * @{ */ +/** \brief Return the domain, bus and device IDs of the OpenCL device \p device. + * + * Device \p device must match the local machine. + */ +static __hwloc_inline int +hwloc_opencl_get_device_pci_busid(cl_device_id device, + unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func) +{ + hwloc_cl_device_topology_amd amdtopo; + cl_uint nvbus, nvslot; + cl_int clret; + + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); + if (CL_SUCCESS == clret + && HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) { + *domain = 0; /* can't do anything better */ + *bus = (unsigned) amdtopo.pcie.bus; + *dev = (unsigned) amdtopo.pcie.device; + *func = (unsigned) amdtopo.pcie.function; + return 0; + } + + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_ID_NV, sizeof(nvbus), &nvbus, NULL); + if (CL_SUCCESS == clret) { + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_SLOT_ID_NV, sizeof(nvslot), &nvslot, NULL); + if (CL_SUCCESS == clret) { + /* FIXME: PCI bus only uses 8bit, assume nvidia hardcodes the domain in higher bits */ + *domain = nvbus >> 8; + *bus = nvbus & 0xff; + /* non-documented but used in many other projects */ + *dev = nvslot >> 3; + *func = nvslot & 0x7; + return 0; + } + } + + return -1; +} + /** \brief Get the CPU set of logical processors that are physically * close to OpenCL device \p device. * @@ -62,7 +118,7 @@ extern "C" { * and hwloc_opencl_get_device_osdev_by_index(). * * This function is currently only implemented in a meaningful way for - * Linux with the AMD OpenCL implementation; other systems will simply + * Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply * get a full cpuset. */ static __hwloc_inline int @@ -70,35 +126,28 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse cl_device_id device __hwloc_attribute_unused, hwloc_cpuset_t set) { -#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD) - /* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */ +#if (defined HWLOC_LINUX_SYS) + /* If we're on Linux, try AMD/NVIDIA extensions + the sysfs mechanism to get the local cpus */ #define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128 char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX]; - cl_device_topology_amd amdtopo; - cl_int clret; + unsigned pcidomain, pcibus, pcidev, pcifunc; if (!hwloc_topology_is_thissystem(topology)) { errno = EINVAL; return -1; } - clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); - if (CL_SUCCESS != clret) { - hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); - return 0; - } - if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidev, &pcifunc) < 0) { hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); return 0; } - sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus", - (unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function); + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus", pcidomain, pcibus, pcidev, pcifunc); if (hwloc_linux_read_path_as_cpumask(path, set) < 0 || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); #else - /* Non-Linux + AMD OpenCL systems simply get a full cpuset */ + /* Non-Linux systems simply get a full cpuset */ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); #endif return 0; @@ -140,8 +189,8 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology, * Use OpenCL device attributes to find the corresponding hwloc OS device object. * Return NULL if there is none or if useful attributes are not available. * - * This function currently only works on AMD OpenCL devices that support - * the CL_DEVICE_TOPOLOGY_AMD extension. hwloc_opencl_get_device_osdev_by_index() + * This function currently only works on AMD and NVIDIA OpenCL devices that support + * relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index() * should be preferred whenever possible, i.e. when platform and device index * are known. * @@ -159,17 +208,10 @@ static __hwloc_inline hwloc_obj_t hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused, cl_device_id device __hwloc_attribute_unused) { -#ifdef CL_DEVICE_TOPOLOGY_AMD hwloc_obj_t osdev; - cl_device_topology_amd amdtopo; - cl_int clret; + unsigned pcidomain, pcibus, pcidevice, pcifunc; - clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); - if (CL_SUCCESS != clret) { - errno = EINVAL; - return NULL; - } - if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidevice, &pcifunc) < 0) { errno = EINVAL; return NULL; } @@ -181,18 +223,15 @@ hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused continue; if (pcidev && pcidev->type == HWLOC_OBJ_PCI_DEVICE - && pcidev->attr->pcidev.domain == 0 - && pcidev->attr->pcidev.bus == amdtopo.pcie.bus - && pcidev->attr->pcidev.dev == amdtopo.pcie.device - && pcidev->attr->pcidev.func == amdtopo.pcie.function) + && pcidev->attr->pcidev.domain == pcidomain + && pcidev->attr->pcidev.bus == pcibus + && pcidev->attr->pcidev.dev == pcidevice + && pcidev->attr->pcidev.func == pcifunc) return osdev; /* if PCI are filtered out, we need a info attr to match on */ } return NULL; -#else - return NULL; -#endif } /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h index 174ab4a5..d247a8b1 100644 --- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -19,10 +19,10 @@ #ifndef HWLOC_OPENFABRICS_VERBS_H #define HWLOC_OPENFABRICS_VERBS_H -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index cb22000d..0f53ac4d 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2017 Inria. All rights reserved. + * Copyright © 2013-2019 Inria. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -13,7 +13,8 @@ struct hwloc_backend; -#include +#include "hwloc.h" + #ifdef HWLOC_INSIDE_PLUGIN /* needed for hwloc_plugin_check_namespace() */ #include @@ -25,52 +26,36 @@ struct hwloc_backend; * @{ */ -/** \brief Discovery component type */ -typedef enum hwloc_disc_component_type_e { - /** \brief CPU-only discovery through the OS, or generic no-OS support. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0), - - /** \brief xml or synthetic, - * platform-specific components such as bgq. - * Anything the discovers CPU and everything else. - * No misc backend is expected to complement a global component. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1), - - /** \brief OpenCL, Cuda, etc. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2) -} hwloc_disc_component_type_t; - /** \brief Discovery component structure * * This is the major kind of components, taking care of the discovery. * They are registered by generic components, either statically-built or as plugins. */ struct hwloc_disc_component { - /** \brief Discovery component type */ - hwloc_disc_component_type_t type; - /** \brief Name. * If this component is built as a plugin, this name does not have to match the plugin filename. */ const char *name; - /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. + /** \brief Discovery phases performed by this component. + * OR'ed set of ::hwloc_disc_phase_t + */ + unsigned phases; + + /** \brief Component phases to exclude, as an OR'ed set of ::hwloc_disc_phase_t. * - * For a GLOBAL component, this usually includes all other types (~0). + * For a GLOBAL component, this usually includes all other phases (\c ~UL). * * Other components only exclude types that may bring conflicting * topology information. MISC components should likely not be excluded * since they usually bring non-primary additional information. */ - unsigned excludes; + unsigned excluded_phases; /** \brief Instantiate callback to create a backend from the component. * Parameters data1, data2, data3 are NULL except for components * that have special enabling routines such as hwloc_topology_set_xml(). */ - struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3); + struct hwloc_backend * (*instantiate)(struct hwloc_topology *topology, struct hwloc_disc_component *component, unsigned excluded_phases, const void *data1, const void *data2, const void *data3); /** \brief Component priority. * Used to sort topology->components, higher priority first. @@ -107,6 +92,72 @@ struct hwloc_disc_component { * @{ */ +/** \brief Discovery phase */ +typedef enum hwloc_disc_phase_e { + /** \brief xml or synthetic, platform-specific components such as bgq. + * Discovers everything including CPU, memory, I/O and everything else. + * A component with a Global phase usually excludes all other phases. + * \hideinitializer */ + HWLOC_DISC_PHASE_GLOBAL = (1U<<0), + + /** \brief CPU discovery. + * \hideinitializer */ + HWLOC_DISC_PHASE_CPU = (1U<<1), + + /** \brief Attach memory to existing CPU objects. + * \hideinitializer */ + HWLOC_DISC_PHASE_MEMORY = (1U<<2), + + /** \brief Attach PCI devices and bridges to existing CPU objects. + * \hideinitializer */ + HWLOC_DISC_PHASE_PCI = (1U<<3), + + /** \brief I/O discovery that requires PCI devices (OS devices such as OpenCL, CUDA, etc.). + * \hideinitializer */ + HWLOC_DISC_PHASE_IO = (1U<<4), + + /** \brief Misc objects that gets added below anything else. + * \hideinitializer */ + HWLOC_DISC_PHASE_MISC = (1U<<5), + + /** \brief Annotating existing objects, adding distances, etc. + * \hideinitializer */ + HWLOC_DISC_PHASE_ANNOTATE = (1U<<6), + + /** \brief Final tweaks to a ready-to-use topology. + * This phase runs once the topology is loaded, before it is returned to the topology. + * Hence it may only use the main hwloc API for modifying the topology, + * for instance by restricting it, adding info attributes, etc. + * \hideinitializer */ + HWLOC_DISC_PHASE_TWEAK = (1U<<7) +} hwloc_disc_phase_t; + +/** \brief Discovery status flags */ +enum hwloc_disc_status_flag_e { + /** \brief The sets of allowed resources were already retrieved \hideinitializer */ + HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES = (1UL<<1) +}; + +/** \brief Discovery status structure + * + * Used by the core and backends to inform about what has been/is being done + * during the discovery process. + */ +struct hwloc_disc_status { + /** \brief The current discovery phase that is performed. + * Must match one of the phases in the component phases field. + */ + hwloc_disc_phase_t phase; + + /** \brief Dynamically excluded phases. + * If a component decides during discovery that some phases are no longer needed. + */ + unsigned excluded_phases; + + /** \brief OR'ed set of hwloc_disc_status_flag_e */ + unsigned long flags; +}; + /** \brief Discovery backend structure * * A backend is the instantiation of a discovery component. @@ -116,6 +167,14 @@ struct hwloc_disc_component { * hwloc_backend_alloc() initializes all fields to default values * that the component may change (except "component" and "next") * before enabling the backend with hwloc_backend_enable(). + * + * Most backends assume that the topology is_thissystem flag is + * set because they talk to the underlying operating system. + * However they may still be used in topologies without the + * is_thissystem flag for debugging reasons. + * In practice, they are usually auto-disabled in such cases + * (excluded by xml or synthetic backends, or by environment + * variables when changing the Linux fsroot or the x86 cpuid path). */ struct hwloc_backend { /** \private Reserved for the core, set by hwloc_backend_alloc() */ @@ -127,12 +186,20 @@ struct hwloc_backend { /** \private Reserved for the core. Used internally to list backends topology->backends. */ struct hwloc_backend * next; + /** \brief Discovery phases performed by this component, possibly without some of them if excluded by other components. + * OR'ed set of ::hwloc_disc_phase_t + */ + unsigned phases; + /** \brief Backend flags, currently always 0. */ unsigned long flags; /** \brief Backend-specific 'is_thissystem' property. - * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled. - * Set to -1 if the backend doesn't care (default). */ + * Set to 0 if the backend disables the thissystem flag for this topology + * (e.g. loading from xml or synthetic string, + * or using a different fsroot on Linux, or a x86 CPUID dump). + * Set to -1 if the backend doesn't care (default). + */ int is_thissystem; /** \brief Backend private data, or NULL if none. */ @@ -147,20 +214,22 @@ struct hwloc_backend { * or because of an actual discovery/gathering failure. * May be NULL. */ - int (*discover)(struct hwloc_backend *backend); + int (*discover)(struct hwloc_backend *backend, struct hwloc_disc_status *status); - /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend. - * May be NULL. */ + /** \brief Callback to retrieve the locality of a PCI object. + * Called by the PCI core when attaching PCI hierarchy to CPU objects. + * May be NULL. + */ int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset); }; /** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc. * The caller will then modify whatever needed, and call hwloc_backend_enable(). */ -HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component); +HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_topology *topology, struct hwloc_disc_component *component); /** \brief Enable a previously allocated and setup backend. */ -HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend); +HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend); /** @} */ @@ -480,7 +549,9 @@ HWLOC_DECLSPEC hwloc_obj_type_t hwloc_pcidisc_check_bridge_type(unsigned device_ * * Returns -1 and destroys /p obj if bridge fields are invalid. */ -HWLOC_DECLSPEC int hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, const unsigned char *config); +HWLOC_DECLSPEC int hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func, + unsigned *secondary_busp, unsigned *subordinate_busp, + const unsigned char *config); /** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs. * @@ -490,10 +561,7 @@ HWLOC_DECLSPEC void hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep, /** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the topology. * - * For now, they will be attached to the root object. The core will move them to their actual PCI - * locality using hwloc_pci_belowroot_apply_locality() at the end of the discovery. - * - * In the meantime, other backends lookup PCI objects or localities (for instance to attach OS devices) + * Other backends may lookup PCI objects or localities (for instance to attach OS devices) * by using hwloc_pcidisc_find_by_busid() or hwloc_pcidisc_find_busid_parent(). */ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree); @@ -507,32 +575,14 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st * @{ */ -/** \brief Find the PCI object that matches the bus ID. - * - * To be used after a PCI backend added PCI devices with hwloc_pcidisc_tree_attach() - * and before the core moves them to their actual location with hwloc_pci_belowroot_apply_locality(). - * - * If no exactly matching object is found, return the container bridge if any, or NULL. - * - * On failure, it may be possible to find the PCI locality (instead of the PCI device) - * by calling hwloc_pcidisc_find_busid_parent(). - * - * \note This is semantically identical to hwloc_get_pcidev_by_busid() which only works - * after the topology is fully loaded. - */ -HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); - /** \brief Find the normal parent of a PCI bus ID. * * Look at PCI affinity to find out where the given PCI bus ID should be attached. * - * This function should be used to attach an I/O device directly under a normal - * (non-I/O) object, instead of below a PCI object. - * It is usually used by backends when hwloc_pcidisc_find_by_busid() failed - * to find the hwloc object corresponding to this bus ID, for instance because - * PCI discovery is not supported on this platform. + * This function should be used to attach an I/O device under the corresponding + * PCI object (if any), or under a normal (non-I/O) object with same locality. */ -HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); +HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index 7cef1b2e..a23738d0 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,13 +1,13 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2018 Inria. All rights reserved. + * Copyright © 2010-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ #ifndef HWLOC_RENAME_H #define HWLOC_RENAME_H -#include +#include "hwloc/autogen/config.h" #ifdef __cplusplus @@ -49,7 +49,9 @@ extern "C" { #define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) #define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) +#define HWLOC_OBJ_MEMCACHE HWLOC_NAME_CAPS(OBJ_MEMCACHE) #define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) +#define HWLOC_OBJ_DIE HWLOC_NAME_CAPS(OBJ_DIE) #define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) #define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) #define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE) @@ -116,7 +118,7 @@ extern "C" { #define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) -#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) +#define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED) #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) @@ -124,6 +126,9 @@ extern "C" { #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) #define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) #define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) +#define hwloc_topology_components_flag_e HWLOC_NAME(hwloc_topology_components_flag_e) +#define HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST HWLOC_NAME_CAPS(TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) +#define hwloc_topology_set_components HWLOC_NAME(topology_set_components) #define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) #define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) @@ -151,10 +156,18 @@ extern "C" { #define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) #define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS) +#define HWLOC_RESTRICT_FLAG_BYNODESET HWLOC_NAME_CAPS(RESTRICT_FLAG_BYNODESET) +#define HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_MEMLESS) #define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) #define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) #define hwloc_topology_restrict HWLOC_NAME(topology_restrict) +#define hwloc_allow_flags_e HWLOC_NAME(allow_flags_e) +#define HWLOC_ALLOW_FLAG_ALL HWLOC_NAME_CAPS(ALLOW_FLAG_ALL) +#define HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS HWLOC_NAME_CAPS(ALLOW_FLAG_LOCAL_RESTRICTIONS) +#define HWLOC_ALLOW_FLAG_CUSTOM HWLOC_NAME_CAPS(ALLOW_FLAG_CUSTOM) +#define hwloc_topology_allow HWLOC_NAME(topology_allow) + #define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object) #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) @@ -172,6 +185,7 @@ extern "C" { #define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) #define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC) #define HWLOC_TYPE_DEPTH_NUMANODE HWLOC_NAME_CAPS(TYPE_DEPTH_NUMANODE) +#define HWLOC_TYPE_DEPTH_MEMCACHE HWLOC_NAME_CAPS(TYPE_DEPTH_MEMCACHE) #define hwloc_get_depth_type HWLOC_NAME(get_depth_type) #define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) @@ -266,10 +280,12 @@ extern "C" { #define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) #define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) #define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) - +#define hwloc_bitmap_from_ulongs HWLOC_NAME(bitmap_from_ulongs) #define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) #define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) #define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) +#define hwloc_bitmap_to_ulongs HWLOC_NAME(bitmap_to_ulongs) +#define hwloc_bitmap_nr_ulongs HWLOC_NAME(bitmap_nr_ulongs) #define hwloc_bitmap_only HWLOC_NAME(bitmap_only) #define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) #define hwloc_bitmap_set HWLOC_NAME(bitmap_set) @@ -380,10 +396,13 @@ extern "C" { #define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER) #define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY) #define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH) +#define HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES HWLOC_NAME_CAPS(DISTANCES_KIND_HETEROGENEOUS_TYPES) #define hwloc_distances_get HWLOC_NAME(distances_get) #define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth) #define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type) +#define hwloc_distances_get_by_name HWLOC_NAME(distances_get_by_name) +#define hwloc_distances_get_name HWLOC_NAME(distances_get_name) #define hwloc_distances_release HWLOC_NAME(distances_release) #define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) #define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) @@ -396,6 +415,7 @@ extern "C" { #define hwloc_distances_remove HWLOC_NAME(distances_remove) #define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) #define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) +#define hwloc_distances_release_remove HWLOC_NAME(distances_release_remove) /* diff.h */ @@ -469,6 +489,8 @@ extern "C" { /* opencl.h */ +#define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd) +#define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids) #define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) #define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) #define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) @@ -502,13 +524,22 @@ extern "C" { /* hwloc/plugins.h */ -#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) -#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) -#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) -#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) -#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) +#define hwloc_disc_phase_e HWLOC_NAME(disc_phase_e) +#define HWLOC_DISC_PHASE_GLOBAL HWLOC_NAME_CAPS(DISC_PHASE_GLOBAL) +#define HWLOC_DISC_PHASE_CPU HWLOC_NAME_CAPS(DISC_PHASE_CPU) +#define HWLOC_DISC_PHASE_MEMORY HWLOC_NAME_CAPS(DISC_PHASE_MEMORY) +#define HWLOC_DISC_PHASE_PCI HWLOC_NAME_CAPS(DISC_PHASE_PCI) +#define HWLOC_DISC_PHASE_IO HWLOC_NAME_CAPS(DISC_PHASE_IO) +#define HWLOC_DISC_PHASE_MISC HWLOC_NAME_CAPS(DISC_PHASE_MISC) +#define HWLOC_DISC_PHASE_ANNOTATE HWLOC_NAME_CAPS(DISC_PHASE_ANNOTATE) +#define HWLOC_DISC_PHASE_TWEAK HWLOC_NAME_CAPS(DISC_PHASE_TWEAK) +#define hwloc_disc_phase_t HWLOC_NAME(disc_phase_t) #define hwloc_disc_component HWLOC_NAME(disc_component) +#define hwloc_disc_status_flag_e HWLOC_NAME(disc_status_flag_e) +#define HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES HWLOC_NAME_CAPS(DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) +#define hwloc_disc_status HWLOC_NAME(disc_status) + #define hwloc_backend HWLOC_NAME(backend) #define hwloc_backend_alloc HWLOC_NAME(backend_alloc) @@ -540,12 +571,11 @@ extern "C" { #define hwloc_pcidisc_find_cap HWLOC_NAME(pcidisc_find_cap) #define hwloc_pcidisc_find_linkspeed HWLOC_NAME(pcidisc_find_linkspeed) #define hwloc_pcidisc_check_bridge_type HWLOC_NAME(pcidisc_check_bridge_type) -#define hwloc_pcidisc_setup_bridge_attr HWLOC_NAME(pcidisc_setup_bridge_attr) +#define hwloc_pcidisc_find_bridge_buses HWLOC_NAME(pcidisc_find_bridge_buses) #define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid) #define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach) -#define hwloc_pcidisc_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) -#define hwloc_pcidisc_find_busid_parent HWLOC_NAME(pcidisc_find_busid_parent) +#define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent) /* hwloc/deprecated.h */ @@ -571,8 +601,9 @@ extern "C" { /* private/misc.h */ +#ifndef HWLOC_HAVE_CORRECT_SNPRINTF #define hwloc_snprintf HWLOC_NAME(snprintf) -#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) +#endif #define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) #define hwloc_ffs32 HWLOC_NAME(ffs32) #define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) @@ -631,8 +662,9 @@ extern "C" { #define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) #define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks) -#define hwloc_backends_init HWLOC_NAME(backends_init) +#define hwloc_topology_components_init HWLOC_NAME(topology_components_init) #define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) +#define hwloc_topology_components_fini HWLOC_NAME(topology_components_fini) #define hwloc_components_init HWLOC_NAME(components_init) #define hwloc_components_fini HWLOC_NAME(components_fini) @@ -656,7 +688,6 @@ extern "C" { #define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_gl_component HWLOC_NAME(gl_component) -#define hwloc_linuxio_component HWLOC_NAME(linuxio_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_pci_component HWLOC_NAME(pci_component) @@ -669,6 +700,9 @@ extern "C" { #define hwloc_special_level_s HWLOC_NAME(special_level_s) #define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s) +#define hwloc_pci_locality_s HWLOC_NAME(pci_locality_s) + +#define hwloc_topology_forced_component_s HWLOC_NAME(topology_forced_component) #define hwloc_alloc_root_sets HWLOC_NAME(alloc_root_sets) #define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) @@ -687,8 +721,8 @@ extern "C" { #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) +#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) #define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset) -#define hwloc_pci_belowroot_apply_locality HWLOC_NAME(pci_belowroot_apply_locality) #define hwloc__add_info HWLOC_NAME(_add_info) #define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup) diff --git a/src/3rdparty/hwloc/include/hwloc/shmem.h b/src/3rdparty/hwloc/include/hwloc/shmem.h index 22249463..86f57b4f 100644 --- a/src/3rdparty/hwloc/include/hwloc/shmem.h +++ b/src/3rdparty/hwloc/include/hwloc/shmem.h @@ -10,7 +10,7 @@ #ifndef HWLOC_SHMEM_H #define HWLOC_SHMEM_H -#include +#include "hwloc.h" #ifdef __cplusplus extern "C" { diff --git a/src/3rdparty/hwloc/include/private/components.h b/src/3rdparty/hwloc/include/private/components.h index 8525bbe4..e28c00b1 100644 --- a/src/3rdparty/hwloc/include/private/components.h +++ b/src/3rdparty/hwloc/include/private/components.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2015 Inria. All rights reserved. + * Copyright © 2012-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -16,13 +16,13 @@ #ifndef PRIVATE_COMPONENTS_H #define PRIVATE_COMPONENTS_H 1 -#include +#include "hwloc/plugins.h" struct hwloc_topology; extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology, int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */ - int type, const char *name, + const char *name, const void *data1, const void *data2, const void *data3); extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology); @@ -30,10 +30,12 @@ extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology) extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology); extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology); -/* Initialize the list of backends used by a topology */ -extern void hwloc_backends_init(struct hwloc_topology *topology); +/* Initialize the lists of components and backends used by a topology */ +extern void hwloc_topology_components_init(struct hwloc_topology *topology); /* Disable and destroy all backends used by a topology */ extern void hwloc_backends_disable_all(struct hwloc_topology *topology); +/* Cleanup the lists of components used by a topology */ +extern void hwloc_topology_components_fini(struct hwloc_topology *topology); /* Used by the core to setup/destroy the list of components */ extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */ diff --git a/src/3rdparty/hwloc/include/private/debug.h b/src/3rdparty/hwloc/include/private/debug.h index 74b697db..637e0141 100644 --- a/src/3rdparty/hwloc/include/private/debug.h +++ b/src/3rdparty/hwloc/include/private/debug.h @@ -11,8 +11,8 @@ #ifndef HWLOC_DEBUG_H #define HWLOC_DEBUG_H -#include -#include +#include "private/autogen/config.h" +#include "private/misc.h" #ifdef HWLOC_DEBUG #include diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h index b138a0eb..d3c89783 100644 --- a/src/3rdparty/hwloc/include/private/internal-components.h +++ b/src/3rdparty/hwloc/include/private/internal-components.h @@ -1,5 +1,5 @@ /* - * Copyright © 2018 Inria. All rights reserved. + * Copyright © 2018-2019 Inria. All rights reserved. * * See COPYING in top-level directory. */ @@ -29,7 +29,6 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; /* I/O discovery */ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxio_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; diff --git a/src/3rdparty/hwloc/include/private/misc.h b/src/3rdparty/hwloc/include/private/misc.h index 66608bc7..6c02d793 100644 --- a/src/3rdparty/hwloc/include/private/misc.h +++ b/src/3rdparty/hwloc/include/private/misc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,9 +11,9 @@ #ifndef HWLOC_PRIVATE_MISC_H #define HWLOC_PRIVATE_MISC_H -#include -#include -#include +#include "hwloc/autogen/config.h" +#include "private/autogen/config.h" +#include "hwloc.h" #ifdef HWLOC_HAVE_DECL_STRNCASECMP #ifdef HAVE_STRINGS_H @@ -439,14 +439,14 @@ hwloc_linux_pci_link_speed_from_string(const char *string) static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type) { /* type contiguity is asserted in topology_check() */ - return type <= HWLOC_OBJ_GROUP; + return type <= HWLOC_OBJ_GROUP || type == HWLOC_OBJ_DIE; } -/* Any object attached to memory children, currently only NUMA nodes */ +/* Any object attached to memory children, currently NUMA nodes or Memory-side caches */ static __hwloc_inline int hwloc__obj_type_is_memory (hwloc_obj_type_t type) { /* type contiguity is asserted in topology_check() */ - return type == HWLOC_OBJ_NUMANODE; + return type == HWLOC_OBJ_NUMANODE || type == HWLOC_OBJ_MEMCACHE; } /* I/O or Misc object, without cpusets or nodesets. */ @@ -463,6 +463,7 @@ static __hwloc_inline int hwloc__obj_type_is_io (hwloc_obj_type_t type) return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE; } +/* Any CPU caches (not Memory-side caches) */ static __hwloc_inline int hwloc__obj_type_is_cache(hwloc_obj_type_t type) { @@ -572,12 +573,4 @@ typedef SSIZE_T ssize_t; # endif #endif -#if defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined(__CYGWIN__) -/* MSVC doesn't support C99 variable-length array */ -#include -#define HWLOC_VLA(_type, _name, _nb) _type *_name = (_type*) _alloca((_nb)*sizeof(_type)) -#else -#define HWLOC_VLA(_type, _name, _nb) _type _name[_nb] -#endif - #endif /* HWLOC_PRIVATE_MISC_H */ diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index 8e3964ab..5f878937 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -22,11 +22,12 @@ #ifndef HWLOC_PRIVATE_H #define HWLOC_PRIVATE_H -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/bitmap.h" +#include "private/components.h" +#include "private/misc.h" + #include #ifdef HAVE_UNISTD_H #include @@ -39,7 +40,7 @@ #endif #include -#define HWLOC_TOPOLOGY_ABI 0x20000 /* version of the layout of struct topology */ +#define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */ /***************************************************** * WARNING: @@ -67,12 +68,13 @@ struct hwloc_topology { void *adopted_shmem_addr; size_t adopted_shmem_length; -#define HWLOC_NR_SLEVELS 5 +#define HWLOC_NR_SLEVELS 6 #define HWLOC_SLEVEL_NUMANODE 0 #define HWLOC_SLEVEL_BRIDGE 1 #define HWLOC_SLEVEL_PCIDEV 2 #define HWLOC_SLEVEL_OSDEV 3 #define HWLOC_SLEVEL_MISC 4 +#define HWLOC_SLEVEL_MEMCACHE 5 /* order must match negative depth, it's asserted in setup_defaults() */ #define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) #define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) @@ -86,6 +88,7 @@ struct hwloc_topology { hwloc_bitmap_t allowed_nodeset; struct hwloc_binding_hooks { + /* These are actually rather OS hooks since some of them are not about binding */ int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); @@ -127,20 +130,35 @@ struct hwloc_topology { int userdata_not_decoded; struct hwloc_internal_distances_s { - hwloc_obj_type_t type; + char *name; /* FIXME: needs an API to set it from user */ + + unsigned id; /* to match the container id field of public distances structure + * not exported to XML, regenerated during _add() + */ + + /* if all objects have the same type, different_types is NULL and unique_type is valid. + * otherwise unique_type is HWLOC_OBJ_TYPE_NONE and different_types contains individual objects types. + */ + hwloc_obj_type_t unique_type; + hwloc_obj_type_t *different_types; + /* add union hwloc_obj_attr_u if we ever support groups */ unsigned nbobjs; - uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. */ + uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. + * OS indexes for distances covering only PUs or only NUMAnodes. + */ +#define HWLOC_DIST_TYPE_USE_OS_INDEX(_type) ((_type) == HWLOC_OBJ_PU || (_type == HWLOC_OBJ_NUMANODE)) uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array. * distance from i to j is stored in slot i*nbnodes+j. */ unsigned long kind; +#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */ + unsigned iflags; + /* objects are currently stored in physical_index order */ hwloc_obj_t *objs; /* array of objects */ - int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */ - unsigned id; /* to match the container id field of public distances structure */ struct hwloc_internal_distances_s *prev, *next; } *first_dist, *last_dist; unsigned next_dist_id; @@ -153,8 +171,9 @@ struct hwloc_topology { /* list of enabled backends. */ struct hwloc_backend * backends; - struct hwloc_backend * get_pci_busid_cpuset_backend; - unsigned backend_excludes; + struct hwloc_backend * get_pci_busid_cpuset_backend; /* first backend that provides get_pci_busid_cpuset() callback */ + unsigned backend_phases; + unsigned backend_excluded_phases; /* memory allocator for topology objects */ struct hwloc_tma * tma; @@ -176,7 +195,6 @@ struct hwloc_topology { struct hwloc_numanode_attr_s machine_memory; /* pci stuff */ - int need_pci_belowroot_apply_locality; int pci_has_forced_locality; unsigned pci_forced_locality_nr; struct hwloc_pci_forced_locality_s { @@ -185,13 +203,32 @@ struct hwloc_topology { hwloc_bitmap_t cpuset; } * pci_forced_locality; + /* component blacklisting */ + unsigned nr_blacklisted_components; + struct hwloc_topology_forced_component_s { + struct hwloc_disc_component *component; + unsigned phases; + } *blacklisted_components; + + /* FIXME: keep until topo destroy and reuse for finding specific buses */ + struct hwloc_pci_locality_s { + unsigned domain; + unsigned bus_min; + unsigned bus_max; + hwloc_bitmap_t cpuset; + hwloc_obj_t parent; + struct hwloc_pci_locality_s *prev, *next; + } *first_pci_locality, *last_pci_locality; }; extern void hwloc_alloc_root_sets(hwloc_obj_t root); extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus); extern int hwloc_get_sysctlbyname(const char *name, int64_t *n); extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n); -extern int hwloc_fallback_nbprocessors(struct hwloc_topology *topology); + +/* returns the number of CPU from the OS (only valid if thissystem) */ +#define HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE 1 /* by default we try to get only the online CPUs */ +extern int hwloc_fallback_nbprocessors(unsigned flags); extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2); extern void hwloc__reorder_children(hwloc_obj_t parent); @@ -208,19 +245,17 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology); +/* Look for an object matching the given domain/bus/func, + * either exactly or return the smallest container bridge + */ +extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); + /* Look for an object matching complete cpuset exactly, or insert one. * Return NULL on failure. * Return a good fallback (object above) on failure to insert. */ extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset); -/* Move PCI objects currently attached to the root object ot their actual location. - * Called by the core at the end of hwloc_topology_load(). - * Prior to this call, all PCI objects may be found below the root object. - * After this call and a reconnect of levels, all PCI objects are available through levels. - */ -extern int hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology); - extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value); extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace); extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp); @@ -313,8 +348,8 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology); extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); -extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); -extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); /* encode src buffer into target buffer. @@ -330,13 +365,19 @@ extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *targe */ extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize); -/* Check whether needle matches the beginning of haystack, at least n, and up - * to a colon or \0 */ -extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n); - /* On some systems, snprintf returns the size of written data, not the actually - * required size. hwloc_snprintf always report the actually required size. */ + * required size. Sometimes it returns -1 on truncation too. + * And sometimes it doesn't like NULL output buffers. + * http://www.gnu.org/software/gnulib/manual/html_node/snprintf.html + * + * hwloc_snprintf behaves properly, but it's a bit overkill on the vast majority + * of platforms, so don't enable it unless really needed. + */ +#ifdef HWLOC_HAVE_CORRECT_SNPRINTF +#define hwloc_snprintf snprintf +#else extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4); +#endif /* Return the name of the currently running program, if supported. * If not NULL, must be freed by the caller. @@ -356,7 +397,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology); #define HWLOC_GROUP_KIND_INTEL_MODULE 102 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_TILE 103 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */ -#define HWLOC_GROUP_KIND_S390_BOOK 110 /* no subkind */ +#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */ #define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */ /* then, OS-specific groups */ #define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */ diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h index 7c73384d..f59fca1f 100644 --- a/src/3rdparty/hwloc/include/private/xml.h +++ b/src/3rdparty/hwloc/include/private/xml.h @@ -1,12 +1,12 @@ /* - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * See COPYING in top-level directory. */ #ifndef PRIVATE_XML_H #define PRIVATE_XML_H 1 -#include +#include "hwloc.h" #include @@ -54,7 +54,6 @@ struct hwloc_xml_backend_data_s { unsigned nbnumanodes; hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */ struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist; - int dont_merge_die_groups; }; /************** diff --git a/src/3rdparty/hwloc/src/base64.c b/src/3rdparty/hwloc/src/base64.c index 7b3e1210..4df67bf9 100644 --- a/src/3rdparty/hwloc/src/base64.c +++ b/src/3rdparty/hwloc/src/base64.c @@ -11,7 +11,7 @@ /* include hwloc's config before anything else * so that extensions and features are properly enabled */ -#include +#include "private/private.h" /* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */ diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c index b3457bc7..0bd85e25 100644 --- a/src/3rdparty/hwloc/src/bind.c +++ b/src/3rdparty/hwloc/src/bind.c @@ -1,15 +1,16 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "hwloc/helper.h" + #ifdef HAVE_SYS_MMAN_H # include #endif @@ -885,6 +886,8 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology) } else { /* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */ hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support); + + /* Linux has some hooks that also work in this case, but they are not strictly needed yet. */ } /* if not is_thissystem, set_cpubind is fake diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c index ea1264af..5fb9cd35 100644 --- a/src/3rdparty/hwloc/src/bitmap.c +++ b/src/3rdparty/hwloc/src/bitmap.c @@ -1,18 +1,18 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc/autogen/config.h" +#include "hwloc.h" +#include "private/misc.h" +#include "private/private.h" +#include "private/debug.h" +#include "hwloc/bitmap.h" #include #include @@ -766,6 +766,21 @@ int hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned return 0; } +int hwloc_bitmap_from_ulongs(struct hwloc_bitmap_s *set, unsigned nr, const unsigned long *masks) +{ + unsigned j; + + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_reset_by_ulongs(set, nr) < 0) + return -1; + + for(j=0; julongs[j] = masks[j]; + set->infinite = 0; + return 0; +} + unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set) { HWLOC__BITMAP_CHECK(set); @@ -780,6 +795,30 @@ unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsign return HWLOC_SUBBITMAP_READULONG(set, i); } +int hwloc_bitmap_to_ulongs(const struct hwloc_bitmap_s *set, unsigned nr, unsigned long *masks) +{ + unsigned j; + + HWLOC__BITMAP_CHECK(set); + + for(j=0; jinfinite) + return -1; + + last = hwloc_bitmap_last(set); + return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG; +} + int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu) { unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c index bd7c00e3..5c2879b6 100644 --- a/src/3rdparty/hwloc/src/components.c +++ b/src/3rdparty/hwloc/src/components.c @@ -1,18 +1,19 @@ /* - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2012 Université Bordeaux * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/xml.h" +#include "private/misc.h" #define HWLOC_COMPONENT_STOP_NAME "stop" #define HWLOC_COMPONENT_EXCLUDE_CHAR '-' #define HWLOC_COMPONENT_SEPS "," +#define HWLOC_COMPONENT_PHASESEP_CHAR ':' /* list of all registered discovery components, sorted by priority, higher priority first. * noos is last because its priority is 0. @@ -232,17 +233,6 @@ hwloc_plugins_init(void) #endif /* HWLOC_HAVE_PLUGINS */ -static const char * -hwloc_disc_component_type_string(hwloc_disc_component_type_t type) -{ - switch (type) { - case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu"; - case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global"; - case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc"; - default: return "**unknown**"; - } -} - static int hwloc_disc_component_register(struct hwloc_disc_component *component, const char *filename) @@ -256,21 +246,26 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, return -1; } if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) + || strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR) || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { if (hwloc_components_verbose) fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); return -1; } - /* check that the component type is valid */ - switch ((unsigned) component->type) { - case HWLOC_DISC_COMPONENT_TYPE_CPU: - case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: - case HWLOC_DISC_COMPONENT_TYPE_MISC: - break; - default: - fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n", - component->name, (unsigned) component->type); + + /* check that the component phases are valid */ + if (!component->phases + || (component->phases != HWLOC_DISC_PHASE_GLOBAL + && component->phases & ~(HWLOC_DISC_PHASE_CPU + |HWLOC_DISC_PHASE_MEMORY + |HWLOC_DISC_PHASE_PCI + |HWLOC_DISC_PHASE_IO + |HWLOC_DISC_PHASE_MISC + |HWLOC_DISC_PHASE_ANNOTATE + |HWLOC_DISC_PHASE_TWEAK))) { + fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n", + component->name, component->phases); return -1; } @@ -295,8 +290,8 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, prev = &((*prev)->next); } if (hwloc_components_verbose) - fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n", - hwloc_disc_component_type_string(component->type), component->name, component->priority, + fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n", + component->name, component->phases, component->priority, filename ? "from plugin " : "statically build", filename ? filename : ""); prev = &hwloc_disc_components; @@ -310,7 +305,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, return 0; } -#include +#include "static-components.h" static void (**hwloc_component_finalize_cbs)(unsigned long); static unsigned hwloc_component_finalize_cb_count; @@ -415,31 +410,152 @@ hwloc_components_init(void) } void -hwloc_backends_init(struct hwloc_topology *topology) +hwloc_topology_components_init(struct hwloc_topology *topology) { + topology->nr_blacklisted_components = 0; + topology->blacklisted_components = NULL; + topology->backends = NULL; - topology->backend_excludes = 0; + topology->backend_phases = 0; + topology->backend_excluded_phases = 0; } +/* look for name among components, ignoring things after `:' */ static struct hwloc_disc_component * -hwloc_disc_component_find(int type /* hwloc_disc_component_type_t or -1 if any */, - const char *name /* name of NULL if any */) +hwloc_disc_component_find(const char *name, const char **endp) { - struct hwloc_disc_component *comp = hwloc_disc_components; + struct hwloc_disc_component *comp; + size_t length; + const char *end = strchr(name, HWLOC_COMPONENT_PHASESEP_CHAR); + if (end) { + length = end-name; + if (endp) + *endp = end+1; + } else { + length = strlen(name); + if (endp) + *endp = NULL; + } + + comp = hwloc_disc_components; while (NULL != comp) { - if ((-1 == type || type == (int) comp->type) - && (NULL == name || !strcmp(name, comp->name))) + if (!strncmp(name, comp->name, length)) return comp; comp = comp->next; } return NULL; } +static unsigned +hwloc_phases_from_string(const char *s) +{ + if (!s) + return ~0U; + if (s[0]<'0' || s[0]>'9') { + if (!strcasecmp(s, "global")) + return HWLOC_DISC_PHASE_GLOBAL; + else if (!strcasecmp(s, "cpu")) + return HWLOC_DISC_PHASE_CPU; + if (!strcasecmp(s, "memory")) + return HWLOC_DISC_PHASE_MEMORY; + if (!strcasecmp(s, "pci")) + return HWLOC_DISC_PHASE_PCI; + if (!strcasecmp(s, "io")) + return HWLOC_DISC_PHASE_IO; + if (!strcasecmp(s, "misc")) + return HWLOC_DISC_PHASE_MISC; + if (!strcasecmp(s, "annotate")) + return HWLOC_DISC_PHASE_ANNOTATE; + if (!strcasecmp(s, "tweak")) + return HWLOC_DISC_PHASE_TWEAK; + return 0; + } + return (unsigned) strtoul(s, NULL, 0); +} + +static int +hwloc_disc_component_blacklist_one(struct hwloc_topology *topology, + const char *name) +{ + struct hwloc_topology_forced_component_s *blacklisted; + struct hwloc_disc_component *comp; + unsigned phases; + unsigned i; + + if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) { + /* replace linuxpci and linuxio with linux (with IO phases) + * for backward compatibility with pre-v2.0 and v2.0 respectively */ + if (hwloc_components_verbose) + fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name); + comp = hwloc_disc_component_find("linux", NULL); + phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE; + + } else { + /* normal lookup */ + const char *end; + comp = hwloc_disc_component_find(name, &end); + phases = hwloc_phases_from_string(end); + } + if (!comp) { + errno = EINVAL; + return -1; + } + + if (hwloc_components_verbose) + fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases); + + for(i=0; inr_blacklisted_components; i++) { + if (topology->blacklisted_components[i].component == comp) { + topology->blacklisted_components[i].phases |= phases; + return 0; + } + } + + blacklisted = realloc(topology->blacklisted_components, (topology->nr_blacklisted_components+1)*sizeof(*blacklisted)); + if (!blacklisted) + return -1; + + blacklisted[topology->nr_blacklisted_components].component = comp; + blacklisted[topology->nr_blacklisted_components].phases = phases; + topology->blacklisted_components = blacklisted; + topology->nr_blacklisted_components++; + return 0; +} + +int +hwloc_topology_set_components(struct hwloc_topology *topology, + unsigned long flags, + const char *name) +{ + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + if (flags & ~HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) { + errno = EINVAL; + return -1; + } + + /* this flag is strictly required for now */ + if (flags != HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) { + errno = EINVAL; + return -1; + } + + if (!strncmp(name, "all", 3) && name[3] == HWLOC_COMPONENT_PHASESEP_CHAR) { + topology->backend_excluded_phases = hwloc_phases_from_string(name+4); + return 0; + } + + return hwloc_disc_component_blacklist_one(topology, name); +} + /* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */ int hwloc_disc_component_force_enable(struct hwloc_topology *topology, int envvar_forced, - int type, const char *name, + const char *name, const void *data1, const void *data2, const void *data3) { struct hwloc_disc_component *comp; @@ -450,18 +566,28 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology, return -1; } - comp = hwloc_disc_component_find(type, name); + comp = hwloc_disc_component_find(name, NULL); if (!comp) { errno = ENOSYS; return -1; } - backend = comp->instantiate(comp, data1, data2, data3); + backend = comp->instantiate(topology, comp, 0U /* force-enabled don't get any phase blacklisting */, + data1, data2, data3); if (backend) { + int err; backend->envvar_forced = envvar_forced; if (topology->backends) hwloc_backends_disable_all(topology); - return hwloc_backend_enable(topology, backend); + err = hwloc_backend_enable(backend); + + if (comp->phases == HWLOC_DISC_PHASE_GLOBAL) { + char *env = getenv("HWLOC_ANNOTATE_GLOBAL_COMPONENTS"); + if (env && atoi(env)) + topology->backend_excluded_phases &= ~HWLOC_DISC_PHASE_ANNOTATE; + } + + return err; } else return -1; } @@ -469,29 +595,32 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology, static int hwloc_disc_component_try_enable(struct hwloc_topology *topology, struct hwloc_disc_component *comp, - const char *comparg, - int envvar_forced) + int envvar_forced, + unsigned blacklisted_phases) { struct hwloc_backend *backend; - if (topology->backend_excludes & comp->type) { + if (!(comp->phases & ~(topology->backend_excluded_phases | blacklisted_phases))) { + /* all this backend phases are already excluded, exclude the backend entirely */ if (hwloc_components_verbose) /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. */ - fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n", - hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes); + fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n", + comp->name, comp->phases, topology->backend_excluded_phases); return -1; } - backend = comp->instantiate(comp, comparg, NULL, NULL); + backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases, + NULL, NULL, NULL); if (!backend) { if (hwloc_components_verbose || envvar_forced) fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); return -1; } + backend->phases &= ~blacklisted_phases; backend->envvar_forced = envvar_forced; - return hwloc_backend_enable(topology, backend); + return hwloc_backend_enable(backend); } void @@ -502,11 +631,12 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) int tryall = 1; const char *_env; char *env; /* we'll to modify the env value, so duplicate it */ + unsigned i; _env = getenv("HWLOC_COMPONENTS"); env = _env ? strdup(_env) : NULL; - /* enable explicitly listed components */ + /* blacklist disabled components */ if (env) { char *curenv = env; size_t s; @@ -516,21 +646,41 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (s) { char c; - /* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */ - if (!strncmp(curenv, "linuxpci", 8) && s == 8) { - curenv[5] = 'i'; - curenv[6] = 'o'; - curenv[7] = *HWLOC_COMPONENT_SEPS; - } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) { - curenv[6] = 'i'; - curenv[7] = 'o'; - curenv[8] = *HWLOC_COMPONENT_SEPS; - /* skip this name, it's a negated one */ + if (curenv[0] != HWLOC_COMPONENT_EXCLUDE_CHAR) goto nextname; - } - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR) - goto nextname; + /* save the last char and replace with \0 */ + c = curenv[s]; + curenv[s] = '\0'; + + /* blacklist it, and just ignore failures to allocate */ + hwloc_disc_component_blacklist_one(topology, curenv+1); + + /* remove that blacklisted name from the string */ + for(i=0; inr_blacklisted_components; i++) + if (comp == topology->blacklisted_components[i].component) { + blacklisted_phases = topology->blacklisted_components[i].phases; + break; + } + if (comp->phases & ~blacklisted_phases) + hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases); } else { - fprintf(stderr, "Cannot find discovery component `%s'\n", curenv); + fprintf(stderr, "Cannot find discovery component `%s'\n", name); } /* restore chars (the second loop below needs env to be unmodified) */ curenv[s] = c; } -nextname: curenv += s; if (*curenv) /* Skip comma */ @@ -566,26 +729,24 @@ nextname: if (tryall) { comp = hwloc_disc_components; while (NULL != comp) { + unsigned blacklisted_phases = 0U; if (!comp->enabled_by_default) goto nextcomp; - /* check if this component was explicitly excluded in env */ - if (env) { - char *curenv = env; - while (*curenv) { - size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS); - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) { - if (hwloc_components_verbose) - fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n", - hwloc_disc_component_type_string(comp->type), comp->name); - goto nextcomp; - } - curenv += s; - if (*curenv) - /* Skip comma */ - curenv++; + /* check if this component was blacklisted by the application */ + for(i=0; inr_blacklisted_components; i++) + if (comp == topology->blacklisted_components[i].component) { + blacklisted_phases = topology->blacklisted_components[i].phases; + break; } + + if (!(comp->phases & ~blacklisted_phases)) { + if (hwloc_components_verbose) + fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n", + comp->name, comp->phases); + goto nextcomp; } - hwloc_disc_component_try_enable(topology, comp, NULL, 0 /* defaults, not envvar forced */); + + hwloc_disc_component_try_enable(topology, comp, 0 /* defaults, not envvar forced */, blacklisted_phases); nextcomp: comp = comp->next; } @@ -597,7 +758,7 @@ nextcomp: backend = topology->backends; fprintf(stderr, "Final list of enabled discovery components: "); while (backend != NULL) { - fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name); + fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases); backend = backend->next; first = 0; } @@ -638,7 +799,8 @@ hwloc_components_fini(void) } struct hwloc_backend * -hwloc_backend_alloc(struct hwloc_disc_component *component) +hwloc_backend_alloc(struct hwloc_topology *topology, + struct hwloc_disc_component *component) { struct hwloc_backend * backend = malloc(sizeof(*backend)); if (!backend) { @@ -646,6 +808,12 @@ hwloc_backend_alloc(struct hwloc_disc_component *component) return NULL; } backend->component = component; + backend->topology = topology; + /* filter-out component phases that are excluded */ + backend->phases = component->phases & ~topology->backend_excluded_phases; + if (backend->phases != component->phases && hwloc_components_verbose) + fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n", + component->name, backend->phases, component->phases); backend->flags = 0; backend->discover = NULL; backend->get_pci_busid_cpuset = NULL; @@ -665,14 +833,15 @@ hwloc_backend_disable(struct hwloc_backend *backend) } int -hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend) +hwloc_backend_enable(struct hwloc_backend *backend) { + struct hwloc_topology *topology = backend->topology; struct hwloc_backend **pprev; /* check backend flags */ if (backend->flags) { - fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags); + fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", + backend->component->name, backend->component->phases, backend->flags); return -1; } @@ -681,8 +850,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back while (NULL != *pprev) { if ((*pprev)->component == backend->component) { if (hwloc_components_verbose) - fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n", + backend->component->name, backend->component->phases); hwloc_backend_disable(backend); errno = EBUSY; return -1; @@ -691,8 +860,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back } if (hwloc_components_verbose) - fprintf(stderr, "Enabling %s discovery component `%s'\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n", + backend->component->name, backend->phases, backend->component->phases); /* enqueue at the end */ pprev = &topology->backends; @@ -701,8 +870,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back backend->next = *pprev; *pprev = backend; - backend->topology = topology; - topology->backend_excludes |= backend->component->excludes; + topology->backend_phases |= backend->component->phases; + topology->backend_excluded_phases |= backend->component->excluded_phases; return 0; } @@ -712,7 +881,7 @@ hwloc_backends_is_thissystem(struct hwloc_topology *topology) struct hwloc_backend *backend; const char *local_env; - /* Apply is_thissystem topology flag before we enforce envvar backends. + /* * If the application changed the backend with set_foo(), * it may use set_flags() update the is_thissystem flag here. * If it changes the backend with environment variables below, @@ -775,11 +944,20 @@ hwloc_backends_disable_all(struct hwloc_topology *topology) while (NULL != (backend = topology->backends)) { struct hwloc_backend *next = backend->next; if (hwloc_components_verbose) - fprintf(stderr, "Disabling %s discovery component `%s'\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Disabling discovery component `%s'\n", + backend->component->name); hwloc_backend_disable(backend); topology->backends = next; } topology->backends = NULL; - topology->backend_excludes = 0; + topology->backend_excluded_phases = 0; +} + +void +hwloc_topology_components_fini(struct hwloc_topology *topology) +{ + /* hwloc_backends_disable_all() must have been called earlier */ + assert(!topology->backends); + + free(topology->blacklisted_components); } diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c index 00811a7b..7794358b 100644 --- a/src/3rdparty/hwloc/src/diff.c +++ b/src/3rdparty/hwloc/src/diff.c @@ -1,11 +1,11 @@ /* - * Copyright © 2013-2018 Inria. All rights reserved. + * Copyright © 2013-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include +#include "private/autogen/config.h" +#include "private/private.h" +#include "private/misc.h" int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff) { @@ -351,7 +351,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, err = 1; break; } - if (dist1->type != dist2->type + if (dist1->unique_type != dist2->unique_type + || dist1->different_types || dist2->different_types /* too lazy to support this case */ || dist1->nbobjs != dist2->nbobjs || dist1->kind != dist2->kind || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { @@ -463,6 +464,10 @@ int hwloc_topology_diff_apply(hwloc_topology_t topology, errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) { errno = EINVAL; diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index f0b91f01..9e56a969 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,19 +1,22 @@ /* - * Copyright © 2010-2018 Inria. All rights reserved. + * Copyright © 2010-2019 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" #include #include +static struct hwloc_internal_distances_s * +hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances); + /****************************************************** * Global init, prepare, destroy, dup */ @@ -70,6 +73,8 @@ void hwloc_internal_distances_prepare(struct hwloc_topology *topology) static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist) { + free(dist->name); + free(dist->different_types); free(dist->indexes); free(dist->objs); free(dist->values); @@ -96,15 +101,35 @@ static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct h newdist = hwloc_tma_malloc(tma, sizeof(*newdist)); if (!newdist) return -1; + if (olddist->name) { + newdist->name = hwloc_tma_strdup(tma, olddist->name); + if (!newdist->name) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + } else { + newdist->name = NULL; + } - newdist->type = olddist->type; + if (olddist->different_types) { + newdist->different_types = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->different_types)); + if (!newdist->different_types) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + memcpy(newdist->different_types, olddist->different_types, nbobjs * sizeof(*newdist->different_types)); + } else + newdist->different_types = NULL; + newdist->unique_type = olddist->unique_type; newdist->nbobjs = nbobjs; newdist->kind = olddist->kind; newdist->id = olddist->id; newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes)); newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs)); - newdist->objs_are_valid = 0; + newdist->iflags = olddist->iflags & ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; /* must be revalidated after dup() */ newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values)); if (!newdist->indexes || !newdist->objs || !newdist->values) { assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ @@ -150,6 +175,10 @@ int hwloc_distances_remove(hwloc_topology_t topology) errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } hwloc_internal_distances_destroy(topology); return 0; } @@ -163,6 +192,10 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } /* switch back to types since we don't support groups for now */ type = hwloc_get_depth_type(topology, depth); @@ -174,7 +207,7 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) next = topology->first_dist; while ((dist = next) != NULL) { next = dist->next; - if (dist->type == type) { + if (dist->unique_type == type) { if (next) next->prev = dist->prev; else @@ -190,6 +223,27 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) return 0; } +int hwloc_distances_release_remove(hwloc_topology_t topology, + struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + if (!dist) { + errno = EINVAL; + return -1; + } + if (dist->prev) + dist->prev->next = dist->next; + else + topology->first_dist = dist->next; + if (dist->next) + dist->next->prev = dist->prev; + else + topology->last_dist = dist->prev; + hwloc_internal_distances_free(dist); + hwloc_distances_release(topology, distances); + return 0; +} + /****************************************************** * Add distances to the topology */ @@ -201,17 +255,34 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, str * the caller gives us the distances and objs pointers, we'll free them later. */ static int -hwloc_internal_distances__add(hwloc_topology_t topology, - hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, - unsigned long kind) +hwloc_internal_distances__add(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, + unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, + unsigned long kind, unsigned iflags) { - struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist)); + struct hwloc_internal_distances_s *dist; + + if (different_types) { + kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */ + } else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) { + errno = EINVAL; + goto err; + } + + dist = calloc(1, sizeof(*dist)); if (!dist) goto err; - dist->type = type; + if (name) + dist->name = strdup(name); /* ignore failure */ + + dist->unique_type = unique_type; + dist->different_types = different_types; dist->nbobjs = nbobjs; dist->kind = kind; + dist->iflags = iflags; + + assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs); if (!objs) { assert(indexes); @@ -220,18 +291,16 @@ hwloc_internal_distances__add(hwloc_topology_t topology, dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t)); if (!dist->objs) goto err_with_dist; - dist->objs_are_valid = 0; } else { unsigned i; assert(!indexes); /* we only have objs, generate the indexes arrays so that we can refresh objs later */ dist->objs = objs; - dist->objs_are_valid = 1; dist->indexes = malloc(nbobjs * sizeof(*dist->indexes)); if (!dist->indexes) goto err_with_dist; - if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) { + if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) { for(i=0; iindexes[i] = objs[i]->os_index; } else { @@ -256,16 +325,19 @@ hwloc_internal_distances__add(hwloc_topology_t topology, err_with_dist: free(dist); err: + free(different_types); free(objs); free(indexes); free(values); return -1; } -int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, - hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, +int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags) { + unsigned iflags = 0; /* objs not valid */ + if (nbobjs < 2) { errno = EINVAL; goto err; @@ -279,24 +351,71 @@ int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, goto err; } - return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind); + return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags); err: free(indexes); free(values); + free(different_types); return -1; } -int hwloc_internal_distances_add(hwloc_topology_t topology, +static void +hwloc_internal_distances_restrict(hwloc_obj_t *objs, + uint64_t *indexes, + uint64_t *values, + unsigned nbobjs, unsigned disappeared); + +int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags) { + hwloc_obj_type_t unique_type, *different_types; + unsigned i, disappeared = 0; + unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; + if (nbobjs < 2) { errno = EINVAL; goto err; } - if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) { + /* is there any NULL object? (useful in case of problem during insert in backends) */ + for(i=0; itype; + for(i=1; itype != unique_type) { + unique_type = HWLOC_OBJ_TYPE_NONE; + break; + } + if (unique_type == HWLOC_OBJ_TYPE_NONE) { + /* heterogeneous types */ + different_types = malloc(nbobjs * sizeof(*different_types)); + if (!different_types) + goto err; + for(i=0; itype; + + } else { + /* homogeneous types */ + different_types = NULL; + } + + if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) { float full_accuracy = 0.f; float *accuracies; unsigned nbaccuracies; @@ -310,8 +429,8 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, } if (topology->grouping_verbose) { - unsigned i, j; - int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU); + unsigned j; + int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type); fprintf(stderr, "Trying to group objects using distance matrix:\n"); fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); for(j=0; jtype, nbobjs, objs, NULL, values, kind); + return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags); err: free(objs); @@ -348,7 +467,6 @@ int hwloc_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, unsigned long kind, unsigned long flags) { - hwloc_obj_type_t type; unsigned i; uint64_t *_values; hwloc_obj_t *_objs; @@ -358,6 +476,10 @@ int hwloc_distances_add(hwloc_topology_t topology, errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } if ((kind & ~HWLOC_DISTANCES_KIND_ALL) || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1 @@ -368,15 +490,8 @@ int hwloc_distances_add(hwloc_topology_t topology, /* no strict need to check for duplicates, things shouldn't break */ - type = objs[0]->type; - if (type == HWLOC_OBJ_GROUP) { - /* not supported yet, would require we save the subkind together with the type. */ - errno = EINVAL; - return -1; - } - for(i=1; itype != type) { + if (!objs[i]) { errno = EINVAL; return -1; } @@ -389,7 +504,7 @@ int hwloc_distances_add(hwloc_topology_t topology, memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t)); memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values)); - err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags); + err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags); if (err < 0) goto out; /* _objs and _values freed in hwloc_internal_distances_add() */ @@ -409,9 +524,9 @@ int hwloc_distances_add(hwloc_topology_t topology, * Refresh objects in distances */ -static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) { - hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0); + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); while (obj) { if (obj->gp_index == gp_index) return obj; @@ -420,12 +535,31 @@ static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology return NULL; } -static void -hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, - hwloc_obj_t *objs, - unsigned disappeared) +static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { + int topodepth = hwloc_topology_get_depth(topology); + for(depth=0; depthnbobjs; unsigned i, newi; unsigned j, newj; @@ -433,7 +567,7 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, if (objs[i]) { for(j=0, newj=0; jvalues[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j]; + values[newi*(nbobjs-disappeared)+newj] = values[i*nbobjs+j]; newj++; } newi++; @@ -442,25 +576,25 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, for(i=0, newi=0; iindexes[newi] = dist->indexes[i]; + if (indexes) + indexes[newi] = indexes[i]; newi++; } - - dist->nbobjs -= disappeared; } static int hwloc_internal_distances_refresh_one(hwloc_topology_t topology, struct hwloc_internal_distances_s *dist) { - hwloc_obj_type_t type = dist->type; + hwloc_obj_type_t unique_type = dist->unique_type; + hwloc_obj_type_t *different_types = dist->different_types; unsigned nbobjs = dist->nbobjs; hwloc_obj_t *objs = dist->objs; uint64_t *indexes = dist->indexes; unsigned disappeared = 0; unsigned i; - if (dist->objs_are_valid) + if (dist->iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) return 0; for(i=0; iindexes, dist->values, nbobjs, disappeared); + dist->nbobjs -= disappeared; + } - dist->objs_are_valid = 1; + dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; return 0; } @@ -520,32 +660,64 @@ hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology) { struct hwloc_internal_distances_s *dist; for(dist = topology->first_dist; dist; dist = dist->next) - dist->objs_are_valid = 0; + dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; } /****************************************************** * User API for getting distances */ +/* what we actually allocate for user queries, even if we only + * return the distances part of it. + */ +struct hwloc_distances_container_s { + unsigned id; + struct hwloc_distances_s distances; +}; + +#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL) +#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET ) + +static struct hwloc_internal_distances_s * +hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances) +{ + struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances); + struct hwloc_internal_distances_s *dist; + for(dist = topology->first_dist; dist; dist = dist->next) + if (dist->id == cont->id) + return dist; + return NULL; +} + void hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_distances_s *distances) { + struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances); free(distances->values); free(distances->objs); - free(distances); + free(cont); +} + +const char * +hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + return dist ? dist->name : NULL; } static struct hwloc_distances_s * hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_internal_distances_s *dist) { + struct hwloc_distances_container_s *cont; struct hwloc_distances_s *distances; unsigned nbobjs; - distances = malloc(sizeof(*distances)); - if (!distances) + cont = malloc(sizeof(*cont)); + if (!cont) return NULL; + distances = &cont->distances; nbobjs = distances->nbobjs = dist->nbobjs; @@ -560,18 +732,20 @@ hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values)); distances->kind = dist->kind; + + cont->id = dist->id; return distances; out_with_objs: free(distances->objs); out: - free(distances); + free(cont); return NULL; } static int hwloc__distances_get(hwloc_topology_t topology, - hwloc_obj_type_t type, + const char *name, hwloc_obj_type_t type, unsigned *nrp, struct hwloc_distances_s **distancesp, unsigned long kind, unsigned long flags __hwloc_attribute_unused) { @@ -602,7 +776,10 @@ hwloc__distances_get(hwloc_topology_t topology, unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL; unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL; - if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type) + if (name && (!dist->name || strcmp(name, dist->name))) + continue; + + if (type != HWLOC_OBJ_TYPE_NONE && type != dist->unique_type) continue; if (kind_from && !(kind_from & dist->kind)) @@ -640,7 +817,7 @@ hwloc_distances_get(hwloc_topology_t topology, return -1; } - return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); + return hwloc__distances_get(topology, NULL, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); } int @@ -655,14 +832,40 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, return -1; } - /* switch back to types since we don't support groups for now */ + /* FIXME: passing the depth of a group level may return group distances at a different depth */ type = hwloc_get_depth_type(topology, depth); if (type == (hwloc_obj_type_t)-1) { errno = EINVAL; return -1; } - return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags); + return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags); +} + +int +hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, name, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, HWLOC_DISTANCES_KIND_ALL, flags); +} + +int +hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long kind, unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags); } /****************************************************** @@ -823,10 +1026,14 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, float *accuracies, int needcheck) { - HWLOC_VLA(unsigned, groupids, nbobjs); + unsigned *groupids; unsigned nbgroups = 0; unsigned i,j; int verbose = topology->grouping_verbose; + hwloc_obj_t *groupobjs; + unsigned * groupsizes; + uint64_t *groupvalues; + unsigned failed = 0; if (nbobjs <= 2) return; @@ -836,6 +1043,10 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, /* TODO hwloc__find_groups_by_max_distance() for bandwidth */ return; + groupids = malloc(nbobjs * sizeof(*groupids)); + if (!groupids) + return; + for(i=0; i -#include -#include +#include "private/autogen/config.h" +#include "private/private.h" +#include "private/misc.h" #include #ifdef HAVE_SYS_UTSNAME_H @@ -28,6 +28,7 @@ extern char *program_invocation_name; extern char *__progname; #endif +#ifndef HWLOC_HAVE_CORRECT_SNPRINTF int hwloc_snprintf(char *str, size_t size, const char *format, ...) { int ret; @@ -77,21 +78,7 @@ int hwloc_snprintf(char *str, size_t size, const char *format, ...) return ret; } - -int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n) -{ - size_t i = 0; - while (*haystack && *haystack != ':') { - int ha = *haystack++; - int low_h = tolower(ha); - int ne = *needle++; - int low_n = tolower(ne); - if (low_h != low_n) - return 1; - i++; - } - return i < n; -} +#endif void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused, void *cached_uname __hwloc_attribute_unused) diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index 00f08a9e..deca5cce 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -1,14 +1,14 @@ /* - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/plugins.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" #include #ifdef HAVE_UNISTD_H @@ -23,6 +23,11 @@ #define close _close #endif + +/************************************** + * Init/Exit and Forced PCI localities + */ + static void hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology, const char *string /* must contain a ' ' */, @@ -109,11 +114,11 @@ hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_en void hwloc_pci_discovery_init(struct hwloc_topology *topology) { - topology->need_pci_belowroot_apply_locality = 0; - topology->pci_has_forced_locality = 0; topology->pci_forced_locality_nr = 0; topology->pci_forced_locality = NULL; + + topology->first_pci_locality = topology->last_pci_locality = NULL; } void @@ -135,7 +140,7 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) if (!err) { if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */ buffer = malloc(st.st_size+1); - if (read(fd, buffer, st.st_size) == st.st_size) { + if (buffer && read(fd, buffer, st.st_size) == st.st_size) { buffer[st.st_size] = '\0'; hwloc_pci_forced_locality_parse(topology, buffer); } @@ -152,16 +157,31 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) } void -hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused) +hwloc_pci_discovery_exit(struct hwloc_topology *topology) { + struct hwloc_pci_locality_s *cur; unsigned i; + for(i=0; ipci_forced_locality_nr; i++) hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset); free(topology->pci_forced_locality); + cur = topology->first_pci_locality; + while (cur) { + struct hwloc_pci_locality_s *next = cur->next; + hwloc_bitmap_free(cur->cpuset); + free(cur); + cur = next; + } + hwloc_pci_discovery_init(topology); } + +/****************************** + * Inserting in Tree by Bus ID + */ + #ifdef HWLOC_DEBUG static void hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused, @@ -324,32 +344,16 @@ hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep, hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj); } -int -hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old_tree) + +/********************** + * Attaching PCI Trees + */ + +static struct hwloc_obj * +hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology, + struct hwloc_obj *old_tree) { - struct hwloc_obj **next_hb_p; - enum hwloc_type_filter_e bfilter; - - if (!old_tree) - /* found nothing, exit */ - return 0; - -#ifdef HWLOC_DEBUG - hwloc_debug("%s", "\nPCI hierarchy:\n"); - hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb); - hwloc_debug("%s", "\n"); -#endif - - next_hb_p = &hwloc_get_root_obj(topology)->io_first_child; - while (*next_hb_p) - next_hb_p = &((*next_hb_p)->next_sibling); - - bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE]; - if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) { - *next_hb_p = old_tree; - topology->modified = 1; - goto done; - } + struct hwloc_obj * new = NULL, **newp = &new; /* * tree points to all objects connected to any upstream bus in the machine. @@ -358,15 +362,29 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old */ while (old_tree) { /* start a new host bridge */ - struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX); - struct hwloc_obj **dstnextp = &hostbridge->io_first_child; - struct hwloc_obj **srcnextp = &old_tree; - struct hwloc_obj *child = *srcnextp; - unsigned short current_domain = child->attr->pcidev.domain; - unsigned char current_bus = child->attr->pcidev.bus; - unsigned char current_subordinate = current_bus; + struct hwloc_obj *hostbridge; + struct hwloc_obj **dstnextp; + struct hwloc_obj **srcnextp; + struct hwloc_obj *child; + unsigned short current_domain; + unsigned char current_bus; + unsigned char current_subordinate; - hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus); + hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX); + if (!hostbridge) { + /* just queue remaining things without hostbridges and return */ + *newp = old_tree; + return new; + } + dstnextp = &hostbridge->io_first_child; + + srcnextp = &old_tree; + child = *srcnextp; + current_domain = child->attr->pcidev.domain; + current_bus = child->attr->pcidev.bus; + current_subordinate = current_bus; + + hwloc_debug("Adding new PCI hostbridge %04x:%02x\n", current_domain, current_bus); next_child: /* remove next child from tree */ @@ -395,19 +413,14 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old hostbridge->attr->bridge.downstream.pci.domain = current_domain; hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus; hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate; - hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n", + hwloc_debug(" new PCI hostbridge covers %04x:[%02x-%02x]\n", current_domain, current_bus, current_subordinate); - *next_hb_p = hostbridge; - next_hb_p = &hostbridge->next_sibling; - topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality() - * or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root. - */ + *newp = hostbridge; + newp = &hostbridge->next_sibling; } - done: - topology->need_pci_belowroot_apply_locality = 1; - return 0; + return new; } static struct hwloc_obj * @@ -458,6 +471,9 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide unsigned i; int err; + hwloc_debug("Looking for parent of PCI busid %04x:%02x:%02x.%01x\n", + busid->domain, busid->bus, busid->dev, busid->func); + /* try to match a forced locality */ if (topology->pci_has_forced_locality) { for(i=0; ipci_forced_locality_nr; i++) { @@ -489,7 +505,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide } if (*env) { /* force the cpuset */ - hwloc_debug("Overriding localcpus using %s in the environment\n", envname); + hwloc_debug("Overriding PCI locality using %s in the environment\n", envname); hwloc_bitmap_sscanf(cpuset, env); forced = 1; } @@ -499,7 +515,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide } if (!forced) { - /* get the cpuset by asking the OS backend. */ + /* get the cpuset by asking the backend that provides the relevant hook, if any. */ struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend; if (backend) err = backend->get_pci_busid_cpuset(backend, busid, cpuset); @@ -510,7 +526,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology)); } - hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset); + hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset); parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset); if (parent) { @@ -526,11 +542,129 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide return parent; } +int +hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree) +{ + enum hwloc_type_filter_e bfilter; + + if (!tree) + /* found nothing, exit */ + return 0; + +#ifdef HWLOC_DEBUG + hwloc_debug("%s", "\nPCI hierarchy:\n"); + hwloc_pci_traverse(NULL, tree, hwloc_pci_traverse_print_cb); + hwloc_debug("%s", "\n"); +#endif + + bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE]; + if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE) { + tree = hwloc_pcidisc_add_hostbridges(topology, tree); + } + + while (tree) { + struct hwloc_obj *obj, *pciobj; + struct hwloc_obj *parent; + struct hwloc_pci_locality_s *loc; + unsigned domain, bus_min, bus_max; + + obj = tree; + + /* hostbridges don't have a PCI busid for looking up locality, use their first child */ + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + pciobj = obj->io_first_child; + else + pciobj = obj; + /* now we have a pci device or a pci bridge */ + assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE + || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)); + + if (obj->type == HWLOC_OBJ_BRIDGE) { + domain = obj->attr->bridge.downstream.pci.domain; + bus_min = obj->attr->bridge.downstream.pci.secondary_bus; + bus_max = obj->attr->bridge.downstream.pci.subordinate_bus; + } else { + domain = pciobj->attr->pcidev.domain; + bus_min = pciobj->attr->pcidev.bus; + bus_max = pciobj->attr->pcidev.bus; + } + + /* find where to attach that PCI bus */ + parent = hwloc__pci_find_busid_parent(topology, &pciobj->attr->pcidev); + + /* reuse the previous locality if possible */ + if (topology->last_pci_locality + && parent == topology->last_pci_locality->parent + && domain == topology->last_pci_locality->domain + && (bus_min == topology->last_pci_locality->bus_max + || bus_min == topology->last_pci_locality->bus_max+1)) { + hwloc_debug(" Reusing PCI locality up to bus %04x:%02x\n", + domain, bus_max); + topology->last_pci_locality->bus_max = bus_max; + goto done; + } + + loc = malloc(sizeof(*loc)); + if (!loc) { + /* fallback to attaching to root */ + parent = hwloc_get_root_obj(topology); + goto done; + } + + loc->domain = domain; + loc->bus_min = bus_min; + loc->bus_max = bus_max; + loc->parent = parent; + loc->cpuset = hwloc_bitmap_dup(parent->cpuset); + if (!loc->cpuset) { + /* fallback to attaching to root */ + free(loc); + parent = hwloc_get_root_obj(topology); + goto done; + } + + hwloc_debug("Adding PCI locality %s P#%u for bus %04x:[%02x:%02x]\n", + hwloc_obj_type_string(parent->type), parent->os_index, loc->domain, loc->bus_min, loc->bus_max); + if (topology->last_pci_locality) { + loc->prev = topology->last_pci_locality; + loc->next = NULL; + topology->last_pci_locality->next = loc; + topology->last_pci_locality = loc; + } else { + loc->prev = NULL; + loc->next = NULL; + topology->first_pci_locality = loc; + topology->last_pci_locality = loc; + } + + done: + /* dequeue this object */ + tree = obj->next_sibling; + obj->next_sibling = NULL; + hwloc_insert_object_by_parent(topology, parent, obj); + } + + return 0; +} + + +/********************************* + * Finding PCI objects or parents + */ + struct hwloc_obj * -hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, - unsigned domain, unsigned bus, unsigned dev, unsigned func) +hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) { struct hwloc_pcidev_attr_s busid; + hwloc_obj_t parent; + + /* try to find that exact busid */ + parent = hwloc_pci_find_by_busid(topology, domain, bus, dev, func); + if (parent) + return parent; + + /* try to find the locality of that bus instead */ busid.domain = domain; busid.bus = bus; busid.dev = dev; @@ -538,66 +672,10 @@ hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, return hwloc__pci_find_busid_parent(topology, &busid); } -int -hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology) -{ - struct hwloc_obj *root = hwloc_get_root_obj(topology); - struct hwloc_obj **listp, *obj; - - if (!topology->need_pci_belowroot_apply_locality) - return 0; - topology->need_pci_belowroot_apply_locality = 0; - - /* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things. - * insert the PCI trees according to their PCI-locality. - */ - listp = &root->io_first_child; - while ((obj = *listp) != NULL) { - struct hwloc_pcidev_attr_s *busid; - struct hwloc_obj *parent; - - /* skip non-PCI objects */ - if (obj->type != HWLOC_OBJ_PCI_DEVICE - && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) - && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) { - listp = &obj->next_sibling; - continue; - } - - if (obj->type == HWLOC_OBJ_PCI_DEVICE - || (obj->type == HWLOC_OBJ_BRIDGE - && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) - busid = &obj->attr->pcidev; - else { - /* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */ - hwloc_obj_t child = obj->io_first_child; - if (child && (child->type == HWLOC_OBJ_PCI_DEVICE - || (child->type == HWLOC_OBJ_BRIDGE - && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))) - busid = &obj->io_first_child->attr->pcidev; - else - continue; - } - - /* attach the object (and children) where it belongs */ - parent = hwloc__pci_find_busid_parent(topology, busid); - if (parent == root) { - /* keep this object here */ - listp = &obj->next_sibling; - } else { - /* dequeue this object */ - *listp = obj->next_sibling; - obj->next_sibling = NULL; - hwloc_insert_object_by_parent(topology, parent, obj); - } - } - - return 0; -} - +/* return the smallest object that contains the desired busid */ static struct hwloc_obj * -hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, - unsigned domain, unsigned bus, unsigned dev, unsigned func) +hwloc__pci_find_by_busid(hwloc_obj_t parent, + unsigned domain, unsigned bus, unsigned dev, unsigned func) { hwloc_obj_t child; @@ -622,7 +700,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, && child->attr->bridge.downstream.pci.secondary_bus <= bus && child->attr->bridge.downstream.pci.subordinate_bus >= bus) /* not the right bus id, but it's included in the bus below that bridge */ - return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); + return hwloc__pci_find_by_busid(child, domain, bus, dev, func); } else if (child->type == HWLOC_OBJ_BRIDGE && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI @@ -632,7 +710,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, && child->attr->bridge.downstream.pci.secondary_bus <= bus && child->attr->bridge.downstream.pci.subordinate_bus >= bus) { /* contains our bus, recurse */ - return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); + return hwloc__pci_find_by_busid(child, domain, bus, dev, func); } } /* didn't find anything, return parent */ @@ -640,17 +718,54 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, } struct hwloc_obj * -hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, - unsigned domain, unsigned bus, unsigned dev, unsigned func) +hwloc_pci_find_by_busid(struct hwloc_topology *topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) { + struct hwloc_pci_locality_s *loc; hwloc_obj_t root = hwloc_get_root_obj(topology); - hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func); - if (parent == root) + hwloc_obj_t parent = NULL; + + hwloc_debug("pcidisc looking for bus id %04x:%02x:%02x.%01x\n", domain, bus, dev, func); + loc = topology->first_pci_locality; + while (loc) { + if (loc->domain == domain && loc->bus_min <= bus && loc->bus_max >= bus) { + parent = loc->parent; + assert(parent); + hwloc_debug(" found pci locality for %04x:[%02x:%02x]\n", + loc->domain, loc->bus_min, loc->bus_max); + break; + } + loc = loc->next; + } + /* if we failed to insert localities, look at root too */ + if (!parent) + parent = root; + + hwloc_debug(" looking for bus %04x:%02x:%02x.%01x below %s P#%u\n", + domain, bus, dev, func, + hwloc_obj_type_string(parent->type), parent->os_index); + parent = hwloc__pci_find_by_busid(parent, domain, bus, dev, func); + if (parent == root) { + hwloc_debug(" found nothing better than root object, ignoring\n"); return NULL; - else + } else { + if (parent->type == HWLOC_OBJ_PCI_DEVICE + || (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) + hwloc_debug(" found busid %04x:%02x:%02x.%01x\n", + parent->attr->pcidev.domain, parent->attr->pcidev.bus, + parent->attr->pcidev.dev, parent->attr->pcidev.func); + else + hwloc_debug(" found parent %s P#%u\n", + hwloc_obj_type_string(parent->type), parent->os_index); return parent; + } } + +/******************************* + * Parsing the PCI Config Space + */ + #define HWLOC_PCI_STATUS 0x06 #define HWLOC_PCI_STATUS_CAP_LIST 0x10 #define HWLOC_PCI_CAPABILITY_LIST 0x34 @@ -703,13 +818,14 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config, * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane + * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane */ /* lanespeed in Gbit/s */ if (speed <= 2) lanespeed = 2.5f * speed * 0.8f; else - lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */ + lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */ /* linkspeed in GB/s */ *linkspeed = lanespeed * width / 8; @@ -738,30 +854,27 @@ hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *conf #define HWLOC_PCI_SUBORDINATE_BUS 0x1a int -hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, +hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func, + unsigned *secondary_busp, unsigned *subordinate_busp, const unsigned char *config) { - struct hwloc_bridge_attr_s *battr = &obj->attr->bridge; - struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci; + unsigned secondary_bus, subordinate_bus; - if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) { + if (config[HWLOC_PCI_PRIMARY_BUS] != bus) { /* Sometimes the config space contains 00 instead of the actual primary bus number. * Always trust the bus ID because it was built by the system which has more information * to workaround such problems (e.g. ACPI information about PCI parent/children). */ hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n", - pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]); + domain, bus, dev, func, config[HWLOC_PCI_PRIMARY_BUS]); } - battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI; - battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI; - battr->downstream.pci.domain = pattr->domain; - battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS]; - battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS]; + secondary_bus = config[HWLOC_PCI_SECONDARY_BUS]; + subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS]; - if (battr->downstream.pci.secondary_bus <= pattr->bus - || battr->downstream.pci.subordinate_bus <= pattr->bus - || battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) { + if (secondary_bus <= bus + || subordinate_bus <= bus + || secondary_bus > subordinate_bus) { /* This should catch most cases of invalid bridge information * (e.g. 00 for secondary and subordinate). * Ideally we would also check that [secondary-subordinate] is included @@ -769,15 +882,21 @@ hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, * because objects may be discovered out of order (especially in the fsroot case). */ hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n", - pattr->domain, pattr->bus, pattr->dev, pattr->func, - battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus); - hwloc_free_unlinked_object(obj); + domain, bus, dev, func, + secondary_bus, subordinate_bus); return -1; } + *secondary_busp = secondary_bus; + *subordinate_busp = subordinate_bus; return 0; } + +/**************** + * Class Strings + */ + const char * hwloc_pci_class_string(unsigned short class_id) { diff --git a/src/3rdparty/hwloc/src/shmem.c b/src/3rdparty/hwloc/src/shmem.c index 6c507f52..94d55eef 100644 --- a/src/3rdparty/hwloc/src/shmem.c +++ b/src/3rdparty/hwloc/src/shmem.c @@ -1,12 +1,12 @@ /* - * Copyright © 2017-2018 Inria. All rights reserved. + * Copyright © 2017-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/shmem.h" +#include "private/private.h" #ifndef HWLOC_WIN_SYS @@ -214,6 +214,8 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, new->support.discovery = malloc(sizeof(*new->support.discovery)); new->support.cpubind = malloc(sizeof(*new->support.cpubind)); new->support.membind = malloc(sizeof(*new->support.membind)); + if (!new->support.discovery || !new->support.cpubind || !new->support.membind) + goto out_with_support; memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); @@ -230,6 +232,11 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, *topologyp = new; return 0; + out_with_support: + free(new->support.discovery); + free(new->support.cpubind); + free(new->support.membind); + free(new); out_with_components: hwloc_components_fini(); out_with_mmap: diff --git a/src/3rdparty/hwloc/src/topology-noos.c b/src/3rdparty/hwloc/src/topology-noos.c index 77871eb1..174b6fd8 100644 --- a/src/3rdparty/hwloc/src/topology-noos.c +++ b/src/3rdparty/hwloc/src/topology-noos.c @@ -1,26 +1,34 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" static int -hwloc_look_noos(struct hwloc_backend *backend) +hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend uses the underlying OS. + * However we don't enforce topology->is_thissystem so that + * we may still force use this backend when debugging with !thissystem. + */ + struct hwloc_topology *topology = backend->topology; int nbprocs; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; - nbprocs = hwloc_fallback_nbprocessors(topology); + nbprocs = hwloc_fallback_nbprocessors(0); if (nbprocs >= 1) topology->support.discovery->pu = 1; else @@ -33,13 +41,15 @@ hwloc_look_noos(struct hwloc_backend *backend) } static struct hwloc_backend * -hwloc_noos_component_instantiate(struct hwloc_disc_component *component, +hwloc_noos_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) return NULL; backend->discover = hwloc_look_noos; @@ -47,9 +57,9 @@ hwloc_noos_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_noos_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "no_os", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_noos_component_instantiate, 40, /* lower than native OS component, higher than globals */ 1, diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c index 1fe334d1..686efce1 100644 --- a/src/3rdparty/hwloc/src/topology-synthetic.c +++ b/src/3rdparty/hwloc/src/topology-synthetic.c @@ -6,11 +6,11 @@ * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/debug.h" #include #include @@ -122,6 +122,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, unsigned long nbs = 1; unsigned j, mul; const char *tmp; + struct hwloc_synthetic_intlv_loop_s *loops; tmp = attr; while (tmp) { @@ -132,9 +133,10 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, tmp++; } - { /* nr_loops colon-separated fields, but we may need one more at the end */ - HWLOC_VLA(struct hwloc_synthetic_intlv_loop_s, loops, nr_loops+1); + loops = malloc((nr_loops+1) * sizeof(*loops)); + if (!loops) + goto out_with_array; if (*attr >= '0' && *attr <= '9') { /* interleaving as x*y:z*t:... */ @@ -148,11 +150,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (tmp2 == tmp || *tmp2 != '*') { if (verbose) fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp); + free(loops); goto out_with_array; } if (!step) { if (verbose) fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp); + free(loops); goto out_with_array; } tmp2++; @@ -160,11 +164,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) { if (verbose) fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp); + free(loops); goto out_with_array; } if (!nb) { if (verbose) fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2); + free(loops); goto out_with_array; } loops[cur_loop].step = step; @@ -192,11 +198,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (err < 0) { if (verbose) fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp); + free(loops); goto out_with_array; } if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { if (verbose) fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp); + free(loops); goto out_with_array; } for(i=0; ; i++) { @@ -217,6 +225,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (verbose) fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s'\n", tmp); + free(loops); goto out_with_array; } tmp = strchr(tmp, ':'); @@ -235,6 +244,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (loops[i].level_depth == mydepth && i != cur_loop) { if (verbose) fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr); + free(loops); goto out_with_array; } if (loops[i].level_depth < mydepth @@ -264,6 +274,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, } else { if (verbose) fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total); + free(loops); goto out_with_array; } } @@ -278,6 +289,8 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, mul *= nb; } + free(loops); + /* check that we have the right values (cannot pass total, cannot give duplicate 0) */ for(j=0; j= total) { @@ -293,7 +306,6 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, } indexes->array = array; - } } return; @@ -527,7 +539,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, if (*pos < '0' || *pos > '9') { if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) { - if (!strncmp(pos, "Die", 3) || !strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { + if (!strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { + /* possible future types */ type = HWLOC_OBJ_GROUP; } else { /* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */ @@ -645,6 +658,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, errno = EINVAL; return -1; } + if (type_count[HWLOC_OBJ_DIE] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several die levels\n"); + errno = EINVAL; + return -1; + } if (type_count[HWLOC_OBJ_NUMANODE] > 1) { if (verbose) fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n"); @@ -829,6 +848,7 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr, obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; break; case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: break; case HWLOC_OBJ_L1CACHE: case HWLOC_OBJ_L2CACHE: @@ -953,13 +973,19 @@ hwloc__look_synthetic(struct hwloc_topology *topology, } static int -hwloc_look_synthetic(struct hwloc_backend *backend) +hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend enforces !topology->is_thissystem by default. + */ + struct hwloc_topology *topology = backend->topology; struct hwloc_synthetic_backend_data_s *data = backend->private_data; hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); unsigned i; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + assert(!topology->levels[0][0]->cpuset); hwloc_alloc_root_sets(topology->levels[0][0]); @@ -1001,7 +1027,9 @@ hwloc_synthetic_backend_disable(struct hwloc_backend *backend) } static struct hwloc_backend * -hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, +hwloc_synthetic_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1021,7 +1049,7 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, } } - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) goto out; @@ -1051,8 +1079,8 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_synthetic_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, "synthetic", + HWLOC_DISC_PHASE_GLOBAL, ~0, hwloc_synthetic_component_instantiate, 30, @@ -1267,6 +1295,12 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag /* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */ res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys); + } else if (obj->type == HWLOC_OBJ_DIE + && (flags & (HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1))) { + /* if exporting to v1 or without extended-types, use all-v1-compatible Group name */ + res = hwloc_snprintf(tmp, tmplen, "Group%s", aritys); + } else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */ || flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) { res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys); @@ -1323,16 +1357,26 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign } while (mchild) { - /* v2: export all NUMA children */ - - assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ + /* FIXME: really recurse to export memcaches and numanode, + * but it requires clever parsing of [ memcache [numa] [numa] ] during import, + * better attaching of things to describe the hierarchy. + */ + hwloc_obj_t numanode = mchild; + /* only export the first NUMA node leaf of each memory child + * FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms + */ + while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) { + assert(numanode->arity == 1); + numanode = numanode->memory_first_child; + } + assert(numanode); /* there's always a numanode at the bottom of the memory tree */ if (needprefix) hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, '['); - res = hwloc__export_synthetic_obj(topology, flags, mchild, (unsigned)-1, tmp, tmplen); + res = hwloc__export_synthetic_obj(topology, flags, numanode, (unsigned)-1, tmp, tmplen); if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) return -1; @@ -1366,9 +1410,8 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology) assert(node); first_parent = node->parent; - assert(hwloc__obj_type_is_normal(first_parent->type)); /* only depth-1 memory children for now */ - /* check whether all object on parent's level have same number of NUMA children */ + /* check whether all object on parent's level have same number of NUMA bits */ for(i=0; idepth); i++) { hwloc_obj_t parent, mchild; @@ -1379,10 +1422,9 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology) if (parent->memory_arity != first_parent->memory_arity) goto out_with_bitmap; - /* clear these NUMA children from remaining_nodes */ + /* clear children NUMA bits from remaining_nodes */ mchild = parent->memory_first_child; while (mchild) { - assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ hwloc_bitmap_clr(remaining_nodes, mchild->os_index); /* cannot use parent->nodeset, some normal children may have other NUMA nodes */ mchild = mchild->next_sibling; } diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index d03645c0..22521aa3 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -9,10 +9,10 @@ /* To try to get all declarations duplicated below. */ #define _WIN32_WINNT 0x0601 -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" #include @@ -731,8 +731,14 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse */ static int -hwloc_look_windows(struct hwloc_backend *backend) +hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend uses the underlying OS. + * However we don't enforce topology->is_thissystem so that + * we may still force use this backend when debugging with !thissystem. + */ + struct hwloc_topology *topology = backend->topology; hwloc_bitmap_t groups_pu_set = NULL; SYSTEM_INFO SystemInfo; @@ -740,6 +746,8 @@ hwloc_look_windows(struct hwloc_backend *backend) int gotnuma = 0; int gotnumamemory = 0; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -1136,13 +1144,15 @@ static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribu } static struct hwloc_backend * -hwloc_windows_component_instantiate(struct hwloc_disc_component *component, +hwloc_windows_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) return NULL; backend->discover = hwloc_look_windows; @@ -1150,9 +1160,9 @@ hwloc_windows_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_windows_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "windows", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_windows_component_instantiate, 50, 1, @@ -1168,10 +1178,12 @@ const struct hwloc_component hwloc_windows_component = { }; int -hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { +hwloc_fallback_nbprocessors(unsigned flags __hwloc_attribute_unused) { int n; SYSTEM_INFO sysinfo; + /* TODO handle flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE */ + /* by default, ignore groups (return only the number in the current group) */ GetSystemInfo(&sysinfo); n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */ diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 4aefdcf1..1060157d 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -14,13 +14,12 @@ * on various architectures, without having to use this x86-specific code. */ -#include -#include -#include -#include -#include - -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" +#include "private/cpuid-x86.h" #include #ifdef HAVE_DIRENT_H @@ -70,6 +69,8 @@ cpuiddump_read(const char *dirpath, unsigned idx) { struct cpuiddump *cpuiddump; struct cpuiddump_entry *cur; + size_t filenamelen; + char *filename; FILE *file; char line[128]; unsigned nr; @@ -80,16 +81,16 @@ cpuiddump_read(const char *dirpath, unsigned idx) goto out; } - { - size_t filenamelen = strlen(dirpath) + 15; - HWLOC_VLA(char, filename, filenamelen); + filenamelen = strlen(dirpath) + 15; + filename = malloc(filenamelen); + if (!filename) + goto out_with_dump; snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx); file = fopen(filename, "r"); if (!file) { fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename); - goto out_with_dump; + goto out_with_filename; } - } nr = 0; while (fgets(line, sizeof(line), file)) @@ -117,10 +118,13 @@ cpuiddump_read(const char *dirpath, unsigned idx) cpuiddump->nr = nr; fclose(file); + free(filename); return cpuiddump; out_with_file: fclose(file); + out_with_filename: + free(filename); out_with_dump: free(cpuiddump); out: @@ -170,6 +174,11 @@ static void cpuid_or_from_dump(unsigned *eax, unsigned *ebx, unsigned *ecx, unsi * Core detection routines and structures */ +enum hwloc_x86_disc_flags { + HWLOC_X86_DISC_FLAG_FULL = (1<<0), /* discover everything instead of only annotating */ + HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES = (1<<1) /* use AMD topoext numanode information */ +}; + #define has_topoext(features) ((features)[6] & (1 << 22)) #define has_x2apic(features) ((features)[4] & (1 << 21)) @@ -190,12 +199,15 @@ struct cacheinfo { struct procinfo { unsigned present; unsigned apicid; - unsigned packageid; - unsigned dieid; - unsigned nodeid; - unsigned unitid; - unsigned threadid; - unsigned coreid; +#define PKG 0 +#define CORE 1 +#define NODE 2 +#define UNIT 3 +#define TILE 4 +#define MODULE 5 +#define DIE 6 +#define HWLOC_X86_PROCINFO_ID_NR 7 + unsigned ids[HWLOC_X86_PROCINFO_ID_NR]; unsigned *otherids; unsigned levels; unsigned numcaches; @@ -215,7 +227,8 @@ enum cpuid_type { unknown }; -static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid) +/* AMD legacy cache information from specific CPUID 0x80000005-6 leaves */ +static void setup__amd_cache_legacy(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid) { struct cacheinfo *cache, *tmpcaches; unsigned cachenum; @@ -262,7 +275,249 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cac hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); } -static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) +/* AMD legacy cache information from CPUID 0x80000005-6 leaves */ +static void read_amd_caches_legacy(struct procinfo *infos, struct cpuiddump *src_cpuiddump, unsigned legacy_max_log_proc) +{ + unsigned eax, ebx, ecx, edx; + + eax = 0x80000005; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + setup__amd_cache_legacy(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */ + setup__amd_cache_legacy(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */ + + eax = 0x80000006; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (ecx & 0xf000) + /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11. + * Could be useful if some Intels (at least before Core micro-architecture) + * support this leaf without leaf 0x4. + */ + setup__amd_cache_legacy(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */ + if (edx & 0xf000) + setup__amd_cache_legacy(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */ +} + +/* AMD caches from CPUID 0x8000001d leaf (topoext) */ +static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *src_cpuiddump) +{ + unsigned eax, ebx, ecx, edx; + unsigned cachenum; + struct cacheinfo *cache; + + /* the code below doesn't want any other cache yet */ + assert(!infos->numcaches); + + for (cachenum = 0; ; cachenum++) { + eax = 0x8000001d; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if ((eax & 0x1f) == 0) + break; + infos->numcaches++; + } + + cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); + if (cache) { + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; + eax = 0x8000001d; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + if ((eax & 0x1f) == 0) + break; + switch (eax & 0x1f) { + case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; + case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; + default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; + } + + cache->level = (eax >> 5) & 0x7; + /* Note: actually number of cores */ + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + ways = ((ebx >> 22) & 0x3ff) + 1; + + if (eax & (1 << 9)) + /* Fully associative */ + cache->ways = -1; + else + cache->ways = ways; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; + + hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", + cachenum, cache->level, + cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', + cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); + + cache++; + } + } else { + infos->numcaches = 0; + } +} + +/* Intel cache info from CPUID 0x04 leaf */ +static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, struct cpuiddump *src_cpuiddump) +{ + unsigned level; + struct cacheinfo *tmpcaches; + unsigned eax, ebx, ecx, edx; + unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */ + unsigned cachenum; + struct cacheinfo *cache; + + for (cachenum = 0; ; cachenum++) { + eax = 0x04; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f); + if ((eax & 0x1f) == 0) + break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; + infos->numcaches++; + } + + tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache)); + if (!tmpcaches) { + infos->numcaches = oldnumcaches; + } else { + infos->cache = tmpcaches; + cache = &infos->cache[oldnumcaches]; + + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; + eax = 0x04; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + if ((eax & 0x1f) == 0) + break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; + switch (eax & 0x1f) { + case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; + case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; + default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; + } + + cache->level = level; + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + ways = ((ebx >> 22) & 0x3ff) + 1; + if (eax & (1 << 9)) + /* Fully associative */ + cache->ways = -1; + else + cache->ways = ways; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; + + hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", + cachenum, cache->level, + cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', + cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); + cache++; + } + } +} + +/* AMD core/thread info from CPUID 0x80000008 leaf */ +static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_cpuiddump) +{ + unsigned eax, ebx, ecx, edx; + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned coreidsize; + unsigned logprocid; + unsigned threadid __hwloc_attribute_unused; + + eax = 0x80000008; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + coreidsize = (ecx >> 12) & 0xf; + hwloc_debug("core ID size: %u\n", coreidsize); + if (!coreidsize) { + max_nbcores = (ecx & 0xff) + 1; + } else + max_nbcores = 1 << coreidsize; + hwloc_debug("Thus max # of cores: %u\n", max_nbcores); + + /* No multithreaded AMD for this old CPUID leaf */ + max_nbthreads = 1 ; + hwloc_debug("and max # of threads: %u\n", max_nbthreads); + + /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores, + * which is the maximum number of cores that the processor could theoretically support + * (see "Multiple Core Calculation" in the AMD CPUID specification). + * Recompute packageid/coreid accordingly. + */ + infos->ids[PKG] = infos->apicid / max_nbcores; + logprocid = infos->apicid % max_nbcores; + infos->ids[CORE] = logprocid / max_nbthreads; + threadid = logprocid % max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); +} + +/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */ +static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump) +{ + unsigned apic_id, nodes_per_proc = 0; + unsigned eax, ebx, ecx, edx; + + eax = 0x8000001e; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + infos->apicid = apic_id = eax; + + if (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES) { + if (infos->cpufamilynumber == 0x16) { + /* ecx is reserved */ + infos->ids[NODE] = 0; + nodes_per_proc = 1; + } else { + /* AMD other families or Hygon family 18h */ + infos->ids[NODE] = ecx & 0xff; + nodes_per_proc = ((ecx >> 8) & 7) + 1; + } + if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) + || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) { + hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); + } + } + + if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ + unsigned cores_per_unit; + /* coreid was obtained from read_amd_cores_legacy() earlier */ + infos->ids[UNIT] = ebx & 0xff; + cores_per_unit = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]); + /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). + * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively). + * It's not clear if we should do this as well. + */ + } else { + unsigned threads_per_core; + infos->ids[CORE] = ebx & 0xff; + threads_per_core = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, infos->ids[NODE], threads_per_core, infos->ids[CORE]); + } +} + +/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */ +static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) { unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id; unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */ @@ -302,11 +557,19 @@ static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuid /* apic_number is the actual number of threads per core */ break; case 2: - infos->coreid = id; - /* apic_number is the actual number of threads per module */ + infos->ids[CORE] = id; + /* apic_number is the actual number of threads per die */ + break; + case 3: + infos->ids[MODULE] = id; + /* apic_number is the actual number of threads per tile */ + break; + case 4: + infos->ids[TILE] = id; + /* apic_number is the actual number of threads per die */ break; case 5: - infos->dieid = id; + infos->ids[DIE] = id; /* apic_number is the actual number of threads per package */ break; default: @@ -317,16 +580,16 @@ static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuid apic_shift = apic_nextshift; } infos->apicid = apic_id; - infos->packageid = apic_id >> apic_shift; - hwloc_debug("x2APIC remainder: %u\n", infos->packageid); - hwloc_debug("this is thread %u of core %u\n", threadid, infos->coreid); + infos->ids[PKG] = apic_id >> apic_shift; + hwloc_debug("x2APIC remainder: %u\n", infos->ids[PKG]); + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); } } } /* Fetch information from the processor itself thanks to cpuid and store it in * infos for summarize to analyze them globally */ -static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) +static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) { struct hwloc_x86_backend_data_s *data = backend->private_data; unsigned eax, ebx, ecx = 0, edx; @@ -348,9 +611,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns else legacy_max_log_proc = 1; hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc); - infos->packageid = infos->apicid / legacy_max_log_proc; + infos->ids[PKG] = infos->apicid / legacy_max_log_proc; legacy_log_proc_id = infos->apicid % legacy_max_log_proc; - hwloc_debug("phys %u legacy thread %u\n", infos->packageid, legacy_log_proc_id); + hwloc_debug("phys %u legacy thread %u\n", infos->ids[PKG], legacy_log_proc_id); /* Get cpu model/family/stepping numbers from same cpuid */ _model = (eax>>4) & 0xf; @@ -397,258 +660,88 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns /* infos was calloc'ed, already ends with \0 */ } - /* Get core/thread information from cpuid 0x80000008 - * (not supported on Intel) - */ - if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) { - unsigned max_nbcores; - unsigned max_nbthreads; - unsigned coreidsize; - unsigned logprocid; - eax = 0x80000008; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - coreidsize = (ecx >> 12) & 0xf; - hwloc_debug("core ID size: %u\n", coreidsize); - if (!coreidsize) { - max_nbcores = (ecx & 0xff) + 1; - } else - max_nbcores = 1 << coreidsize; - hwloc_debug("Thus max # of cores: %u\n", max_nbcores); - /* Still no multithreaded AMD */ - max_nbthreads = 1 ; - hwloc_debug("and max # of threads: %u\n", max_nbthreads); - /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores, - * which is the maximum number of cores that the processor could theoretically support - * (see "Multiple Core Calculation" in the AMD CPUID specification). - * Recompute packageid/threadid/coreid accordingly. - */ - infos->packageid = infos->apicid / max_nbcores; - logprocid = infos->apicid % max_nbcores; - infos->threadid = logprocid % max_nbthreads; - infos->coreid = logprocid / max_nbthreads; - hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); - } - - infos->numcaches = 0; - infos->cache = NULL; - - /* Get apicid, nodeid, unitid from cpuid 0x8000001e - * and cache information from cpuid 0x8000001d - * (AMD topology extension) - */ - if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { - unsigned apic_id, node_id, nodes_per_proc; - - /* the code below doesn't want any other cache yet */ - assert(!infos->numcaches); - - eax = 0x8000001e; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - infos->apicid = apic_id = eax; - - if (infos->cpufamilynumber == 0x16) { - /* ecx is reserved */ - node_id = 0; - nodes_per_proc = 1; - } else { - /* AMD other families or Hygon family 18h */ - node_id = ecx & 0xff; - nodes_per_proc = ((ecx >> 8) & 7) + 1; - } - infos->nodeid = node_id; - if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) - || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) { - hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); - } - - if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ - unsigned unit_id, cores_per_unit; - infos->unitid = unit_id = ebx & 0xff; - cores_per_unit = ((ebx >> 8) & 0xff) + 1; - hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id); - /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). - * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively). - * It's not clear if we should do this as well. - */ - } else { - unsigned core_id, threads_per_core; - infos->coreid = core_id = ebx & 0xff; - threads_per_core = ((ebx >> 8) & 0xff) + 1; - hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id); - } - - for (cachenum = 0; ; cachenum++) { - eax = 0x8000001d; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if ((eax & 0x1f) == 0) - break; - infos->numcaches++; - } - - cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); - if (cache) { - for (cachenum = 0; ; cachenum++) { - unsigned long linesize, linepart, ways, sets; - eax = 0x8000001d; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - - if ((eax & 0x1f) == 0) - break; - switch (eax & 0x1f) { - case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; - case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; - default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; - } - - cache->level = (eax >> 5) & 0x7; - /* Note: actually number of cores */ - cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; - - cache->linesize = linesize = (ebx & 0xfff) + 1; - cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; - ways = ((ebx >> 22) & 0x3ff) + 1; - - if (eax & (1 << 9)) - /* Fully associative */ - cache->ways = -1; - else - cache->ways = ways; - cache->sets = sets = ecx + 1; - cache->size = linesize * linepart * ways * sets; - cache->inclusive = edx & 0x2; - - hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", - cachenum, cache->level, - cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', - cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); - - cache++; - } - } else { - infos->numcaches = 0; - } - } else { - /* If there's no topoext, - * get cache information from cpuid 0x80000005 and 0x80000006 - * (not supported on Intel) - */ - if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) { - eax = 0x80000005; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */ - fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */ - } - if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) { - eax = 0x80000006; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if (ecx & 0xf000) - /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11. - * Could be useful if some Intels (at least before Core micro-architecture) - * support this leaf without leaf 0x4. - */ - fill_amd_cache(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */ - if (edx & 0xf000) - fill_amd_cache(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */ - } - } - - /* Get thread/core + cache information from cpuid 0x04 - * (not supported on AMD) - */ if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) { - unsigned max_nbcores; - unsigned max_nbthreads; - unsigned level; - struct cacheinfo *tmpcaches; - unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */ - - for (cachenum = 0; ; cachenum++) { - eax = 0x04; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - - hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f); - if ((eax & 0x1f) == 0) - break; - level = (eax >> 5) & 0x7; - if (data->is_knl && level == 3) - /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ - break; - infos->numcaches++; - - if (!cachenum) { - /* by the way, get thread/core information from the first cache */ - max_nbcores = ((eax >> 26) & 0x3f) + 1; - max_nbthreads = legacy_max_log_proc / max_nbcores; - hwloc_debug("thus %u threads\n", max_nbthreads); - infos->threadid = legacy_log_proc_id % max_nbthreads; - infos->coreid = legacy_log_proc_id / max_nbthreads; - hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); - } + /* Get core/thread information from first cache reported by cpuid 0x04 + * (not supported on AMD) + */ + eax = 0x04; + ecx = 0; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if ((eax & 0x1f) != 0) { + /* cache looks valid */ + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned threadid __hwloc_attribute_unused; + max_nbcores = ((eax >> 26) & 0x3f) + 1; + max_nbthreads = legacy_max_log_proc / max_nbcores; + hwloc_debug("thus %u threads\n", max_nbthreads); + threadid = legacy_log_proc_id % max_nbthreads; + infos->ids[CORE] = legacy_log_proc_id / max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); } + } - tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache)); - if (!tmpcaches) { - infos->numcaches = oldnumcaches; - } else { - infos->cache = tmpcaches; - cache = &infos->cache[oldnumcaches]; + /********************************************************************************* + * Get the hierarchy of thread, core, die, package, etc. from CPU-specific leaves + */ - for (cachenum = 0; ; cachenum++) { - unsigned long linesize, linepart, ways, sets; - eax = 0x04; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008 && !has_x2apic(features)) { + /* Get core/thread information from cpuid 0x80000008 + * (not supported on Intel) + * We could ignore this codepath when x2apic is supported, but we may need + * nodeids if HWLOC_X86_TOPOEXT_NUMANODES is set. + */ + read_amd_cores_legacy(infos, src_cpuiddump); + } - if ((eax & 0x1f) == 0) - break; - level = (eax >> 5) & 0x7; - if (data->is_knl && level == 3) - /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ - break; - switch (eax & 0x1f) { - case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; - case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; - default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; - } - - cache->level = level; - cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; - - cache->linesize = linesize = (ebx & 0xfff) + 1; - cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; - ways = ((ebx >> 22) & 0x3ff) + 1; - if (eax & (1 << 9)) - /* Fully associative */ - cache->ways = -1; - else - cache->ways = ways; - cache->sets = sets = ecx + 1; - cache->size = linesize * linepart * ways * sets; - cache->inclusive = edx & 0x2; - - hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", - cachenum, cache->level, - cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', - cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); - cache++; - } - } + if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { + /* Get apicid, nodeid, unitid/coreid from cpuid 0x8000001e (AMD topology extension). + * Requires read_amd_cores_legacy() for coreid on family 0x15-16. + * + * Only needed when x2apic supported if NUMA nodes are needed. + */ + read_amd_cores_topoext(infos, flags, src_cpuiddump); } if ((cpuid_type == intel) && highest_cpuid >= 0x1f) { /* Get package/die/module/tile/core/thread information from cpuid 0x1f * (Intel v2 Extended Topology Enumeration) */ - look_exttopoenum(infos, 0x1f, src_cpuiddump); + read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump); - } else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) { + } else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin) + && highest_cpuid >= 0x0b && has_x2apic(features)) { /* Get package/core/thread information from cpuid 0x0b * (Intel v1 Extended Topology Enumeration) */ - look_exttopoenum(infos, 0x0b, src_cpuiddump); + read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump); + } + + /************************************** + * Get caches from CPU-specific leaves + */ + + infos->numcaches = 0; + infos->cache = NULL; + + if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { + /* Get cache information from cpuid 0x8000001d (AMD topology extension) */ + read_amd_caches_topoext(infos, src_cpuiddump); + + } else if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) { + /* If there's no topoext, + * get cache information from cpuid 0x80000005 and 0x80000006. + * (not supported on Intel) + * It looks like we cannot have 0x80000005 without 0x80000006. + */ + read_amd_caches_legacy(infos, src_cpuiddump, legacy_max_log_proc); + } + + if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) { + /* Get cache information from cpuid 0x04 + * (not supported on AMD) + */ + read_intel_caches(data, infos, src_cpuiddump); } /* Now that we have all info, compute cacheids and apply quirks */ @@ -736,8 +829,55 @@ hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int replace) hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUStepping", number, replace); } +static void +hwloc_x86_add_groups(hwloc_topology_t topology, + struct procinfo *infos, + unsigned nbprocs, + hwloc_bitmap_t remaining_cpuset, + unsigned type, + const char *subtype, + unsigned kind, + int dont_merge) +{ + hwloc_bitmap_t obj_cpuset; + hwloc_obj_t obj; + unsigned i, j; + + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].ids[PKG]; + unsigned id = infos[i].ids[type]; + + if (id == (unsigned)-1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + obj_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].ids[type] == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].ids[PKG] == packageid && infos[j].ids[type] == id) { + hwloc_bitmap_set(obj_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); + obj->cpuset = obj_cpuset; + obj->subtype = strdup(subtype); + obj->attr->group.kind = kind; + obj->attr->group.dont_merge = dont_merge; + hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n", + subtype, id, obj_cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } +} + /* Analyse information stored in infos, and build/annotate topology levels accordingly */ -static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery) +static void summarize(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags) { struct hwloc_topology *topology = backend->topology; struct hwloc_x86_backend_data_s *data = backend->private_data; @@ -747,6 +887,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int int one = -1; hwloc_bitmap_t remaining_cpuset; int gotnuma = 0; + int fulldiscovery = (flags & HWLOC_X86_DISC_FLAG_FULL); for (i = 0; i < nbprocs; i++) if (infos[i].present) { @@ -773,11 +914,11 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { if (fulldiscovery) { - unsigned packageid = infos[i].packageid; + unsigned packageid = infos[i].ids[PKG]; hwloc_bitmap_t package_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { - if (infos[j].packageid == packageid) { + if (infos[j].ids[PKG] == packageid) { hwloc_bitmap_set(package_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -811,7 +952,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } /* Look for Numa nodes inside packages (cannot be filtered-out) */ - if (fulldiscovery && getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { + if (fulldiscovery && (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES)) { hwloc_bitmap_t node_cpuset; hwloc_obj_t node; @@ -819,8 +960,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned nodeid = infos[i].nodeid; + unsigned packageid = infos[i].ids[PKG]; + unsigned nodeid = infos[i].ids[NODE]; if (nodeid == (unsigned)-1) { hwloc_bitmap_clr(remaining_cpuset, i); @@ -829,12 +970,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int node_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { - if (infos[j].nodeid == (unsigned) -1) { + if (infos[j].ids[NODE] == (unsigned) -1) { hwloc_bitmap_clr(remaining_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) { + if (infos[j].ids[PKG] == packageid && infos[j].ids[NODE] == nodeid) { hwloc_bitmap_set(node_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -852,77 +993,21 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { if (fulldiscovery) { - char *env; - int dont_merge; - hwloc_bitmap_t unit_cpuset, die_cpuset; - hwloc_obj_t unit, die; - - /* Look for Compute units inside packages */ + /* Look for AMD Compute units inside packages */ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned unitid = infos[i].unitid; - - if (unitid == (unsigned)-1) { - hwloc_bitmap_clr(remaining_cpuset, i); - continue; - } - - unit_cpuset = hwloc_bitmap_alloc(); - for (j = i; j < nbprocs; j++) { - if (infos[j].unitid == (unsigned) -1) { - hwloc_bitmap_clr(remaining_cpuset, j); - continue; - } - - if (infos[j].packageid == packageid && infos[j].unitid == unitid) { - hwloc_bitmap_set(unit_cpuset, j); - hwloc_bitmap_clr(remaining_cpuset, j); - } - } - unit = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unitid); - unit->cpuset = unit_cpuset; - unit->subtype = strdup("ComputeUnit"); - unit->attr->group.kind = HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT; - hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n", - unitid, unit_cpuset); - hwloc_insert_object_by_cpuset(topology, unit); - } - - /* Look for Dies inside packages */ - env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS"); - dont_merge = env && atoi(env); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + UNIT, "Compute Unit", + HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0); + /* Look for Intel Modules inside packages */ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned dieid = infos[i].dieid; - - if (dieid == (unsigned)-1) { - hwloc_bitmap_clr(remaining_cpuset, i); - continue; - } - - die_cpuset = hwloc_bitmap_alloc(); - for (j = i; j < nbprocs; j++) { - if (infos[j].dieid == (unsigned) -1) { - hwloc_bitmap_clr(remaining_cpuset, j); - continue; - } - - if (infos[j].packageid == packageid && infos[j].dieid == dieid) { - hwloc_bitmap_set(die_cpuset, j); - hwloc_bitmap_clr(remaining_cpuset, j); - } - } - die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, dieid); - die->cpuset = die_cpuset; - die->subtype = strdup("Die"); - die->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE; - die->attr->group.dont_merge = dont_merge; - hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", - dieid, die_cpuset); - hwloc_insert_object_by_cpuset(topology, die); - } + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + MODULE, "Module", + HWLOC_GROUP_KIND_INTEL_MODULE, 0); + /* Look for Intel Tiles inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + TILE, "Tile", + HWLOC_GROUP_KIND_INTEL_TILE, 0); /* Look for unknown objects */ if (infos[one].otherids) { @@ -956,6 +1041,43 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } } + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { + /* Look for Intel Dies inside packages */ + if (fulldiscovery) { + hwloc_bitmap_t die_cpuset; + hwloc_obj_t die; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].ids[PKG]; + unsigned dieid = infos[i].ids[DIE]; + + if (dieid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + die_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].ids[DIE] == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].ids[PKG] == packageid && infos[j].ids[DIE] == dieid) { + hwloc_bitmap_set(die_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_DIE, dieid); + die->cpuset = die_cpuset; + hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", + dieid, die_cpuset); + hwloc_insert_object_by_cpuset(topology, die); + } + } + } + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { /* Look for cores */ if (fulldiscovery) { @@ -964,9 +1086,9 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned nodeid = infos[i].nodeid; - unsigned coreid = infos[i].coreid; + unsigned packageid = infos[i].ids[PKG]; + unsigned nodeid = infos[i].ids[NODE]; + unsigned coreid = infos[i].ids[CORE]; if (coreid == (unsigned) -1) { hwloc_bitmap_clr(remaining_cpuset, i); @@ -975,12 +1097,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int core_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { - if (infos[j].coreid == (unsigned) -1) { + if (infos[j].ids[CORE] == (unsigned) -1) { hwloc_bitmap_clr(remaining_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) { + if (infos[j].ids[PKG] == packageid && infos[j].ids[NODE] == nodeid && infos[j].ids[CORE] == coreid) { hwloc_bitmap_set(core_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -1056,7 +1178,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } else { /* Add the missing cache */ hwloc_bitmap_t cache_cpuset; - unsigned packageid = infos[i].packageid; + unsigned packageid = infos[i].ids[PKG]; unsigned cacheid = infos[i].cache[l].cacheid; /* Now look for others sharing it */ cache_cpuset = hwloc_bitmap_alloc(); @@ -1071,7 +1193,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_clr(remaining_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) { + if (infos[j].ids[PKG] == packageid && infos[j].cache[l2].cacheid == cacheid) { hwloc_bitmap_set(cache_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -1103,7 +1225,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } static int -look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery, +look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags), int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags)) @@ -1139,7 +1261,7 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscov } } - look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); + look_proc(backend, &infos[i], flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); if (data->src_cpuiddump_path) { cpuiddump_free(src_cpuiddump); @@ -1152,10 +1274,10 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscov hwloc_bitmap_free(orig_cpuset); } - if (!data->apicid_unique) - fulldiscovery = 0; - else - summarize(backend, infos, fulldiscovery); + if (data->apicid_unique) + summarize(backend, infos, flags); + /* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */ + return 0; } @@ -1223,7 +1345,7 @@ static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, } static -int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) +int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) { struct hwloc_x86_backend_data_s *data = backend->private_data; unsigned nbprocs = data->nbprocs; @@ -1245,13 +1367,18 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) int ret = -1; if (data->src_cpuiddump_path) { - /* just read cpuid from the dump */ + /* Just read cpuid from the dump (implies !topology->is_thissystem by default) */ src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0); if (!src_cpuiddump) goto out; } else { - /* otherwise check if binding works */ + /* Using real hardware. + * However we don't enforce topology->is_thissystem so that + * we may still force use this backend when debugging with !thissystem. + */ + + /* check if binding works */ memset(&hooks, 0, sizeof(hooks)); support.membind = &memsupport; hwloc_set_native_binding_hooks(&hooks, &support); @@ -1281,12 +1408,13 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) if (NULL == infos) goto out; for (i = 0; i < nbprocs; i++) { - infos[i].nodeid = (unsigned) -1; - infos[i].packageid = (unsigned) -1; - infos[i].dieid = (unsigned) -1; - infos[i].unitid = (unsigned) -1; - infos[i].coreid = (unsigned) -1; - infos[i].threadid = (unsigned) -1; + infos[i].ids[PKG] = (unsigned) -1; + infos[i].ids[CORE] = (unsigned) -1; + infos[i].ids[NODE] = (unsigned) -1; + infos[i].ids[UNIT] = (unsigned) -1; + infos[i].ids[TILE] = (unsigned) -1; + infos[i].ids[MODULE] = (unsigned) -1; + infos[i].ids[DIE] = (unsigned) -1; } eax = 0x00; @@ -1334,7 +1462,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) hwloc_x86_os_state_save(&os_state, src_cpuiddump); - ret = look_procs(backend, infos, fulldiscovery, + ret = look_procs(backend, infos, flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, get_cpubind, set_cpubind); if (!ret) @@ -1343,8 +1471,8 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) if (nbprocs == 1) { /* only one processor, no need to bind */ - look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); - summarize(backend, infos, fulldiscovery); + look_proc(backend, &infos[0], flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); + summarize(backend, infos, flags); ret = 0; } @@ -1367,13 +1495,20 @@ out: } static int -hwloc_x86_discover(struct hwloc_backend *backend) +hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { struct hwloc_x86_backend_data_s *data = backend->private_data; struct hwloc_topology *topology = backend->topology; + unsigned long flags = 0; int alreadypus = 0; int ret; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + + if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { + flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES; + } + #if HAVE_DECL_RUNNING_ON_VALGRIND if (RUNNING_ON_VALGRIND && !data->src_cpuiddump_path) { fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n" @@ -1387,7 +1522,7 @@ hwloc_x86_discover(struct hwloc_backend *backend) assert(data->nbprocs > 0); /* enforced by hwloc_x86_component_instantiate() */ topology->support.discovery->pu = 1; } else { - int nbprocs = hwloc_fallback_nbprocessors(topology); + int nbprocs = hwloc_fallback_nbprocessors(HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE); if (nbprocs >= 1) topology->support.discovery->pu = 1; else @@ -1405,7 +1540,7 @@ hwloc_x86_discover(struct hwloc_backend *backend) /* several object types were added, we can't easily complete, just do partial discovery */ hwloc_topology_reconnect(topology, 0); - ret = hwloc_look_x86(backend, 0); + ret = hwloc_look_x86(backend, flags); if (ret) hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); return 0; @@ -1415,7 +1550,7 @@ hwloc_x86_discover(struct hwloc_backend *backend) } fulldiscovery: - if (hwloc_look_x86(backend, 1) < 0) { + if (hwloc_look_x86(backend, flags | HWLOC_X86_DISC_FLAG_FULL) < 0) { /* if failed, create PUs */ if (!alreadypus) hwloc_setup_pu_level(topology, data->nbprocs); @@ -1446,6 +1581,7 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s #if !(defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined __CYGWIN__) /* needs a lot of work */ struct dirent *dirent; DIR *dir; + char *path; FILE *file; char line [32]; @@ -1453,23 +1589,26 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s if (!dir) return -1; - char path[strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1]; + path = malloc(strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1); + if (!path) + goto out_with_dir; sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path); file = fopen(path, "r"); if (!file) { fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path); - goto out_with_dir; + goto out_with_path; } if (!fgets(line, sizeof(line), file)) { fprintf(stderr, "Found read dumped cpuid summary in %s\n", path); fclose(file); - goto out_with_dir; + goto out_with_path; } fclose(file); if (strcmp(line, "Architecture: x86\n")) { fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line); - goto out_with_dir; + goto out_with_path; } + free(path); while ((dirent = readdir(dir)) != NULL) { if (!strncmp(dirent->d_name, "pu", 2)) { @@ -1497,7 +1636,9 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s return 0; -out_with_dir: + out_with_path: + free(path); + out_with_dir: closedir(dir); #endif /* HWLOC_WIN_SYS & !__MINGW32__ needs a lot of work */ return -1; @@ -1513,7 +1654,9 @@ hwloc_x86_backend_disable(struct hwloc_backend *backend) } static struct hwloc_backend * -hwloc_x86_component_instantiate(struct hwloc_disc_component *component, +hwloc_x86_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1522,7 +1665,7 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component, struct hwloc_x86_backend_data_s *data; const char *src_cpuiddump_path; - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) goto out; @@ -1565,9 +1708,9 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_x86_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "x86", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_x86_component_instantiate, 45, /* between native and no_os */ 1, diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c index 5a0d02da..d0e9ec16 100644 --- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -1,18 +1,18 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/plugins.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/xml.h" +#include "private/debug.h" #include #include @@ -27,9 +27,8 @@ *******************/ struct hwloc__nolibxml_backend_data_s { - size_t buflen; /* size of both buffer and copy buffers, set during backend_init() */ + size_t buflen; /* size of both buffer, set during backend_init() */ char *buffer; /* allocated and filled during backend_init() */ - char *copy; /* allocated during backend_init(), used later during actual parsing */ }; typedef struct hwloc__nolibxml_import_state_data_s { @@ -260,14 +259,11 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata, struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data; unsigned major, minor; char *end; - char *buffer; + char *buffer = nbdata->buffer; + char *tagname; HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data)); - /* use a copy in the temporary buffer, we may modify during parsing */ - buffer = nbdata->copy; - memcpy(buffer, nbdata->buffer, nbdata->buflen); - /* skip headers */ while (!strncmp(buffer, "version_major = major; bdata->version_minor = minor; end = strchr(buffer, '>') + 1; + tagname = "topology"; } else if (!strncmp(buffer, "", 10)) { bdata->version_major = 1; bdata->version_minor = 0; end = buffer + 10; + tagname = "topology"; } else if (!strncmp(buffer, "", 6)) { bdata->version_major = 0; bdata->version_minor = 9; end = buffer + 6; + tagname = "root"; } else goto failed; @@ -301,7 +300,7 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata, state->parent = NULL; nstate->closed = 0; nstate->tagbuffer = end; - nstate->tagname = (char *) "topology"; + nstate->tagname = tagname; nstate->attrbuffer = NULL; return 0; /* success */ @@ -320,10 +319,6 @@ hwloc_nolibxml_free_buffers(struct hwloc_xml_backend_data_s *bdata) free(nbdata->buffer); nbdata->buffer = NULL; } - if (nbdata->copy) { - free(nbdata->copy); - nbdata->copy = NULL; - } } static void @@ -429,19 +424,11 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata, goto out_with_nbdata; } - /* allocate a temporary copy buffer that we may modify during parsing */ - nbdata->copy = malloc(nbdata->buflen+1); - if (!nbdata->copy) - goto out_with_buffer; - nbdata->copy[nbdata->buflen] = '\0'; - bdata->look_init = hwloc_nolibxml_look_init; bdata->look_done = hwloc_nolibxml_look_done; bdata->backend_exit = hwloc_nolibxml_backend_exit; return 0; -out_with_buffer: - free(nbdata->buffer); out_with_nbdata: free(nbdata); out: @@ -666,7 +653,7 @@ hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *n } static void -hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length) +hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length __hwloc_attribute_unused) { hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; int res; @@ -678,7 +665,7 @@ hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char * } ndata->has_content = 1; - res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length); + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%s", buffer); hwloc__nolibxml_export_update_buffer(ndata, res); } @@ -799,6 +786,7 @@ hwloc___nolibxml_prepare_export_diff(hwloc_topology_diff_t diff, const char *ref state.new_prop = hwloc__nolibxml_export_new_prop; state.add_content = hwloc__nolibxml_export_add_content; state.end_object = hwloc__nolibxml_export_end_object; + state.global = NULL; ndata->indent = 0; ndata->written = 0; diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index e7c5ef62..f6bb210c 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -6,12 +6,12 @@ * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/xml.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/debug.h" #include @@ -158,7 +158,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_size")) { unsigned long long lvalue = strtoull(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) obj->attr->cache.size = lvalue; else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n", @@ -167,7 +167,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_linesize")) { unsigned long lvalue = strtoul(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) obj->attr->cache.linesize = lvalue; else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n", @@ -176,7 +176,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_associativity")) { int lvalue = atoi(value); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) obj->attr->cache.associativity = lvalue; else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n", @@ -185,7 +185,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_type")) { unsigned long lvalue = strtoul(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) { if (lvalue == HWLOC_OBJ_CACHE_UNIFIED || lvalue == HWLOC_OBJ_CACHE_DATA || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION) @@ -211,7 +211,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "depth")) { unsigned long lvalue = strtoul(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) { obj->attr->cache.depth = lvalue; } else if (obj->type == HWLOC_OBJ_GROUP || obj->type == HWLOC_OBJ_BRIDGE) { /* will be overwritten by the core */ @@ -805,21 +805,13 @@ hwloc__xml_import_object(hwloc_topology_t topology, state->global->msgprefix); goto error_with_object; } - } else if (!strcasecmp(attrvalue, "Die")) { - /* deal with possible future type */ - obj->type = HWLOC_OBJ_GROUP; - obj->subtype = strdup("Die"); - obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE; - obj->attr->group.dont_merge = data->dont_merge_die_groups; } else if (!strcasecmp(attrvalue, "Tile")) { /* deal with possible future type */ obj->type = HWLOC_OBJ_GROUP; - obj->subtype = strdup("Tile"); obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE; } else if (!strcasecmp(attrvalue, "Module")) { /* deal with possible future type */ obj->type = HWLOC_OBJ_GROUP; - obj->subtype = strdup("Module"); obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; } else if (!strcasecmp(attrvalue, "MemCache")) { /* ignore possible future type */ @@ -1053,6 +1045,13 @@ hwloc__xml_import_object(hwloc_topology_t topology, /* end of 1.x specific checks */ } + /* 2.0 backward compatibility */ + if (obj->type == HWLOC_OBJ_GROUP) { + if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE + || (obj->subtype && !strcmp(obj->subtype, "Die"))) + obj->type = HWLOC_OBJ_DIE; + } + /* check that cache attributes are coherent with the actual type */ if (hwloc__obj_type_is_cache(obj->type) && obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) { @@ -1212,19 +1211,24 @@ hwloc__xml_import_object(hwloc_topology_t topology, static int hwloc__xml_v2import_distances(hwloc_topology_t topology, - hwloc__xml_import_state_t state) + hwloc__xml_import_state_t state, + int heterotypes) { - hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE; + hwloc_obj_type_t unique_type = HWLOC_OBJ_TYPE_NONE; + hwloc_obj_type_t *different_types = NULL; unsigned nbobjs = 0; - int indexing = 0; + int indexing = heterotypes; int os_indexing = 0; - int gp_indexing = 0; + int gp_indexing = heterotypes; + char *name = NULL; unsigned long kind = 0; unsigned nr_indexes, nr_u64values; uint64_t *indexes; uint64_t *u64values; int ret; +#define _TAG_NAME (heterotypes ? "distances2hetero" : "distances2") + /* process attributes */ while (1) { char *attrname, *attrvalue; @@ -1233,8 +1237,12 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (!strcmp(attrname, "nbobjs")) nbobjs = strtoul(attrvalue, NULL, 10); else if (!strcmp(attrname, "type")) { - if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0) + if (hwloc_type_sscanf(attrvalue, &unique_type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: unrecognized %s type %s\n", + state->global->msgprefix, _TAG_NAME, attrvalue); goto out; + } } else if (!strcmp(attrname, "indexing")) { indexing = 1; @@ -1246,27 +1254,32 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, else if (!strcmp(attrname, "kind")) { kind = strtoul(attrvalue, NULL, 10); } + else if (!strcmp(attrname, "name")) { + name = attrvalue; + } else { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring unknown distance attribute %s\n", - state->global->msgprefix, attrname); + fprintf(stderr, "%s: ignoring unknown %s attribute %s\n", + state->global->msgprefix, _TAG_NAME, attrname); } } /* abort if missing attribute */ - if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) { + if (!nbobjs || (!heterotypes && unique_type == HWLOC_OBJ_TYPE_NONE) || !indexing || !kind) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 missing some attributes\n", - state->global->msgprefix); + fprintf(stderr, "%s: %s missing some attributes\n", + state->global->msgprefix, _TAG_NAME); goto out; } indexes = malloc(nbobjs*sizeof(*indexes)); u64values = malloc(nbobjs*nbobjs*sizeof(*u64values)); - if (!indexes || !u64values) { + if (heterotypes) + different_types = malloc(nbobjs*sizeof(*different_types)); + if (!indexes || !u64values || (heterotypes && !different_types)) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: failed to allocate %s arrays for %u objects\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } @@ -1290,16 +1303,16 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, is_u64values = 1; if (!is_index && !is_u64values) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with unrecognized child %s\n", - state->global->msgprefix, tag); + fprintf(stderr, "%s: %s with unrecognized child %s\n", + state->global->msgprefix, _TAG_NAME, tag); goto out_with_arrays; } if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0 || strcmp(attrname, "length")) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 child must have length attribute\n", - state->global->msgprefix); + fprintf(stderr, "%s: %s child must have length attribute\n", + state->global->msgprefix, _TAG_NAME); goto out_with_arrays; } length = atoi(attrvalue); @@ -1307,24 +1320,43 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, ret = state->global->get_content(&childstate, &buffer, length); if (ret < 0) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 child needs content of length %d\n", - state->global->msgprefix, length); + fprintf(stderr, "%s: %s child needs content of length %d\n", + state->global->msgprefix, _TAG_NAME, length); goto out_with_arrays; } if (is_index) { /* get indexes */ - char *tmp; + char *tmp, *tmp2; if (nr_indexes >= nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with more than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } tmp = buffer; while (1) { char *next; - unsigned long long u = strtoull(tmp, &next, 0); + unsigned long long u; + if (heterotypes) { + hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE; + if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n", + state->global->msgprefix, _TAG_NAME, tmp); + goto out_with_arrays; + } + tmp2 = strchr(tmp, ':'); + if (!tmp2) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s with missing colon after heterogeneous type %s\n", + state->global->msgprefix, _TAG_NAME, tmp); + goto out_with_arrays; + } + tmp = tmp2+1; + different_types[nr_indexes] = t; + } + u = strtoull(tmp, &next, 0); if (next == tmp) break; indexes[nr_indexes++] = u; @@ -1340,8 +1372,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, char *tmp; if (nr_u64values >= nbobjs*nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u u64values\n", - state->global->msgprefix, nbobjs*nbobjs); + fprintf(stderr, "%s: %s with more than %u u64values\n", + state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs); goto out_with_arrays; } tmp = buffer; @@ -1364,8 +1396,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, ret = state->global->close_tag(&childstate); if (ret < 0) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with more than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } @@ -1374,56 +1406,60 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (nr_indexes != nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with less than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with less than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } if (nr_u64values != nbobjs*nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with less than %u u64values\n", - state->global->msgprefix, nbobjs*nbobjs); + fprintf(stderr, "%s: %s with less than %u u64values\n", + state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs); goto out_with_arrays; } if (nbobjs < 2) { /* distances with a single object are useless, even if the XML isn't invalid */ if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring distances2 with only %u objects\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: ignoring %s with only %u objects\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_ignore; } - if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) { + if (unique_type == HWLOC_OBJ_PU || unique_type == HWLOC_OBJ_NUMANODE) { if (!os_indexing) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n", - state->global->msgprefix); + fprintf(stderr, "%s: ignoring PU or NUMA %s without os_indexing\n", + state->global->msgprefix, _TAG_NAME); goto out_ignore; } } else { if (!gp_indexing) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n", - state->global->msgprefix); + fprintf(stderr, "%s: ignoring !PU or !NUMA %s without gp_indexing\n", + state->global->msgprefix, _TAG_NAME); goto out_ignore; } } - hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0); + hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0); /* prevent freeing below */ indexes = NULL; u64values = NULL; + different_types = NULL; out_ignore: + free(different_types); free(indexes); free(u64values); return state->global->close_tag(state); out_with_arrays: + free(different_types); free(indexes); free(u64values); out: return -1; +#undef _TAG_NAME } static int @@ -1625,8 +1661,12 @@ hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, flo /* this canNOT be the first XML call */ static int -hwloc_look_xml(struct hwloc_backend *backend) +hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend enforces !topology->is_thissystem by default. + */ + struct hwloc_topology *topology = backend->topology; struct hwloc_xml_backend_data_s *data = backend->private_data; struct hwloc__xml_import_state_s state, childstate; @@ -1634,9 +1674,10 @@ hwloc_look_xml(struct hwloc_backend *backend) char *tag; int gotignored = 0; hwloc_localeswitch_declare; - char *env; int ret; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + state.global = data; assert(!root->cpuset); @@ -1647,9 +1688,6 @@ hwloc_look_xml(struct hwloc_backend *backend) data->first_numanode = data->last_numanode = NULL; data->first_v1dist = data->last_v1dist = NULL; - env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS"); - data->dont_merge_die_groups = env && atoi(env); - ret = data->look_init(data, &state); if (ret < 0) goto failed; @@ -1684,15 +1722,20 @@ hwloc_look_xml(struct hwloc_backend *backend) goto failed; if (!ret) break; - if (strcmp(tag, "distances2")) { + if (!strcmp(tag, "distances2")) { + ret = hwloc__xml_v2import_distances(topology, &childstate, 0); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "distances2hetero")) { + ret = hwloc__xml_v2import_distances(topology, &childstate, 1); + if (ret < 0) + goto failed; + } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n", data->msgprefix, tag); goto done; } - ret = hwloc__xml_v2import_distances(topology, &childstate); - if (ret < 0) - goto failed; state.global->close_child(&childstate); } } @@ -1742,8 +1785,8 @@ done: inext_cousin) objs[i] = node; -hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); - hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0); + hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); + hwloc_internal_distances_add(topology, NULL, nbobjs, objs, values, v1dist->kind, 0); } else { free(objs); free(values); @@ -1791,9 +1834,11 @@ hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */ topology->support.discovery->pu = 1; + topology->support.discovery->disallowed_pu = 1; if (data->nbnumanodes) { topology->support.discovery->numa = 1; topology->support.discovery->numa_memory = 1; // FIXME + topology->support.discovery->disallowed_numa = 1; } if (data->look_done) @@ -1936,6 +1981,9 @@ hwloc__xml_export_safestrdup(const char *old) char *new = malloc(strlen(old)+1); char *dst = new; const char *src = old; + if (!new) + return NULL; + while (*src) { if (HWLOC_XML_CHAR_VALID(*src)) *(dst++) = *src; @@ -1955,6 +2003,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (v1export && obj->type == HWLOC_OBJ_PACKAGE) state->new_prop(state, "type", "Socket"); + else if (v1export && obj->type == HWLOC_OBJ_DIE) + state->new_prop(state, "type", "Group"); else if (v1export && hwloc__obj_type_is_cache(obj->type)) state->new_prop(state, "type", "Cache"); else @@ -1966,8 +2016,23 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo } if (obj->cpuset) { - if (v1export && obj->type == HWLOC_OBJ_NUMANODE && obj->sibling_rank > 0) { - /* v1 non-first NUMA nodes have empty cpusets */ + int empty_cpusets = 0; + + if (v1export && obj->type == HWLOC_OBJ_NUMANODE) { + /* walk up this memory hierarchy to find-out if we are the first numa node. + * v1 non-first NUMA nodes have empty cpusets. + */ + hwloc_obj_t parent = obj; + while (!hwloc_obj_type_is_normal(parent->type)) { + if (parent->sibling_rank > 0) { + empty_cpusets = 1; + break; + } + parent = parent->parent; + } + } + + if (empty_cpusets) { state->new_prop(state, "cpuset", "0x0"); state->new_prop(state, "online_cpuset", "0x0"); state->new_prop(state, "complete_cpuset", "0x0"); @@ -2024,13 +2089,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (obj->name) { char *name = hwloc__xml_export_safestrdup(obj->name); - state->new_prop(state, "name", name); - free(name); + if (name) { + state->new_prop(state, "name", name); + free(name); + } } if (!v1export && obj->subtype) { char *subtype = hwloc__xml_export_safestrdup(obj->subtype); - state->new_prop(state, "subtype", subtype); - free(subtype); + if (subtype) { + state->new_prop(state, "subtype", subtype); + free(subtype); + } } switch (obj->type) { @@ -2057,6 +2126,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L3ICACHE: + case HWLOC_OBJ_MEMCACHE: sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size); state->new_prop(state, "cache_size", tmp); sprintf(tmp, "%u", obj->attr->cache.depth); @@ -2125,23 +2195,34 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo for(i=0; iinfos_count; i++) { char *name = hwloc__xml_export_safestrdup(obj->infos[i].name); char *value = hwloc__xml_export_safestrdup(obj->infos[i].value); - struct hwloc__xml_export_state_s childstate; - state->new_child(state, &childstate, "info"); - childstate.new_prop(&childstate, "name", name); - childstate.new_prop(&childstate, "value", value); - childstate.end_object(&childstate, "info"); + if (name && value) { + struct hwloc__xml_export_state_s childstate; + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", name); + childstate.new_prop(&childstate, "value", value); + childstate.end_object(&childstate, "info"); + } free(name); free(value); } if (v1export && obj->subtype) { char *subtype = hwloc__xml_export_safestrdup(obj->subtype); + if (subtype) { + struct hwloc__xml_export_state_s childstate; + int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); + childstate.new_prop(&childstate, "value", subtype); + childstate.end_object(&childstate, "info"); + free(subtype); + } + } + if (v1export && obj->type == HWLOC_OBJ_DIE) { struct hwloc__xml_export_state_s childstate; - int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); state->new_child(state, &childstate, "info"); - childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); - childstate.new_prop(&childstate, "value", subtype); + childstate.new_prop(&childstate, "name", "Type"); + childstate.new_prop(&childstate, "value", "Die"); childstate.end_object(&childstate, "info"); - free(subtype); } if (v1export && !obj->parent) { @@ -2152,19 +2233,27 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo for(dist = topology->first_dist; dist; dist = dist->next) { struct hwloc__xml_export_state_s childstate; unsigned nbobjs = dist->nbobjs; + unsigned *logical_to_v2array; int depth; - if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type)) + if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->unique_type)) continue; if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)) continue; - { - HWLOC_VLA(unsigned, logical_to_v2array, nbobjs); + if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) + continue; + + logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array)); + if (!logical_to_v2array) { + fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n"); + continue; + } + for(i=0; iobjs[i]->logical_index] = i; /* compute the relative depth */ - if (dist->type == HWLOC_OBJ_NUMANODE) { + if (dist->unique_type == HWLOC_OBJ_NUMANODE) { /* for NUMA nodes, use the highest normal-parent depth + 1 */ depth = -1; for(i=0; itype) + parent_with_memory; + depth = hwloc_get_type_depth(topology, dist->unique_type) + parent_with_memory; } state->new_child(state, &childstate, "distances"); @@ -2210,7 +2299,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo } } childstate.end_object(&childstate, "distances"); - } + free(logical_to_v2array); } } @@ -2243,13 +2332,90 @@ hwloc__xml_v2export_object (hwloc__xml_export_state_t parentstate, hwloc_topolog static void hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags); +static hwloc_obj_t +hwloc__xml_v1export_object_next_numanode(hwloc_obj_t obj, hwloc_obj_t cur) +{ + hwloc_obj_t parent; + + if (!cur) { + /* first numa node is on the very bottom left */ + cur = obj->memory_first_child; + goto find_first; + } + + /* walk-up until there's a next sibling */ + parent = cur; + while (1) { + if (parent->next_sibling) { + /* found a next sibling, we'll walk down-left from there */ + cur = parent->next_sibling; + break; + } + parent = parent->parent; + if (parent == obj) + return NULL; + } + + find_first: + while (cur->type != HWLOC_OBJ_NUMANODE) + cur = cur->memory_first_child; + assert(cur); + return cur; +} + +static unsigned +hwloc__xml_v1export_object_list_numanodes(hwloc_obj_t obj, hwloc_obj_t *first_p, hwloc_obj_t **nodes_p) +{ + hwloc_obj_t *nodes, cur; + int nr; + + if (!obj->memory_first_child) { + *first_p = NULL; + *nodes_p = NULL; + return 0; + } + /* we're sure there's at least one numa node */ + + nr = hwloc_bitmap_weight(obj->nodeset); + assert(nr > 0); + /* these are local nodes, but some of them may be attached above instead of here */ + + nodes = calloc(nr, sizeof(*nodes)); + if (!nodes) { + /* only return the first node */ + cur = hwloc__xml_v1export_object_next_numanode(obj, NULL); + assert(cur); + *first_p = cur; + *nodes_p = NULL; + return 1; + } + + nr = 0; + cur = NULL; + while (1) { + cur = hwloc__xml_v1export_object_next_numanode(obj, cur); + if (!cur) + break; + nodes[nr++] = cur; + } + + *first_p = nodes[0]; + *nodes_p = nodes; + return nr; +} + static void hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) { struct hwloc__xml_export_state_s gstate, mstate, ostate, *state = parentstate; hwloc_obj_t child; + unsigned nr_numanodes; + hwloc_obj_t *numanodes, first_numanode; + unsigned i; - if (obj->parent->arity > 1 && obj->memory_arity > 1 && parentstate->global->v1_memory_group) { + nr_numanodes = hwloc__xml_v1export_object_list_numanodes(obj, &first_numanode, &numanodes); + + if (obj->parent->arity > 1 && nr_numanodes > 1 && parentstate->global->v1_memory_group) { /* child has sibling, we must add a Group around those memory children */ hwloc_obj_t group = parentstate->global->v1_memory_group; parentstate->new_child(parentstate, &gstate, "object"); @@ -2266,10 +2432,8 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw } /* export first memory child */ - child = obj->memory_first_child; - assert(child->type == HWLOC_OBJ_NUMANODE); state->new_child(state, &mstate, "object"); - hwloc__xml_export_object_contents (&mstate, topology, child, flags); + hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags); /* then the actual object */ mstate.new_child(&mstate, &ostate, "object"); @@ -2288,9 +2452,10 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw mstate.end_object(&mstate, "object"); /* now other memory children */ - for_each_memory_child(child, obj) - if (child->sibling_rank > 0) - hwloc__xml_v1export_object (state, topology, child, flags); + for(i=1; inew_child(state, &_childstate, tagname); \ + for(_j=0; \ + _i+_j<(nr) && _jtype), (unsigned long long) (objs)[_i+_j]->gp_index); \ + _i += _j; \ + sprintf(_tmp2, "%lu", (unsigned long) _len); \ + _childstate.new_prop(&_childstate, "length", _tmp2); \ + _childstate.add_content(&_childstate, _tmp, _len); \ + _childstate.end_object(&_childstate, tagname); \ + } \ +} while (0) + +static void +hwloc___xml_v2export_distances(hwloc__xml_export_state_t parentstate, struct hwloc_internal_distances_s *dist) +{ + char tmp[255]; + unsigned nbobjs = dist->nbobjs; + struct hwloc__xml_export_state_s state; + + if (dist->different_types) { + parentstate->new_child(parentstate, &state, "distances2hetero"); + } else { + parentstate->new_child(parentstate, &state, "distances2"); + state.new_prop(&state, "type", hwloc_obj_type_string(dist->unique_type)); + } + + sprintf(tmp, "%u", nbobjs); + state.new_prop(&state, "nbobjs", tmp); + sprintf(tmp, "%lu", dist->kind); + state.new_prop(&state, "kind", tmp); + if (dist->name) + state.new_prop(&state, "name", dist->name); + + if (!dist->different_types) { + state.new_prop(&state, "indexing", + HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type) ? "os" : "gp"); + } + + /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ + if (dist->different_types) { + EXPORT_TYPE_GPINDEX_ARRAY(&state, nbobjs, dist->objs, "indexes", 10); + } else { + EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10); + } + EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); + state.end_object(&state, dist->different_types ? "distances2hetero" : "distances2"); +} + static void hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) { struct hwloc_internal_distances_s *dist; - for(dist = topology->first_dist; dist; dist = dist->next) { - char tmp[255]; - unsigned nbobjs = dist->nbobjs; - struct hwloc__xml_export_state_s state; - - parentstate->new_child(parentstate, &state, "distances2"); - - state.new_prop(&state, "type", hwloc_obj_type_string(dist->type)); - sprintf(tmp, "%u", nbobjs); - state.new_prop(&state, "nbobjs", tmp); - sprintf(tmp, "%lu", dist->kind); - state.new_prop(&state, "kind", tmp); - - state.new_prop(&state, "indexing", - (dist->type == HWLOC_OBJ_NUMANODE || dist->type == HWLOC_OBJ_PU) ? "os" : "gp"); - /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ - EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10); - EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); - state.end_object(&state, "distances2"); - } + for(dist = topology->first_dist; dist; dist = dist->next) + if (!dist->different_types) + hwloc___xml_v2export_distances(parentstate, dist); + /* export homogeneous distances first in case the importer doesn't support heterogeneous and stops there */ + for(dist = topology->first_dist; dist; dist = dist->next) + if (dist->different_types) + hwloc___xml_v2export_distances(parentstate, dist); } void @@ -2378,18 +2587,22 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top hwloc_obj_t root = hwloc_get_root_obj(topology); if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { - if (root->memory_first_child) { + hwloc_obj_t *numanodes, first_numanode; + unsigned nr_numanodes; + + nr_numanodes = hwloc__xml_v1export_object_list_numanodes(root, &first_numanode, &numanodes); + + if (nr_numanodes) { /* we don't use hwloc__xml_v1export_object_with_memory() because we want/can keep root above the numa node */ struct hwloc__xml_export_state_s rstate, mstate; hwloc_obj_t child; + unsigned i; /* export the root */ state->new_child(state, &rstate, "object"); hwloc__xml_export_object_contents (&rstate, topology, root, flags); /* export first memory child */ - child = root->memory_first_child; - assert(child->type == HWLOC_OBJ_NUMANODE); rstate.new_child(&rstate, &mstate, "object"); - hwloc__xml_export_object_contents (&mstate, topology, child, flags); + hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags); /* then its normal/io/misc children */ for_each_child(child, root) hwloc__xml_v1export_object (&mstate, topology, child, flags); @@ -2400,15 +2613,16 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top /* close first memory child */ mstate.end_object(&mstate, "object"); /* now other memory children */ - for_each_memory_child(child, root) - if (child->sibling_rank > 0) - hwloc__xml_v1export_object (&rstate, topology, child, flags); + for(i=1; i +#include "private/autogen/config.h" #define _ATFILE_SOURCE #include @@ -25,10 +25,10 @@ #include #include -#include -#include -#include -#include +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" #ifdef HAVE_MACH_MACH_INIT_H #include @@ -136,14 +136,28 @@ int hwloc_get_sysctl(int name[], unsigned namelen, int *ret) } #endif -/* Return the OS-provided number of processors. Unlike other methods such as - reading sysfs on Linux, this method is not virtualizable; thus it's only - used as a fall-back method, allowing virtual backends (FSROOT, etc) to - have the desired effect. */ +/* Return the OS-provided number of processors. + * Assumes topology->is_thissystem is true. + */ #ifndef HWLOC_WIN_SYS /* The windows implementation is in topology-windows.c */ int -hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { +hwloc_fallback_nbprocessors(unsigned flags) { int n; + + if (flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE) { + /* try to get all CPUs for Linux and Solaris that can handle offline CPUs */ +#if HAVE_DECL__SC_NPROCESSORS_CONF + n = sysconf(_SC_NPROCESSORS_CONF); +#elif HAVE_DECL__SC_NPROC_CONF + n = sysconf(_SC_NPROC_CONF); +#else + n = -1; +#endif + if (n != -1) + return n; + } + + /* try getting only online CPUs, or whatever we can get */ #if HAVE_DECL__SC_NPROCESSORS_ONLN n = sysconf(_SC_NPROCESSORS_ONLN); #elif HAVE_DECL__SC_NPROC_ONLN @@ -762,9 +776,7 @@ hwloc__duplicate_object(struct hwloc_topology *newtopology, /* place us for real */ assert(newobj->logical_index < level_width); level[newobj->logical_index] = newobj; - /* link to already-inserted cousins - * (hwloc_pci_belowroot_apply_locality() can cause out-of-order logical indexes) - */ + /* link to already-inserted cousins */ if (newobj->logical_index > 0 && level[newobj->logical_index-1]) { newobj->prev_cousin = level[newobj->logical_index-1]; level[newobj->logical_index-1]->next_cousin = newobj; @@ -991,31 +1003,35 @@ hwloc_topology_dup(hwloc_topology_t *newp, /***** Make sure you update obj_type_priority[] below as well. *****/ static const unsigned obj_type_order[] = { /* first entry is HWLOC_OBJ_MACHINE */ 0, - /* next entry is HWLOC_OBJ_PACKAGE */ 3, - /* next entry is HWLOC_OBJ_CORE */ 12, - /* next entry is HWLOC_OBJ_PU */ 16, - /* next entry is HWLOC_OBJ_L1CACHE */ 10, - /* next entry is HWLOC_OBJ_L2CACHE */ 8, - /* next entry is HWLOC_OBJ_L3CACHE */ 6, - /* next entry is HWLOC_OBJ_L4CACHE */ 5, - /* next entry is HWLOC_OBJ_L5CACHE */ 4, - /* next entry is HWLOC_OBJ_L1ICACHE */ 11, - /* next entry is HWLOC_OBJ_L2ICACHE */ 9, - /* next entry is HWLOC_OBJ_L3ICACHE */ 7, + /* next entry is HWLOC_OBJ_PACKAGE */ 4, + /* next entry is HWLOC_OBJ_CORE */ 14, + /* next entry is HWLOC_OBJ_PU */ 18, + /* next entry is HWLOC_OBJ_L1CACHE */ 12, + /* next entry is HWLOC_OBJ_L2CACHE */ 10, + /* next entry is HWLOC_OBJ_L3CACHE */ 8, + /* next entry is HWLOC_OBJ_L4CACHE */ 7, + /* next entry is HWLOC_OBJ_L5CACHE */ 6, + /* next entry is HWLOC_OBJ_L1ICACHE */ 13, + /* next entry is HWLOC_OBJ_L2ICACHE */ 11, + /* next entry is HWLOC_OBJ_L3ICACHE */ 9, /* next entry is HWLOC_OBJ_GROUP */ 1, - /* next entry is HWLOC_OBJ_NUMANODE */ 2, - /* next entry is HWLOC_OBJ_BRIDGE */ 13, - /* next entry is HWLOC_OBJ_PCI_DEVICE */ 14, - /* next entry is HWLOC_OBJ_OS_DEVICE */ 15, - /* next entry is HWLOC_OBJ_MISC */ 17 + /* next entry is HWLOC_OBJ_NUMANODE */ 3, + /* next entry is HWLOC_OBJ_BRIDGE */ 15, + /* next entry is HWLOC_OBJ_PCI_DEVICE */ 16, + /* next entry is HWLOC_OBJ_OS_DEVICE */ 17, + /* next entry is HWLOC_OBJ_MISC */ 19, + /* next entry is HWLOC_OBJ_MEMCACHE */ 2, + /* next entry is HWLOC_OBJ_DIE */ 5 }; #ifndef NDEBUG /* only used in debug check assert if !NDEBUG */ static const hwloc_obj_type_t obj_order_type[] = { HWLOC_OBJ_MACHINE, HWLOC_OBJ_GROUP, + HWLOC_OBJ_MEMCACHE, HWLOC_OBJ_NUMANODE, HWLOC_OBJ_PACKAGE, + HWLOC_OBJ_DIE, HWLOC_OBJ_L5CACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L3CACHE, @@ -1040,6 +1056,7 @@ static const hwloc_obj_type_t obj_order_type[] = { * Always keep Machine/NUMANode/PU/PCIDev/OSDev * then Core * then Package + * then Die * then Cache, * then Instruction Caches * then always drop Group/Misc/Bridge. @@ -1065,7 +1082,9 @@ static const int obj_type_priority[] = { /* next entry is HWLOC_OBJ_BRIDGE */ 0, /* next entry is HWLOC_OBJ_PCI_DEVICE */ 100, /* next entry is HWLOC_OBJ_OS_DEVICE */ 100, - /* next entry is HWLOC_OBJ_MISC */ 0 + /* next entry is HWLOC_OBJ_MISC */ 0, + /* next entry is HWLOC_OBJ_MEMCACHE */ 19, + /* next entry is HWLOC_OBJ_DIE */ 30 }; int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) @@ -1118,12 +1137,10 @@ hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2) /* * How to compare objects based on cpusets. */ - static int hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) { hwloc_bitmap_t set1, set2; - int res = HWLOC_OBJ_DIFFERENT; assert(!hwloc__obj_type_is_special(obj1->type)); assert(!hwloc__obj_type_is_special(obj2->type)); @@ -1136,45 +1153,10 @@ hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) set1 = obj1->cpuset; set2 = obj2->cpuset; } - if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { - res = hwloc_bitmap_compare_inclusion(set1, set2); - if (res == HWLOC_OBJ_INTERSECTS) - return HWLOC_OBJ_INTERSECTS; - } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) + return hwloc_bitmap_compare_inclusion(set1, set2); - /* then compare nodesets, and combine the results */ - if (obj1->complete_nodeset && obj2->complete_nodeset) { - set1 = obj1->complete_nodeset; - set2 = obj2->complete_nodeset; - } else { - set1 = obj1->nodeset; - set2 = obj2->nodeset; - } - if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { - int noderes = hwloc_bitmap_compare_inclusion(set1, set2); - /* deal with conflicting cpusets/nodesets inclusions */ - if (noderes == HWLOC_OBJ_INCLUDED) { - if (res == HWLOC_OBJ_CONTAINS) - /* contradicting order for cpusets and nodesets */ - return HWLOC_OBJ_INTERSECTS; - res = HWLOC_OBJ_INCLUDED; - - } else if (noderes == HWLOC_OBJ_CONTAINS) { - if (res == HWLOC_OBJ_INCLUDED) - /* contradicting order for cpusets and nodesets */ - return HWLOC_OBJ_INTERSECTS; - res = HWLOC_OBJ_CONTAINS; - - } else if (noderes == HWLOC_OBJ_INTERSECTS) { - return HWLOC_OBJ_INTERSECTS; - - } else { - /* nodesets are different, keep the cpuset order */ - - } - } - - return res; + return HWLOC_OBJ_DIFFERENT; } /* Compare object cpusets based on complete_cpuset if defined (always correctly ordered), @@ -1189,10 +1171,6 @@ hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2) return hwloc_bitmap_compare_first(obj1->complete_cpuset, obj2->complete_cpuset); else if (obj1->cpuset && obj2->cpuset) return hwloc_bitmap_compare_first(obj1->cpuset, obj2->cpuset); - else if (obj1->complete_nodeset && obj2->complete_nodeset) - return hwloc_bitmap_compare_first(obj1->complete_nodeset, obj2->complete_nodeset); - else if (obj1->nodeset && obj2->nodeset) - return hwloc_bitmap_compare_first(obj1->nodeset, obj2->nodeset); return 0; } @@ -1346,7 +1324,11 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) } } -/* Try to insert OBJ in CUR, recurse if needed. +/* + * The main insertion routine, only used for CPU-side object (normal types) + * uisng cpuset only (or complete_cpuset). + * + * Try to insert OBJ in CUR, recurse if needed. * Returns the object if it was inserted, * the remaining object it was merged, * NULL if failed to insert. @@ -1546,17 +1528,116 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob return group; } -/*attach the given memory object below the given normal parent. */ +/* only works for MEMCACHE and NUMAnode with a single bit in nodeset */ +static hwloc_obj_t +hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_obj_t parent, + hwloc_obj_t obj, + hwloc_report_error_t report_error) +{ + hwloc_obj_t *curp = &parent->memory_first_child; + unsigned first = hwloc_bitmap_first(obj->nodeset); + + while (*curp) { + hwloc_obj_t cur = *curp; + unsigned curfirst = hwloc_bitmap_first(cur->nodeset); + + if (first < curfirst) { + /* insert before cur */ + obj->next_sibling = cur; + *curp = obj; + obj->memory_first_child = NULL; + obj->parent = parent; + topology->modified = 1; + return obj; + } + + if (first == curfirst) { + /* identical nodeset */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + if (cur->type == HWLOC_OBJ_NUMANODE) { + /* identical NUMA nodes? ignore the new one */ + if (report_error) { + char curstr[512]; + char objstr[512]; + char msg[1100]; + hwloc__report_error_format_obj(curstr, sizeof(curstr), cur); + hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); + snprintf(msg, sizeof(msg), "%s and %s have identical nodesets!", objstr, curstr); + report_error(msg, __LINE__); + } + return NULL; + } + assert(cur->type == HWLOC_OBJ_MEMCACHE); + /* insert the new NUMA node below that existing memcache */ + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + + } else { + assert(obj->type == HWLOC_OBJ_MEMCACHE); + if (cur->type == HWLOC_OBJ_MEMCACHE) { + if (cur->attr->cache.depth == obj->attr->cache.depth) + /* memcache with same nodeset and depth, ignore the new one */ + return NULL; + if (cur->attr->cache.depth > obj->attr->cache.depth) + /* memcache with higher cache depth is actually *higher* in the hierarchy + * (depth starts from the NUMA node). + * insert the new memcache below the existing one + */ + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + } + /* insert the memcache above the existing memcache or numa node */ + obj->next_sibling = cur->next_sibling; + cur->next_sibling = NULL; + obj->memory_first_child = cur; + cur->parent = obj; + *curp = obj; + obj->parent = parent; + topology->modified = 1; + return obj; + } + } + + curp = &cur->next_sibling; + } + + /* append to the end of the list */ + obj->next_sibling = NULL; + *curp = obj; + obj->memory_first_child = NULL; + obj->parent = parent; + topology->modified = 1; + return obj; +} + +/* Attach the given memory object below the given normal parent. + * + * Only the nodeset is used to find the location inside memory children below parent. + * + * Nodeset inclusion inside the given memory hierarchy is guaranteed by this function, + * but nodesets are not propagated to CPU-side parent yet. It will be done by + * propagate_nodeset() later. + */ struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj, - hwloc_report_error_t report_error __hwloc_attribute_unused) + hwloc_report_error_t report_error) { - hwloc_obj_t *cur_children; + hwloc_obj_t result; assert(parent); assert(hwloc__obj_type_is_normal(parent->type)); + /* Check the nodeset */ + if (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) + return NULL; + /* Initialize or check the complete nodeset */ + if (!obj->complete_nodeset) { + obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset); + } else if (!hwloc_bitmap_isincluded(obj->nodeset, obj->complete_nodeset)) { + return NULL; + } + /* Neither ACPI nor Linux support multinode mscache */ + assert(hwloc_bitmap_weight(obj->nodeset) == 1); + #if 0 /* TODO: enable this instead of hack in fixup_sets once NUMA nodes are inserted late */ /* copy the parent cpuset in case it's larger than expected. @@ -1565,35 +1646,22 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, * However, the user decided the ignore Groups, so hierarchy/locality loss is expected. */ hwloc_bitmap_copy(obj->cpuset, parent->cpuset); + hwloc_bitmap_copy(obj->complete_cpuset, parent->complete_cpuset); #endif - /* only NUMA nodes are memory for now, just append to the end of the list */ - assert(obj->type == HWLOC_OBJ_NUMANODE); - assert(obj->nodeset); - cur_children = &parent->memory_first_child; - while (*cur_children) { - /* TODO check that things are inserted in order. - * it's OK for KNL, the only user so far - */ - cur_children = &(*cur_children)->next_sibling; - } - *cur_children = obj; - obj->next_sibling = NULL; - - /* Initialize the complete nodeset if needed */ - if (!obj->complete_nodeset) { - obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset); - } - - /* Add the bit to the top sets, and to the parent CPU-side object */ - if (obj->type == HWLOC_OBJ_NUMANODE) { - if (hwloc_bitmap_isset(obj->nodeset, obj->os_index)) + result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, report_error); + if (result == obj) { + /* Add the bit to the top sets, and to the parent CPU-side object */ + if (obj->type == HWLOC_OBJ_NUMANODE) { hwloc_bitmap_set(topology->levels[0][0]->nodeset, obj->os_index); - hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index); + hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index); + } } - - topology->modified = 1; - return obj; + if (result != obj) { + /* either failed to insert, or got merged, free the original object */ + hwloc_free_unlinked_object(obj); + } + return result; } /* insertion routine that lets you change the error reporting callback */ @@ -1699,11 +1767,18 @@ hwloc_alloc_setup_object(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned os_index) { struct hwloc_obj *obj = hwloc_tma_malloc(topology->tma, sizeof(*obj)); + if (!obj) + return NULL; memset(obj, 0, sizeof(*obj)); obj->type = type; obj->os_index = os_index; obj->gp_index = topology->next_gp_index++; obj->attr = hwloc_tma_malloc(topology->tma, sizeof(*obj->attr)); + if (!obj->attr) { + assert(!topology->tma || !topology->tma->dontfree); /* this tma cannot fail to allocate */ + free(obj); + return NULL; + } memset(obj->attr, 0, sizeof(*obj->attr)); /* do not allocate the cpuset here, let the caller do it */ return obj; @@ -1717,6 +1792,10 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology) errno = EINVAL; return NULL; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); } @@ -1736,6 +1815,10 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t errno = EINVAL; return NULL; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE) { hwloc_free_unlinked_object(obj); @@ -1754,12 +1837,30 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t hwloc_bitmap_and(obj->complete_nodeset, obj->complete_nodeset, root->complete_nodeset); if ((!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset)) - && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset)) - && (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) - && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) { - hwloc_free_unlinked_object(obj); - errno = EINVAL; - return NULL; + && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset))) { + /* we'll insert by cpuset, so build cpuset from the nodeset */ + hwloc_const_bitmap_t nodeset = obj->nodeset ? obj->nodeset : obj->complete_nodeset; + hwloc_obj_t numa; + + if ((!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) + && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) { + hwloc_free_unlinked_object(obj); + errno = EINVAL; + return NULL; + } + + if (!obj->cpuset) { + obj->cpuset = hwloc_bitmap_alloc(); + if (!obj->cpuset) { + hwloc_free_unlinked_object(obj); + return NULL; + } + } + + numa = NULL; + while ((numa = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa)) != NULL) + if (hwloc_bitmap_isset(nodeset, numa->os_index)) + hwloc_bitmap_or(obj->cpuset, obj->cpuset, numa->cpuset); } cmp = hwloc_obj_cmp_sets(obj, root); @@ -1806,6 +1907,10 @@ hwloc_topology_insert_misc_object(struct hwloc_topology *topology, hwloc_obj_t p errno = EINVAL; return NULL; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MISC, HWLOC_UNKNOWN_INDEX); if (name) @@ -1963,6 +2068,7 @@ fixup_sets(hwloc_obj_t obj) in_memory_list = 0; /* iterate over normal children first, we'll come back for memory children later */ + /* FIXME: if memory objects are inserted late, we should update their cpuset and complete_cpuset at insertion instead of here */ iterate: while (child) { /* our cpuset must be included in our parent's one */ @@ -1980,6 +2086,12 @@ fixup_sets(hwloc_obj_t obj) child->complete_nodeset = hwloc_bitmap_dup(child->nodeset); } + if (hwloc_obj_type_is_memory(child->type)) { + /* update memory children cpusets in case some CPU-side parent was removed */ + hwloc_bitmap_copy(child->cpuset, obj->cpuset); + hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset); + } + fixup_sets(child); child = child->next_sibling; } @@ -2025,9 +2137,8 @@ hwloc_obj_add_children_sets(hwloc_obj_t obj) /* CPU objects are inserted by cpusets, we know their cpusets are properly included. * We just need fixup_sets() to make sure they aren't too wide. * - * Memory objects are inserted by cpusets to find their CPU parent, - * but nodesets are only used inside the memory hierarchy below that parent. - * Thus we need to propagate nodesets to CPU-side parents and children. + * Within each memory hierarchy, nodeset are consistent as well. + * However they must be propagated to their CPU-side parents. * * A memory object nodeset consists of NUMA nodes below it. * A normal object nodeset consists in NUMA nodes attached to any @@ -2060,27 +2171,12 @@ propagate_nodeset(hwloc_obj_t obj) /* now add our local nodeset */ for_each_memory_child(child, obj) { - /* FIXME rather recurse in the memory hierarchy */ - - /* first, update children complete_nodeset if needed */ - if (!child->complete_nodeset) - child->complete_nodeset = hwloc_bitmap_dup(child->nodeset); - else - hwloc_bitmap_or(child->complete_nodeset, child->complete_nodeset, child->nodeset); - /* add memory children nodesets to ours */ hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset); hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, child->complete_nodeset); - - /* by the way, copy our cpusets to memory children */ - if (child->cpuset) - hwloc_bitmap_copy(child->cpuset, obj->cpuset); - else - child->cpuset = hwloc_bitmap_dup(obj->cpuset); - if (child->complete_cpuset) - hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset); - else - child->complete_cpuset = hwloc_bitmap_dup(obj->complete_cpuset); + /* no need to recurse because hwloc__attach_memory_object() + * makes sure nodesets are consistent within each memory hierarchy. + */ } /* Propagate our nodeset to CPU children. */ @@ -2219,6 +2315,7 @@ hwloc_reset_normal_type_depths(hwloc_topology_t topology) for (i=HWLOC_OBJ_TYPE_MIN; i<=HWLOC_OBJ_GROUP; i++) topology->type_depth[i] = HWLOC_TYPE_DEPTH_UNKNOWN; /* type contiguity is asserted in topology_check() */ + topology->type_depth[HWLOC_OBJ_DIE] = HWLOC_TYPE_DEPTH_UNKNOWN; } static int @@ -2245,6 +2342,8 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i) return -1; for(j=0; jlevel_nbobjects[i]; j++) { + if (topology->levels[i-1][j] != topology->levels[i][j]->parent) + return -1; if (topology->levels[i-1][j]->arity != 1) return -1; if (checkmemory && topology->levels[i-1][j]->memory_arity) @@ -2434,6 +2533,7 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) { hwloc_obj_t child; unsigned arity = root->arity; + hwloc_obj_t *array; int ok; /* assume we're not symmetric by default */ @@ -2465,8 +2565,9 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) /* now check that children subtrees are identical. * just walk down the first child in each tree and compare their depth and arities */ -{ - HWLOC_VLA(hwloc_obj_t, array, arity); + array = malloc(arity * sizeof(*array)); + if (!array) + return; memcpy(array, root->children, arity * sizeof(*array)); while (1) { unsigned i; @@ -2474,8 +2575,9 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) for(i=1; idepth != array[0]->depth || array[i]->arity != array[0]->arity) { - return; - } + free(array); + return; + } if (!array[0]->arity) /* no more children level, we're ok */ break; @@ -2483,7 +2585,7 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) for(i=0; ifirst_child; } -} + free(array); /* everything went fine, we're symmetric */ good: @@ -2601,57 +2703,23 @@ hwloc_connect_children(hwloc_obj_t parent) } /* - * Check whether there is an object below ROOT that has the same type as OBJ + * Check whether there is an object strictly below ROOT that has the same type as OBJ */ static int find_same_type(hwloc_obj_t root, hwloc_obj_t obj) { hwloc_obj_t child; - if (hwloc_type_cmp(root, obj) == HWLOC_OBJ_EQUAL) - return 1; - - for_each_child (child, root) + for_each_child (child, root) { + if (hwloc_type_cmp(child, obj) == HWLOC_OBJ_EQUAL) + return 1; if (find_same_type(child, obj)) return 1; + } return 0; } -/* traverse the array of current object and compare them with top_obj. - * if equal, take the object and put its children into the remaining objs. - * if not equal, put the object into the remaining objs. - */ -static unsigned -hwloc_level_take_objects(hwloc_obj_t top_obj, - hwloc_obj_t *current_objs, unsigned n_current_objs, - hwloc_obj_t *taken_objs, unsigned n_taken_objs __hwloc_attribute_unused, - hwloc_obj_t *remaining_objs, unsigned n_remaining_objs __hwloc_attribute_unused) -{ - unsigned taken_i = 0; - unsigned new_i = 0; - unsigned i, j; - - for (i = 0; i < n_current_objs; i++) - if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_OBJ_EQUAL) { - /* Take it, add main children. */ - taken_objs[taken_i++] = current_objs[i]; - for (j = 0; j < current_objs[i]->arity; j++) - remaining_objs[new_i++] = current_objs[i]->children[j]; - } else { - /* Leave it. */ - remaining_objs[new_i++] = current_objs[i]; - } - -#ifdef HWLOC_DEBUG - /* Make sure we didn't mess up. */ - assert(taken_i == n_taken_objs); - assert(new_i == n_current_objs - n_taken_objs + n_remaining_objs); -#endif - - return new_i; -} - static int hwloc_build_level_from_list(struct hwloc_special_level_s *slevel) { @@ -2670,6 +2738,9 @@ hwloc_build_level_from_list(struct hwloc_special_level_s *slevel) if (nb) { /* allocate and fill level */ slevel->objs = malloc(nb * sizeof(struct hwloc_obj *)); + if (!slevel->objs) + return -1; + obj = slevel->first; i = 0; while (obj) { @@ -2709,7 +2780,17 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) /* Insert the main NUMA node list */ hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_NUMANODE], obj); - /* Recurse */ + /* Recurse, NUMA nodes only have Misc children */ + for_each_misc_child(child, obj) + hwloc_list_special_objects(topology, child); + + } else if (obj->type == HWLOC_OBJ_MEMCACHE) { + obj->next_cousin = NULL; + obj->depth = HWLOC_TYPE_DEPTH_MEMCACHE; + /* Insert the main MemCache list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_MEMCACHE], obj); + + /* Recurse, MemCaches have NUMA nodes or Misc children */ for_each_memory_child(child, obj) hwloc_list_special_objects(topology, child); for_each_misc_child(child, obj) @@ -2742,6 +2823,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) /* Insert in the main osdev list */ hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_OSDEV], obj); } + /* Recurse, I/O only have I/O and Misc children */ for_each_io_child(child, obj) hwloc_list_special_objects(topology, child); @@ -2762,7 +2844,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) } /* Build I/O levels */ -static void +static int hwloc_connect_io_misc_levels(hwloc_topology_t topology) { unsigned i; @@ -2773,8 +2855,12 @@ hwloc_connect_io_misc_levels(hwloc_topology_t topology) hwloc_list_special_objects(topology, topology->levels[0][0]); - for(i=0; islevels[i]); + for(i=0; islevels[i]) < 0) + return -1; + } + + return 0; } /* @@ -2849,32 +2935,48 @@ hwloc_connect_levels(hwloc_topology_t topology) /* Now peek all objects of the same type, build a level with that and * replace them with their children. */ - /* First count them. */ - n_taken_objs = 0; - n_new_objs = 0; - for (i = 0; i < n_objs; i++) - if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) { - n_taken_objs++; - n_new_objs += objs[i]->arity; - } - - /* New level. */ - taken_objs = malloc((n_taken_objs + 1) * sizeof(taken_objs[0])); - /* New list of pending objects. */ - if (n_objs - n_taken_objs + n_new_objs) { - new_objs = malloc((n_objs - n_taken_objs + n_new_objs) * sizeof(new_objs[0])); - } else { -#ifdef HWLOC_DEBUG - assert(!n_new_objs); - assert(n_objs == n_taken_objs); -#endif - new_objs = NULL; + /* allocate enough to take all current objects and an ending NULL */ + taken_objs = malloc((n_objs+1) * sizeof(taken_objs[0])); + if (!taken_objs) { + free(objs); + errno = ENOMEM; + return -1; } - n_new_objs = hwloc_level_take_objects(top_obj, - objs, n_objs, - taken_objs, n_taken_objs, - new_objs, n_new_objs); + /* allocate enough to keep all current objects or their children */ + n_new_objs = 0; + for (i = 0; i < n_objs; i++) { + if (objs[i]->arity) + n_new_objs += objs[i]->arity; + else + n_new_objs++; + } + new_objs = malloc(n_new_objs * sizeof(new_objs[0])); + if (!new_objs) { + free(objs); + free(taken_objs); + errno = ENOMEM; + return -1; + } + + /* now actually take these objects */ + n_new_objs = 0; + n_taken_objs = 0; + for (i = 0; i < n_objs; i++) + if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) { + /* Take it, add main children. */ + taken_objs[n_taken_objs++] = objs[i]; + memcpy(&new_objs[n_new_objs], objs[i]->children, objs[i]->arity * sizeof(new_objs[0])); + n_new_objs += objs[i]->arity; + } else { + /* Leave it. */ + new_objs[n_new_objs++] = objs[i]; + } + + if (!n_new_objs) { + free(new_objs); + new_objs = NULL; + } /* Ok, put numbers in the level and link cousins. */ for (i = 0; i < n_taken_objs; i++) { @@ -2964,13 +3066,69 @@ hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags) if (hwloc_connect_levels(topology) < 0) return -1; - hwloc_connect_io_misc_levels(topology); + if (hwloc_connect_io_misc_levels(topology) < 0) + return -1; topology->modified = 0; return 0; } +/* for regression testing, make sure the order of io devices + * doesn't change with the dentry order in the filesystem + * + * Only needed for OSDev for now. + */ +static hwloc_obj_t +hwloc_debug_insert_osdev_sorted(hwloc_obj_t queue, hwloc_obj_t obj) +{ + hwloc_obj_t *pcur = &queue; + while (*pcur && strcmp((*pcur)->name, obj->name) < 0) + pcur = &((*pcur)->next_sibling); + obj->next_sibling = *pcur; + *pcur = obj; + return queue; +} + +static void +hwloc_debug_sort_children(hwloc_obj_t root) +{ + hwloc_obj_t child; + + if (root->io_first_child) { + hwloc_obj_t osdevqueue, *pchild; + + pchild = &root->io_first_child; + osdevqueue = NULL; + while ((child = *pchild) != NULL) { + if (child->type != HWLOC_OBJ_OS_DEVICE) { + /* keep non-osdev untouched */ + pchild = &child->next_sibling; + continue; + } + + /* dequeue this child */ + *pchild = child->next_sibling; + child->next_sibling = NULL; + + /* insert in osdev queue in order */ + osdevqueue = hwloc_debug_insert_osdev_sorted(osdevqueue, child); + } + + /* requeue the now-sorted osdev queue */ + *pchild = osdevqueue; + } + + /* Recurse */ + for_each_child(child, root) + hwloc_debug_sort_children(child); + for_each_memory_child(child, root) + hwloc_debug_sort_children(child); + for_each_io_child(child, root) + hwloc_debug_sort_children(child); + /* no I/O under Misc */ +} + void hwloc_alloc_root_sets(hwloc_obj_t root) { /* @@ -2992,11 +3150,32 @@ void hwloc_alloc_root_sets(hwloc_obj_t root) root->complete_nodeset = hwloc_bitmap_alloc(); } -/* Main discovery loop */ -static int -hwloc_discover(struct hwloc_topology *topology) +static void +hwloc_discover_by_phase(struct hwloc_topology *topology, + struct hwloc_disc_status *dstatus, + const char *phasename __hwloc_attribute_unused) { struct hwloc_backend *backend; + hwloc_debug("%s phase discovery...\n", phasename); + for(backend = topology->backends; backend; backend = backend->next) { + if (dstatus->phase & dstatus->excluded_phases) + break; + if (!(backend->phases & dstatus->phase)) + continue; + if (!backend->discover) + continue; + hwloc_debug("%s phase discovery in component %s...\n", phasename, backend->component->name); + backend->discover(backend, dstatus); + hwloc_debug_print_objects(0, topology->levels[0][0]); + } +} + +/* Main discovery loop */ +static int +hwloc_discover(struct hwloc_topology *topology, + struct hwloc_disc_status *dstatus) +{ + const char *env; topology->modified = 0; /* no need to reconnect yet */ @@ -3038,38 +3217,70 @@ hwloc_discover(struct hwloc_topology *topology) * automatically propagated to the whole tree after detection. */ - /* - * Discover CPUs first - */ - backend = topology->backends; - while (NULL != backend) { - if (backend->component->type != HWLOC_DISC_COMPONENT_TYPE_CPU - && backend->component->type != HWLOC_DISC_COMPONENT_TYPE_GLOBAL) - /* not yet */ - goto next_cpubackend; - if (!backend->discover) - goto next_cpubackend; - backend->discover(backend); - hwloc_debug_print_objects(0, topology->levels[0][0]); + if (topology->backend_phases & HWLOC_DISC_PHASE_GLOBAL) { + /* usually, GLOBAL is alone. + * but HWLOC_ANNOTATE_GLOBAL_COMPONENTS=1 allows optional ANNOTATE steps. + */ + struct hwloc_backend *global_backend = topology->backends; + assert(global_backend); + assert(global_backend->phases == HWLOC_DISC_PHASE_GLOBAL); -next_cpubackend: - backend = backend->next; + /* + * Perform the single-component-based GLOBAL discovery + */ + hwloc_debug("GLOBAL phase discovery...\n"); + hwloc_debug("GLOBAL phase discovery with component %s...\n", global_backend->component->name); + dstatus->phase = HWLOC_DISC_PHASE_GLOBAL; + global_backend->discover(global_backend, dstatus); + hwloc_debug_print_objects(0, topology->levels[0][0]); + } + /* Don't explicitly ignore other phases, in case there's ever + * a need to bring them back. + * The component with usually exclude them by default anyway. + * Except if annotating global components is explicitly requested. + */ + + if (topology->backend_phases & HWLOC_DISC_PHASE_CPU) { + /* + * Discover CPUs first + */ + dstatus->phase = HWLOC_DISC_PHASE_CPU; + hwloc_discover_by_phase(topology, dstatus, "CPU"); + } + + if (!(topology->backend_phases & (HWLOC_DISC_PHASE_GLOBAL|HWLOC_DISC_PHASE_CPU))) { + hwloc_debug("No GLOBAL or CPU component phase found\n"); + /* we'll fail below */ } /* One backend should have called hwloc_alloc_root_sets() * and set bits during PU and NUMA insert. */ if (!topology->levels[0][0]->cpuset || hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { - hwloc_debug("%s", "No PU added by any CPU and global backend\n"); + hwloc_debug("%s", "No PU added by any CPU or GLOBAL component phase\n"); errno = EINVAL; return -1; } - if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) { - const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES"); - if ((env && atoi(env)) - || (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) - topology->binding_hooks.get_allowed_resources(topology); + /* + * Memory-specific discovery + */ + if (topology->backend_phases & HWLOC_DISC_PHASE_MEMORY) { + dstatus->phase = HWLOC_DISC_PHASE_MEMORY; + hwloc_discover_by_phase(topology, dstatus, "MEMORY"); + } + + if (/* check if getting the sets of locally allowed resources is possible */ + topology->binding_hooks.get_allowed_resources + && topology->is_thissystem + /* check whether it has been done already */ + && !(dstatus->flags & HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) + /* check whether it was explicitly requested */ + && ((topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) != 0 + || ((env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES")) != NULL && atoi(env)))) { + /* OK, get the sets of locally allowed resources */ + topology->binding_hooks.get_allowed_resources(topology); + dstatus->flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; } /* If there's no NUMA node, add one with all the memory. @@ -3113,7 +3324,7 @@ next_cpubackend: hwloc_debug_print_objects(0, topology->levels[0][0]); - if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) { hwloc_debug("%s", "\nRemoving unauthorized sets from all sets\n"); remove_unused_sets(topology, topology->levels[0][0]); hwloc_debug_print_objects(0, topology->levels[0][0]); @@ -3149,28 +3360,27 @@ next_cpubackend: hwloc_debug_print_objects(0, topology->levels[0][0]); /* - * Additional discovery with other backends + * Additional discovery */ - - backend = topology->backends; - while (NULL != backend) { - if (backend->component->type == HWLOC_DISC_COMPONENT_TYPE_CPU - || backend->component->type == HWLOC_DISC_COMPONENT_TYPE_GLOBAL) - /* already done above */ - goto next_noncpubackend; - if (!backend->discover) - goto next_noncpubackend; - backend->discover(backend); - hwloc_debug_print_objects(0, topology->levels[0][0]); - -next_noncpubackend: - backend = backend->next; + if (topology->backend_phases & HWLOC_DISC_PHASE_PCI) { + dstatus->phase = HWLOC_DISC_PHASE_PCI; + hwloc_discover_by_phase(topology, dstatus, "PCI"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_IO) { + dstatus->phase = HWLOC_DISC_PHASE_IO; + hwloc_discover_by_phase(topology, dstatus, "IO"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_MISC) { + dstatus->phase = HWLOC_DISC_PHASE_MISC; + hwloc_discover_by_phase(topology, dstatus, "MISC"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_ANNOTATE) { + dstatus->phase = HWLOC_DISC_PHASE_ANNOTATE; + hwloc_discover_by_phase(topology, dstatus, "ANNOTATE"); } - hwloc_pci_belowroot_apply_locality(topology); - - hwloc_debug("%s", "\nNow reconnecting\n"); - hwloc_debug_print_objects(0, topology->levels[0][0]); + if (getenv("HWLOC_DEBUG_SORT_CHILDREN")) + hwloc_debug_sort_children(topology->levels[0][0]); /* Remove some stuff */ @@ -3217,7 +3427,8 @@ next_noncpubackend: /* add some identification attributes if not loading from XML */ if (topology->backends - && strcmp(topology->backends->component->name, "xml")) { + && strcmp(topology->backends->component->name, "xml") + && !getenv("HWLOC_DONT_ADD_VERSION_INFO")) { char *value; /* add a hwlocVersion */ hwloc_obj_add_info(topology->levels[0][0], "hwlocVersion", HWLOC_VERSION); @@ -3269,6 +3480,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_BRIDGE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_BRIDGE)); HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_PCIDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_PCI_DEVICE)); HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_OSDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_OS_DEVICE)); + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_MEMCACHE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_MEMCACHE)); /* sane values to type_depth */ hwloc_reset_normal_type_depths(topology); @@ -3277,6 +3489,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; + topology->type_depth[HWLOC_OBJ_MEMCACHE] = HWLOC_TYPE_DEPTH_MEMCACHE; /* Create the actual machine object, but don't touch its attributes yet * since the OS backend may still change the object into something else @@ -3303,7 +3516,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->tma = tma; hwloc_components_init(); /* uses malloc without tma, but won't need it since dup() caller already took a reference */ - hwloc_backends_init(topology); + hwloc_topology_components_init(topology); hwloc_pci_discovery_init(topology); /* make sure both dup() and load() get sane variables */ /* Setup topology context */ @@ -3320,7 +3533,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind)); topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind)); - topology->nb_levels_allocated = nblevels; /* enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels)); topology->level_nbobjects = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); @@ -3343,7 +3556,7 @@ int hwloc_topology_init (struct hwloc_topology **topologyp) { return hwloc__topology_init(topologyp, - 16, /* 16 is enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + 16, /* 16 is enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ NULL); /* no TMA for normal topologies, too many allocations to fix */ } @@ -3376,7 +3589,7 @@ hwloc_topology_set_synthetic(struct hwloc_topology *topology, const char *descri return hwloc_disc_component_force_enable(topology, 0 /* api */, - -1, "synthetic", + "synthetic", description, NULL, NULL); } @@ -3391,7 +3604,7 @@ hwloc_topology_set_xml(struct hwloc_topology *topology, return hwloc_disc_component_force_enable(topology, 0 /* api */, - -1, "xml", + "xml", xmlpath, NULL, NULL); } @@ -3407,7 +3620,7 @@ hwloc_topology_set_xmlbuffer(struct hwloc_topology *topology, return hwloc_disc_component_force_enable(topology, 0 /* api */, - -1, "xml", NULL, + "xml", NULL, xmlbuffer, (void*) (uintptr_t) size); } @@ -3420,7 +3633,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) return -1; } - if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { + if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { errno = EINVAL; return -1; } @@ -3445,6 +3658,7 @@ hwloc__topology_filter_init(struct hwloc_topology *topology) topology->type_filter[HWLOC_OBJ_L1ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_L2ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_L3ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_MEMCACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_GROUP] = HWLOC_TYPE_FILTER_KEEP_STRUCTURE; topology->type_filter[HWLOC_OBJ_MISC] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_FILTER_KEEP_NONE; @@ -3575,6 +3789,7 @@ hwloc_topology_destroy (struct hwloc_topology *topology) } hwloc_backends_disable_all(topology); + hwloc_topology_components_fini(topology); hwloc_components_fini(); hwloc_topology_clear(topology); @@ -3591,6 +3806,8 @@ hwloc_topology_destroy (struct hwloc_topology *topology) int hwloc_topology_load (struct hwloc_topology *topology) { + struct hwloc_disc_status dstatus; + const char *env; int err; if (topology->is_loaded) { @@ -3617,7 +3834,7 @@ hwloc_topology_load (struct hwloc_topology *topology) if (fsroot_path_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - HWLOC_DISC_COMPONENT_TYPE_CPU, "linux", + "linux", NULL /* backend will getenv again */, NULL, NULL); } if (!topology->backends) { @@ -3625,7 +3842,7 @@ hwloc_topology_load (struct hwloc_topology *topology) if (cpuid_path_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - HWLOC_DISC_COMPONENT_TYPE_CPU, "x86", + "x86", NULL /* backend will getenv again */, NULL, NULL); } if (!topology->backends) { @@ -3633,7 +3850,7 @@ hwloc_topology_load (struct hwloc_topology *topology) if (synthetic_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - -1, "synthetic", + "synthetic", synthetic_env, NULL, NULL); } if (!topology->backends) { @@ -3641,11 +3858,19 @@ hwloc_topology_load (struct hwloc_topology *topology) if (xmlpath_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - -1, "xml", + "xml", xmlpath_env, NULL, NULL); } } + dstatus.excluded_phases = 0; + dstatus.flags = 0; /* did nothing yet */ + + env = getenv("HWLOC_ALLOW"); + if (env && !strcmp(env, "all")) + /* don't retrieve the sets of allowed resources */ + dstatus.flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; + /* instantiate all possible other backends now */ hwloc_disc_components_enable_others(topology); /* now that backends are enabled, update the thissystem flag and some callbacks */ @@ -3660,7 +3885,7 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_pci_discovery_prepare(topology); /* actual topology discovery */ - err = hwloc_discover(topology); + err = hwloc_discover(topology, &dstatus); if (err < 0) goto out; @@ -3682,6 +3907,12 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_internal_distances_refresh(topology); topology->is_loaded = 1; + + if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) { + dstatus.phase = HWLOC_DISC_PHASE_TWEAK; + hwloc_discover_by_phase(topology, &dstatus, "TWEAK"); + } + return 0; out: @@ -3740,7 +3971,75 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_ && hwloc_bitmap_iszero(obj->cpuset) && (obj->type != HWLOC_OBJ_NUMANODE || (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS))) { /* remove object */ - hwloc_debug("%s", "\nRemoving object during restrict"); + hwloc_debug("%s", "\nRemoving object during restrict by cpuset"); + hwloc_debug_print_object(0, obj); + + if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) { + hwloc_free_object_siblings_and_children(obj->io_first_child); + obj->io_first_child = NULL; + } + if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_MISC)) { + hwloc_free_object_siblings_and_children(obj->misc_first_child); + obj->misc_first_child = NULL; + } + assert(!obj->first_child); + assert(!obj->memory_first_child); + unlink_and_free_single_object(pobj); + topology->modified = 1; + } +} + +/* adjust object nodesets according the given droppednodeset, + * drop object whose nodeset becomes empty and that have no children, + * and propagate PU removal as cpuset changes in parents. + */ +static void +restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc_obj_t *pobj, + hwloc_bitmap_t droppedcpuset, hwloc_bitmap_t droppednodeset) +{ + hwloc_obj_t obj = *pobj, child, *pchild; + int modified = 0; + + if (hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) { + hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset); + hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset); + modified = 1; + } else { + if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) + && hwloc_bitmap_iszero(obj->complete_nodeset)) { + /* we're empty, there's a PU below us, it'll be removed this time */ + modified = 1; + } + /* cpuset cannot intersect unless nodeset intersects or is empty */ + if (droppedcpuset) + assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset) + || hwloc_bitmap_iszero(obj->complete_nodeset)); + } + if (droppedcpuset) { + hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset); + hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset); + } + + if (modified) { + for_each_child_safe(child, obj, pchild) + restrict_object_by_nodeset(topology, flags, pchild, droppedcpuset, droppednodeset); + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) + /* cpuset may have changed above where some NUMA nodes were removed. + * if some hwloc_bitmap_first(child->complete_cpuset) changed, children might need to be reordered */ + hwloc__reorder_children(obj); + + for_each_memory_child_safe(child, obj, pchild) + restrict_object_by_nodeset(topology, flags, pchild, droppedcpuset, droppednodeset); + /* FIXME: we may have to reorder CPU-less groups of NUMA nodes if some of their nodes were removed */ + + /* Nothing to restrict under I/O or Misc */ + } + + if (!obj->first_child && !obj->memory_first_child /* arity not updated before connect_children() */ + && hwloc_bitmap_iszero(obj->nodeset) + && (obj->type != HWLOC_OBJ_PU || (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS))) { + /* remove object */ + hwloc_debug("%s", "\nRemoving object during restrict by nodeset"); hwloc_debug_print_object(0, obj); if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) { @@ -3759,7 +4058,7 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_ } int -hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, unsigned long flags) +hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t set, unsigned long flags) { hwloc_bitmap_t droppedcpuset, droppednodeset; @@ -3767,15 +4066,35 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } if (flags & ~(HWLOC_RESTRICT_FLAG_REMOVE_CPULESS - |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO)) { + |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO + |HWLOC_RESTRICT_FLAG_BYNODESET|HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)) { errno = EINVAL; return -1; } + if (flags & HWLOC_RESTRICT_FLAG_BYNODESET) { + /* cannot use CPULESS with BYNODESET */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { + errno = EINVAL; + return -1; + } + } else { + /* cannot use MEMLESS without BYNODESET */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) { + errno = EINVAL; + return -1; + } + } + /* make sure we'll keep something in the topology */ - if (!hwloc_bitmap_intersects(cpuset, topology->allowed_cpuset)) { + if (((flags & HWLOC_RESTRICT_FLAG_BYNODESET) && !hwloc_bitmap_intersects(set, topology->allowed_nodeset)) + || (!(flags & HWLOC_RESTRICT_FLAG_BYNODESET) && !hwloc_bitmap_intersects(set, topology->allowed_cpuset))) { errno = EINVAL; /* easy failure, just don't touch the topology */ return -1; } @@ -3788,39 +4107,76 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp return -1; } - /* cpuset to clear */ - hwloc_bitmap_not(droppedcpuset, cpuset); - /* nodeset to clear */ - if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { - hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); - do { - /* node will be removed if nodeset gets or was empty */ - if (hwloc_bitmap_iszero(node->cpuset) - || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset)) - hwloc_bitmap_set(droppednodeset, node->os_index); - node = node->next_cousin; - } while (node); + if (flags & HWLOC_RESTRICT_FLAG_BYNODESET) { + /* nodeset to clear */ + hwloc_bitmap_not(droppednodeset, set); + /* cpuset to clear */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) { + hwloc_obj_t pu = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); + do { + /* PU will be removed if cpuset gets or was empty */ + if (hwloc_bitmap_iszero(pu->cpuset) + || hwloc_bitmap_isincluded(pu->nodeset, droppednodeset)) + hwloc_bitmap_set(droppedcpuset, pu->os_index); + pu = pu->next_cousin; + } while (pu); - /* check we're not removing all NUMA nodes */ - if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) { - errno = EINVAL; /* easy failure, just don't touch the topology */ - hwloc_bitmap_free(droppedcpuset); - hwloc_bitmap_free(droppednodeset); - return -1; + /* check we're not removing all PUs */ + if (hwloc_bitmap_isincluded(topology->allowed_cpuset, droppedcpuset)) { + errno = EINVAL; /* easy failure, just don't touch the topology */ + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + return -1; + } + } + /* remove cpuset if empty */ + if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) + || hwloc_bitmap_iszero(droppedcpuset)) { + hwloc_bitmap_free(droppedcpuset); + droppedcpuset = NULL; } - } - /* remove nodeset if empty */ - if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) - || hwloc_bitmap_iszero(droppednodeset)) { - hwloc_bitmap_free(droppednodeset); - droppednodeset = NULL; - } - /* now recurse to filter sets and drop things */ - restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); - hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); - if (droppednodeset) + /* now recurse to filter sets and drop things */ + restrict_object_by_nodeset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset); + if (droppedcpuset) + hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); + + } else { + /* cpuset to clear */ + hwloc_bitmap_not(droppedcpuset, set); + /* nodeset to clear */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + do { + /* node will be removed if nodeset gets or was empty */ + if (hwloc_bitmap_iszero(node->cpuset) + || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset)) + hwloc_bitmap_set(droppednodeset, node->os_index); + node = node->next_cousin; + } while (node); + + /* check we're not removing all NUMA nodes */ + if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) { + errno = EINVAL; /* easy failure, just don't touch the topology */ + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + return -1; + } + } + /* remove nodeset if empty */ + if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) + || hwloc_bitmap_iszero(droppednodeset)) { + hwloc_bitmap_free(droppednodeset); + droppednodeset = NULL; + } + + /* now recurse to filter sets and drop things */ + restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); + hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); + if (droppednodeset) + hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset); + } hwloc_bitmap_free(droppedcpuset); hwloc_bitmap_free(droppednodeset); @@ -3849,6 +4205,72 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp return -1; } +int +hwloc_topology_allow(struct hwloc_topology *topology, + hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset, + unsigned long flags) +{ + if (!topology->is_loaded) + goto einval; + + if (topology->adopted_shmem_addr) { + errno = EPERM; + goto error; + } + + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) + goto einval; + + if (flags & ~(HWLOC_ALLOW_FLAG_ALL|HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS|HWLOC_ALLOW_FLAG_CUSTOM)) + goto einval; + + switch (flags) { + case HWLOC_ALLOW_FLAG_ALL: { + if (cpuset || nodeset) + goto einval; + hwloc_bitmap_copy(topology->allowed_cpuset, hwloc_get_root_obj(topology)->complete_cpuset); + hwloc_bitmap_copy(topology->allowed_nodeset, hwloc_get_root_obj(topology)->complete_nodeset); + break; + } + case HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS: { + if (cpuset || nodeset) + goto einval; + if (!topology->is_thissystem) + goto einval; + if (!topology->binding_hooks.get_allowed_resources) { + errno = ENOSYS; + goto error; + } + topology->binding_hooks.get_allowed_resources(topology); + break; + } + case HWLOC_ALLOW_FLAG_CUSTOM: { + if (cpuset) { + /* keep the intersection with the full topology cpuset, if not empty */ + if (!hwloc_bitmap_intersects(hwloc_get_root_obj(topology)->cpuset, cpuset)) + goto einval; + hwloc_bitmap_and(topology->allowed_cpuset, hwloc_get_root_obj(topology)->cpuset, cpuset); + } + if (nodeset) { + /* keep the intersection with the full topology nodeset, if not empty */ + if (!hwloc_bitmap_intersects(hwloc_get_root_obj(topology)->nodeset, nodeset)) + goto einval; + hwloc_bitmap_and(topology->allowed_nodeset, hwloc_get_root_obj(topology)->nodeset, nodeset); + } + break; + } + default: + goto einval; + } + + return 0; + + einval: + errno = EINVAL; + error: + return -1; +} + int hwloc_topology_is_thissystem(struct hwloc_topology *topology) { @@ -4005,7 +4427,7 @@ hwloc__check_children_cpusets(hwloc_topology_t topology __hwloc_attribute_unused assert(hwloc_bitmap_first(obj->cpuset) == (int) obj->os_index); assert(hwloc_bitmap_weight(obj->complete_cpuset) == 1); assert(hwloc_bitmap_first(obj->complete_cpuset) == (int) obj->os_index); - if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) { assert(hwloc_bitmap_isset(topology->allowed_cpuset, (int) obj->os_index)); } assert(!obj->arity); @@ -4166,6 +4588,8 @@ hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_ assert(obj->cpuset); if (obj->type == HWLOC_OBJ_NUMANODE) assert(obj->depth == HWLOC_TYPE_DEPTH_NUMANODE); + else if (obj->type == HWLOC_OBJ_MEMCACHE) + assert(obj->depth == HWLOC_TYPE_DEPTH_MEMCACHE); else assert(obj->depth >= 0); } @@ -4219,7 +4643,7 @@ hwloc__check_nodesets(hwloc_topology_t topology, hwloc_obj_t obj, hwloc_bitmap_t assert(hwloc_bitmap_first(obj->nodeset) == (int) obj->os_index); assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1); assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index); - if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) { assert(hwloc_bitmap_isset(topology->allowed_nodeset, (int) obj->os_index)); } assert(!obj->arity); @@ -4362,7 +4786,9 @@ hwloc_topology_check(struct hwloc_topology *topology) HWLOC_BUILD_ASSERT(HWLOC_OBJ_BRIDGE + 1 == HWLOC_OBJ_PCI_DEVICE); HWLOC_BUILD_ASSERT(HWLOC_OBJ_PCI_DEVICE + 1 == HWLOC_OBJ_OS_DEVICE); HWLOC_BUILD_ASSERT(HWLOC_OBJ_OS_DEVICE + 1 == HWLOC_OBJ_MISC); - HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_TYPE_MAX); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_MEMCACHE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_MEMCACHE + 1 == HWLOC_OBJ_DIE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_DIE + 1 == HWLOC_OBJ_TYPE_MAX); /* make sure order and priority arrays have the right size */ HWLOC_BUILD_ASSERT(sizeof(obj_type_order)/sizeof(*obj_type_order) == HWLOC_OBJ_TYPE_MAX); @@ -4408,6 +4834,7 @@ hwloc_topology_check(struct hwloc_topology *topology) int d; type = hwloc_get_depth_type(topology, j); assert(type != HWLOC_OBJ_NUMANODE); + assert(type != HWLOC_OBJ_MEMCACHE); assert(type != HWLOC_OBJ_PCI_DEVICE); assert(type != HWLOC_OBJ_BRIDGE); assert(type != HWLOC_OBJ_OS_DEVICE); @@ -4423,6 +4850,9 @@ hwloc_topology_check(struct hwloc_topology *topology) if (type == HWLOC_OBJ_NUMANODE) { assert(d == HWLOC_TYPE_DEPTH_NUMANODE); assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_NUMANODE); + } else if (type == HWLOC_OBJ_MEMCACHE) { + assert(d == HWLOC_TYPE_DEPTH_MEMCACHE); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_MEMCACHE); } else if (type == HWLOC_OBJ_BRIDGE) { assert(d == HWLOC_TYPE_DEPTH_BRIDGE); assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_BRIDGE); @@ -4449,7 +4879,7 @@ hwloc_topology_check(struct hwloc_topology *topology) assert(!obj->depth); /* check that allowed sets are larger than the main sets */ - if (topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) { + if (topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED) { assert(hwloc_bitmap_isincluded(topology->allowed_cpuset, obj->cpuset)); assert(hwloc_bitmap_isincluded(topology->allowed_nodeset, obj->nodeset)); } else { diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c index 9c5e6268..0b744d78 100644 --- a/src/3rdparty/hwloc/src/traversal.c +++ b/src/3rdparty/hwloc/src/traversal.c @@ -1,16 +1,17 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/debug.h" + #ifdef HAVE_STRINGS_H #include #endif /* HAVE_STRINGS_H */ @@ -40,6 +41,8 @@ hwloc_get_depth_type (hwloc_topology_t topology, int depth) return HWLOC_OBJ_OS_DEVICE; case HWLOC_TYPE_DEPTH_MISC: return HWLOC_OBJ_MISC; + case HWLOC_TYPE_DEPTH_MEMCACHE: + return HWLOC_OBJ_MEMCACHE; default: return HWLOC_OBJ_TYPE_NONE; } @@ -237,8 +240,10 @@ hwloc_obj_type_string (hwloc_obj_type_t obj) case HWLOC_OBJ_MACHINE: return "Machine"; case HWLOC_OBJ_MISC: return "Misc"; case HWLOC_OBJ_GROUP: return "Group"; + case HWLOC_OBJ_MEMCACHE: return "MemCache"; case HWLOC_OBJ_NUMANODE: return "NUMANode"; case HWLOC_OBJ_PACKAGE: return "Package"; + case HWLOC_OBJ_DIE: return "Die"; case HWLOC_OBJ_L1CACHE: return "L1Cache"; case HWLOC_OBJ_L2CACHE: return "L2Cache"; case HWLOC_OBJ_L3CACHE: return "L3Cache"; @@ -256,6 +261,41 @@ hwloc_obj_type_string (hwloc_obj_type_t obj) } } +/* Check if string matches the given type at least on minmatch chars. + * On success, return the address of where matching stop, either pointing to \0 or to a suffix (digits, colon, etc) + * On error, return NULL; + */ +static __hwloc_inline const char * +hwloc__type_match(const char *string, + const char *type, /* type must be lowercase */ + size_t minmatch) +{ + const char *s, *t; + unsigned i; + for(i=0, s=string, t=type; ; i++, s++, t++) { + if (!*s) { + /* string ends before type */ + if (i= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || *s == '-') + /* valid character that doesn't match */ + return NULL; + /* invalid character, we reached the end of the type namein string, stop matching here */ + if (i= '0' && string[1] <= '9') { + char *suffix; depthattr = strtol(string+1, &end, 10); - if (*end == 'i') { + if (*end == 'i' || *end == 'I') { if (depthattr >= 1 && depthattr <= 3) { type = HWLOC_OBJ_L1ICACHE + depthattr-1; cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION; + suffix = end+1; } else return -1; } else { if (depthattr >= 1 && depthattr <= 5) { type = HWLOC_OBJ_L1CACHE + depthattr-1; - cachetypeattr = *end == 'd' ? HWLOC_OBJ_CACHE_DATA : HWLOC_OBJ_CACHE_UNIFIED; + if (*end == 'd' || *end == 'D') { + cachetypeattr = HWLOC_OBJ_CACHE_DATA; + suffix = end+1; + } else if (*end == 'u' || *end == 'U') { + cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED; + suffix = end+1; + } else { + cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED; + suffix = end; + } } else return -1; } + /* check whether the optional suffix matches "cache" */ + if (!hwloc__type_match(suffix, "cache", 0)) + return -1; - } else if (!hwloc_strncasecmp(string, "group", 2)) { - size_t length; + } else if ((end = (char *) hwloc__type_match(string, "group", 2)) != NULL) { type = HWLOC_OBJ_GROUP; - length = strcspn(string, "0123456789"); - if (length <= 5 && !hwloc_strncasecmp(string, "group", length) - && string[length] >= '0' && string[length] <= '9') { - depthattr = strtol(string+length, &end, 10); + if (*end >= '0' && *end <= '9') { + depthattr = strtol(end, &end, 10); } } else @@ -421,7 +477,9 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t case HWLOC_OBJ_MISC: case HWLOC_OBJ_MACHINE: case HWLOC_OBJ_NUMANODE: + case HWLOC_OBJ_MEMCACHE: case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: case HWLOC_OBJ_CORE: case HWLOC_OBJ_PU: return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); @@ -523,6 +581,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L3ICACHE: + case HWLOC_OBJ_MEMCACHE: if (verbose) { char assoc[32]; if (obj->attr->cache.associativity == -1)