1426 lines
40 KiB
C
1426 lines
40 KiB
C
/*
|
|
* Copyright © 2020-2022 Inria. All rights reserved.
|
|
* See COPYING in top-level directory.
|
|
*/
|
|
|
|
#include "private/autogen/config.h"
|
|
#include "hwloc.h"
|
|
#include "private/private.h"
|
|
#include "private/debug.h"
|
|
|
|
|
|
/*****************************
|
|
* Attributes
|
|
*/
|
|
|
|
static __hwloc_inline
|
|
hwloc_uint64_t hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id,
|
|
hwloc_obj_t node)
|
|
{
|
|
if (id == HWLOC_MEMATTR_ID_CAPACITY)
|
|
return node->attr->numanode.local_memory;
|
|
else if (id == HWLOC_MEMATTR_ID_LOCALITY)
|
|
return hwloc_bitmap_weight(node->cpuset);
|
|
else
|
|
assert(0);
|
|
return 0; /* shut up the compiler */
|
|
}
|
|
|
|
void
|
|
hwloc_internal_memattrs_init(struct hwloc_topology *topology)
|
|
{
|
|
topology->nr_memattrs = 0;
|
|
topology->memattrs = NULL;
|
|
}
|
|
|
|
static void
|
|
hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr,
|
|
char *name,
|
|
unsigned long flags,
|
|
unsigned long iflags)
|
|
{
|
|
imattr->name = name;
|
|
imattr->flags = flags;
|
|
imattr->iflags = iflags;
|
|
|
|
imattr->nr_targets = 0;
|
|
imattr->targets = NULL;
|
|
}
|
|
|
|
void
|
|
hwloc_internal_memattrs_prepare(struct hwloc_topology *topology)
|
|
{
|
|
topology->memattrs = malloc(HWLOC_MEMATTR_ID_MAX * sizeof(*topology->memattrs));
|
|
if (!topology->memattrs)
|
|
return;
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY],
|
|
(char *) "Capacity",
|
|
HWLOC_MEMATTR_FLAG_HIGHER_FIRST,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY],
|
|
(char *) "Locality",
|
|
HWLOC_MEMATTR_FLAG_LOWER_FIRST,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH],
|
|
(char *) "Bandwidth",
|
|
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_BANDWIDTH],
|
|
(char *) "ReadBandwidth",
|
|
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_BANDWIDTH],
|
|
(char *) "WriteBandwidth",
|
|
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY],
|
|
(char *) "Latency",
|
|
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_LATENCY],
|
|
(char *) "ReadLatency",
|
|
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
|
|
|
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_LATENCY],
|
|
(char *) "WriteLatency",
|
|
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
|
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
|
|
|
topology->nr_memattrs = HWLOC_MEMATTR_ID_MAX;
|
|
}
|
|
|
|
static void
|
|
hwloc__imi_destroy(struct hwloc_internal_memattr_initiator_s *imi)
|
|
{
|
|
if (imi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET)
|
|
hwloc_bitmap_free(imi->initiator.location.cpuset);
|
|
}
|
|
|
|
static void
|
|
hwloc__imtg_destroy(struct hwloc_internal_memattr_s *imattr,
|
|
struct hwloc_internal_memattr_target_s *imtg)
|
|
{
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
/* only attributes with initiators may have something to free() in the array */
|
|
unsigned k;
|
|
for(k=0; k<imtg->nr_initiators; k++)
|
|
hwloc__imi_destroy(&imtg->initiators[k]);
|
|
}
|
|
free(imtg->initiators);
|
|
}
|
|
|
|
void
|
|
hwloc_internal_memattrs_destroy(struct hwloc_topology *topology)
|
|
{
|
|
unsigned id;
|
|
for(id=0; id<topology->nr_memattrs; id++) {
|
|
struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id];
|
|
unsigned j;
|
|
for(j=0; j<imattr->nr_targets; j++)
|
|
hwloc__imtg_destroy(imattr, &imattr->targets[j]);
|
|
free(imattr->targets);
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_STATIC_NAME))
|
|
free(imattr->name);
|
|
}
|
|
free(topology->memattrs);
|
|
|
|
topology->memattrs = NULL;
|
|
topology->nr_memattrs = 0;
|
|
}
|
|
|
|
int
|
|
hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *old)
|
|
{
|
|
struct hwloc_tma *tma = new->tma;
|
|
struct hwloc_internal_memattr_s *imattrs;
|
|
hwloc_memattr_id_t id;
|
|
|
|
/* old->nr_memattrs is always > 0 thanks to default memattrs */
|
|
|
|
imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs));
|
|
if (!imattrs)
|
|
return -1;
|
|
new->memattrs = imattrs;
|
|
new->nr_memattrs = old->nr_memattrs;
|
|
memcpy(imattrs, old->memattrs, old->nr_memattrs * sizeof(*imattrs));
|
|
|
|
for(id=0; id<old->nr_memattrs; id++) {
|
|
struct hwloc_internal_memattr_s *oimattr = &old->memattrs[id];
|
|
struct hwloc_internal_memattr_s *nimattr = &imattrs[id];
|
|
unsigned j;
|
|
|
|
assert(oimattr->name);
|
|
nimattr->name = hwloc_tma_strdup(tma, oimattr->name);
|
|
if (!nimattr->name) {
|
|
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
|
|
new->nr_memattrs = id;
|
|
goto failed;
|
|
}
|
|
nimattr->iflags &= ~HWLOC_IMATTR_FLAG_STATIC_NAME;
|
|
nimattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; /* cache will need refresh */
|
|
|
|
if (!oimattr->nr_targets)
|
|
continue;
|
|
|
|
nimattr->targets = hwloc_tma_malloc(tma, oimattr->nr_targets * sizeof(*nimattr->targets));
|
|
if (!nimattr->targets) {
|
|
free(nimattr->name);
|
|
new->nr_memattrs = id;
|
|
goto failed;
|
|
}
|
|
memcpy(nimattr->targets, oimattr->targets, oimattr->nr_targets * sizeof(*nimattr->targets));
|
|
|
|
for(j=0; j<oimattr->nr_targets; j++) {
|
|
struct hwloc_internal_memattr_target_s *oimtg = &oimattr->targets[j];
|
|
struct hwloc_internal_memattr_target_s *nimtg = &nimattr->targets[j];
|
|
unsigned k;
|
|
|
|
nimtg->obj = NULL; /* cache will need refresh */
|
|
|
|
if (!oimtg->nr_initiators)
|
|
continue;
|
|
|
|
nimtg->initiators = hwloc_tma_malloc(tma, oimtg->nr_initiators * sizeof(*nimtg->initiators));
|
|
if (!nimtg->initiators) {
|
|
nimattr->nr_targets = j;
|
|
new->nr_memattrs = id+1;
|
|
goto failed;
|
|
}
|
|
memcpy(nimtg->initiators, oimtg->initiators, oimtg->nr_initiators * sizeof(*nimtg->initiators));
|
|
|
|
for(k=0; k<oimtg->nr_initiators; k++) {
|
|
struct hwloc_internal_memattr_initiator_s *oimi = &oimtg->initiators[k];
|
|
struct hwloc_internal_memattr_initiator_s *nimi = &nimtg->initiators[k];
|
|
if (oimi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) {
|
|
nimi->initiator.location.cpuset = hwloc_bitmap_tma_dup(tma, oimi->initiator.location.cpuset);
|
|
if (!nimi->initiator.location.cpuset) {
|
|
nimtg->nr_initiators = k;
|
|
nimattr->nr_targets = j+1;
|
|
new->nr_memattrs = id+1;
|
|
goto failed;
|
|
}
|
|
} else if (oimi->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) {
|
|
nimi->initiator.location.object.obj = NULL; /* cache will need refresh */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
|
|
failed:
|
|
hwloc_internal_memattrs_destroy(new);
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_get_by_name(hwloc_topology_t topology,
|
|
const char *name,
|
|
hwloc_memattr_id_t *idp)
|
|
{
|
|
unsigned id;
|
|
for(id=0; id<topology->nr_memattrs; id++) {
|
|
if (!strcmp(topology->memattrs[id].name, name)) {
|
|
*idp = id;
|
|
return 0;
|
|
}
|
|
}
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_get_name(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
const char **namep)
|
|
{
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
*namep = topology->memattrs[id].name;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_get_flags(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
unsigned long *flagsp)
|
|
{
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
*flagsp = topology->memattrs[id].flags;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_register(hwloc_topology_t topology,
|
|
const char *_name,
|
|
unsigned long flags,
|
|
hwloc_memattr_id_t *id)
|
|
{
|
|
struct hwloc_internal_memattr_s *newattrs;
|
|
char *name;
|
|
unsigned i;
|
|
|
|
/* check flags */
|
|
if (flags & ~(HWLOC_MEMATTR_FLAG_NEED_INITIATOR|HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
if (!(flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST))) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
if ((flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST))
|
|
== (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (!_name) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
/* check name isn't already used */
|
|
for(i=0; i<topology->nr_memattrs; i++) {
|
|
if (!strcmp(_name, topology->memattrs[i].name)) {
|
|
errno = EBUSY;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
name = strdup(_name);
|
|
if (!name)
|
|
return -1;
|
|
|
|
newattrs = realloc(topology->memattrs, (topology->nr_memattrs + 1) * sizeof(*topology->memattrs));
|
|
if (!newattrs) {
|
|
free(name);
|
|
return -1;
|
|
}
|
|
|
|
hwloc__setup_memattr(&newattrs[topology->nr_memattrs],
|
|
name, flags, 0);
|
|
|
|
/* memattr valid when just created */
|
|
newattrs[topology->nr_memattrs].iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID;
|
|
|
|
*id = topology->nr_memattrs;
|
|
topology->nr_memattrs++;
|
|
topology->memattrs = newattrs;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/***************************
|
|
* Internal Locations
|
|
*/
|
|
|
|
/* return 1 if cpuset/obj matchs the existing initiator location,
|
|
* for instance if the cpuset of query is included in the cpuset of existing
|
|
*/
|
|
static int
|
|
match_internal_location(struct hwloc_internal_location_s *iloc,
|
|
struct hwloc_internal_memattr_initiator_s *imi)
|
|
{
|
|
if (iloc->type != imi->initiator.type)
|
|
return 0;
|
|
switch (iloc->type) {
|
|
case HWLOC_LOCATION_TYPE_CPUSET:
|
|
return hwloc_bitmap_isincluded(iloc->location.cpuset, imi->initiator.location.cpuset);
|
|
case HWLOC_LOCATION_TYPE_OBJECT:
|
|
return iloc->location.object.type == imi->initiator.location.object.type
|
|
&& iloc->location.object.gp_index == imi->initiator.location.object.gp_index;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static int
|
|
to_internal_location(struct hwloc_internal_location_s *iloc,
|
|
struct hwloc_location *location)
|
|
{
|
|
iloc->type = location->type;
|
|
|
|
switch (location->type) {
|
|
case HWLOC_LOCATION_TYPE_CPUSET:
|
|
if (!location->location.cpuset || hwloc_bitmap_iszero(location->location.cpuset)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
iloc->location.cpuset = location->location.cpuset;
|
|
return 0;
|
|
case HWLOC_LOCATION_TYPE_OBJECT:
|
|
if (!location->location.object) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
iloc->location.object.gp_index = location->location.object->gp_index;
|
|
iloc->location.object.type = location->location.object->type;
|
|
return 0;
|
|
default:
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static int
|
|
from_internal_location(struct hwloc_internal_location_s *iloc,
|
|
struct hwloc_location *location)
|
|
{
|
|
location->type = iloc->type;
|
|
|
|
switch (iloc->type) {
|
|
case HWLOC_LOCATION_TYPE_CPUSET:
|
|
location->location.cpuset = iloc->location.cpuset;
|
|
return 0;
|
|
case HWLOC_LOCATION_TYPE_OBJECT:
|
|
/* requires the cache to be refreshed */
|
|
location->location.object = iloc->location.object.obj;
|
|
if (!location->location.object)
|
|
return -1;
|
|
return 0;
|
|
default:
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
|
|
/************************
|
|
* Refreshing
|
|
*/
|
|
|
|
static int
|
|
hwloc__imi_refresh(struct hwloc_topology *topology,
|
|
struct hwloc_internal_memattr_initiator_s *imi)
|
|
{
|
|
switch (imi->initiator.type) {
|
|
case HWLOC_LOCATION_TYPE_CPUSET: {
|
|
hwloc_bitmap_and(imi->initiator.location.cpuset, imi->initiator.location.cpuset, topology->levels[0][0]->cpuset);
|
|
if (hwloc_bitmap_iszero(imi->initiator.location.cpuset)) {
|
|
hwloc__imi_destroy(imi);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
case HWLOC_LOCATION_TYPE_OBJECT: {
|
|
hwloc_obj_t obj = hwloc_get_obj_by_type_and_gp_index(topology,
|
|
imi->initiator.location.object.type,
|
|
imi->initiator.location.object.gp_index);
|
|
if (!obj) {
|
|
hwloc__imi_destroy(imi);
|
|
return -1;
|
|
}
|
|
imi->initiator.location.object.obj = obj;
|
|
return 0;
|
|
}
|
|
default:
|
|
assert(0);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
hwloc__imtg_refresh(struct hwloc_topology *topology,
|
|
struct hwloc_internal_memattr_s *imattr,
|
|
struct hwloc_internal_memattr_target_s *imtg)
|
|
{
|
|
hwloc_obj_t node;
|
|
|
|
/* no need to refresh convenience memattrs */
|
|
assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE));
|
|
|
|
/* check the target object */
|
|
if (imtg->gp_index == (hwloc_uint64_t) -1) {
|
|
/* only NUMA and PU may work with os_index, and only NUMA is currently used internally */
|
|
if (imtg->type == HWLOC_OBJ_NUMANODE)
|
|
node = hwloc_get_numanode_obj_by_os_index(topology, imtg->os_index);
|
|
else if (imtg->type == HWLOC_OBJ_PU)
|
|
node = hwloc_get_pu_obj_by_os_index(topology, imtg->os_index);
|
|
else
|
|
node = NULL;
|
|
} else {
|
|
node = hwloc_get_obj_by_type_and_gp_index(topology, imtg->type, imtg->gp_index);
|
|
}
|
|
if (!node) {
|
|
hwloc__imtg_destroy(imattr, imtg);
|
|
return -1;
|
|
}
|
|
|
|
/* save the gp_index in case it wasn't initialized yet */
|
|
imtg->gp_index = node->gp_index;
|
|
/* cache the object */
|
|
imtg->obj = node;
|
|
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
/* check the initiators */
|
|
unsigned k, l;
|
|
for(k=0, l=0; k<imtg->nr_initiators; k++) {
|
|
int err = hwloc__imi_refresh(topology, &imtg->initiators[k]);
|
|
if (err < 0)
|
|
continue;
|
|
if (k != l)
|
|
memcpy(&imtg->initiators[l], &imtg->initiators[k], sizeof(*imtg->initiators));
|
|
l++;
|
|
}
|
|
imtg->nr_initiators = l;
|
|
if (!imtg->nr_initiators) {
|
|
hwloc__imtg_destroy(imattr, imtg);
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
hwloc__imattr_refresh(struct hwloc_topology *topology,
|
|
struct hwloc_internal_memattr_s *imattr)
|
|
{
|
|
unsigned j, k;
|
|
for(j=0, k=0; j<imattr->nr_targets; j++) {
|
|
int ret = hwloc__imtg_refresh(topology, imattr, &imattr->targets[j]);
|
|
if (!ret) {
|
|
/* target still valid, move it if some former targets were removed */
|
|
if (j != k)
|
|
memcpy(&imattr->targets[k], &imattr->targets[j], sizeof(*imattr->targets));
|
|
k++;
|
|
}
|
|
}
|
|
imattr->nr_targets = k;
|
|
imattr->iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID;
|
|
}
|
|
|
|
void
|
|
hwloc_internal_memattrs_refresh(struct hwloc_topology *topology)
|
|
{
|
|
unsigned id;
|
|
for(id=0; id<topology->nr_memattrs; id++) {
|
|
struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id];
|
|
if (imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)
|
|
/* nothing to refresh */
|
|
continue;
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
}
|
|
}
|
|
|
|
void
|
|
hwloc_internal_memattrs_need_refresh(struct hwloc_topology *topology)
|
|
{
|
|
unsigned id;
|
|
for(id=0; id<topology->nr_memattrs; id++) {
|
|
struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id];
|
|
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)
|
|
/* no need to refresh convenience memattrs */
|
|
continue;
|
|
imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID;
|
|
}
|
|
}
|
|
|
|
|
|
/********************************
|
|
* Targets
|
|
*/
|
|
|
|
static struct hwloc_internal_memattr_target_s *
|
|
hwloc__memattr_get_target(struct hwloc_internal_memattr_s *imattr,
|
|
hwloc_obj_type_t target_type,
|
|
hwloc_uint64_t target_gp_index,
|
|
unsigned target_os_index,
|
|
int create)
|
|
{
|
|
struct hwloc_internal_memattr_target_s *news, *new;
|
|
unsigned j;
|
|
|
|
for(j=0; j<imattr->nr_targets; j++) {
|
|
if (target_type == imattr->targets[j].type)
|
|
if ((target_gp_index != (hwloc_uint64_t)-1 && target_gp_index == imattr->targets[j].gp_index)
|
|
|| (target_os_index != (unsigned)-1 && target_os_index == imattr->targets[j].os_index))
|
|
return &imattr->targets[j];
|
|
}
|
|
if (!create)
|
|
return NULL;
|
|
|
|
news = realloc(imattr->targets, (imattr->nr_targets+1)*sizeof(*imattr->targets));
|
|
if (!news)
|
|
return NULL;
|
|
imattr->targets = news;
|
|
|
|
/* FIXME sort targets? by logical index at the end of load? */
|
|
|
|
new = &news[imattr->nr_targets];
|
|
new->type = target_type;
|
|
new->gp_index = target_gp_index;
|
|
new->os_index = target_os_index;
|
|
|
|
/* cached object will be refreshed later on actual access */
|
|
new->obj = NULL;
|
|
imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID;
|
|
/* When setting a value after load(), the caller has the target object
|
|
* (and initiator object, if not CPU set). Hence, we could avoid invalidating
|
|
* the cache here.
|
|
* The overhead of the imattr-wide refresh isn't high enough so far
|
|
* to justify making the cache management more complex.
|
|
*/
|
|
|
|
new->nr_initiators = 0;
|
|
new->initiators = NULL;
|
|
new->noinitiator_value = 0;
|
|
imattr->nr_targets++;
|
|
return new;
|
|
}
|
|
|
|
static struct hwloc_internal_memattr_initiator_s *
|
|
hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr,
|
|
struct hwloc_internal_memattr_target_s *imtg,
|
|
struct hwloc_location *location);
|
|
|
|
int
|
|
hwloc_memattr_get_targets(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
struct hwloc_location *initiator,
|
|
unsigned long flags,
|
|
unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
unsigned i, found = 0, max;
|
|
|
|
if (flags) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (!nrp || (*nrp && !targets)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
max = *nrp;
|
|
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
imattr = &topology->memattrs[id];
|
|
|
|
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) {
|
|
/* convenience attributes */
|
|
for(i=0; ; i++) {
|
|
hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
|
|
if (!node)
|
|
break;
|
|
if (found<max) {
|
|
targets[found] = node;
|
|
if (values)
|
|
values[found] = hwloc__memattr_get_convenience_value(id, node);
|
|
}
|
|
found++;
|
|
}
|
|
goto done;
|
|
}
|
|
|
|
/* normal attributes */
|
|
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
for(i=0; i<imattr->nr_targets; i++) {
|
|
struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[i];
|
|
hwloc_uint64_t value = 0;
|
|
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
if (initiator) {
|
|
/* find a matching initiator */
|
|
struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator);
|
|
if (!imi)
|
|
continue;
|
|
value = imi->value;
|
|
}
|
|
} else {
|
|
value = imtg->noinitiator_value;
|
|
}
|
|
|
|
if (found<max) {
|
|
targets[found] = imtg->obj;
|
|
if (values)
|
|
values[found] = value;
|
|
}
|
|
found++;
|
|
}
|
|
|
|
done:
|
|
*nrp = found;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/************************
|
|
* Initiators
|
|
*/
|
|
|
|
static struct hwloc_internal_memattr_initiator_s *
|
|
hwloc__memattr_target_get_initiator(struct hwloc_internal_memattr_target_s *imtg,
|
|
struct hwloc_internal_location_s *iloc,
|
|
int create)
|
|
{
|
|
struct hwloc_internal_memattr_initiator_s *news, *new;
|
|
unsigned k;
|
|
|
|
for(k=0; k<imtg->nr_initiators; k++) {
|
|
struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k];
|
|
if (match_internal_location(iloc, imi)) {
|
|
return imi;
|
|
}
|
|
}
|
|
|
|
if (!create)
|
|
return NULL;
|
|
|
|
news = realloc(imtg->initiators, (imtg->nr_initiators+1)*sizeof(*imtg->initiators));
|
|
if (!news)
|
|
return NULL;
|
|
new = &news[imtg->nr_initiators];
|
|
|
|
new->initiator = *iloc;
|
|
if (iloc->type == HWLOC_LOCATION_TYPE_CPUSET) {
|
|
new->initiator.location.cpuset = hwloc_bitmap_dup(iloc->location.cpuset);
|
|
if (!new->initiator.location.cpuset)
|
|
goto out_with_realloc;
|
|
}
|
|
|
|
imtg->nr_initiators++;
|
|
imtg->initiators = news;
|
|
return new;
|
|
|
|
out_with_realloc:
|
|
imtg->initiators = news;
|
|
return NULL;
|
|
}
|
|
|
|
static struct hwloc_internal_memattr_initiator_s *
|
|
hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr,
|
|
struct hwloc_internal_memattr_target_s *imtg,
|
|
struct hwloc_location *location)
|
|
{
|
|
struct hwloc_internal_memattr_initiator_s *imi;
|
|
struct hwloc_internal_location_s iloc;
|
|
|
|
assert(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR);
|
|
|
|
/* use the initiator value */
|
|
if (!location) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
if (to_internal_location(&iloc, location) < 0) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
|
if (!imi) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
return imi;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_get_initiators(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
hwloc_obj_t target_node,
|
|
unsigned long flags,
|
|
unsigned *nrp, struct hwloc_location *initiators, hwloc_uint64_t *values)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
struct hwloc_internal_memattr_target_s *imtg;
|
|
unsigned i, max;
|
|
|
|
if (flags) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (!nrp || (*nrp && !initiators)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
max = *nrp;
|
|
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
imattr = &topology->memattrs[id];
|
|
if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) {
|
|
*nrp = 0;
|
|
return 0;
|
|
}
|
|
|
|
/* all convenience attributes have no initiators */
|
|
assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE));
|
|
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0);
|
|
if (!imtg) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
for(i=0; i<imtg->nr_initiators && i<max; i++) {
|
|
struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[i];
|
|
int err = from_internal_location(&imi->initiator, &initiators[i]);
|
|
assert(!err);
|
|
if (values)
|
|
/* no need to handle capacity/locality special cases here, those are initiator-less attributes */
|
|
values[i] = imi->value;
|
|
}
|
|
|
|
*nrp = imtg->nr_initiators;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**************************
|
|
* Values
|
|
*/
|
|
|
|
int
|
|
hwloc_memattr_get_value(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
hwloc_obj_t target_node,
|
|
struct hwloc_location *initiator,
|
|
unsigned long flags,
|
|
hwloc_uint64_t *valuep)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
struct hwloc_internal_memattr_target_s *imtg;
|
|
|
|
if (flags) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
imattr = &topology->memattrs[id];
|
|
|
|
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) {
|
|
/* convenience attributes */
|
|
*valuep = hwloc__memattr_get_convenience_value(id, target_node);
|
|
return 0;
|
|
}
|
|
|
|
/* normal attributes */
|
|
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0);
|
|
if (!imtg) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
/* find the initiator and set its value */
|
|
struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator);
|
|
if (!imi)
|
|
return -1;
|
|
*valuep = imi->value;
|
|
} else {
|
|
/* get the no-initiator value */
|
|
*valuep = imtg->noinitiator_value;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
hwloc__internal_memattr_set_value(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
hwloc_obj_type_t target_type,
|
|
hwloc_uint64_t target_gp_index,
|
|
unsigned target_os_index,
|
|
struct hwloc_internal_location_s *initiator,
|
|
hwloc_uint64_t value)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
struct hwloc_internal_memattr_target_s *imtg;
|
|
|
|
if (id >= topology->nr_memattrs) {
|
|
/* something bad happened during init */
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
imattr = &topology->memattrs[id];
|
|
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
/* check given initiator */
|
|
if (!initiator) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) {
|
|
/* convenience attributes are read-only */
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (topology->is_loaded && !(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
/* don't refresh when adding values during load (some nodes might not be ready yet),
|
|
* we'll refresh later
|
|
*/
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
imtg = hwloc__memattr_get_target(imattr, target_type, target_gp_index, target_os_index, 1);
|
|
if (!imtg)
|
|
return -1;
|
|
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
/* find/add the initiator and set its value */
|
|
// FIXME what if cpuset is larger than an existing one ?
|
|
struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_target_get_initiator(imtg, initiator, 1);
|
|
if (!imi)
|
|
return -1;
|
|
imi->value = value;
|
|
|
|
} else {
|
|
/* set the no-initiator value */
|
|
imtg->noinitiator_value = value;
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
int
|
|
hwloc_internal_memattr_set_value(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
hwloc_obj_type_t target_type,
|
|
hwloc_uint64_t target_gp_index,
|
|
unsigned target_os_index,
|
|
struct hwloc_internal_location_s *initiator,
|
|
hwloc_uint64_t value)
|
|
{
|
|
assert(id != HWLOC_MEMATTR_ID_CAPACITY);
|
|
assert(id != HWLOC_MEMATTR_ID_LOCALITY);
|
|
|
|
return hwloc__internal_memattr_set_value(topology, id, target_type, target_gp_index, target_os_index, initiator, value);
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_set_value(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
hwloc_obj_t target_node,
|
|
struct hwloc_location *initiator,
|
|
unsigned long flags,
|
|
hwloc_uint64_t value)
|
|
{
|
|
struct hwloc_internal_location_s iloc, *ilocp;
|
|
|
|
if (flags) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (initiator) {
|
|
if (to_internal_location(&iloc, initiator) < 0) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
ilocp = &iloc;
|
|
} else {
|
|
ilocp = NULL;
|
|
}
|
|
|
|
return hwloc__internal_memattr_set_value(topology, id, target_node->type, target_node->gp_index, target_node->os_index, ilocp, value);
|
|
}
|
|
|
|
|
|
/**********************
|
|
* Best target
|
|
*/
|
|
|
|
static void
|
|
hwloc__update_best_target(hwloc_obj_t *best_obj, hwloc_uint64_t *best_value, int *found,
|
|
hwloc_obj_t new_obj, hwloc_uint64_t new_value,
|
|
int keep_highest)
|
|
{
|
|
if (*found) {
|
|
if (keep_highest) {
|
|
if (new_value <= *best_value)
|
|
return;
|
|
} else {
|
|
if (new_value >= *best_value)
|
|
return;
|
|
}
|
|
}
|
|
|
|
*best_obj = new_obj;
|
|
*best_value = new_value;
|
|
*found = 1;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
struct hwloc_location *initiator,
|
|
unsigned long flags,
|
|
hwloc_obj_t *bestp, hwloc_uint64_t *valuep)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
hwloc_uint64_t best_value = 0; /* shutup the compiler */
|
|
hwloc_obj_t best = NULL;
|
|
int found = 0;
|
|
unsigned j;
|
|
|
|
if (flags) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
imattr = &topology->memattrs[id];
|
|
|
|
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) {
|
|
/* convenience attributes */
|
|
for(j=0; ; j++) {
|
|
hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j);
|
|
hwloc_uint64_t value;
|
|
if (!node)
|
|
break;
|
|
value = hwloc__memattr_get_convenience_value(id, node);
|
|
hwloc__update_best_target(&best, &best_value, &found,
|
|
node, value,
|
|
imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST);
|
|
}
|
|
goto done;
|
|
}
|
|
|
|
/* normal attributes */
|
|
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
/* not strictly need */
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
for(j=0; j<imattr->nr_targets; j++) {
|
|
struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[j];
|
|
hwloc_uint64_t value;
|
|
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
|
/* find the initiator and set its value */
|
|
struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator);
|
|
if (!imi)
|
|
continue;
|
|
value = imi->value;
|
|
} else {
|
|
/* get the no-initiator value */
|
|
value = imtg->noinitiator_value;
|
|
}
|
|
hwloc__update_best_target(&best, &best_value, &found,
|
|
imtg->obj, value,
|
|
imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST);
|
|
}
|
|
|
|
done:
|
|
if (found) {
|
|
assert(best);
|
|
*bestp = best;
|
|
if (valuep)
|
|
*valuep = best_value;
|
|
return 0;
|
|
} else {
|
|
errno = ENOENT;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/**********************
|
|
* Best initiators
|
|
*/
|
|
|
|
static void
|
|
hwloc__update_best_initiator(struct hwloc_internal_location_s *best_initiator, hwloc_uint64_t *best_value, int *found,
|
|
struct hwloc_internal_location_s *new_initiator, hwloc_uint64_t new_value,
|
|
int keep_highest)
|
|
{
|
|
if (*found) {
|
|
if (keep_highest) {
|
|
if (new_value <= *best_value)
|
|
return;
|
|
} else {
|
|
if (new_value >= *best_value)
|
|
return;
|
|
}
|
|
}
|
|
|
|
*best_initiator = *new_initiator;
|
|
*best_value = new_value;
|
|
*found = 1;
|
|
}
|
|
|
|
int
|
|
hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
|
hwloc_memattr_id_t id,
|
|
hwloc_obj_t target_node,
|
|
unsigned long flags,
|
|
struct hwloc_location *bestp, hwloc_uint64_t *valuep)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
struct hwloc_internal_memattr_target_s *imtg;
|
|
struct hwloc_internal_location_s best_initiator;
|
|
hwloc_uint64_t best_value;
|
|
int found;
|
|
unsigned i;
|
|
|
|
if (flags) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (id >= topology->nr_memattrs) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
imattr = &topology->memattrs[id];
|
|
|
|
if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
/* not strictly need */
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0);
|
|
if (!imtg) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
found = 0;
|
|
for(i=0; i<imtg->nr_initiators; i++) {
|
|
struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[i];
|
|
hwloc__update_best_initiator(&best_initiator, &best_value, &found,
|
|
&imi->initiator, imi->value,
|
|
imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST);
|
|
}
|
|
|
|
if (found) {
|
|
if (valuep)
|
|
*valuep = best_value;
|
|
return from_internal_location(&best_initiator, bestp);
|
|
} else {
|
|
errno = ENOENT;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/****************************
|
|
* Listing local nodes
|
|
*/
|
|
|
|
static __hwloc_inline int
|
|
match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long flags)
|
|
{
|
|
if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)
|
|
return 1;
|
|
if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
|
|
&& hwloc_bitmap_isincluded(cpuset, node->cpuset))
|
|
return 1;
|
|
if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY)
|
|
&& hwloc_bitmap_isincluded(node->cpuset, cpuset))
|
|
return 1;
|
|
return hwloc_bitmap_isequal(node->cpuset, cpuset);
|
|
}
|
|
|
|
int
|
|
hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|
struct hwloc_location *location,
|
|
unsigned *nrp,
|
|
hwloc_obj_t *nodes,
|
|
unsigned long flags)
|
|
{
|
|
hwloc_cpuset_t cpuset;
|
|
hwloc_obj_t node;
|
|
unsigned i;
|
|
|
|
if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY
|
|
|HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY
|
|
| HWLOC_LOCAL_NUMANODE_FLAG_ALL)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (!nrp || (*nrp && !nodes)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (!location) {
|
|
if (!(flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
cpuset = NULL; /* unused */
|
|
|
|
} else {
|
|
if (location->type == HWLOC_LOCATION_TYPE_CPUSET) {
|
|
cpuset = location->location.cpuset;
|
|
} else if (location->type == HWLOC_LOCATION_TYPE_OBJECT) {
|
|
hwloc_obj_t obj = location->location.object;
|
|
while (!obj->cpuset)
|
|
obj = obj->parent;
|
|
cpuset = obj->cpuset;
|
|
} else {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
i = 0;
|
|
for(node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
|
|
node;
|
|
node = node->next_cousin) {
|
|
if (!match_local_obj_cpuset(node, cpuset, flags))
|
|
continue;
|
|
if (i < *nrp)
|
|
nodes[i] = node;
|
|
i++;
|
|
}
|
|
|
|
*nrp = i;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**************************************
|
|
* Using memattrs to identify HBM/DRAM
|
|
*/
|
|
|
|
struct hwloc_memory_tier_s {
|
|
hwloc_obj_t node;
|
|
uint64_t local_bw;
|
|
enum hwloc_memory_tier_type_e {
|
|
/* warning the order is important for guess_memory_tiers() after qsort() */
|
|
HWLOC_MEMORY_TIER_UNKNOWN,
|
|
HWLOC_MEMORY_TIER_DRAM,
|
|
HWLOC_MEMORY_TIER_HBM,
|
|
HWLOC_MEMORY_TIER_SPM, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm */
|
|
HWLOC_MEMORY_TIER_NVM,
|
|
HWLOC_MEMORY_TIER_GPU,
|
|
} type;
|
|
};
|
|
|
|
static int compare_tiers(const void *_a, const void *_b)
|
|
{
|
|
const struct hwloc_memory_tier_s *a = _a, *b = _b;
|
|
/* sort by type of tier first */
|
|
if (a->type != b->type)
|
|
return a->type - b->type;
|
|
/* then by bandwidth */
|
|
if (a->local_bw > b->local_bw)
|
|
return -1;
|
|
else if (a->local_bw < b->local_bw)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology)
|
|
{
|
|
struct hwloc_internal_memattr_s *imattr;
|
|
struct hwloc_memory_tier_s *tiers;
|
|
unsigned i, j, n;
|
|
const char *env;
|
|
int spm_is_hbm = -1; /* -1 will guess from BW, 0 no, 1 forced */
|
|
int mark_dram = 1;
|
|
unsigned first_spm, first_nvm;
|
|
hwloc_uint64_t max_unknown_bw, min_spm_bw;
|
|
|
|
env = getenv("HWLOC_MEMTIERS_GUESS");
|
|
if (env) {
|
|
if (!strcmp(env, "none")) {
|
|
return 0;
|
|
} else if (!strcmp(env, "default")) {
|
|
/* nothing */
|
|
} else if (!strcmp(env, "spm_is_hbm")) {
|
|
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
|
|
spm_is_hbm = 1;
|
|
} else if (HWLOC_SHOW_CRITICAL_ERRORS()) {
|
|
fprintf(stderr, "hwloc: Failed to recognize HWLOC_MEMTIERS_GUESS value %s\n", env);
|
|
}
|
|
}
|
|
|
|
imattr = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
|
|
|
|
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
|
hwloc__imattr_refresh(topology, imattr);
|
|
|
|
n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE);
|
|
assert(n);
|
|
|
|
tiers = malloc(n * sizeof(*tiers));
|
|
if (!tiers)
|
|
return -1;
|
|
|
|
for(i=0; i<n; i++) {
|
|
hwloc_obj_t node;
|
|
const char *daxtype;
|
|
struct hwloc_internal_location_s iloc;
|
|
struct hwloc_internal_memattr_target_s *imtg = NULL;
|
|
struct hwloc_internal_memattr_initiator_s *imi;
|
|
|
|
node = hwloc_get_obj_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE, i);
|
|
assert(node);
|
|
tiers[i].node = node;
|
|
|
|
/* defaults */
|
|
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
|
tiers[i].local_bw = 0; /* unknown */
|
|
|
|
daxtype = hwloc_obj_get_info_by_name(node, "DAXType");
|
|
/* mark NVM, SPM and GPU nodes */
|
|
if (daxtype && !strcmp(daxtype, "NVM"))
|
|
tiers[i].type = HWLOC_MEMORY_TIER_NVM;
|
|
if (daxtype && !strcmp(daxtype, "SPM"))
|
|
tiers[i].type = HWLOC_MEMORY_TIER_SPM;
|
|
if (node->subtype && !strcmp(node->subtype, "GPUMemory"))
|
|
tiers[i].type = HWLOC_MEMORY_TIER_GPU;
|
|
|
|
if (spm_is_hbm == -1) {
|
|
for(j=0; j<imattr->nr_targets; j++)
|
|
if (imattr->targets[j].obj == node) {
|
|
imtg = &imattr->targets[j];
|
|
break;
|
|
}
|
|
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
|
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
|
iloc.location.cpuset = node->cpuset;
|
|
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
|
if (imi)
|
|
tiers[i].local_bw = imi->value;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* sort tiers */
|
|
qsort(tiers, n, sizeof(*tiers), compare_tiers);
|
|
hwloc_debug("Sorting memory tiers...\n");
|
|
for(i=0; i<n; i++)
|
|
hwloc_debug(" tier %u = node L#%u P#%u with tier type %d and local BW #%llu\n",
|
|
i,
|
|
tiers[i].node->logical_index, tiers[i].node->os_index,
|
|
tiers[i].type, (unsigned long long) tiers[i].local_bw);
|
|
|
|
/* now we have UNKNOWN tiers (sorted by BW), then SPM tiers (sorted by BW), then NVM, then GPU */
|
|
|
|
/* iterate over UNKNOWN tiers, and find their BW */
|
|
for(i=0; i<n; i++) {
|
|
if (tiers[i].type > HWLOC_MEMORY_TIER_UNKNOWN)
|
|
break;
|
|
}
|
|
first_spm = i;
|
|
/* get max BW from first */
|
|
if (first_spm > 0)
|
|
max_unknown_bw = tiers[0].local_bw;
|
|
else
|
|
max_unknown_bw = 0;
|
|
|
|
/* there are no DRAM or HBM tiers yet */
|
|
|
|
/* iterate over SPM tiers, and find their BW */
|
|
for(i=first_spm; i<n; i++) {
|
|
if (tiers[i].type > HWLOC_MEMORY_TIER_SPM)
|
|
break;
|
|
}
|
|
first_nvm = i;
|
|
/* get min BW from last */
|
|
if (first_nvm > first_spm)
|
|
min_spm_bw = tiers[first_nvm-1].local_bw;
|
|
else
|
|
min_spm_bw = 0;
|
|
|
|
/* FIXME: if there's more than 10% between some sets of nodes inside a tier, split it? */
|
|
/* FIXME: if there are cpuset-intersecting nodes in same tier, abort? */
|
|
|
|
if (spm_is_hbm == -1) {
|
|
/* if we have BW for all SPM and UNKNOWN
|
|
* and all SPM BW are 2x superior to all UNKNOWN BW
|
|
*/
|
|
hwloc_debug("UNKNOWN-memory-tier max bandwidth %llu\n", (unsigned long long) max_unknown_bw);
|
|
hwloc_debug("SPM-memory-tier min bandwidth %llu\n", (unsigned long long) min_spm_bw);
|
|
if (max_unknown_bw > 0 && min_spm_bw > 0 && max_unknown_bw*2 < min_spm_bw) {
|
|
hwloc_debug("assuming SPM means HBM and !SPM means DRAM since bandwidths are very different\n");
|
|
spm_is_hbm = 1;
|
|
} else {
|
|
hwloc_debug("cannot assume SPM means HBM\n");
|
|
spm_is_hbm = 0;
|
|
}
|
|
}
|
|
|
|
if (spm_is_hbm) {
|
|
for(i=0; i<first_spm; i++)
|
|
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
|
|
for(i=first_spm; i<first_nvm; i++)
|
|
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
|
|
}
|
|
|
|
if (first_spm == n)
|
|
mark_dram = 0;
|
|
|
|
/* now apply subtypes */
|
|
for(i=0; i<n; i++) {
|
|
const char *type = NULL;
|
|
if (tiers[i].node->subtype) /* don't overwrite the existing subtype */
|
|
continue;
|
|
switch (tiers[i].type) {
|
|
case HWLOC_MEMORY_TIER_DRAM:
|
|
if (mark_dram)
|
|
type = "DRAM";
|
|
break;
|
|
case HWLOC_MEMORY_TIER_HBM:
|
|
type = "HBM";
|
|
break;
|
|
case HWLOC_MEMORY_TIER_SPM:
|
|
type = "SPM";
|
|
break;
|
|
case HWLOC_MEMORY_TIER_NVM:
|
|
type = "NVM";
|
|
break;
|
|
default:
|
|
/* GPU memory is already marked with subtype="GPUMemory",
|
|
* UNKNOWN doesn't deserve any subtype
|
|
*/
|
|
break;
|
|
}
|
|
if (type) {
|
|
hwloc_debug("Marking node L#%u P#%u as %s\n", tiers[i].node->logical_index, tiers[i].node->os_index, type);
|
|
tiers[i].node->subtype = strdup(type);
|
|
}
|
|
}
|
|
|
|
free(tiers);
|
|
return 0;
|
|
}
|