941 lines
30 KiB
C
941 lines
30 KiB
C
/*
|
|
* Copyright © 2009-2018 Inria. All rights reserved.
|
|
* See COPYING in top-level directory.
|
|
*/
|
|
|
|
#include <private/autogen/config.h>
|
|
#include <hwloc.h>
|
|
#include <hwloc/plugins.h>
|
|
#include <private/private.h>
|
|
#include <private/debug.h>
|
|
#include <private/misc.h>
|
|
|
|
#include <fcntl.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <sys/stat.h>
|
|
|
|
#if defined(HWLOC_WIN_SYS) && !defined(__CYGWIN__)
|
|
#include <io.h>
|
|
#define open _open
|
|
#define read _read
|
|
#define close _close
|
|
#endif
|
|
|
|
static void
|
|
hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology,
|
|
const char *string /* must contain a ' ' */,
|
|
unsigned *allocated)
|
|
{
|
|
unsigned nr = topology->pci_forced_locality_nr;
|
|
unsigned domain, bus_first, bus_last, dummy;
|
|
hwloc_bitmap_t set;
|
|
char *tmp;
|
|
|
|
if (sscanf(string, "%x:%x-%x %x", &domain, &bus_first, &bus_last, &dummy) == 4) {
|
|
/* fine */
|
|
} else if (sscanf(string, "%x:%x %x", &domain, &bus_first, &dummy) == 3) {
|
|
bus_last = bus_first;
|
|
} else if (sscanf(string, "%x %x", &domain, &dummy) == 2) {
|
|
bus_first = 0;
|
|
bus_last = 255;
|
|
} else
|
|
return;
|
|
|
|
tmp = strchr(string, ' ');
|
|
if (!tmp)
|
|
return;
|
|
tmp++;
|
|
|
|
set = hwloc_bitmap_alloc();
|
|
hwloc_bitmap_sscanf(set, tmp);
|
|
|
|
if (!*allocated) {
|
|
topology->pci_forced_locality = malloc(sizeof(*topology->pci_forced_locality));
|
|
if (!topology->pci_forced_locality)
|
|
goto out_with_set; /* failed to allocate, ignore this forced locality */
|
|
*allocated = 1;
|
|
} else if (nr >= *allocated) {
|
|
struct hwloc_pci_forced_locality_s *tmplocs;
|
|
tmplocs = realloc(topology->pci_forced_locality,
|
|
2 * *allocated * sizeof(*topology->pci_forced_locality));
|
|
if (!tmplocs)
|
|
goto out_with_set; /* failed to allocate, ignore this forced locality */
|
|
topology->pci_forced_locality = tmplocs;
|
|
*allocated *= 2;
|
|
}
|
|
|
|
topology->pci_forced_locality[nr].domain = domain;
|
|
topology->pci_forced_locality[nr].bus_first = bus_first;
|
|
topology->pci_forced_locality[nr].bus_last = bus_last;
|
|
topology->pci_forced_locality[nr].cpuset = set;
|
|
topology->pci_forced_locality_nr++;
|
|
return;
|
|
|
|
out_with_set:
|
|
hwloc_bitmap_free(set);
|
|
return;
|
|
}
|
|
|
|
static void
|
|
hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_env)
|
|
{
|
|
char *env = strdup(_env);
|
|
unsigned allocated = 0;
|
|
char *tmp = env;
|
|
|
|
while (1) {
|
|
size_t len = strcspn(tmp, ";\r\n");
|
|
char *next = NULL;
|
|
|
|
if (tmp[len] != '\0') {
|
|
tmp[len] = '\0';
|
|
if (tmp[len+1] != '\0')
|
|
next = &tmp[len]+1;
|
|
}
|
|
|
|
hwloc_pci_forced_locality_parse_one(topology, tmp, &allocated);
|
|
|
|
if (next)
|
|
tmp = next;
|
|
else
|
|
break;
|
|
}
|
|
|
|
free(env);
|
|
}
|
|
|
|
void
|
|
hwloc_pci_discovery_init(struct hwloc_topology *topology)
|
|
{
|
|
topology->need_pci_belowroot_apply_locality = 0;
|
|
|
|
topology->pci_has_forced_locality = 0;
|
|
topology->pci_forced_locality_nr = 0;
|
|
topology->pci_forced_locality = NULL;
|
|
}
|
|
|
|
void
|
|
hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
|
|
{
|
|
char *env;
|
|
|
|
env = getenv("HWLOC_PCI_LOCALITY");
|
|
if (env) {
|
|
int fd;
|
|
|
|
topology->pci_has_forced_locality = 1;
|
|
|
|
fd = open(env, O_RDONLY);
|
|
if (fd >= 0) {
|
|
struct stat st;
|
|
char *buffer;
|
|
int err = fstat(fd, &st);
|
|
if (!err) {
|
|
if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */
|
|
buffer = malloc(st.st_size+1);
|
|
if (read(fd, buffer, st.st_size) == st.st_size) {
|
|
buffer[st.st_size] = '\0';
|
|
hwloc_pci_forced_locality_parse(topology, buffer);
|
|
}
|
|
free(buffer);
|
|
} else {
|
|
fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
|
|
env, (unsigned long) st.st_size);
|
|
}
|
|
}
|
|
close(fd);
|
|
} else
|
|
hwloc_pci_forced_locality_parse(topology, env);
|
|
}
|
|
}
|
|
|
|
void
|
|
hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused)
|
|
{
|
|
unsigned i;
|
|
for(i=0; i<topology->pci_forced_locality_nr; i++)
|
|
hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset);
|
|
free(topology->pci_forced_locality);
|
|
|
|
hwloc_pci_discovery_init(topology);
|
|
}
|
|
|
|
#ifdef HWLOC_DEBUG
|
|
static void
|
|
hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
|
|
struct hwloc_obj *pcidev)
|
|
{
|
|
char busid[14];
|
|
hwloc_obj_t parent;
|
|
|
|
/* indent */
|
|
parent = pcidev->parent;
|
|
while (parent) {
|
|
hwloc_debug("%s", " ");
|
|
parent = parent->parent;
|
|
}
|
|
|
|
snprintf(busid, sizeof(busid), "%04x:%02x:%02x.%01x",
|
|
pcidev->attr->pcidev.domain, pcidev->attr->pcidev.bus, pcidev->attr->pcidev.dev, pcidev->attr->pcidev.func);
|
|
|
|
if (pcidev->type == HWLOC_OBJ_BRIDGE) {
|
|
if (pcidev->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
|
|
hwloc_debug("HostBridge");
|
|
else
|
|
hwloc_debug("%s Bridge [%04x:%04x]", busid,
|
|
pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id);
|
|
hwloc_debug(" to %04x:[%02x:%02x]\n",
|
|
pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
|
|
} else
|
|
hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid,
|
|
pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id,
|
|
pcidev->attr->pcidev.subvendor_id, pcidev->attr->pcidev.subdevice_id,
|
|
pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id);
|
|
}
|
|
|
|
static void
|
|
hwloc_pci_traverse(void * cbdata, struct hwloc_obj *tree,
|
|
void (*cb)(void * cbdata, struct hwloc_obj *))
|
|
{
|
|
hwloc_obj_t child;
|
|
cb(cbdata, tree);
|
|
for_each_io_child(child, tree) {
|
|
if (child->type == HWLOC_OBJ_BRIDGE)
|
|
hwloc_pci_traverse(cbdata, child, cb);
|
|
}
|
|
}
|
|
#endif /* HWLOC_DEBUG */
|
|
|
|
enum hwloc_pci_busid_comparison_e {
|
|
HWLOC_PCI_BUSID_LOWER,
|
|
HWLOC_PCI_BUSID_HIGHER,
|
|
HWLOC_PCI_BUSID_INCLUDED,
|
|
HWLOC_PCI_BUSID_SUPERSET
|
|
};
|
|
|
|
static enum hwloc_pci_busid_comparison_e
|
|
hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
|
|
{
|
|
#ifdef HWLOC_DEBUG
|
|
if (a->type == HWLOC_OBJ_BRIDGE)
|
|
assert(a->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
|
|
if (b->type == HWLOC_OBJ_BRIDGE)
|
|
assert(b->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
|
|
#endif
|
|
|
|
if (a->attr->pcidev.domain < b->attr->pcidev.domain)
|
|
return HWLOC_PCI_BUSID_LOWER;
|
|
if (a->attr->pcidev.domain > b->attr->pcidev.domain)
|
|
return HWLOC_PCI_BUSID_HIGHER;
|
|
|
|
if (a->type == HWLOC_OBJ_BRIDGE
|
|
&& b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus
|
|
&& b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus)
|
|
return HWLOC_PCI_BUSID_SUPERSET;
|
|
if (b->type == HWLOC_OBJ_BRIDGE
|
|
&& a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus
|
|
&& a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus)
|
|
return HWLOC_PCI_BUSID_INCLUDED;
|
|
|
|
if (a->attr->pcidev.bus < b->attr->pcidev.bus)
|
|
return HWLOC_PCI_BUSID_LOWER;
|
|
if (a->attr->pcidev.bus > b->attr->pcidev.bus)
|
|
return HWLOC_PCI_BUSID_HIGHER;
|
|
|
|
if (a->attr->pcidev.dev < b->attr->pcidev.dev)
|
|
return HWLOC_PCI_BUSID_LOWER;
|
|
if (a->attr->pcidev.dev > b->attr->pcidev.dev)
|
|
return HWLOC_PCI_BUSID_HIGHER;
|
|
|
|
if (a->attr->pcidev.func < b->attr->pcidev.func)
|
|
return HWLOC_PCI_BUSID_LOWER;
|
|
if (a->attr->pcidev.func > b->attr->pcidev.func)
|
|
return HWLOC_PCI_BUSID_HIGHER;
|
|
|
|
/* Should never reach here. Abort on both debug builds and
|
|
non-debug builds */
|
|
assert(0);
|
|
fprintf(stderr, "Bad assertion in hwloc %s:%d (aborting)\n", __FILE__, __LINE__);
|
|
exit(1);
|
|
}
|
|
|
|
static void
|
|
hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_first_child_p, struct hwloc_obj *new)
|
|
{
|
|
struct hwloc_obj **curp, **childp;
|
|
|
|
curp = parent_io_first_child_p;
|
|
while (*curp) {
|
|
enum hwloc_pci_busid_comparison_e comp = hwloc_pci_compare_busids(new, *curp);
|
|
switch (comp) {
|
|
case HWLOC_PCI_BUSID_HIGHER:
|
|
/* go further */
|
|
curp = &(*curp)->next_sibling;
|
|
continue;
|
|
case HWLOC_PCI_BUSID_INCLUDED:
|
|
/* insert new below current bridge */
|
|
hwloc_pci_add_object(*curp, &(*curp)->io_first_child, new);
|
|
return;
|
|
case HWLOC_PCI_BUSID_LOWER:
|
|
case HWLOC_PCI_BUSID_SUPERSET: {
|
|
/* insert new before current */
|
|
new->next_sibling = *curp;
|
|
*curp = new;
|
|
new->parent = parent;
|
|
if (new->type == HWLOC_OBJ_BRIDGE) {
|
|
/* look at remaining siblings and move some below new */
|
|
childp = &new->io_first_child;
|
|
curp = &new->next_sibling;
|
|
while (*curp) {
|
|
hwloc_obj_t cur = *curp;
|
|
if (hwloc_pci_compare_busids(new, cur) == HWLOC_PCI_BUSID_LOWER) {
|
|
/* this sibling remains under root, after new. */
|
|
if (cur->attr->pcidev.domain > new->attr->pcidev.domain
|
|
|| cur->attr->pcidev.bus > new->attr->bridge.downstream.pci.subordinate_bus)
|
|
/* this sibling is even above new's subordinate bus, no other sibling could go below new */
|
|
return;
|
|
curp = &cur->next_sibling;
|
|
} else {
|
|
/* this sibling goes under new */
|
|
*childp = cur;
|
|
*curp = cur->next_sibling;
|
|
(*childp)->parent = new;
|
|
(*childp)->next_sibling = NULL;
|
|
childp = &(*childp)->next_sibling;
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
/* add to the end of the list if higher than everybody */
|
|
new->parent = parent;
|
|
new->next_sibling = NULL;
|
|
*curp = new;
|
|
}
|
|
|
|
void
|
|
hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
|
|
struct hwloc_obj *obj)
|
|
{
|
|
hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj);
|
|
}
|
|
|
|
int
|
|
hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old_tree)
|
|
{
|
|
struct hwloc_obj **next_hb_p;
|
|
enum hwloc_type_filter_e bfilter;
|
|
|
|
if (!old_tree)
|
|
/* found nothing, exit */
|
|
return 0;
|
|
|
|
#ifdef HWLOC_DEBUG
|
|
hwloc_debug("%s", "\nPCI hierarchy:\n");
|
|
hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb);
|
|
hwloc_debug("%s", "\n");
|
|
#endif
|
|
|
|
next_hb_p = &hwloc_get_root_obj(topology)->io_first_child;
|
|
while (*next_hb_p)
|
|
next_hb_p = &((*next_hb_p)->next_sibling);
|
|
|
|
bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
|
|
if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) {
|
|
*next_hb_p = old_tree;
|
|
topology->modified = 1;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* tree points to all objects connected to any upstream bus in the machine.
|
|
* We now create one real hostbridge object per upstream bus.
|
|
* It's not actually a PCI device so we have to create it.
|
|
*/
|
|
while (old_tree) {
|
|
/* start a new host bridge */
|
|
struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX);
|
|
struct hwloc_obj **dstnextp = &hostbridge->io_first_child;
|
|
struct hwloc_obj **srcnextp = &old_tree;
|
|
struct hwloc_obj *child = *srcnextp;
|
|
unsigned short current_domain = child->attr->pcidev.domain;
|
|
unsigned char current_bus = child->attr->pcidev.bus;
|
|
unsigned char current_subordinate = current_bus;
|
|
|
|
hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
|
|
|
|
next_child:
|
|
/* remove next child from tree */
|
|
*srcnextp = child->next_sibling;
|
|
/* append it to hostbridge */
|
|
*dstnextp = child;
|
|
child->parent = hostbridge;
|
|
child->next_sibling = NULL;
|
|
dstnextp = &child->next_sibling;
|
|
|
|
/* compute hostbridge secondary/subordinate buses */
|
|
if (child->type == HWLOC_OBJ_BRIDGE
|
|
&& child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate)
|
|
current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus;
|
|
|
|
/* use next child if it has the same domains/bus */
|
|
child = *srcnextp;
|
|
if (child
|
|
&& child->attr->pcidev.domain == current_domain
|
|
&& child->attr->pcidev.bus == current_bus)
|
|
goto next_child;
|
|
|
|
/* finish setting up this hostbridge */
|
|
hostbridge->attr->bridge.upstream_type = HWLOC_OBJ_BRIDGE_HOST;
|
|
hostbridge->attr->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI;
|
|
hostbridge->attr->bridge.downstream.pci.domain = current_domain;
|
|
hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus;
|
|
hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate;
|
|
hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n",
|
|
current_domain, current_bus, current_subordinate);
|
|
|
|
*next_hb_p = hostbridge;
|
|
next_hb_p = &hostbridge->next_sibling;
|
|
topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality()
|
|
* or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root.
|
|
*/
|
|
}
|
|
|
|
done:
|
|
topology->need_pci_belowroot_apply_locality = 1;
|
|
return 0;
|
|
}
|
|
|
|
static struct hwloc_obj *
|
|
hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused,
|
|
struct hwloc_pcidev_attr_s *busid,
|
|
struct hwloc_obj *parent)
|
|
{
|
|
/* Xeon E5v3 in cluster-on-die mode only have PCI on the first NUMA node of each package.
|
|
* but many dual-processor host report the second PCI hierarchy on 2nd NUMA of first package.
|
|
*/
|
|
if (parent->depth >= 2
|
|
&& parent->type == HWLOC_OBJ_NUMANODE
|
|
&& parent->sibling_rank == 1 && parent->parent->arity == 2
|
|
&& parent->parent->type == HWLOC_OBJ_PACKAGE
|
|
&& parent->parent->sibling_rank == 0 && parent->parent->parent->arity == 2) {
|
|
const char *cpumodel = hwloc_obj_get_info_by_name(parent->parent, "CPUModel");
|
|
if (cpumodel && strstr(cpumodel, "Xeon")) {
|
|
if (!hwloc_hide_errors()) {
|
|
fprintf(stderr, "****************************************************************************\n");
|
|
fprintf(stderr, "* hwloc %s has encountered an incorrect PCI locality information.\n", HWLOC_VERSION);
|
|
fprintf(stderr, "* PCI bus %04x:%02x is supposedly close to 2nd NUMA node of 1st package,\n",
|
|
busid->domain, busid->bus);
|
|
fprintf(stderr, "* however hwloc believes this is impossible on this architecture.\n");
|
|
fprintf(stderr, "* Therefore the PCI bus will be moved to 1st NUMA node of 2nd package.\n");
|
|
fprintf(stderr, "*\n");
|
|
fprintf(stderr, "* If you feel this fixup is wrong, disable it by setting in your environment\n");
|
|
fprintf(stderr, "* HWLOC_PCI_%04x_%02x_LOCALCPUS= (empty value), and report the problem\n",
|
|
busid->domain, busid->bus);
|
|
fprintf(stderr, "* to the hwloc's user mailing list together with the XML output of lstopo.\n");
|
|
fprintf(stderr, "*\n");
|
|
fprintf(stderr, "* You may silence this message by setting HWLOC_HIDE_ERRORS=1 in your environment.\n");
|
|
fprintf(stderr, "****************************************************************************\n");
|
|
}
|
|
return parent->parent->next_sibling->first_child;
|
|
}
|
|
}
|
|
|
|
return parent;
|
|
}
|
|
|
|
static struct hwloc_obj *
|
|
hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcidev_attr_s *busid)
|
|
{
|
|
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
|
|
hwloc_obj_t parent;
|
|
int forced = 0;
|
|
int noquirks = 0;
|
|
unsigned i;
|
|
int err;
|
|
|
|
/* try to match a forced locality */
|
|
if (topology->pci_has_forced_locality) {
|
|
for(i=0; i<topology->pci_forced_locality_nr; i++) {
|
|
if (busid->domain == topology->pci_forced_locality[i].domain
|
|
&& busid->bus >= topology->pci_forced_locality[i].bus_first
|
|
&& busid->bus <= topology->pci_forced_locality[i].bus_last) {
|
|
hwloc_bitmap_copy(cpuset, topology->pci_forced_locality[i].cpuset);
|
|
forced = 1;
|
|
break;
|
|
}
|
|
}
|
|
/* if pci locality was forced, even empty, don't let quirks change what the OS reports */
|
|
noquirks = 1;
|
|
}
|
|
|
|
/* deprecated force locality variables */
|
|
if (!forced) {
|
|
const char *env;
|
|
char envname[256];
|
|
/* override the cpuset with the environment if given */
|
|
snprintf(envname, sizeof(envname), "HWLOC_PCI_%04x_%02x_LOCALCPUS",
|
|
busid->domain, busid->bus);
|
|
env = getenv(envname);
|
|
if (env) {
|
|
static int reported = 0;
|
|
if (!topology->pci_has_forced_locality && !reported) {
|
|
fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
|
|
reported = 1;
|
|
}
|
|
if (*env) {
|
|
/* force the cpuset */
|
|
hwloc_debug("Overriding localcpus using %s in the environment\n", envname);
|
|
hwloc_bitmap_sscanf(cpuset, env);
|
|
forced = 1;
|
|
}
|
|
/* if env exists, even empty, don't let quirks change what the OS reports */
|
|
noquirks = 1;
|
|
}
|
|
}
|
|
|
|
if (!forced) {
|
|
/* get the cpuset by asking the OS backend. */
|
|
struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
|
|
if (backend)
|
|
err = backend->get_pci_busid_cpuset(backend, busid, cpuset);
|
|
else
|
|
err = -1;
|
|
if (err < 0)
|
|
/* if we got nothing, assume this PCI bus is attached to the top of hierarchy */
|
|
hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology));
|
|
}
|
|
|
|
hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset);
|
|
|
|
parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
|
|
if (parent) {
|
|
if (!noquirks)
|
|
/* We found a valid parent. Check that the OS didn't report invalid locality */
|
|
parent = hwloc_pci_fixup_busid_parent(topology, busid, parent);
|
|
} else {
|
|
/* Fallback to root */
|
|
parent = hwloc_get_root_obj(topology);
|
|
}
|
|
|
|
hwloc_bitmap_free(cpuset);
|
|
return parent;
|
|
}
|
|
|
|
struct hwloc_obj *
|
|
hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology,
|
|
unsigned domain, unsigned bus, unsigned dev, unsigned func)
|
|
{
|
|
struct hwloc_pcidev_attr_s busid;
|
|
busid.domain = domain;
|
|
busid.bus = bus;
|
|
busid.dev = dev;
|
|
busid.func = func;
|
|
return hwloc__pci_find_busid_parent(topology, &busid);
|
|
}
|
|
|
|
int
|
|
hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology)
|
|
{
|
|
struct hwloc_obj *root = hwloc_get_root_obj(topology);
|
|
struct hwloc_obj **listp, *obj;
|
|
|
|
if (!topology->need_pci_belowroot_apply_locality)
|
|
return 0;
|
|
topology->need_pci_belowroot_apply_locality = 0;
|
|
|
|
/* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things.
|
|
* insert the PCI trees according to their PCI-locality.
|
|
*/
|
|
listp = &root->io_first_child;
|
|
while ((obj = *listp) != NULL) {
|
|
struct hwloc_pcidev_attr_s *busid;
|
|
struct hwloc_obj *parent;
|
|
|
|
/* skip non-PCI objects */
|
|
if (obj->type != HWLOC_OBJ_PCI_DEVICE
|
|
&& !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)
|
|
&& !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
|
|
listp = &obj->next_sibling;
|
|
continue;
|
|
}
|
|
|
|
if (obj->type == HWLOC_OBJ_PCI_DEVICE
|
|
|| (obj->type == HWLOC_OBJ_BRIDGE
|
|
&& obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
|
|
busid = &obj->attr->pcidev;
|
|
else {
|
|
/* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */
|
|
hwloc_obj_t child = obj->io_first_child;
|
|
if (child && (child->type == HWLOC_OBJ_PCI_DEVICE
|
|
|| (child->type == HWLOC_OBJ_BRIDGE
|
|
&& child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)))
|
|
busid = &obj->io_first_child->attr->pcidev;
|
|
else
|
|
continue;
|
|
}
|
|
|
|
/* attach the object (and children) where it belongs */
|
|
parent = hwloc__pci_find_busid_parent(topology, busid);
|
|
if (parent == root) {
|
|
/* keep this object here */
|
|
listp = &obj->next_sibling;
|
|
} else {
|
|
/* dequeue this object */
|
|
*listp = obj->next_sibling;
|
|
obj->next_sibling = NULL;
|
|
hwloc_insert_object_by_parent(topology, parent, obj);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct hwloc_obj *
|
|
hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
|
|
unsigned domain, unsigned bus, unsigned dev, unsigned func)
|
|
{
|
|
hwloc_obj_t child;
|
|
|
|
for_each_io_child(child, parent) {
|
|
if (child->type == HWLOC_OBJ_PCI_DEVICE
|
|
|| (child->type == HWLOC_OBJ_BRIDGE
|
|
&& child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
|
|
if (child->attr->pcidev.domain == domain
|
|
&& child->attr->pcidev.bus == bus
|
|
&& child->attr->pcidev.dev == dev
|
|
&& child->attr->pcidev.func == func)
|
|
/* that's the right bus id */
|
|
return child;
|
|
if (child->attr->pcidev.domain > domain
|
|
|| (child->attr->pcidev.domain == domain
|
|
&& child->attr->pcidev.bus > bus))
|
|
/* bus id too high, won't find anything later, return parent */
|
|
return parent;
|
|
if (child->type == HWLOC_OBJ_BRIDGE
|
|
&& child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
|
|
&& child->attr->bridge.downstream.pci.domain == domain
|
|
&& child->attr->bridge.downstream.pci.secondary_bus <= bus
|
|
&& child->attr->bridge.downstream.pci.subordinate_bus >= bus)
|
|
/* not the right bus id, but it's included in the bus below that bridge */
|
|
return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func);
|
|
|
|
} else if (child->type == HWLOC_OBJ_BRIDGE
|
|
&& child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
|
|
&& child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
|
|
/* non-PCI to PCI bridge, just look at the subordinate bus */
|
|
&& child->attr->bridge.downstream.pci.domain == domain
|
|
&& child->attr->bridge.downstream.pci.secondary_bus <= bus
|
|
&& child->attr->bridge.downstream.pci.subordinate_bus >= bus) {
|
|
/* contains our bus, recurse */
|
|
return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func);
|
|
}
|
|
}
|
|
/* didn't find anything, return parent */
|
|
return parent;
|
|
}
|
|
|
|
struct hwloc_obj *
|
|
hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology,
|
|
unsigned domain, unsigned bus, unsigned dev, unsigned func)
|
|
{
|
|
hwloc_obj_t root = hwloc_get_root_obj(topology);
|
|
hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func);
|
|
if (parent == root)
|
|
return NULL;
|
|
else
|
|
return parent;
|
|
}
|
|
|
|
#define HWLOC_PCI_STATUS 0x06
|
|
#define HWLOC_PCI_STATUS_CAP_LIST 0x10
|
|
#define HWLOC_PCI_CAPABILITY_LIST 0x34
|
|
#define HWLOC_PCI_CAP_LIST_ID 0
|
|
#define HWLOC_PCI_CAP_LIST_NEXT 1
|
|
|
|
unsigned
|
|
hwloc_pcidisc_find_cap(const unsigned char *config, unsigned cap)
|
|
{
|
|
unsigned char seen[256] = { 0 };
|
|
unsigned char ptr; /* unsigned char to make sure we stay within the 256-byte config space */
|
|
|
|
if (!(config[HWLOC_PCI_STATUS] & HWLOC_PCI_STATUS_CAP_LIST))
|
|
return 0;
|
|
|
|
for (ptr = config[HWLOC_PCI_CAPABILITY_LIST] & ~3;
|
|
ptr; /* exit if next is 0 */
|
|
ptr = config[ptr + HWLOC_PCI_CAP_LIST_NEXT] & ~3) {
|
|
unsigned char id;
|
|
|
|
/* Looped around! */
|
|
if (seen[ptr])
|
|
break;
|
|
seen[ptr] = 1;
|
|
|
|
id = config[ptr + HWLOC_PCI_CAP_LIST_ID];
|
|
if (id == cap)
|
|
return ptr;
|
|
if (id == 0xff) /* exit if id is 0 or 0xff */
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#define HWLOC_PCI_EXP_LNKSTA 0x12
|
|
#define HWLOC_PCI_EXP_LNKSTA_SPEED 0x000f
|
|
#define HWLOC_PCI_EXP_LNKSTA_WIDTH 0x03f0
|
|
|
|
int
|
|
hwloc_pcidisc_find_linkspeed(const unsigned char *config,
|
|
unsigned offset, float *linkspeed)
|
|
{
|
|
unsigned linksta, speed, width;
|
|
float lanespeed;
|
|
|
|
memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4);
|
|
speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */
|
|
width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */
|
|
/* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane
|
|
* PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane
|
|
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
|
|
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
|
|
*/
|
|
|
|
/* lanespeed in Gbit/s */
|
|
if (speed <= 2)
|
|
lanespeed = 2.5f * speed * 0.8f;
|
|
else
|
|
lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */
|
|
|
|
/* linkspeed in GB/s */
|
|
*linkspeed = lanespeed * width / 8;
|
|
return 0;
|
|
}
|
|
|
|
#define HWLOC_PCI_HEADER_TYPE 0x0e
|
|
#define HWLOC_PCI_HEADER_TYPE_BRIDGE 1
|
|
#define HWLOC_PCI_CLASS_BRIDGE_PCI 0x0604
|
|
|
|
hwloc_obj_type_t
|
|
hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *config)
|
|
{
|
|
unsigned char headertype;
|
|
|
|
if (device_class != HWLOC_PCI_CLASS_BRIDGE_PCI)
|
|
return HWLOC_OBJ_PCI_DEVICE;
|
|
|
|
headertype = config[HWLOC_PCI_HEADER_TYPE] & 0x7f;
|
|
return (headertype == HWLOC_PCI_HEADER_TYPE_BRIDGE)
|
|
? HWLOC_OBJ_BRIDGE : HWLOC_OBJ_PCI_DEVICE;
|
|
}
|
|
|
|
#define HWLOC_PCI_PRIMARY_BUS 0x18
|
|
#define HWLOC_PCI_SECONDARY_BUS 0x19
|
|
#define HWLOC_PCI_SUBORDINATE_BUS 0x1a
|
|
|
|
int
|
|
hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj,
|
|
const unsigned char *config)
|
|
{
|
|
struct hwloc_bridge_attr_s *battr = &obj->attr->bridge;
|
|
struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci;
|
|
|
|
if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) {
|
|
/* Sometimes the config space contains 00 instead of the actual primary bus number.
|
|
* Always trust the bus ID because it was built by the system which has more information
|
|
* to workaround such problems (e.g. ACPI information about PCI parent/children).
|
|
*/
|
|
hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n",
|
|
pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]);
|
|
}
|
|
|
|
battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI;
|
|
battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI;
|
|
battr->downstream.pci.domain = pattr->domain;
|
|
battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
|
|
battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
|
|
|
|
if (battr->downstream.pci.secondary_bus <= pattr->bus
|
|
|| battr->downstream.pci.subordinate_bus <= pattr->bus
|
|
|| battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) {
|
|
/* This should catch most cases of invalid bridge information
|
|
* (e.g. 00 for secondary and subordinate).
|
|
* Ideally we would also check that [secondary-subordinate] is included
|
|
* in the parent bridge [secondary+1:subordinate]. But that's hard to do
|
|
* because objects may be discovered out of order (especially in the fsroot case).
|
|
*/
|
|
hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n",
|
|
pattr->domain, pattr->bus, pattr->dev, pattr->func,
|
|
battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus);
|
|
hwloc_free_unlinked_object(obj);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
const char *
|
|
hwloc_pci_class_string(unsigned short class_id)
|
|
{
|
|
/* See https://pci-ids.ucw.cz/read/PD/ */
|
|
switch ((class_id & 0xff00) >> 8) {
|
|
case 0x00:
|
|
switch (class_id) {
|
|
case 0x0001: return "VGA";
|
|
}
|
|
break;
|
|
case 0x01:
|
|
switch (class_id) {
|
|
case 0x0100: return "SCSI";
|
|
case 0x0101: return "IDE";
|
|
case 0x0102: return "Floppy";
|
|
case 0x0103: return "IPI";
|
|
case 0x0104: return "RAID";
|
|
case 0x0105: return "ATA";
|
|
case 0x0106: return "SATA";
|
|
case 0x0107: return "SAS";
|
|
case 0x0108: return "NVMExp";
|
|
}
|
|
return "Storage";
|
|
case 0x02:
|
|
switch (class_id) {
|
|
case 0x0200: return "Ethernet";
|
|
case 0x0201: return "TokenRing";
|
|
case 0x0202: return "FDDI";
|
|
case 0x0203: return "ATM";
|
|
case 0x0204: return "ISDN";
|
|
case 0x0205: return "WorldFip";
|
|
case 0x0206: return "PICMG";
|
|
case 0x0207: return "InfiniBand";
|
|
case 0x0208: return "Fabric";
|
|
}
|
|
return "Network";
|
|
case 0x03:
|
|
switch (class_id) {
|
|
case 0x0300: return "VGA";
|
|
case 0x0301: return "XGA";
|
|
case 0x0302: return "3D";
|
|
}
|
|
return "Display";
|
|
case 0x04:
|
|
switch (class_id) {
|
|
case 0x0400: return "MultimediaVideo";
|
|
case 0x0401: return "MultimediaAudio";
|
|
case 0x0402: return "Telephony";
|
|
case 0x0403: return "AudioDevice";
|
|
}
|
|
return "Multimedia";
|
|
case 0x05:
|
|
switch (class_id) {
|
|
case 0x0500: return "RAM";
|
|
case 0x0501: return "Flash";
|
|
}
|
|
return "Memory";
|
|
case 0x06:
|
|
switch (class_id) {
|
|
case 0x0600: return "HostBridge";
|
|
case 0x0601: return "ISABridge";
|
|
case 0x0602: return "EISABridge";
|
|
case 0x0603: return "MicroChannelBridge";
|
|
case 0x0604: return "PCIBridge";
|
|
case 0x0605: return "PCMCIABridge";
|
|
case 0x0606: return "NubusBridge";
|
|
case 0x0607: return "CardBusBridge";
|
|
case 0x0608: return "RACEwayBridge";
|
|
case 0x0609: return "SemiTransparentPCIBridge";
|
|
case 0x060a: return "InfiniBandPCIHostBridge";
|
|
}
|
|
return "Bridge";
|
|
case 0x07:
|
|
switch (class_id) {
|
|
case 0x0700: return "Serial";
|
|
case 0x0701: return "Parallel";
|
|
case 0x0702: return "MultiportSerial";
|
|
case 0x0703: return "Model";
|
|
case 0x0704: return "GPIB";
|
|
case 0x0705: return "SmartCard";
|
|
}
|
|
return "Communication";
|
|
case 0x08:
|
|
switch (class_id) {
|
|
case 0x0800: return "PIC";
|
|
case 0x0801: return "DMA";
|
|
case 0x0802: return "Timer";
|
|
case 0x0803: return "RTC";
|
|
case 0x0804: return "PCIHotPlug";
|
|
case 0x0805: return "SDHost";
|
|
case 0x0806: return "IOMMU";
|
|
}
|
|
return "SystemPeripheral";
|
|
case 0x09:
|
|
switch (class_id) {
|
|
case 0x0900: return "Keyboard";
|
|
case 0x0901: return "DigitizerPen";
|
|
case 0x0902: return "Mouse";
|
|
case 0x0903: return "Scanern";
|
|
case 0x0904: return "Gameport";
|
|
}
|
|
return "Input";
|
|
case 0x0a:
|
|
return "DockingStation";
|
|
case 0x0b:
|
|
switch (class_id) {
|
|
case 0x0b00: return "386";
|
|
case 0x0b01: return "486";
|
|
case 0x0b02: return "Pentium";
|
|
/* 0x0b03 and 0x0b04 might be Pentium and P6 ? */
|
|
case 0x0b10: return "Alpha";
|
|
case 0x0b20: return "PowerPC";
|
|
case 0x0b30: return "MIPS";
|
|
case 0x0b40: return "Co-Processor";
|
|
}
|
|
return "Processor";
|
|
case 0x0c:
|
|
switch (class_id) {
|
|
case 0x0c00: return "FireWire";
|
|
case 0x0c01: return "ACCESS";
|
|
case 0x0c02: return "SSA";
|
|
case 0x0c03: return "USB";
|
|
case 0x0c04: return "FibreChannel";
|
|
case 0x0c05: return "SMBus";
|
|
case 0x0c06: return "InfiniBand";
|
|
case 0x0c07: return "IPMI-SMIC";
|
|
case 0x0c08: return "SERCOS";
|
|
case 0x0c09: return "CANBUS";
|
|
}
|
|
return "SerialBus";
|
|
case 0x0d:
|
|
switch (class_id) {
|
|
case 0x0d00: return "IRDA";
|
|
case 0x0d01: return "ConsumerIR";
|
|
case 0x0d10: return "RF";
|
|
case 0x0d11: return "Bluetooth";
|
|
case 0x0d12: return "Broadband";
|
|
case 0x0d20: return "802.1a";
|
|
case 0x0d21: return "802.1b";
|
|
}
|
|
return "Wireless";
|
|
case 0x0e:
|
|
switch (class_id) {
|
|
case 0x0e00: return "I2O";
|
|
}
|
|
return "Intelligent";
|
|
case 0x0f:
|
|
return "Satellite";
|
|
case 0x10:
|
|
return "Encryption";
|
|
case 0x11:
|
|
return "SignalProcessing";
|
|
case 0x12:
|
|
return "ProcessingAccelerator";
|
|
case 0x13:
|
|
return "Instrumentation";
|
|
case 0x40:
|
|
return "Co-Processor";
|
|
}
|
|
return "Other";
|
|
}
|