1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2023 Oxide Computer Company
14 */
15
16 /*
17 * The purpose of this module is to build topology information for the
18 * 'pcie' scheme. It works in conjunction with the XML file that corresponds to
19 * a given hardware platform. That XML file provides the basic structure and
20 * passes responsibility for enumerating various parts of the PCIe topology
21 * tree to this module.
22 *
23 * Since the PCIe topology tree structure depends very much on the underlying
24 * hardware and its configuration, almost all of the tree is built
25 * programmatically. The static XML file defines just the root of the tree, a
26 * CPU enumerator, and then a PCIe root complex enumerator under each CPU.
27 * Each root complex is discovered, attached to the corresponding CPU and then
28 * recursively enumerated to discover bridges, switches, devices, etc. that lie
29 * underneath. Platform-specific modules can augment the discovered tree by
30 * adding labels or nodes that cannot be discovered, such as re-timers.
31 *
32 * When this module is first asked to enumerate, it traverses the devinfo tree
33 * and builds an interim tree view of the various PCI and PCIe devices found
34 * there. This interim tree is used to drive building topology nodes on this
35 * and subsequent enumerations.
36 */
37
38 #include <fcntl.h>
39 #include <libdevinfo.h>
40 #include <priv.h>
41 #include <stdbool.h>
42 #include <strings.h>
43 #include <unistd.h>
44 #include <sys/debug.h>
45 #include <sys/pci.h>
46 #include <sys/pcie.h>
47 #include <sys/stat.h>
48 #include <sys/types.h>
49 #include <sys/devfm.h>
50
51 #include <sys/fm/protocol.h>
52 #include <fm/topo_mod.h>
53 #include <fm/topo_hc.h>
54 #include <fm/topo_list.h>
55 #include <fm/topo_method.h>
56 #include <fm/fmd_agent.h>
57
58 #include <io/amdzen/amdzen_topo.h>
59
60 #include "topo_pcie_impl.h"
61
62 typedef struct cbdata {
63 pcie_t *cbd_pcie;
64 topo_mod_t *cbd_mod;
65 bool cbd_fatal;
66 } cbdata_t;
67
68 typedef struct pcie_enum pcie_enum_t;
69 typedef int (*pcie_enum_f)(topo_mod_t *, pcie_t *, const pcie_enum_t *,
70 tnode_t *, tnode_t *, topo_instance_t, topo_instance_t);
71
72 typedef enum {
73 /*
74 * This flag is used to indicate that we are okay operating on a range
75 * of instances. This should only happen during the range enumeration
76 * phase, not during the post-creation enumeration phase.
77 */
78 PCIE_ENUM_F_MULTI_RANGE = 1 << 0,
79 } pcie_enum_flags_t;
80
81 struct pcie_enum {
82 const char *pe_name;
83 pcie_enum_flags_t pe_flags;
84 pcie_enum_f pe_range_enum;
85 pcie_enum_f pe_post_enum;
86 };
87
88 static tnode_t *pcie_topo_add_bridge(topo_mod_t *, pcie_t *, tnode_t *,
89 pcie_node_t *);
90
91 static void
pcie_node_print(topo_mod_t * mod,pcie_t * pcie,topo_list_t * list,uint_t indent)92 pcie_node_print(topo_mod_t *mod, pcie_t *pcie, topo_list_t *list, uint_t indent)
93 {
94 pcie_node_t *node;
95
96 for (node = topo_list_next(list); node != NULL;
97 node = topo_list_next(node)) {
98 char suffix[0x10] = "";
99
100 if (node->pn_type == PCIE_NODE_ROOTNEXUS) {
101 (void) snprintf(suffix, sizeof (suffix), " cpu%" PRIu64,
102 node->pn_cpu);
103 }
104 topo_mod_dprintf(mod, "%*s[%x/%x/%x] %s [%s%d] {%x/%x/%x} %s%s",
105 indent, "",
106 node->pn_bus, node->pn_dev, node->pn_func,
107 pcie_type_name(node->pn_type),
108 node->pn_drvname == NULL ? "" : node->pn_drvname,
109 node->pn_drvinst,
110 node->pn_class, node->pn_subclass, node->pn_intf,
111 node->pn_path, suffix);
112 pcie_node_print(mod, pcie, &node->pn_children, indent + 4);
113 }
114 }
115
116 static void
pcie_node_free(topo_mod_t * mod,pcie_t * pcie,pcie_node_t * node)117 pcie_node_free(topo_mod_t *mod, pcie_t *pcie, pcie_node_t *node)
118 {
119 topo_mod_strfree(mod, node->pn_drvname);
120 if (node->pn_path != NULL)
121 di_devfs_path_free((char *)node->pn_path);
122 topo_mod_free(mod, node, sizeof (*node));
123 }
124
125 static pcie_node_t *
pcie_node_create(topo_mod_t * mod,pcie_t * pcie,di_node_t did,pcie_node_type_t type,pcie_node_t * parent)126 pcie_node_create(topo_mod_t *mod, pcie_t *pcie, di_node_t did,
127 pcie_node_type_t type, pcie_node_t *parent)
128 {
129 char *drvname;
130 pcie_node_t *node;
131 char *path;
132 int inst;
133
134 drvname = di_driver_name(did);
135 if (drvname != NULL) {
136 drvname = topo_mod_strdup(mod, drvname);
137 if (drvname == NULL) {
138 (void) topo_mod_seterrno(mod, EMOD_NOMEM);
139 topo_mod_dprintf(mod,
140 "failed to duplicate driver name");
141 return (NULL);
142 }
143 }
144
145 inst = di_instance(did);
146 path = di_devfs_path(did);
147
148 if (path == NULL) {
149 topo_mod_dprintf(mod, "failed to get /devices path for "
150 "%s%d: %s", drvname == NULL ? "<unknown>" : drvname, inst,
151 strerror(errno));
152 (void) topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM);
153 topo_mod_strfree(mod, drvname);
154 return (NULL);
155 }
156
157 if ((node = topo_mod_zalloc(mod, sizeof (*node))) == NULL) {
158 (void) topo_mod_seterrno(mod, EMOD_NOMEM);
159 topo_mod_strfree(mod, node->pn_drvname);
160 di_devfs_path_free(path);
161 return (NULL);
162 }
163
164 node->pn_pcie = pcie;
165 node->pn_did = did;
166 node->pn_type = type;
167 node->pn_path = path;
168 node->pn_drvname = drvname;
169 node->pn_drvinst = inst;
170
171 if (parent != NULL) {
172 topo_list_append(&parent->pn_children, node);
173 node->pn_parent = parent;
174 }
175
176 return (node);
177 }
178
179 static tnode_t *
pcie_topo_node_create(topo_mod_t * mod,pcie_t * pcie,tnode_t * parent,pcie_node_t * node,const char * name,topo_instance_t inst)180 pcie_topo_node_create(topo_mod_t *mod, pcie_t *pcie, tnode_t *parent,
181 pcie_node_t *node, const char *name, topo_instance_t inst)
182 {
183 nvlist_t *fmri, *auth;
184 tnode_t *tn, *dtn;
185
186 topo_mod_dprintf(mod, "topo node create %s=%" PRIu64 " (%s)",
187 name, inst, node == NULL ? "" : node->pn_path);
188
189 auth = mod_pcie_platform_auth(mod, pcie, parent);
190 if (auth == NULL) {
191 topo_mod_dprintf(mod, "could not get auth from parent: %s",
192 topo_mod_errmsg(mod));
193 return (NULL);
194 }
195 fmri = topo_mod_pciefmri(mod, parent, FM_PCIE_SCHEME_VERSION,
196 name, inst, auth);
197
198 if (fmri == NULL) {
199 topo_mod_dprintf(mod, "fmri creation failed: %s",
200 topo_mod_errmsg(mod));
201 nvlist_free(auth);
202 return (NULL);
203 }
204
205 tn = topo_node_bind(mod, parent, name, inst, fmri);
206 if (tn == NULL) {
207 topo_mod_dprintf(mod, "bind failed: %s", topo_mod_errmsg(mod));
208 goto error;
209 }
210
211 topo_node_setspecific(tn, (void *)node);
212
213 if (node != NULL) {
214 if (!topo_pcie_set_pci_props(mod, pcie, node, tn))
215 goto error;
216
217 node->pn_inst = inst;
218 }
219
220 topo_pgroup_hcset(tn, auth);
221
222 /*
223 * Give the platform-specific module an opportunity to decorate or
224 * extend the new topology node.
225 */
226 dtn = mod_pcie_platform_topo_node_decorate(mod, pcie, node, tn);
227 if (dtn == NULL)
228 goto error;
229
230 nvlist_free(auth);
231 nvlist_free(fmri);
232 return (dtn);
233
234 error:
235
236 nvlist_free(fmri);
237 topo_node_unbind(tn);
238 return (NULL);
239 }
240
241 static void
pcie_topo_node_free(topo_mod_t * mod __unused,tnode_t * tn)242 pcie_topo_node_free(topo_mod_t *mod __unused, tnode_t *tn)
243 {
244 topo_node_setspecific(tn, NULL);
245 }
246
247 static void
pcie_socket_map(topo_mod_t * mod,pcie_t * pcie,pcie_node_t * node)248 pcie_socket_map(topo_mod_t *mod, pcie_t *pcie, pcie_node_t *node)
249 {
250 int32_t *busrange;
251 nvlist_t **dfs;
252 uint_t ndfs;
253 int nval;
254 int err;
255
256 node->pn_cpu = 0;
257
258 if (pcie->tp_cpupcidata == NULL)
259 return;
260
261 nval = di_prop_lookup_ints(DDI_DEV_T_ANY, node->pn_did,
262 DI_BUSRANGE, &busrange);
263 if (nval != 2) {
264 topo_mod_dprintf(mod, "failed to retrieve nexus bus range");
265 return;
266 }
267
268 err = nvlist_lookup_nvlist_array(pcie->tp_cpupcidata,
269 FM_PCI_DATA_DFS, &dfs, &ndfs);
270 if (err != 0 || dfs == NULL) {
271 topo_mod_dprintf(mod, "CPU PCI data does not contain %s",
272 FM_PCI_DATA_DFS);
273 return;
274 }
275
276 for (uint_t i = 0; i < ndfs; i++) {
277 uint32_t nb_busno;
278 int32_t sockid;
279
280 err = nvlist_lookup_int32(dfs[i], FM_PCI_DATA_CHIP_ID,
281 &sockid);
282 if (err != 0) {
283 topo_mod_dprintf(mod, "DF[%d] is missing key '%s'",
284 i, FM_PCI_DATA_CHIP_ID);
285 continue;
286 }
287 err = nvlist_lookup_uint32(dfs[i], FM_PCI_DATA_NB_BUSNO,
288 &nb_busno);
289 if (err != 0) {
290 topo_mod_dprintf(mod, "DF[%d] is missing key '%s'",
291 i, FM_PCI_DATA_NB_BUSNO);
292 continue;
293 }
294 if (busrange[0] <= nb_busno) {
295 node->pn_cpu = sockid;
296 topo_mod_dprintf(mod,
297 "Mapped root complex %s to socket %" PRIu64,
298 node->pn_path, node->pn_cpu);
299 break;
300 }
301 }
302 }
303
304 static bool
pcie_physcpu_enum(topo_mod_t * mod,pcie_t * pcie)305 pcie_physcpu_enum(topo_mod_t *mod, pcie_t *pcie)
306 {
307 fmd_agent_hdl_t *hdl;
308
309 if ((hdl = fmd_agent_open(FMD_AGENT_VERSION)) == NULL) {
310 topo_mod_dprintf(mod, "failed to open fmd agent interface: %s",
311 strerror(errno));
312 return (false);
313 }
314
315 if (fmd_agent_chip_count(hdl, &pcie->tp_nchip) != 0) {
316 topo_mod_dprintf(mod,
317 "failed to retrieve physical CPU count: %s",
318 fmd_agent_errmsg(hdl));
319 fmd_agent_close(hdl);
320 return (false);
321 }
322
323 if (fmd_agent_physcpu_pci(hdl, &pcie->tp_cpupcidata) != 0) {
324 topo_mod_dprintf(mod,
325 "failed to retrieve physical CPU PCI data: %s",
326 fmd_agent_errmsg(hdl));
327 /*
328 * This is not fatal, we just won't be able to properly map
329 * root complexes to physical CPUs and will report them all
330 * as being under the first CPU.
331 */
332 }
333
334 fmd_agent_close(hdl);
335
336 return (true);
337 }
338
339 static int
pcie_rootnexus_enum_cb(di_node_t did,void * arg)340 pcie_rootnexus_enum_cb(di_node_t did, void *arg)
341 {
342 cbdata_t *cbd = arg;
343 topo_mod_t *mod = cbd->cbd_mod;
344 pcie_t *pcie = cbd->cbd_pcie;
345 char *compat;
346 bool found = false;
347 int *ents, nents;
348
349 nents = di_prop_lookup_strings(DDI_DEV_T_ANY, did, DI_COMPATPROP,
350 &compat);
351 if (nents > 0) {
352 for (uint_t i = 0; i < nents; i++) {
353 if (strcmp(PCIE_ROOT_NEXUS, compat) == 0) {
354 found = true;
355 break;
356 }
357 compat += strlen(compat) + 1;
358 }
359 }
360
361 if (!found) {
362 /* Check for a PCI nexus */
363 char *drv = di_driver_name(did);
364
365 if (drv != NULL && strcmp(drv, "pci") == 0 &&
366 di_prop_lookup_ints(DDI_DEV_T_ANY, did, DI_BUSRANGE,
367 &ents) == 2) {
368 found = true;
369 }
370 }
371
372 if (!found)
373 return (DI_WALK_CONTINUE);
374
375 pcie_node_t *node = pcie_node_create(mod, pcie, did,
376 PCIE_NODE_ROOTNEXUS, NULL);
377 if (node == NULL) {
378 topo_mod_dprintf(mod,
379 "failed to create root nexus pcie node: %s",
380 topo_mod_errmsg(mod));
381 cbd->cbd_fatal = true;
382 }
383
384 pcie_socket_map(mod, pcie, node);
385
386 topo_list_append(&pcie->tp_rootnexus, node);
387
388 return (DI_WALK_PRUNECHILD);
389 }
390
391 static bool
pcie_rootnexus_enum(topo_mod_t * mod,pcie_t * pcie)392 pcie_rootnexus_enum(topo_mod_t *mod, pcie_t *pcie)
393 {
394 cbdata_t cbd = {
395 .cbd_pcie = pcie,
396 .cbd_mod = mod,
397 .cbd_fatal = false
398 };
399
400 (void) di_walk_node(pcie->tp_devinfo, DI_WALK_CLDFIRST,
401 &cbd, pcie_rootnexus_enum_cb);
402
403 return (!cbd.cbd_fatal);
404 }
405
406 static pcie_node_t *
pcie_process_node(topo_mod_t * mod,pcie_t * pcie,pcie_node_t * parent,di_node_t did)407 pcie_process_node(topo_mod_t *mod, pcie_t *pcie, pcie_node_t *parent,
408 di_node_t did)
409 {
410 int nents;
411 int *ents;
412 int class, subclass, interface, bus, dev, func;
413 uint16_t pcie_type;
414 bool is_pcie, is_pcibr;
415 pcie_node_t *node = NULL;
416 pcie_node_type_t type;
417
418 nents = di_prop_lookup_ints(DDI_DEV_T_ANY, did, DI_CLASSPROP, &ents);
419 if (nents != 1)
420 return (NULL);
421
422 class = GETCLASS(ents[0]);
423 subclass = GETSUBCLASS(ents[0]);
424 interface = GETINTF(ents[0]);
425
426 nents = di_prop_lookup_ints(DDI_DEV_T_ANY, did, DI_REGPROP, &ents);
427 if (nents > 0) {
428 bus = PCI_REG_BUS_G(ents[0]);
429 dev = PCI_REG_DEV_G(ents[0]);
430 func = PCI_REG_FUNC_G(ents[0]);
431 } else {
432 bus = dev = func = -1;
433 }
434
435 is_pcibr = (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI);
436
437 nents = di_prop_lookup_ints(DDI_DEV_T_ANY, did, DI_PCIETYPPROP, &ents);
438 if (nents == 1) {
439 is_pcie = true;
440 pcie_type = (uint16_t)(ents[0] << PCIE_PCIECAP_DEV_TYPE_SHIFT);
441 } else {
442 is_pcie = false;
443 pcie_type = PCIE_PCIECAP_DEV_TYPE_PCI_DEV;
444 }
445
446 type = PCIE_NODE_PCI_DEV;
447 if (!is_pcie) {
448 type = is_pcibr ? PCIE_NODE_PCIE_PCI : PCIE_NODE_PCI_DEV;
449 } else if (is_pcibr) {
450 switch (pcie_type) {
451 case PCIE_PCIECAP_DEV_TYPE_ROOT:
452 type = PCIE_NODE_ROOTPORT;
453 break;
454 case PCIE_PCIECAP_DEV_TYPE_UP:
455 type = PCIE_NODE_SWITCH_UP;
456 break;
457 case PCIE_PCIECAP_DEV_TYPE_DOWN:
458 type = PCIE_NODE_SWITCH_DOWN;
459 break;
460 case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI:
461 type = PCIE_NODE_PCIE_PCI;
462 break;
463 case PCIE_PCIECAP_DEV_TYPE_PCI2PCIE:
464 type = PCIE_NODE_PCI_PCIE;
465 break;
466 default:
467 topo_mod_dprintf(mod,
468 "pcie_type unhandled bridge type 0x%x", pcie_type);
469 abort();
470 }
471 } else {
472 switch (pcie_type) {
473 case PCIE_PCIECAP_DEV_TYPE_ROOT:
474 type = PCIE_NODE_ROOTPORT;
475 break;
476 case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV:
477 type = PCIE_NODE_PCIE_DEV;
478 break;
479 case PCIE_PCIECAP_DEV_TYPE_PCI_DEV:
480 type = PCIE_NODE_PCI_DEV;
481 break;
482 case PCIE_PCIECAP_DEV_TYPE_RC_IEP:
483 case PCIE_PCIECAP_DEV_TYPE_RC_EC:
484 /* These types are not currently handled */
485 return (NULL);
486 default:
487 topo_mod_dprintf(mod, "pcie_type unhandled type 0x%x",
488 pcie_type);
489 abort();
490 }
491 }
492
493 node = pcie_node_create(mod, pcie, did, type, parent);
494
495 node->pn_class = class;
496 node->pn_subclass = subclass;
497 node->pn_intf = interface;
498 node->pn_bus = bus;
499 node->pn_dev = dev;
500 node->pn_func = func;
501 node->pn_is_pcie = is_pcie;
502
503 return (node);
504 }
505
506 static void
pcie_enum_children(topo_mod_t * mod,pcie_t * pcie,pcie_node_t * parent)507 pcie_enum_children(topo_mod_t *mod, pcie_t *pcie, pcie_node_t *parent)
508 {
509 di_node_t did;
510
511 for (did = di_child_node(parent->pn_did); did != DI_NODE_NIL;
512 did = di_sibling_node(did)) {
513 pcie_node_t *node;
514
515 node = pcie_process_node(mod, pcie, parent, did);
516 if (node != NULL)
517 pcie_enum_children(mod, pcie, node);
518 }
519 }
520
521 static void
pcie_rootnexus_enum_children(topo_mod_t * mod,pcie_t * pcie)522 pcie_rootnexus_enum_children(topo_mod_t *mod, pcie_t *pcie)
523 {
524 pcie_node_t *nexus;
525
526 for (nexus = topo_list_next(&pcie->tp_rootnexus); nexus != NULL;
527 nexus = topo_list_next(nexus)) {
528 topo_mod_dprintf(mod, "enumerate nexus %s", nexus->pn_path);
529 pcie_enum_children(mod, pcie, nexus);
530 }
531 }
532
533 static bool
pcie_gather(topo_mod_t * mod,pcie_t * pcie)534 pcie_gather(topo_mod_t *mod, pcie_t *pcie)
535 {
536 if (pcie->tp_enumdone)
537 return (true);
538
539 if (!pcie_physcpu_enum(mod, pcie))
540 return (false);
541
542 if (!pcie_rootnexus_enum(mod, pcie))
543 return (false);
544
545 pcie_rootnexus_enum_children(mod, pcie);
546
547 pcie->tp_enumdone = true;
548
549 pcie_node_print(mod, pcie, &pcie->tp_rootnexus, 0);
550
551 return (true);
552 }
553
554 static tnode_t *
pcie_topo_add_link(topo_mod_t * mod,pcie_t * pcie,tnode_t * pnode,pcie_node_t * node)555 pcie_topo_add_link(topo_mod_t *mod, pcie_t *pcie, tnode_t *pnode,
556 pcie_node_t *node)
557 {
558 tnode_t *tn;
559
560 if (!pcie_topo_range_create(mod, pnode, "link", 0, 0))
561 return (NULL);
562
563 tn = pcie_topo_node_create(mod, pcie, pnode, NULL, "link", 0);
564 if (tn == NULL)
565 return (NULL);
566
567 (void) topo_pcie_set_link_props(mod, pcie, node, tn);
568
569 return (tn);
570 }
571
572 static tnode_t *
pcie_topo_add_bridge(topo_mod_t * mod,pcie_t * pcie,tnode_t * pnode,pcie_node_t * node)573 pcie_topo_add_bridge(topo_mod_t *mod, pcie_t *pcie, tnode_t *pnode,
574 pcie_node_t *node)
575 {
576 tnode_t *tn = pnode;
577
578 if (!pcie_topo_range_create(mod, tn, "port", 0, 0))
579 return (NULL);
580
581 tn = pcie_topo_node_create(mod, pcie, tn, NULL, "port", 0);
582 if (tn == NULL)
583 return (NULL);
584
585 (void) topo_pcie_set_port_props(mod, pcie, node, tn,
586 TOPO_PORT_DOWNSTREAM);
587
588 tn = pcie_topo_add_link(mod, pcie, tn, node);
589
590 return (tn);
591 }
592
593 static tnode_t *
pcie_topo_add_device(topo_mod_t * mod,pcie_t * pcie,tnode_t * pnode,pcie_node_t * node,topo_instance_t * inst)594 pcie_topo_add_device(topo_mod_t *mod, pcie_t *pcie, tnode_t *pnode,
595 pcie_node_t *node, topo_instance_t *inst)
596 {
597 topo_instance_t fninst;
598 tnode_t *dev, *func;
599
600 topo_mod_dprintf(mod, "add_device: %s (%s)",
601 node->pn_path, pcie_type_name(node->pn_type));
602
603 if (node->pn_parent->pn_type == PCIE_NODE_ROOTNEXUS) {
604 /*
605 * Devices directly under under the artificial root complex
606 * node are presented as functions, without an intermediate
607 * device or any attempt to represent the internal fabric.
608 */
609 dev = pnode;
610 fninst = *inst;
611 (*inst)++;
612 } else {
613 pcie_node_t *parent;
614 size_t idx;
615
616 if (node->pn_type == PCIE_NODE_SWITCH_DOWN) {
617 /*
618 * A downstream switch is a child of an upstream one
619 * in the devinfo tree, but is represented in the topo
620 * tree underneath the same device as the upstream
621 * bridge.
622 */
623 VERIFY(node->pn_parent->pn_type == PCIE_NODE_SWITCH_UP);
624 parent = node->pn_parent->pn_parent;
625 idx = node->pn_parent->pn_dev;
626 } else {
627 parent = node->pn_parent;
628 idx = node->pn_dev;
629 }
630
631 VERIFY3U(idx, <=, sizeof (parent->pn_devices));
632
633 dev = parent->pn_devices[idx];
634
635 if (dev == NULL) {
636 topo_instance_t max;
637 tnode_t *port;
638
639 max = topo_list_size(&parent->pn_children);
640 if (node->pn_type == PCIE_NODE_SWITCH_UP)
641 max += topo_list_size(&node->pn_children);
642
643 if (!pcie_topo_range_create(mod, pnode, "port", 0, max))
644 return (NULL);
645 port = pcie_topo_node_create(mod, pcie, pnode, NULL,
646 "port", *inst);
647
648 if (port == NULL)
649 return (NULL);
650
651 (void) topo_pcie_set_port_props(mod, pcie, node, port,
652 TOPO_PORT_UPSTREAM);
653
654 if (!pcie_topo_range_create(mod, port, "device", 0,
655 max)) {
656 return (NULL);
657 }
658
659 dev = pcie_topo_node_create(mod, pcie, port, NULL,
660 "device", *inst);
661 if (dev == NULL)
662 return (NULL);
663 (*inst)++;
664 parent->pn_devices[idx] = dev;
665 parent->pn_devfunc[idx] = 0;
666 if (!pcie_topo_range_create(mod, dev, "function", 0,
667 max)) {
668 return (NULL);
669 }
670 }
671 fninst = parent->pn_devfunc[idx];
672 /*
673 * pn_devfunc records the next available function number for
674 * this device.
675 */
676 parent->pn_devfunc[idx]++;
677 }
678
679 func = pcie_topo_node_create(mod, pcie, dev, node, "function", fninst);
680
681 return (func);
682 }
683
684 static bool
pcie_topo_process_functions(topo_mod_t * mod,pcie_t * pcie,tnode_t * pnode,pcie_node_t * node,topo_instance_t * fninst)685 pcie_topo_process_functions(topo_mod_t *mod, pcie_t *pcie, tnode_t *pnode,
686 pcie_node_t *node, topo_instance_t *fninst)
687 {
688 pcie_node_t *func;
689 topo_instance_t i;
690 size_t children = topo_list_size(&node->pn_children);
691
692 topo_mod_dprintf(mod,
693 "-> functions for %s (%s) [%"PRIu64"] children %zd",
694 node->pn_path, pcie_type_name(node->pn_type), *fninst, children);
695
696 for (func = topo_list_next(&node->pn_children); func != NULL;
697 func = topo_list_next(func)) {
698 tnode_t *fn;
699
700 topo_mod_dprintf(mod, " fn %s (%s)",
701 func->pn_path, pcie_type_name(func->pn_type));
702
703 switch (func->pn_type) {
704 case PCIE_NODE_ROOTPORT:
705 fn = pcie_topo_node_create(mod, pcie, pnode, func,
706 "function", *fninst);
707 (*fninst)++;
708 fn = pcie_topo_add_bridge(mod, pcie, fn, func);
709 if (fn == NULL)
710 return (false);
711 if (topo_list_size(&func->pn_children) == 0)
712 break;
713 i = 0;
714 if (!pcie_topo_process_functions(mod, pcie, fn,
715 func, &i)) {
716 return (false);
717 }
718 break;
719 case PCIE_NODE_PCIE_PCI:
720 case PCIE_NODE_PCI_PCIE:
721 fn = pcie_topo_add_device(mod, pcie, pnode, func,
722 fninst);
723 if (fn == NULL)
724 return (false);
725 fn = pcie_topo_add_bridge(mod, pcie, fn, func);
726 if (fn == NULL)
727 return (false);
728 i = 0;
729 if (!pcie_topo_process_functions(mod, pcie, fn,
730 func, &i)) {
731 return (false);
732 }
733 break;
734 case PCIE_NODE_SWITCH_UP:
735 fn = pcie_topo_add_device(mod, pcie, pnode, func,
736 fninst);
737 if (fn == NULL)
738 return (false);
739 if (!pcie_topo_process_functions(mod, pcie, pnode,
740 func, fninst)) {
741 return (false);
742 }
743 break;
744 case PCIE_NODE_SWITCH_DOWN:
745 fn = pcie_topo_add_device(mod, pcie, pnode, func,
746 fninst);
747 if (fn == NULL)
748 return (false);
749 if (topo_list_size(&func->pn_children) == 0)
750 break;
751 fn = pcie_topo_add_bridge(mod, pcie, fn, func);
752 if (fn == NULL)
753 return (false);
754 i = 0;
755 if (!pcie_topo_process_functions(mod, pcie, fn,
756 func, &i)) {
757 return (false);
758 }
759 break;
760 case PCIE_NODE_PCI_DEV:
761 case PCIE_NODE_PCIE_DEV:
762 fn = pcie_topo_add_device(mod, pcie, pnode, func,
763 fninst);
764 if (fn == NULL)
765 return (false);
766 break;
767 default:
768 topo_mod_dprintf(mod, "unhandled device type %u (%s)",
769 func->pn_type, pcie_type_name(func->pn_type));
770 abort();
771 }
772 }
773
774 return (true);
775 }
776
777 static int
pcie_topo_enum_cpu(topo_mod_t * mod,pcie_t * pcie,const pcie_enum_t * pe,tnode_t * pnode,tnode_t * tnode,topo_instance_t min,topo_instance_t max)778 pcie_topo_enum_cpu(topo_mod_t *mod, pcie_t *pcie, const pcie_enum_t *pe,
779 tnode_t *pnode, tnode_t *tnode, topo_instance_t min, topo_instance_t max)
780 {
781 int ret = 0;
782
783 topo_mod_dprintf(mod, "physical CPU count: %u", pcie->tp_nchip);
784
785 for (uint_t chipid = 0; chipid < pcie->tp_nchip; chipid++) {
786 tnode_t *cpu;
787
788 if (chipid < min || chipid > max) {
789 topo_mod_dprintf(mod, "cpu chipid %" PRId32 " "
790 "out of range [%" PRIu64 ", %" PRIu64 "]",
791 chipid, min, max);
792 continue;
793 }
794
795 topo_mod_dprintf(mod, "creating CPU chip %" PRId32, chipid);
796
797 cpu = pcie_topo_node_create(mod, pcie, pnode, NULL,
798 pe->pe_name, chipid);
799 if (cpu == NULL) {
800 ret = -1;
801 break;
802 }
803 }
804
805 return (ret);
806 }
807
808 static int
pcie_topo_enum_root_complex(topo_mod_t * mod,pcie_t * pcie,const pcie_enum_t * pe,tnode_t * pnode,tnode_t * tnode,topo_instance_t min,topo_instance_t max)809 pcie_topo_enum_root_complex(topo_mod_t *mod, pcie_t *pcie,
810 const pcie_enum_t *pe, tnode_t *pnode, tnode_t *tnode,
811 topo_instance_t min, topo_instance_t max)
812 {
813 topo_instance_t rcinst, cpuinst;
814 pcie_node_t *rc;
815
816 cpuinst = topo_node_instance(pnode);
817
818 for (rcinst = min, rc = topo_list_next(&pcie->tp_rootnexus);
819 rc != NULL; rc = topo_list_next(rc)) {
820 tnode_t *rcnode;
821 topo_instance_t fninst = 0;
822
823 if (rc->pn_cpu != cpuinst)
824 continue;
825
826 if (rcinst > max)
827 return (-1);
828
829 rcnode = pcie_topo_node_create(mod, pcie, pnode, rc,
830 pe->pe_name, rcinst);
831
832 if (rcnode == NULL)
833 return (-1);
834
835 if (!pcie_topo_range_create(mod, rcnode, "function", 0,
836 topo_list_size(&rc->pn_children))) {
837 return (-1);
838 }
839
840 if (!pcie_topo_process_functions(mod, pcie, rcnode, rc,
841 &fninst)) {
842 return (-1);
843 }
844
845 rcinst++;
846 }
847
848 return (0);
849 }
850
851 /*
852 * At present, the XML file only goes as far as:
853 * cpu -> root-complex
854 * with the "root-complex" enumerator iterating to complete the tree.
855 *
856 * This is because the tree structure is variable below each root complex.
857 */
858 const pcie_enum_t pcie_enum_common[] = {
859 {
860 .pe_name = CPU,
861 .pe_range_enum = pcie_topo_enum_cpu,
862 .pe_flags = PCIE_ENUM_F_MULTI_RANGE,
863 },
864 {
865 .pe_name = "root-complex",
866 .pe_range_enum = pcie_topo_enum_root_complex,
867 .pe_flags = PCIE_ENUM_F_MULTI_RANGE,
868 },
869 };
870
871 /*
872 * This is our module's primary enumerator entry point. All types that we
873 * declare and handle ourselves enter this function. In general, this is driven
874 * by the corresponding topology map and this means that we are called
875 * potentially twice by the XML processing logic for each range.
876 *
877 * 1) The first time we will be called is when we are being asked to enumerate
878 * a range declaration. The range declarations give us a number of different
879 * entries that we can possibly process and will ask us to create as many as
880 * we believe we make sense.
881 *
882 * 2) There is a second phase where we can be called into to take action. This
883 * occurs if there are XML <node> entries that are used to declare
884 * information about the node. The most common use case here is to decorate
885 * specific nodes with properties and property groups. When we are called
886 * this time, our instance tnode_t point directly to the node itself and not
887 * to the parent.
888 *
889 * In general, since the PCIe topology is somewhat arbitrary after the first
890 * couple of levels (CPUs, internal functions and root ports), we only provide
891 * enumerators into these first two levels of the tree. XML files are just
892 * wrappers around these enumerators and do not generally contain static
893 * annotations.
894 *
895 * In the current implementation, no handlers are registered for the second
896 * pass (that is, none of the entries in pcie_enum_common have a pe_post_enum
897 * callback).
898 */
899 static int
pcie_enum(topo_mod_t * mod,tnode_t * pnode,const char * name,topo_instance_t min,topo_instance_t max,void * modarg,void * data)900 pcie_enum(topo_mod_t *mod, tnode_t *pnode, const char *name,
901 topo_instance_t min, topo_instance_t max, void *modarg, void *data)
902 {
903 pcie_t *pcie;
904 const pcie_enum_t *pe;
905 const char *pname;
906 tnode_t *tn = NULL;
907 bool post, range;
908
909 topo_mod_dprintf(mod, "enum: %s [%" PRIu64 ", %" PRIu64 "] on "
910 "%s%" PRIu64, name, min, max,
911 topo_node_name(pnode), topo_node_instance(pnode));
912
913 /*
914 * Enumerating pcie requires PRIV_SYS_CONFIG. If the caller does not
915 * have that, we cannot enumerate. We return success so as not to block
916 * enumerating other schemes.
917 */
918 if (!priv_ineffect(PRIV_SYS_CONFIG)) {
919 topo_mod_dprintf(mod, "privilege %s is not in effect",
920 PRIV_SYS_CONFIG);
921 return (0);
922 }
923
924 if ((pcie = topo_mod_getspecific(mod)) == NULL)
925 return (-1);
926
927 if (!pcie_gather(mod, pcie))
928 return (-1);
929
930 /*
931 * Look for whether we are in the case where we've been asked to come
932 * back over our specific node. In this case the range's min/max will
933 * stay the same, but our node will have our own name. This means that
934 * we can't really have children as a parent right this moment.
935 */
936 pname = topo_node_name(pnode);
937 range = (min != max);
938 post = false;
939 if (strcmp(pname, name) == 0) {
940 topo_instance_t pinst = topo_node_instance(pnode);
941
942 VERIFY3U(pinst, >=, min);
943 VERIFY3U(pinst, <=, max);
944
945 tn = pnode;
946 pnode = topo_node_parent(tn);
947 pname = topo_node_name(pnode);
948 post = true;
949 }
950
951 topo_mod_dprintf(mod, "enum: %s for %s (parent %s=%" PRIu64 ")",
952 post ? "post" : "initial", name,
953 pname, topo_node_instance(pnode));
954
955 pe = NULL;
956 for (size_t i = 0; i < ARRAY_SIZE(pcie_enum_common); i++) {
957 if (strcmp(pcie_enum_common[i].pe_name, name) == 0) {
958 pe = &pcie_enum_common[i];
959 break;
960 }
961 }
962
963 if (pe == NULL) {
964 topo_mod_dprintf(mod, "enum: component %s unknown", name);
965 return (-1);
966 }
967
968 if (range && !post && (pe->pe_flags & PCIE_ENUM_F_MULTI_RANGE) == 0) {
969 topo_mod_dprintf(mod,
970 "enum: multi-instance range enumeration not supported");
971 return (topo_mod_seterrno(mod, EMOD_NODE_RANGE));
972 }
973
974 if (post) {
975 if (pe->pe_post_enum == NULL) {
976 topo_mod_dprintf(mod,
977 "enum: skipping post: no processing function");
978 return (0);
979 }
980 return (pe->pe_post_enum(mod, pcie, pe, pnode, tn, min, max));
981 }
982
983 /*
984 * While there are cases that we might get called into post-enumeration
985 * just because of how we've constructed the topo map even if we don't
986 * need to do anything (but we want to make sure it doesn't go to some
987 * other module), we pretty much always expect to have something for
988 * initial enumeration right now.
989 */
990 if (pe->pe_range_enum == NULL) {
991 topo_mod_dprintf(mod,
992 "enum: missing initial enumeration function!");
993 return (-1);
994 }
995
996 return (pe->pe_range_enum(mod, pcie, pe, pnode, tn, min, max));
997 }
998
999 bool
pcie_set_platdata(pcie_t * pcie,void * val)1000 pcie_set_platdata(pcie_t *pcie, void *val)
1001 {
1002 pcie->tp_privdata = val;
1003 return (true);
1004 }
1005
1006 void *
pcie_get_platdata(const pcie_t * pcie)1007 pcie_get_platdata(const pcie_t *pcie)
1008 {
1009 return (pcie->tp_privdata);
1010 }
1011
1012 static void
pcie_free_tree(topo_mod_t * mod,pcie_t * pcie,pcie_node_t * node)1013 pcie_free_tree(topo_mod_t *mod, pcie_t *pcie, pcie_node_t *node)
1014 {
1015 pcie_node_t *child, *nchild;
1016
1017 for (child = topo_list_next(&node->pn_children); child != NULL;
1018 child = nchild) {
1019 nchild = topo_list_next(child);
1020 pcie_free_tree(mod, pcie, child);
1021
1022 }
1023
1024 pcie_node_free(mod, pcie, node);
1025 }
1026
1027 static void
pcie_free(topo_mod_t * mod,pcie_t * pcie)1028 pcie_free(topo_mod_t *mod, pcie_t *pcie)
1029 {
1030 pcie_node_t *nexus, *nnexus;
1031
1032 if (pcie == NULL)
1033 return;
1034 /* The devinfo handle came from fm, don't do anything ourselves. */
1035 pcie->tp_devinfo = DI_NODE_NIL;
1036
1037 if (pcie->tp_pcidb_hdl != NULL)
1038 pcidb_close(pcie->tp_pcidb_hdl);
1039
1040 for (nexus = topo_list_next(&pcie->tp_rootnexus); nexus != NULL;
1041 nexus = nnexus) {
1042 nnexus = topo_list_next(nexus);
1043 pcie_free_tree(mod, pcie, nexus);
1044 }
1045
1046 nvlist_free(pcie->tp_cpupcidata);
1047
1048 topo_mod_free(mod, pcie, sizeof (*pcie));
1049 }
1050
1051 static pcie_t *
pcie_alloc(topo_mod_t * mod)1052 pcie_alloc(topo_mod_t *mod)
1053 {
1054 pcie_t *pcie;
1055
1056 if ((pcie = topo_mod_zalloc(mod, sizeof (*pcie))) == NULL) {
1057 topo_mod_dprintf(mod,
1058 "Could not allocate memory for pcie_t: %s",
1059 topo_strerror(EMOD_NOMEM));
1060 return (NULL);
1061 }
1062
1063 if ((pcie->tp_devinfo = topo_mod_devinfo(mod)) == DI_NODE_NIL) {
1064 topo_mod_dprintf(mod, "No devinfo node from framework");
1065 pcie_free(mod, pcie);
1066 return (NULL);
1067 }
1068
1069 if ((pcie->tp_pcidb_hdl = pcidb_open(PCIDB_VERSION)) == NULL) {
1070 topo_mod_dprintf(mod, "Failed to open pcidb");
1071 /* Carry on regardless, we just won't have descriptions. */
1072 }
1073
1074 return (pcie);
1075 }
1076
1077 static const topo_modops_t pcie_ops = {
1078 .tmo_enum = pcie_enum,
1079 .tmo_release = pcie_topo_node_free,
1080 };
1081
1082 static topo_modinfo_t pcie_mod = {
1083 .tmi_desc = PCIE,
1084 .tmi_scheme = FM_FMRI_SCHEME_PCIE,
1085 PCIE_VERSION,
1086 &pcie_ops,
1087 };
1088
1089 int
_topo_init(topo_mod_t * mod,topo_version_t version)1090 _topo_init(topo_mod_t *mod, topo_version_t version)
1091 {
1092 pcie_t *pcie;
1093
1094 if (getenv("TOPOPCIEDEBUG") != NULL)
1095 topo_mod_setdebug(mod);
1096
1097 topo_mod_dprintf(mod, "initialising %s enumerator\n", PCIE);
1098
1099 if (version != PCIE_VERSION)
1100 return (-1);
1101
1102 if ((pcie = pcie_alloc(mod)) == NULL)
1103 return (-1);
1104
1105 if (topo_mod_register(mod, &pcie_mod, TOPO_VERSION) != 0) {
1106 topo_mod_dprintf(mod, "failed to register module");
1107 pcie_free(mod, pcie);
1108 return (-1);
1109 }
1110
1111 if (!mod_pcie_platform_init(mod, pcie)) {
1112 topo_mod_unregister(mod);
1113 pcie_free(mod, pcie);
1114 return (-1);
1115 }
1116
1117 topo_mod_setspecific(mod, pcie);
1118
1119 return (0);
1120 }
1121
1122 void
_topo_fini(topo_mod_t * mod)1123 _topo_fini(topo_mod_t *mod)
1124 {
1125 pcie_t *pcie;
1126
1127 pcie = topo_mod_getspecific(mod);
1128
1129 if (pcie != NULL) {
1130 topo_mod_setspecific(mod, NULL);
1131 mod_pcie_platform_fini(mod, pcie);
1132 pcie_free(mod, pcie);
1133 }
1134
1135 topo_mod_unregister(mod);
1136 }
1137