xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/pcibus/pcibus.c (revision a92282e44f968185a6bba094d1e5fece2da819cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2020 Joyent, Inc.
25  */
26 
27 #include <sys/fm/protocol.h>
28 #include <assert.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <alloca.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <sys/param.h>
37 #include <sys/pci.h>
38 #include <sys/pcie.h>
39 #include <libdevinfo.h>
40 #include <libnvpair.h>
41 #include <fm/topo_mod.h>
42 #include <fm/topo_hc.h>
43 #include <sys/ddi_ufm.h>
44 #include <sys/stat.h>
45 #include <sys/types.h>
46 
47 #include <hostbridge.h>
48 #include <pcibus.h>
49 #include <did.h>
50 #include <did_props.h>
51 #include <util.h>
52 #include <topo_nic.h>
53 #include <topo_usb.h>
54 
55 extern txprop_t Bus_common_props[];
56 extern txprop_t Dev_common_props[];
57 extern txprop_t Fn_common_props[];
58 extern int Bus_propcnt;
59 extern int Dev_propcnt;
60 extern int Fn_propcnt;
61 
62 extern int platform_pci_label(topo_mod_t *mod, tnode_t *, nvlist_t *,
63     nvlist_t **);
64 extern int platform_pci_fru(topo_mod_t *mod, tnode_t *, nvlist_t *,
65     nvlist_t **);
66 static void pci_release(topo_mod_t *, tnode_t *);
67 static int pci_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
68     topo_instance_t, void *, void *);
69 static int pci_label(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
70     nvlist_t **);
71 static int pci_fru(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
72     nvlist_t **);
73 
74 static const topo_modops_t Pci_ops =
75 	{ pci_enum, pci_release };
76 static const topo_modinfo_t Pci_info =
77 	{ PCI_BUS, FM_FMRI_SCHEME_HC, PCI_ENUMR_VERS, &Pci_ops };
78 
79 static const topo_method_t Pci_methods[] = {
80 	{ TOPO_METH_LABEL, TOPO_METH_LABEL_DESC,
81 	    TOPO_METH_LABEL_VERSION, TOPO_STABILITY_INTERNAL, pci_label },
82 	{ TOPO_METH_FRU_COMPUTE, TOPO_METH_FRU_COMPUTE_DESC,
83 	    TOPO_METH_FRU_COMPUTE_VERSION, TOPO_STABILITY_INTERNAL, pci_fru },
84 	{ NULL }
85 };
86 
87 int
88 _topo_init(topo_mod_t *modhdl, topo_version_t version)
89 {
90 	/*
91 	 * Turn on module debugging output
92 	 */
93 	if (getenv("TOPOPCIDBG") != NULL)
94 		topo_mod_setdebug(modhdl);
95 	topo_mod_dprintf(modhdl, "initializing pcibus builtin\n");
96 
97 	if (version != PCI_ENUMR_VERS)
98 		return (topo_mod_seterrno(modhdl, EMOD_VER_NEW));
99 
100 	if (topo_mod_register(modhdl, &Pci_info, TOPO_VERSION) != 0) {
101 		topo_mod_dprintf(modhdl, "failed to register module");
102 		return (-1);
103 	}
104 	topo_mod_dprintf(modhdl, "PCI Enumr initd\n");
105 
106 	return (0);
107 }
108 
109 void
110 _topo_fini(topo_mod_t *modhdl)
111 {
112 	topo_mod_unregister(modhdl);
113 }
114 
115 static int
116 pci_label(topo_mod_t *mp, tnode_t *node, topo_version_t version,
117     nvlist_t *in, nvlist_t **out)
118 {
119 	if (version > TOPO_METH_LABEL_VERSION)
120 		return (topo_mod_seterrno(mp, EMOD_VER_NEW));
121 	return (platform_pci_label(mp, node, in, out));
122 }
123 static int
124 pci_fru(topo_mod_t *mp, tnode_t *node, topo_version_t version,
125     nvlist_t *in, nvlist_t **out)
126 {
127 	if (version > TOPO_METH_FRU_COMPUTE_VERSION)
128 		return (topo_mod_seterrno(mp, EMOD_VER_NEW));
129 	return (platform_pci_fru(mp, node, in, out));
130 }
131 static tnode_t *
132 pci_tnode_create(topo_mod_t *mod, tnode_t *parent,
133     const char *name, topo_instance_t i, void *priv)
134 {
135 	tnode_t *ntn;
136 
137 	if ((ntn = tnode_create(mod, parent, name, i, priv)) == NULL)
138 		return (NULL);
139 	if (topo_method_register(mod, ntn, Pci_methods) < 0) {
140 		topo_mod_dprintf(mod, "topo_method_register failed: %s\n",
141 		    topo_strerror(topo_mod_errno(mod)));
142 		topo_node_unbind(ntn);
143 		return (NULL);
144 	}
145 	return (ntn);
146 }
147 
148 /*ARGSUSED*/
149 static int
150 hostbridge_asdevice(topo_mod_t *mod, tnode_t *bus)
151 {
152 	di_node_t di;
153 	tnode_t *dev32;
154 
155 	di = topo_node_getspecific(bus);
156 	assert(di != DI_NODE_NIL);
157 
158 	if ((dev32 = pcidev_declare(mod, bus, di, 32)) == NULL)
159 		return (-1);
160 	if (pcifn_declare(mod, dev32, di, 0) == NULL) {
161 		topo_node_unbind(dev32);
162 		return (-1);
163 	}
164 	return (0);
165 }
166 
167 static int
168 pciexfn_add_ufm(topo_mod_t *mod, tnode_t *parent, tnode_t *node)
169 {
170 	char *devpath = NULL;
171 	ufm_ioc_getcaps_t ugc = { 0 };
172 	ufm_ioc_bufsz_t ufbz = { 0 };
173 	ufm_ioc_report_t ufmr = { 0 };
174 	nvlist_t *ufminfo = NULL, **images;
175 	uint_t nimages;
176 	int err, fd, ret = -1;
177 	tnode_t *create;
178 
179 	if (topo_prop_get_string(node, TOPO_PGROUP_IO, TOPO_IO_DEV, &devpath,
180 	    &err) != 0) {
181 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
182 	}
183 	if (strlen(devpath) >= MAXPATHLEN) {
184 		topo_mod_dprintf(mod, "devpath is too long: %s", devpath);
185 		topo_mod_strfree(mod, devpath);
186 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
187 	}
188 
189 	if ((fd = open(DDI_UFM_DEV, O_RDONLY)) < 0) {
190 		topo_mod_dprintf(mod, "%s: failed to open %s", __func__,
191 		    DDI_UFM_DEV);
192 		topo_mod_strfree(mod, devpath);
193 		return (0);
194 	}
195 	/*
196 	 * Make an ioctl to probe if the driver for this function is
197 	 * UFM-capable.  If the ioctl fails or if it doesn't advertise the
198 	 * DDI_UFM_CAP_REPORT capability, we bail out.
199 	 */
200 	ugc.ufmg_version = DDI_UFM_CURRENT_VERSION;
201 	(void) strlcpy(ugc.ufmg_devpath, devpath, MAXPATHLEN);
202 	if (ioctl(fd, UFM_IOC_GETCAPS, &ugc) < 0) {
203 		topo_mod_dprintf(mod, "UFM_IOC_GETCAPS failed: %s",
204 		    strerror(errno));
205 		(void) close(fd);
206 		topo_mod_strfree(mod, devpath);
207 		return (0);
208 	}
209 	if ((ugc.ufmg_caps & DDI_UFM_CAP_REPORT) == 0) {
210 		topo_mod_dprintf(mod, "driver doesn't advertise "
211 		    "DDI_UFM_CAP_REPORT");
212 		(void) close(fd);
213 		topo_mod_strfree(mod, devpath);
214 		return (0);
215 	}
216 
217 	/*
218 	 * If we made it this far, then the driver is indeed UFM-capable and
219 	 * is capable of reporting its firmware information.  First step is to
220 	 * make an ioctl to query the size of the report data so that we can
221 	 * allocate a buffer large enough to hold it.
222 	 */
223 	ufbz.ufbz_version = DDI_UFM_CURRENT_VERSION;
224 	(void) strlcpy(ufbz.ufbz_devpath, devpath, MAXPATHLEN);
225 	if (ioctl(fd, UFM_IOC_REPORTSZ, &ufbz) < 0) {
226 		topo_mod_dprintf(mod, "UFM_IOC_REPORTSZ failed: %s\n",
227 		    strerror(errno));
228 		(void) close(fd);
229 		topo_mod_strfree(mod, devpath);
230 		return (0);
231 	}
232 
233 	ufmr.ufmr_version = DDI_UFM_CURRENT_VERSION;
234 	if ((ufmr.ufmr_buf = topo_mod_alloc(mod, ufbz.ufbz_size)) == NULL) {
235 		topo_mod_dprintf(mod, "failed to alloc %u bytes\n",
236 		    ufbz.ufbz_size);
237 		(void) close(fd);
238 		topo_mod_strfree(mod, devpath);
239 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
240 	}
241 	ufmr.ufmr_bufsz = ufbz.ufbz_size;
242 	(void) strlcpy(ufmr.ufmr_devpath, devpath, MAXPATHLEN);
243 	topo_mod_strfree(mod, devpath);
244 
245 	/*
246 	 * Now, make the ioctl to retrieve the actual report data.  The data
247 	 * is stored as a packed nvlist.
248 	 */
249 	if (ioctl(fd, UFM_IOC_REPORT, &ufmr) < 0) {
250 		topo_mod_dprintf(mod, "UFM_IOC_REPORT failed: %s\n",
251 		    strerror(errno));
252 		topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
253 		(void) close(fd);
254 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
255 	}
256 	(void) close(fd);
257 
258 	if (nvlist_unpack(ufmr.ufmr_buf, ufmr.ufmr_bufsz, &ufminfo,
259 	    NV_ENCODE_NATIVE) != 0) {
260 		topo_mod_dprintf(mod, "failed to unpack nvlist\n");
261 		topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
262 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
263 	}
264 	topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
265 
266 	if (nvlist_lookup_nvlist_array(ufminfo, DDI_UFM_NV_IMAGES, &images,
267 	    &nimages) != 0) {
268 		topo_mod_dprintf(mod, "failed to lookup %s nvpair",
269 		    DDI_UFM_NV_IMAGES);
270 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
271 		goto err;
272 	}
273 
274 	/*
275 	 * There's nothing for us to do if there are no images.
276 	 */
277 	if (nimages == 0) {
278 		ret = 0;
279 		goto err;
280 	}
281 
282 	/*
283 	 * In general, almost all UFMs are device-wide. That is, in a
284 	 * multi-function device, there is still a single global firmware image.
285 	 * At this time, we default to putting the UFM data always on the device
286 	 * node. However, if someone creates a UFM on something that's not the
287 	 * first function, we'll create a UFM under that function for now. If we
288 	 * add support for hardware that has per-function UFMs, then we should
289 	 * update the UFM API to convey that scope.
290 	 */
291 	if (topo_node_instance(node) != 0) {
292 		create = node;
293 	} else {
294 		create = parent;
295 	}
296 
297 	if (topo_node_range_create(mod, create, UFM, 0, (nimages - 1)) != 0) {
298 		topo_mod_dprintf(mod, "failed to create %s range", UFM);
299 		/* errno set */
300 		goto err;
301 	}
302 	for (uint_t i = 0; i < nimages; i++) {
303 		tnode_t *ufmnode = NULL;
304 		char *descr;
305 		uint_t nslots;
306 		nvlist_t **slots;
307 
308 		if (nvlist_lookup_string(images[i], DDI_UFM_NV_IMAGE_DESC,
309 		    &descr) != 0 ||
310 		    nvlist_lookup_nvlist_array(images[i],
311 		    DDI_UFM_NV_IMAGE_SLOTS, &slots, &nslots) != 0) {
312 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
313 			goto err;
314 		}
315 
316 		if ((ufmnode = topo_mod_create_ufm(mod, create, descr, NULL)) ==
317 		    NULL) {
318 			topo_mod_dprintf(mod, "failed to create ufm nodes for "
319 			    "%s", descr);
320 			/* errno set */
321 			goto err;
322 		}
323 		for (uint_t s = 0; s < nslots; s++) {
324 			topo_ufm_slot_info_t slotinfo = { 0 };
325 			uint32_t slotattrs;
326 
327 			if (nvlist_lookup_string(slots[s],
328 			    DDI_UFM_NV_SLOT_VERSION,
329 			    (char **)&slotinfo.usi_version) != 0 ||
330 			    nvlist_lookup_uint32(slots[s],
331 			    DDI_UFM_NV_SLOT_ATTR, &slotattrs) != 0) {
332 				topo_node_unbind(ufmnode);
333 				topo_mod_dprintf(mod, "malformed slot nvlist");
334 				(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
335 				goto err;
336 			}
337 			(void) nvlist_lookup_nvlist(slots[s],
338 			    DDI_UFM_NV_SLOT_MISC, &slotinfo.usi_extra);
339 
340 			if (slotattrs & DDI_UFM_ATTR_READABLE &&
341 			    slotattrs & DDI_UFM_ATTR_WRITEABLE)
342 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_RW;
343 			else if (slotattrs & DDI_UFM_ATTR_READABLE)
344 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_RO;
345 			else if (slotattrs & DDI_UFM_ATTR_WRITEABLE)
346 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_WO;
347 			else
348 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_NONE;
349 
350 			if (slotattrs & DDI_UFM_ATTR_ACTIVE)
351 				slotinfo.usi_active = B_TRUE;
352 
353 			if (topo_node_range_create(mod, ufmnode, SLOT, 0,
354 			    (nslots - 1)) < 0) {
355 				topo_mod_dprintf(mod, "failed to create %s "
356 				    "range", SLOT);
357 				/* errno set */
358 				goto err;
359 			}
360 			if (topo_mod_create_ufm_slot(mod, ufmnode,
361 			    &slotinfo) == NULL) {
362 				topo_node_unbind(ufmnode);
363 				topo_mod_dprintf(mod, "failed to create ufm "
364 				    "slot %d for %s", s, descr);
365 				/* errno set */
366 				goto err;
367 			}
368 		}
369 	}
370 	ret = 0;
371 err:
372 	nvlist_free(ufminfo);
373 	return (ret);
374 }
375 
376 tnode_t *
377 pciexfn_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
378     topo_instance_t i)
379 {
380 	did_t *pd;
381 	tnode_t *ntn, *ptn;
382 	di_node_t pdn;
383 	uint_t class, subclass;
384 	char *devtyp, *pdevtyp;
385 	int pcie_devtyp, pexcap;
386 	boolean_t dev_is_pcie, pdev_is_pcie;
387 
388 	/* We need the parent's dev info node for some of the info */
389 	ptn = find_predecessor(parent, PCIEX_FUNCTION);
390 	/* If this is the first child under root, get root's ptn */
391 	if (ptn == NULL)
392 		ptn = find_predecessor(parent, PCIEX_ROOT);
393 	if (ptn == NULL)
394 		return (NULL);
395 	pdn = topo_node_getspecific(ptn);
396 
397 	/* Get the required info to populate the excap */
398 	(void) pci_classcode_get(mod, dn, &class, &subclass);
399 	devtyp = pci_devtype_get(mod, dn);
400 	pdevtyp = pci_devtype_get(mod, pdn);
401 	pexcap = pciex_cap_get(mod, pdn);
402 
403 	dev_is_pcie = devtyp && (strcmp(devtyp, "pciex") == 0);
404 	pdev_is_pcie = pdevtyp && (strcmp(pdevtyp, "pciex") == 0);
405 
406 	/*
407 	 * Populate the excap with correct PCIe device type.
408 	 *
409 	 * Device	Parent		Device		Parent	Device
410 	 * excap	device-type	device-type	excap	Class Code
411 	 * -------------------------------------------------------------------
412 	 * PCI(default)	pci		N/A		N/A	!= bridge
413 	 * PCIe		pciex		N/A		N/A	!= bridge
414 	 * Root Port	Defined in hostbridge
415 	 * Switch Up	pciex		pciex		!= up	= bridge
416 	 * Switch Down	pciex		pciex		= up	= bridge
417 	 * PCIe-PCI	pciex		pci		N/A	= bridge
418 	 * PCI-PCIe	pci		pciex		N/A	= bridge
419 	 */
420 	pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCI_DEV;
421 	if (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
422 		if (pdev_is_pcie) {
423 			if (dev_is_pcie) {
424 				if (pexcap != PCIE_PCIECAP_DEV_TYPE_UP)
425 					pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_UP;
426 				else
427 					pcie_devtyp =
428 					    PCIE_PCIECAP_DEV_TYPE_DOWN;
429 			} else {
430 				pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCIE2PCI;
431 			}
432 		} else {
433 			if (dev_is_pcie)
434 				pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCI2PCIE;
435 		}
436 	} else {
437 		if (pdev_is_pcie)
438 			pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCIE_DEV;
439 	}
440 
441 	if ((pd = did_find(mod, dn)) == NULL)
442 		return (NULL);
443 	did_excap_set(pd, pcie_devtyp);
444 
445 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_FUNCTION, i, dn))
446 	    == NULL)
447 		return (NULL);
448 	if (did_props_set(ntn, pd, Fn_common_props, Fn_propcnt) < 0) {
449 		topo_node_unbind(ntn);
450 		return (NULL);
451 	}
452 
453 	/*
454 	 * Check if the driver associated with this function exports firmware
455 	 * information via the DDI UFM subsystem and, if so, create the
456 	 * corresponding ufm topo nodes.
457 	 */
458 	if (pciexfn_add_ufm(mod, parent, ntn) != 0) {
459 		topo_node_unbind(ntn);
460 		return (NULL);
461 	}
462 
463 	/*
464 	 * We may find pci-express buses or plain-pci buses beneath a function
465 	 */
466 	if (child_range_add(mod, ntn, PCIEX_BUS, 0, MAX_HB_BUSES) < 0) {
467 		topo_node_unbind(ntn);
468 		return (NULL);
469 	}
470 	if (child_range_add(mod, ntn, PCI_BUS, 0, MAX_HB_BUSES) < 0) {
471 		topo_node_range_destroy(ntn, PCIEX_BUS);
472 		topo_node_unbind(ntn);
473 		return (NULL);
474 	}
475 	return (ntn);
476 }
477 
478 tnode_t *
479 pciexdev_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
480     topo_instance_t i)
481 {
482 	did_t *pd;
483 	tnode_t *ntn;
484 
485 	if ((pd = did_find(mod, dn)) == NULL)
486 		return (NULL);
487 	did_settnode(pd, parent);
488 
489 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_DEVICE, i, dn)) == NULL)
490 		return (NULL);
491 	if (did_props_set(ntn, pd, Dev_common_props, Dev_propcnt) < 0) {
492 		topo_node_unbind(ntn);
493 		return (NULL);
494 	}
495 
496 	if (pci_create_dev_sensors(mod, ntn) < 0) {
497 		topo_node_unbind(ntn);
498 		return (NULL);
499 	}
500 
501 	/*
502 	 * We can expect to find pci-express functions beneath the device
503 	 */
504 	if (child_range_add(mod,
505 	    ntn, PCIEX_FUNCTION, 0, MAX_PCIDEV_FNS) < 0) {
506 		topo_node_unbind(ntn);
507 		return (NULL);
508 	}
509 	return (ntn);
510 }
511 
512 tnode_t *
513 pciexbus_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
514     topo_instance_t i)
515 {
516 	did_t *pd;
517 	tnode_t *ntn;
518 
519 	if ((pd = did_find(mod, dn)) == NULL)
520 		return (NULL);
521 	did_settnode(pd, parent);
522 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_BUS, i, dn)) == NULL)
523 		return (NULL);
524 	if (did_props_set(ntn, pd, Bus_common_props, Bus_propcnt) < 0) {
525 		topo_node_unbind(ntn);
526 		return (NULL);
527 	}
528 	/*
529 	 * We can expect to find pci-express devices beneath the bus
530 	 */
531 	if (child_range_add(mod,
532 	    ntn, PCIEX_DEVICE, 0, MAX_PCIBUS_DEVS) < 0) {
533 		topo_node_unbind(ntn);
534 		return (NULL);
535 	}
536 	return (ntn);
537 }
538 
539 tnode_t *
540 pcifn_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
541     topo_instance_t i)
542 {
543 	did_t *pd;
544 	tnode_t *ntn;
545 
546 	if ((pd = did_find(mod, dn)) == NULL)
547 		return (NULL);
548 	did_excap_set(pd, PCIE_PCIECAP_DEV_TYPE_PCI_DEV);
549 
550 	if ((ntn = pci_tnode_create(mod, parent, PCI_FUNCTION, i, dn)) == NULL)
551 		return (NULL);
552 	if (did_props_set(ntn, pd, Fn_common_props, Fn_propcnt) < 0) {
553 		topo_node_unbind(ntn);
554 		return (NULL);
555 	}
556 	/*
557 	 * We may find pci buses beneath a function
558 	 */
559 	if (child_range_add(mod, ntn, PCI_BUS, 0, MAX_HB_BUSES) < 0) {
560 		topo_node_unbind(ntn);
561 		return (NULL);
562 	}
563 	return (ntn);
564 }
565 
566 tnode_t *
567 pcidev_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
568     topo_instance_t i)
569 {
570 	did_t *pd;
571 	tnode_t *ntn;
572 
573 	if ((pd = did_find(mod, dn)) == NULL)
574 		return (NULL);
575 	/* remember parent tnode */
576 	did_settnode(pd, parent);
577 
578 	if ((ntn = pci_tnode_create(mod, parent, PCI_DEVICE, i, dn)) == NULL)
579 		return (NULL);
580 	if (did_props_set(ntn, pd, Dev_common_props, Dev_propcnt) < 0) {
581 		topo_node_unbind(ntn);
582 		return (NULL);
583 	}
584 
585 	if (pci_create_dev_sensors(mod, ntn) < 0) {
586 		topo_node_unbind(ntn);
587 		return (NULL);
588 	}
589 
590 	/*
591 	 * We can expect to find pci functions beneath the device
592 	 */
593 	if (child_range_add(mod, ntn, PCI_FUNCTION, 0, MAX_PCIDEV_FNS) < 0) {
594 		topo_node_unbind(ntn);
595 		return (NULL);
596 	}
597 	return (ntn);
598 }
599 
600 tnode_t *
601 pcibus_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
602     topo_instance_t i)
603 {
604 	did_t *pd;
605 	tnode_t *ntn;
606 	int hbchild = 0;
607 
608 	if ((pd = did_find(mod, dn)) == NULL)
609 		return (NULL);
610 	did_settnode(pd, parent);
611 	if ((ntn = pci_tnode_create(mod, parent, PCI_BUS, i, dn)) == NULL)
612 		return (NULL);
613 	/*
614 	 * If our devinfo node is lacking certain information of its
615 	 * own, and our parent topology node is a hostbridge, we may
616 	 * need/want to inherit information available in the
617 	 * hostbridge node's private data.
618 	 */
619 	if (strcmp(topo_node_name(parent), HOSTBRIDGE) == 0)
620 		hbchild = 1;
621 	if (did_props_set(ntn, pd, Bus_common_props, Bus_propcnt) < 0) {
622 		topo_node_unbind(ntn);
623 		return (NULL);
624 	}
625 	/*
626 	 * We can expect to find pci devices beneath the bus
627 	 */
628 	if (child_range_add(mod, ntn, PCI_DEVICE, 0, MAX_PCIBUS_DEVS) < 0) {
629 		topo_node_unbind(ntn);
630 		return (NULL);
631 	}
632 	/*
633 	 * On each bus child of the hostbridge, we represent the
634 	 * hostbridge as a device outside the range of legal device
635 	 * numbers.
636 	 */
637 	if (hbchild == 1) {
638 		if (hostbridge_asdevice(mod, ntn) < 0) {
639 			topo_node_range_destroy(ntn, PCI_DEVICE);
640 			topo_node_unbind(ntn);
641 			return (NULL);
642 		}
643 	}
644 	return (ntn);
645 }
646 
647 static int
648 pci_bridge_declare(topo_mod_t *mod, tnode_t *fn, di_node_t din, int board,
649     int bridge, int rc, int depth)
650 {
651 	int err;
652 	char *devtyp;
653 
654 	devtyp = pci_devtype_get(mod, din);
655 	/* Check if the children are PCI or PCIe */
656 	if (devtyp && (strcmp(devtyp, "pciex") == 0))
657 		err = pci_children_instantiate(mod, fn, din, board, bridge,
658 		    rc, TRUST_BDF, depth + 1);
659 	else
660 		err = pci_children_instantiate(mod, fn, din, board, bridge,
661 		    rc - TO_PCI, TRUST_BDF, depth + 1);
662 	return (err);
663 }
664 
665 static void
666 declare_dev_and_fn(topo_mod_t *mod, tnode_t *bus, tnode_t **dev, di_node_t din,
667     int board, int bridge, int rc, int devno, int fnno, int depth)
668 {
669 	int dcnt = 0, rcnt, err;
670 	char *propstr, *label = NULL, *pdev = NULL;
671 	tnode_t *fn;
672 	uint_t class, subclass;
673 	uint_t vid, did;
674 	uint_t pdev_sz;
675 	did_t *dp = NULL;
676 
677 	if (*dev == NULL) {
678 		if (rc >= 0)
679 			*dev = pciexdev_declare(mod, bus, din, devno);
680 		else
681 			*dev = pcidev_declare(mod, bus, din, devno);
682 		if (*dev == NULL)
683 			return;
684 		++dcnt;
685 	}
686 	if (rc >= 0)
687 		fn = pciexfn_declare(mod, *dev, din, fnno);
688 	else
689 		fn = pcifn_declare(mod, *dev, din, fnno);
690 
691 	if (fn == NULL) {
692 		if (dcnt) {
693 			topo_node_unbind(*dev);
694 			*dev = NULL;
695 		}
696 		return;
697 	}
698 
699 	if (pci_classcode_get(mod, din, &class, &subclass) < 0) {
700 		topo_node_unbind(fn);
701 		if (dcnt)
702 			topo_node_unbind(*dev);
703 		return;
704 	}
705 
706 	/*
707 	 * This function may be a bridge.  If not, check for a possible
708 	 * topology map file and kick off its enumeration of lower-level
709 	 * devices.
710 	 */
711 	if (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
712 		(void) pci_bridge_declare(mod, fn, din, board, bridge, rc,
713 		    depth);
714 	}
715 
716 	/*
717 	 * Check for a Neptune-based NIC. This could either be a Neptune
718 	 * adapter card or an Neptune ASIC on a board (e.g. motherboard)
719 	 *
720 	 * For Netpune adapter cards, use xfp-hc-topology.xml to expand
721 	 * topology to include the XFP optical module, which is a FRU on
722 	 * the Neptune based 10giga fiber NICs.
723 	 *
724 	 * For Neptune ASICs, use the XAUI enumerator to expand topology.
725 	 * The 10giga ports are externalized by a XAUI cards, which
726 	 * are FRUs. The XAUI enumerator in turn instantiates the XFP
727 	 * optical module FRUs.
728 	 */
729 	else if (class == PCI_CLASS_NET &&
730 	    di_uintprop_get(mod, din, DI_VENDIDPROP, &vid) >= 0 &&
731 	    di_uintprop_get(mod, din, DI_DEVIDPROP, &did) >= 0 &&
732 	    vid == SUN_VENDOR_ID && did == NEPTUNE_DEVICE_ID) {
733 		/*
734 		 * Is this an adapter card? Check the bus's physlot
735 		 */
736 		dp = did_find(mod, topo_node_getspecific(bus));
737 		if (did_physlot(dp) >= 0) {
738 			topo_mod_dprintf(mod, "Found Neptune slot\n");
739 			(void) topo_mod_enummap(mod, fn,
740 			    "xfp", FM_FMRI_SCHEME_HC);
741 		} else {
742 			topo_mod_dprintf(mod, "Found Neptune ASIC\n");
743 			if (topo_mod_load(mod, XAUI, TOPO_VERSION) == NULL) {
744 				topo_mod_dprintf(mod, "pcibus enum "
745 				    "could not load xaui enum\n");
746 				(void) topo_mod_seterrno(mod,
747 				    EMOD_PARTIAL_ENUM);
748 				return;
749 			} else {
750 				if (topo_node_range_create(mod, fn,
751 				    XAUI, 0, 1) < 0) {
752 					topo_mod_dprintf(mod,
753 					    "child_range_add for "
754 					    "XAUI failed: %s\n",
755 					    topo_strerror(
756 					    topo_mod_errno(mod)));
757 					return;
758 				}
759 				(void) topo_mod_enumerate(mod, fn,
760 				    XAUI, XAUI, fnno, fnno, fn);
761 			}
762 		}
763 	} else if (class == PCI_CLASS_NET) {
764 		/*
765 		 * Ask the nic module if there are any nodes that need to be
766 		 * enumerated under this device. This might include things like
767 		 * transceivers or some day, LEDs.
768 		 */
769 		if (topo_mod_load(mod, NIC, NIC_VERSION) == NULL) {
770 			topo_mod_dprintf(mod, "pcibus enum could not load "
771 			    "nic enum\n");
772 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
773 			return;
774 		}
775 
776 		(void) topo_mod_enumerate(mod, fn, NIC, NIC, 0, 0, din);
777 	} else if (class == PCI_CLASS_SERIALBUS && subclass == PCI_SERIAL_USB) {
778 		/*
779 		 * If we encounter a USB controller, make sure to enumerate all
780 		 * of its USB ports.
781 		 */
782 		if (topo_mod_load(mod, USB, USB_VERSION) == NULL) {
783 			topo_mod_dprintf(mod, "pcibus enum could not load "
784 			    "usb enum\n");
785 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
786 			return;
787 		}
788 
789 		(void) topo_mod_enumerate(mod, fn, USB, USB_PCI, 0, 0, din);
790 	} else if (class == PCI_CLASS_MASS) {
791 		di_node_t cn;
792 		int niports = 0;
793 		extern void pci_iports_instantiate(topo_mod_t *, tnode_t *,
794 		    di_node_t, int);
795 		extern void pci_receptacle_instantiate(topo_mod_t *, tnode_t *,
796 		    di_node_t);
797 
798 		for (cn = di_child_node(din); cn != DI_NODE_NIL;
799 		    cn = di_sibling_node(cn)) {
800 			if (strcmp(di_node_name(cn), IPORT) == 0)
801 				niports++;
802 		}
803 		if (niports > 0)
804 			pci_iports_instantiate(mod, fn, din, niports);
805 
806 		if ((rcnt = di_prop_lookup_strings(DDI_DEV_T_ANY, din,
807 		    DI_RECEPTACLE_PHYMASK, &propstr)) > 0) {
808 			if (topo_node_range_create(mod, fn, RECEPTACLE, 0,
809 			    rcnt) >= 0)
810 				pci_receptacle_instantiate(mod, fn, din);
811 		}
812 	}
813 
814 	/*
815 	 * If this is an NVMe device and if the FRU label indicates it's not an
816 	 * onboard device then invoke the disk enumerator to enumerate the NVMe
817 	 * controller and associated namespaces.
818 	 *
819 	 * We skip NVMe devices that appear to be onboard as those are likely
820 	 * M.2 or U.2 devices and so should be enumerated via a
821 	 * platform-specific XML map so that they can be associated with the
822 	 * correct physical bay/slot.  This code is intended to pick up NVMe
823 	 * devices that are part of PCIe add-in cards.
824 	 */
825 	if (topo_node_label(fn, &label, &err) != 0) {
826 		topo_mod_dprintf(mod, "%s: failed to lookup FRU label on %s=%d",
827 		    __func__, topo_node_name(fn), topo_node_instance(fn));
828 		goto out;
829 	}
830 
831 	if (class == PCI_CLASS_MASS && subclass == PCI_MASS_NVME &&
832 	    strcmp(label, "MB") != 0) {
833 		char *driver = di_driver_name(din);
834 		char *slash;
835 		topo_pgroup_info_t pgi;
836 
837 		if (topo_prop_get_string(fn, TOPO_PGROUP_IO, TOPO_IO_DEV,
838 		    &pdev, &err) != 0) {
839 			topo_mod_dprintf(mod, "%s: failed to lookup %s on "
840 			    "%s=%d", __func__, TOPO_IO_DEV, topo_node_name(fn),
841 			    topo_node_instance(fn));
842 			goto out;
843 		}
844 
845 		/*
846 		 * Add the binding properties that are required by the disk
847 		 * enumerator to discover the accociated NVMe controller.
848 		 */
849 		pdev_sz = strlen(pdev) + 1;
850 		if ((slash = strrchr(pdev, '/')) == NULL) {
851 			topo_mod_dprintf(mod, "%s: malformed dev path\n",
852 			    __func__);
853 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
854 			goto out;
855 		}
856 		*slash = '\0';
857 
858 		pgi.tpi_name = TOPO_PGROUP_BINDING;
859 		pgi.tpi_namestab = TOPO_STABILITY_PRIVATE;
860 		pgi.tpi_datastab = TOPO_STABILITY_PRIVATE;
861 		pgi.tpi_version = TOPO_VERSION;
862 		if (topo_pgroup_create(fn, &pgi, &err) != 0 ||
863 		    topo_prop_set_string(fn, TOPO_PGROUP_BINDING,
864 		    TOPO_BINDING_DRIVER, TOPO_PROP_IMMUTABLE, driver,
865 		    &err) != 0 ||
866 		    topo_prop_set_string(fn, TOPO_PGROUP_BINDING,
867 		    TOPO_BINDING_PARENT_DEV, TOPO_PROP_IMMUTABLE, pdev,
868 		    &err) != 0) {
869 			topo_mod_dprintf(mod, "%s: failed to set binding "
870 			    "props", __func__);
871 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
872 			goto out;
873 		}
874 
875 		/*
876 		 * Load and invoke the disk enumerator module.
877 		 */
878 		if (topo_mod_load(mod, DISK, TOPO_VERSION) == NULL) {
879 			topo_mod_dprintf(mod, "pcibus enum could not load "
880 			    "disk enum\n");
881 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
882 			goto out;
883 		}
884 		(void) topo_mod_enumerate(mod, fn, DISK, NVME, 0, 0, NULL);
885 	}
886 out:
887 	if (pdev != NULL) {
888 		topo_mod_free(mod, pdev, pdev_sz);
889 	}
890 	topo_mod_strfree(mod, label);
891 }
892 
893 int
894 pci_children_instantiate(topo_mod_t *mod, tnode_t *parent, di_node_t pn,
895     int board, int bridge, int rc, int bover, int depth)
896 {
897 	did_t *pps[MAX_PCIBUS_DEVS][MAX_PCIDEV_FNS];
898 	did_t *bp = NULL;
899 	did_t *np;
900 	di_node_t sib;
901 	di_node_t din;
902 	tnode_t *bn = NULL;
903 	tnode_t *dn = NULL;
904 	int pb = -1;
905 	int b, d, f;
906 
907 	for (d = 0; d < MAX_PCIBUS_DEVS; d++)
908 		for (f = 0; f < MAX_PCIDEV_FNS; f++)
909 			pps[d][f] = NULL;
910 
911 	/* start at the parent's first sibling */
912 	sib = di_child_node(pn);
913 	while (sib != DI_NODE_NIL) {
914 		np = did_create(mod, sib, board, bridge, rc, bover);
915 		if (np == NULL)
916 			return (-1);
917 		did_BDF(np, &b, &d, &f);
918 		pps[d][f] = np;
919 		if (bp == NULL)
920 			bp = np;
921 		if (pb < 0)
922 			pb = ((bover == TRUST_BDF) ? b : bover);
923 		sib = di_sibling_node(sib);
924 	}
925 	if (pb < 0 && bover < 0)
926 		return (0);
927 	if (rc >= 0)
928 		bn = pciexbus_declare(mod, parent, pn, ((pb < 0) ? bover : pb));
929 	else
930 		bn = pcibus_declare(mod, parent, pn, ((pb < 0) ? bover : pb));
931 	if (bn == NULL)
932 		return (-1);
933 	if (pb < 0)
934 		return (0);
935 
936 	for (d = 0; d < MAX_PCIBUS_DEVS; d++) {
937 		for (f = 0; f < MAX_PCIDEV_FNS; f++) {
938 			if (pps[d][f] == NULL)
939 				continue;
940 			din = did_dinode(pps[d][f]);
941 
942 			/*
943 			 * Try to enumerate as many devices and functions as
944 			 * possible.  If we fail to declare a device, break
945 			 * out of the function loop.
946 			 */
947 			declare_dev_and_fn(mod, bn,
948 			    &dn, din, board, bridge, rc, d, f, depth);
949 			did_rele(pps[d][f]);
950 
951 			if (dn == NULL)
952 				break;
953 		}
954 		dn = NULL;
955 	}
956 	return (0);
957 }
958 
959 static int
960 pciexbus_enum(topo_mod_t *mp, tnode_t *ptn, char *pnm, topo_instance_t min,
961     topo_instance_t max)
962 {
963 	di_node_t pdn;
964 	int rc, hb;
965 	tnode_t *hbtn;
966 	int retval;
967 
968 	/*
969 	 * PCI-Express; parent node's private data is a simple di_node_t
970 	 * and we have to construct our own did hash and did_t.
971 	 */
972 	rc = topo_node_instance(ptn);
973 	if ((hbtn = topo_node_parent(ptn)) != NULL)
974 		hb = topo_node_instance(hbtn);
975 	else
976 		hb = rc;
977 
978 	if ((pdn = topo_node_getspecific(ptn)) == DI_NODE_NIL) {
979 		topo_mod_dprintf(mp,
980 		    "Parent %s node missing private data.\n"
981 		    "Unable to proceed with %s enumeration.\n", pnm, PCIEX_BUS);
982 		return (0);
983 	}
984 	if (did_hash_init(mp) != 0)
985 		return (-1);
986 	if ((did_create(mp, pdn, 0, hb, rc, TRUST_BDF)) == NULL)
987 		return (-1);	/* errno already set */
988 
989 	retval = pci_children_instantiate(mp, ptn, pdn, 0, hb, rc,
990 	    (min == max) ? min : TRUST_BDF, 0);
991 	did_hash_fini(mp);
992 
993 	return (retval);
994 }
995 
996 static int
997 pcibus_enum(topo_mod_t *mp, tnode_t *ptn, char *pnm, topo_instance_t min,
998     topo_instance_t max, void *data)
999 {
1000 	did_t *didp, *hbdid = (did_t *)data;
1001 	int retval;
1002 
1003 	/*
1004 	 * XXTOPO: we should not be sharing private node data with another
1005 	 * module. PCI Bus; Parent node's private data is a did_t.  We'll
1006 	 * use the did hash established by the parent.
1007 	 */
1008 	did_setspecific(mp, data);
1009 
1010 	/*
1011 	 * If we're looking for a specific bus-instance, find the right
1012 	 * did_t in the chain, otherwise, there should be only one did_t.
1013 	 */
1014 	if (min == max) {
1015 		int b;
1016 		didp = hbdid;
1017 		while (didp != NULL) {
1018 			did_BDF(didp, &b, NULL, NULL);
1019 			if (b == min)
1020 				break;
1021 			didp = did_link_get(didp);
1022 		}
1023 		if (didp == NULL) {
1024 			topo_mod_dprintf(mp,
1025 			    "Parent %s node missing private data related\n"
1026 			    "to %s instance %d.\n", pnm, PCI_BUS, min);
1027 			topo_mod_setspecific(mp, NULL);
1028 			return (0);
1029 		}
1030 	} else {
1031 		assert(did_link_get(hbdid) == NULL);
1032 		didp = hbdid;
1033 	}
1034 	retval = pci_children_instantiate(mp, ptn, did_dinode(didp),
1035 	    did_board(didp), did_bridge(didp), did_rc(didp),
1036 	    (min == max) ? min : TRUST_BDF, 0);
1037 
1038 	topo_mod_setspecific(mp, NULL);
1039 
1040 	return (retval);
1041 }
1042 
1043 /*ARGSUSED*/
1044 static int
1045 pci_enum(topo_mod_t *mod, tnode_t *ptn, const char *name,
1046     topo_instance_t min, topo_instance_t max, void *notused, void *data)
1047 {
1048 	int retval;
1049 	char *pname;
1050 
1051 	topo_mod_dprintf(mod, "Enumerating pci!\n");
1052 
1053 	if (strcmp(name, PCI_BUS) != 0 && strcmp(name, PCIEX_BUS) != 0) {
1054 		topo_mod_dprintf(mod,
1055 		    "Currently only know how to enumerate %s or %s.\n",
1056 		    PCI_BUS, PCIEX_BUS);
1057 		return (0);
1058 	}
1059 	pname = topo_node_name(ptn);
1060 	if (strcmp(pname, HOSTBRIDGE) != 0 && strcmp(pname, PCIEX_ROOT) != 0) {
1061 		topo_mod_dprintf(mod,
1062 		    "Currently can only enumerate a %s or %s directly\n",
1063 		    PCI_BUS, PCIEX_BUS);
1064 		topo_mod_dprintf(mod,
1065 		    "descended from a %s or %s node.\n",
1066 		    HOSTBRIDGE, PCIEX_ROOT);
1067 		return (0);
1068 	}
1069 
1070 	if (strcmp(name, PCI_BUS) == 0) {
1071 		retval = pcibus_enum(mod, ptn, pname, min, max, data);
1072 	} else if (strcmp(name, PCIEX_BUS) == 0) {
1073 		retval = pciexbus_enum(mod, ptn, pname, min, max);
1074 	} else {
1075 		topo_mod_dprintf(mod,
1076 		    "Currently only know how to enumerate %s or %s not %s.\n",
1077 		    PCI_BUS, PCIEX_BUS, name);
1078 		return (0);
1079 	}
1080 
1081 	return (retval);
1082 }
1083 
1084 /*ARGSUSED*/
1085 static void
1086 pci_release(topo_mod_t *mp, tnode_t *node)
1087 {
1088 	topo_method_unregister_all(mp, node);
1089 
1090 	/*
1091 	 * node private data (did_t) for this node is destroyed in
1092 	 * did_hash_destroy()
1093 	 */
1094 
1095 	topo_node_unbind(node);
1096 }
1097