xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/pcibus/pcibus.c (revision e9a9d42d6b4da41cc01b726a4d919abc09841750)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2020 Joyent, Inc.
25  */
26 
27 #include <sys/fm/protocol.h>
28 #include <assert.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <alloca.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <sys/param.h>
37 #include <sys/pci.h>
38 #include <sys/pcie.h>
39 #include <libdevinfo.h>
40 #include <libnvpair.h>
41 #include <fm/topo_mod.h>
42 #include <fm/topo_hc.h>
43 #include <sys/ddi_ufm.h>
44 #include <sys/stat.h>
45 #include <sys/types.h>
46 
47 #include <hostbridge.h>
48 #include <pcibus.h>
49 #include <did.h>
50 #include <did_props.h>
51 #include <util.h>
52 #include <topo_nic.h>
53 #include <topo_usb.h>
54 
55 extern txprop_t Bus_common_props[];
56 extern txprop_t Dev_common_props[];
57 extern txprop_t Fn_common_props[];
58 extern int Bus_propcnt;
59 extern int Dev_propcnt;
60 extern int Fn_propcnt;
61 
62 extern int platform_pci_label(topo_mod_t *mod, tnode_t *, nvlist_t *,
63     nvlist_t **);
64 extern int platform_pci_fru(topo_mod_t *mod, tnode_t *, nvlist_t *,
65     nvlist_t **);
66 static void pci_release(topo_mod_t *, tnode_t *);
67 static int pci_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
68     topo_instance_t, void *, void *);
69 static int pci_label(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
70     nvlist_t **);
71 static int pci_fru(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
72     nvlist_t **);
73 
74 static const topo_modops_t Pci_ops =
75 	{ pci_enum, pci_release };
76 static const topo_modinfo_t Pci_info =
77 	{ PCI_BUS, FM_FMRI_SCHEME_HC, PCI_ENUMR_VERS, &Pci_ops };
78 
79 static const topo_method_t Pci_methods[] = {
80 	{ TOPO_METH_LABEL, TOPO_METH_LABEL_DESC,
81 	    TOPO_METH_LABEL_VERSION, TOPO_STABILITY_INTERNAL, pci_label },
82 	{ TOPO_METH_FRU_COMPUTE, TOPO_METH_FRU_COMPUTE_DESC,
83 	    TOPO_METH_FRU_COMPUTE_VERSION, TOPO_STABILITY_INTERNAL, pci_fru },
84 	{ NULL }
85 };
86 
87 int
88 _topo_init(topo_mod_t *modhdl, topo_version_t version)
89 {
90 	/*
91 	 * Turn on module debugging output
92 	 */
93 	if (getenv("TOPOPCIDBG") != NULL)
94 		topo_mod_setdebug(modhdl);
95 	topo_mod_dprintf(modhdl, "initializing pcibus builtin\n");
96 
97 	if (version != PCI_ENUMR_VERS)
98 		return (topo_mod_seterrno(modhdl, EMOD_VER_NEW));
99 
100 	if (topo_mod_register(modhdl, &Pci_info, TOPO_VERSION) != 0) {
101 		topo_mod_dprintf(modhdl, "failed to register module");
102 		return (-1);
103 	}
104 	topo_mod_dprintf(modhdl, "PCI Enumr initd\n");
105 
106 	return (0);
107 }
108 
109 void
110 _topo_fini(topo_mod_t *modhdl)
111 {
112 	topo_mod_unregister(modhdl);
113 }
114 
115 static int
116 pci_label(topo_mod_t *mp, tnode_t *node, topo_version_t version,
117     nvlist_t *in, nvlist_t **out)
118 {
119 	if (version > TOPO_METH_LABEL_VERSION)
120 		return (topo_mod_seterrno(mp, EMOD_VER_NEW));
121 	return (platform_pci_label(mp, node, in, out));
122 }
123 static int
124 pci_fru(topo_mod_t *mp, tnode_t *node, topo_version_t version,
125     nvlist_t *in, nvlist_t **out)
126 {
127 	if (version > TOPO_METH_FRU_COMPUTE_VERSION)
128 		return (topo_mod_seterrno(mp, EMOD_VER_NEW));
129 	return (platform_pci_fru(mp, node, in, out));
130 }
131 static tnode_t *
132 pci_tnode_create(topo_mod_t *mod, tnode_t *parent,
133     const char *name, topo_instance_t i, void *priv)
134 {
135 	tnode_t *ntn;
136 
137 	if ((ntn = tnode_create(mod, parent, name, i, priv)) == NULL)
138 		return (NULL);
139 	if (topo_method_register(mod, ntn, Pci_methods) < 0) {
140 		topo_mod_dprintf(mod, "topo_method_register failed: %s\n",
141 		    topo_strerror(topo_mod_errno(mod)));
142 		topo_node_unbind(ntn);
143 		return (NULL);
144 	}
145 	return (ntn);
146 }
147 
148 /*ARGSUSED*/
149 static int
150 hostbridge_asdevice(topo_mod_t *mod, tnode_t *bus)
151 {
152 	di_node_t di;
153 	tnode_t *dev32;
154 
155 	di = topo_node_getspecific(bus);
156 	assert(di != DI_NODE_NIL);
157 
158 	if ((dev32 = pcidev_declare(mod, bus, di, 32)) == NULL)
159 		return (-1);
160 	if (pcifn_declare(mod, dev32, di, 0) == NULL) {
161 		topo_node_unbind(dev32);
162 		return (-1);
163 	}
164 	return (0);
165 }
166 
167 static int
168 pciexfn_add_ufm(topo_mod_t *mod, tnode_t *parent, tnode_t *node)
169 {
170 	char *devpath = NULL;
171 	ufm_ioc_getcaps_t ugc = { 0 };
172 	ufm_ioc_bufsz_t ufbz = { 0 };
173 	ufm_ioc_report_t ufmr = { 0 };
174 	nvlist_t *ufminfo = NULL, **images;
175 	uint_t nimages;
176 	int err, fd, ret = -1;
177 	tnode_t *create;
178 
179 	if (topo_prop_get_string(node, TOPO_PGROUP_IO, TOPO_IO_DEV, &devpath,
180 	    &err) != 0) {
181 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
182 	}
183 	if (strlen(devpath) >= MAXPATHLEN) {
184 		topo_mod_dprintf(mod, "devpath is too long: %s", devpath);
185 		topo_mod_strfree(mod, devpath);
186 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
187 	}
188 
189 	if ((fd = open(DDI_UFM_DEV, O_RDONLY)) < 0) {
190 		topo_mod_dprintf(mod, "%s: failed to open %s", __func__,
191 		    DDI_UFM_DEV);
192 		topo_mod_strfree(mod, devpath);
193 		return (0);
194 	}
195 	/*
196 	 * Make an ioctl to probe if the driver for this function is
197 	 * UFM-capable.  If the ioctl fails or if it doesn't advertise the
198 	 * DDI_UFM_CAP_REPORT capability, we bail out.
199 	 */
200 	ugc.ufmg_version = DDI_UFM_CURRENT_VERSION;
201 	(void) strlcpy(ugc.ufmg_devpath, devpath, MAXPATHLEN);
202 	if (ioctl(fd, UFM_IOC_GETCAPS, &ugc) < 0) {
203 		topo_mod_dprintf(mod, "UFM_IOC_GETCAPS failed: %s",
204 		    strerror(errno));
205 		(void) close(fd);
206 		topo_mod_strfree(mod, devpath);
207 		return (0);
208 	}
209 	if ((ugc.ufmg_caps & DDI_UFM_CAP_REPORT) == 0) {
210 		topo_mod_dprintf(mod, "driver doesn't advertise "
211 		    "DDI_UFM_CAP_REPORT");
212 		(void) close(fd);
213 		topo_mod_strfree(mod, devpath);
214 		return (0);
215 	}
216 
217 	/*
218 	 * If we made it this far, then the driver is indeed UFM-capable and
219 	 * is capable of reporting its firmware information.  First step is to
220 	 * make an ioctl to query the size of the report data so that we can
221 	 * allocate a buffer large enough to hold it.
222 	 */
223 	ufbz.ufbz_version = DDI_UFM_CURRENT_VERSION;
224 	(void) strlcpy(ufbz.ufbz_devpath, devpath, MAXPATHLEN);
225 	if (ioctl(fd, UFM_IOC_REPORTSZ, &ufbz) < 0) {
226 		topo_mod_dprintf(mod, "UFM_IOC_REPORTSZ failed: %s\n",
227 		    strerror(errno));
228 		(void) close(fd);
229 		topo_mod_strfree(mod, devpath);
230 		return (0);
231 	}
232 
233 	ufmr.ufmr_version = DDI_UFM_CURRENT_VERSION;
234 	if ((ufmr.ufmr_buf = topo_mod_alloc(mod, ufbz.ufbz_size)) == NULL) {
235 		topo_mod_dprintf(mod, "failed to alloc %u bytes\n",
236 		    ufbz.ufbz_size);
237 		(void) close(fd);
238 		topo_mod_strfree(mod, devpath);
239 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
240 	}
241 	ufmr.ufmr_bufsz = ufbz.ufbz_size;
242 	(void) strlcpy(ufmr.ufmr_devpath, devpath, MAXPATHLEN);
243 	topo_mod_strfree(mod, devpath);
244 
245 	/*
246 	 * Now, make the ioctl to retrieve the actual report data.  The data
247 	 * is stored as a packed nvlist.
248 	 */
249 	if (ioctl(fd, UFM_IOC_REPORT, &ufmr) < 0) {
250 		topo_mod_dprintf(mod, "UFM_IOC_REPORT failed: %s\n",
251 		    strerror(errno));
252 		topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
253 		(void) close(fd);
254 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
255 	}
256 	(void) close(fd);
257 
258 	if (nvlist_unpack(ufmr.ufmr_buf, ufmr.ufmr_bufsz, &ufminfo, 0) != 0) {
259 		topo_mod_dprintf(mod, "failed to unpack nvlist\n");
260 		topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
261 		return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
262 	}
263 	topo_mod_free(mod, ufmr.ufmr_buf, ufmr.ufmr_bufsz);
264 
265 	if (nvlist_lookup_nvlist_array(ufminfo, DDI_UFM_NV_IMAGES, &images,
266 	    &nimages) != 0) {
267 		topo_mod_dprintf(mod, "failed to lookup %s nvpair",
268 		    DDI_UFM_NV_IMAGES);
269 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
270 		goto err;
271 	}
272 
273 	/*
274 	 * There's nothing for us to do if there are no images.
275 	 */
276 	if (nimages == 0) {
277 		ret = 0;
278 		goto err;
279 	}
280 
281 	/*
282 	 * In general, almost all UFMs are device-wide. That is, in a
283 	 * multi-function device, there is still a single global firmware image.
284 	 * At this time, we default to putting the UFM data always on the device
285 	 * node. However, if someone creates a UFM on something that's not the
286 	 * first function, we'll create a UFM under that function for now. If we
287 	 * add support for hardware that has per-function UFMs, then we should
288 	 * update the UFM API to convey that scope.
289 	 */
290 	if (topo_node_instance(node) != 0) {
291 		create = node;
292 	} else {
293 		create = parent;
294 	}
295 
296 	if (topo_node_range_create(mod, create, UFM, 0, (nimages - 1)) != 0) {
297 		topo_mod_dprintf(mod, "failed to create %s range", UFM);
298 		/* errno set */
299 		goto err;
300 	}
301 	for (uint_t i = 0; i < nimages; i++) {
302 		tnode_t *ufmnode = NULL;
303 		char *descr;
304 		uint_t nslots;
305 		nvlist_t **slots;
306 
307 		if (nvlist_lookup_string(images[i], DDI_UFM_NV_IMAGE_DESC,
308 		    &descr) != 0 ||
309 		    nvlist_lookup_nvlist_array(images[i],
310 		    DDI_UFM_NV_IMAGE_SLOTS, &slots, &nslots) != 0) {
311 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
312 			goto err;
313 		}
314 
315 		if ((ufmnode = topo_mod_create_ufm(mod, create, descr, NULL)) ==
316 		    NULL) {
317 			topo_mod_dprintf(mod, "failed to create ufm nodes for "
318 			    "%s", descr);
319 			/* errno set */
320 			goto err;
321 		}
322 		for (uint_t s = 0; s < nslots; s++) {
323 			topo_ufm_slot_info_t slotinfo = { 0 };
324 			uint32_t slotattrs;
325 
326 			if (nvlist_lookup_string(slots[s],
327 			    DDI_UFM_NV_SLOT_VERSION,
328 			    (char **)&slotinfo.usi_version) != 0 ||
329 			    nvlist_lookup_uint32(slots[s],
330 			    DDI_UFM_NV_SLOT_ATTR, &slotattrs) != 0) {
331 				topo_node_unbind(ufmnode);
332 				topo_mod_dprintf(mod, "malformed slot nvlist");
333 				(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
334 				goto err;
335 			}
336 			(void) nvlist_lookup_nvlist(slots[s],
337 			    DDI_UFM_NV_SLOT_MISC, &slotinfo.usi_extra);
338 
339 			if (slotattrs & DDI_UFM_ATTR_READABLE &&
340 			    slotattrs & DDI_UFM_ATTR_WRITEABLE)
341 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_RW;
342 			else if (slotattrs & DDI_UFM_ATTR_READABLE)
343 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_RO;
344 			else if (slotattrs & DDI_UFM_ATTR_WRITEABLE)
345 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_WO;
346 			else
347 				slotinfo.usi_mode = TOPO_UFM_SLOT_MODE_NONE;
348 
349 			if (slotattrs & DDI_UFM_ATTR_ACTIVE)
350 				slotinfo.usi_active = B_TRUE;
351 
352 			if (topo_node_range_create(mod, ufmnode, SLOT, 0,
353 			    (nslots - 1)) < 0) {
354 				topo_mod_dprintf(mod, "failed to create %s "
355 				    "range", SLOT);
356 				/* errno set */
357 				goto err;
358 			}
359 			if (topo_mod_create_ufm_slot(mod, ufmnode,
360 			    &slotinfo) == NULL) {
361 				topo_node_unbind(ufmnode);
362 				topo_mod_dprintf(mod, "failed to create ufm "
363 				    "slot %d for %s", s, descr);
364 				/* errno set */
365 				goto err;
366 			}
367 		}
368 	}
369 	ret = 0;
370 err:
371 	nvlist_free(ufminfo);
372 	return (ret);
373 }
374 
375 tnode_t *
376 pciexfn_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
377     topo_instance_t i)
378 {
379 	did_t *pd;
380 	tnode_t *ntn, *ptn;
381 	di_node_t pdn;
382 	uint_t class, subclass;
383 	char *devtyp, *pdevtyp;
384 	int pcie_devtyp, pexcap;
385 	boolean_t dev_is_pcie, pdev_is_pcie;
386 
387 	/* We need the parent's dev info node for some of the info */
388 	ptn = find_predecessor(parent, PCIEX_FUNCTION);
389 	/* If this is the first child under root, get root's ptn */
390 	if (ptn == NULL)
391 		ptn = find_predecessor(parent, PCIEX_ROOT);
392 	if (ptn == NULL)
393 		return (NULL);
394 	pdn = topo_node_getspecific(ptn);
395 
396 	/* Get the required info to populate the excap */
397 	(void) pci_classcode_get(mod, dn, &class, &subclass);
398 	devtyp = pci_devtype_get(mod, dn);
399 	pdevtyp = pci_devtype_get(mod, pdn);
400 	pexcap = pciex_cap_get(mod, pdn);
401 
402 	dev_is_pcie = devtyp && (strcmp(devtyp, "pciex") == 0);
403 	pdev_is_pcie = pdevtyp && (strcmp(pdevtyp, "pciex") == 0);
404 
405 	/*
406 	 * Populate the excap with correct PCIe device type.
407 	 *
408 	 * Device	Parent		Device		Parent	Device
409 	 * excap	device-type	device-type	excap	Class Code
410 	 * -------------------------------------------------------------------
411 	 * PCI(default)	pci		N/A		N/A	!= bridge
412 	 * PCIe		pciex		N/A		N/A	!= bridge
413 	 * Root Port	Defined in hostbridge
414 	 * Switch Up	pciex		pciex		!= up	= bridge
415 	 * Switch Down	pciex		pciex		= up	= bridge
416 	 * PCIe-PCI	pciex		pci		N/A	= bridge
417 	 * PCI-PCIe	pci		pciex		N/A	= bridge
418 	 */
419 	pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCI_DEV;
420 	if (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
421 		if (pdev_is_pcie) {
422 			if (dev_is_pcie) {
423 				if (pexcap != PCIE_PCIECAP_DEV_TYPE_UP)
424 					pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_UP;
425 				else
426 					pcie_devtyp =
427 					    PCIE_PCIECAP_DEV_TYPE_DOWN;
428 			} else {
429 				pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCIE2PCI;
430 			}
431 		} else {
432 			if (dev_is_pcie)
433 				pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCI2PCIE;
434 		}
435 	} else {
436 		if (pdev_is_pcie)
437 			pcie_devtyp = PCIE_PCIECAP_DEV_TYPE_PCIE_DEV;
438 	}
439 
440 	if ((pd = did_find(mod, dn)) == NULL)
441 		return (NULL);
442 	did_excap_set(pd, pcie_devtyp);
443 
444 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_FUNCTION, i, dn))
445 	    == NULL)
446 		return (NULL);
447 	if (did_props_set(ntn, pd, Fn_common_props, Fn_propcnt) < 0) {
448 		topo_node_unbind(ntn);
449 		return (NULL);
450 	}
451 
452 	/*
453 	 * Check if the driver associated with this function exports firmware
454 	 * information via the DDI UFM subsystem and, if so, create the
455 	 * corresponding ufm topo nodes.
456 	 */
457 	if (pciexfn_add_ufm(mod, parent, ntn) != 0) {
458 		topo_node_unbind(ntn);
459 		return (NULL);
460 	}
461 
462 	/*
463 	 * We may find pci-express buses or plain-pci buses beneath a function
464 	 */
465 	if (child_range_add(mod, ntn, PCIEX_BUS, 0, MAX_HB_BUSES) < 0) {
466 		topo_node_unbind(ntn);
467 		return (NULL);
468 	}
469 	if (child_range_add(mod, ntn, PCI_BUS, 0, MAX_HB_BUSES) < 0) {
470 		topo_node_range_destroy(ntn, PCIEX_BUS);
471 		topo_node_unbind(ntn);
472 		return (NULL);
473 	}
474 	return (ntn);
475 }
476 
477 tnode_t *
478 pciexdev_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
479     topo_instance_t i)
480 {
481 	did_t *pd;
482 	tnode_t *ntn;
483 
484 	if ((pd = did_find(mod, dn)) == NULL)
485 		return (NULL);
486 	did_settnode(pd, parent);
487 
488 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_DEVICE, i, dn)) == NULL)
489 		return (NULL);
490 	if (did_props_set(ntn, pd, Dev_common_props, Dev_propcnt) < 0) {
491 		topo_node_unbind(ntn);
492 		return (NULL);
493 	}
494 
495 	if (pci_create_dev_sensors(mod, ntn) < 0) {
496 		topo_node_unbind(ntn);
497 		return (NULL);
498 	}
499 
500 	/*
501 	 * We can expect to find pci-express functions beneath the device
502 	 */
503 	if (child_range_add(mod,
504 	    ntn, PCIEX_FUNCTION, 0, MAX_PCIDEV_FNS) < 0) {
505 		topo_node_unbind(ntn);
506 		return (NULL);
507 	}
508 	return (ntn);
509 }
510 
511 tnode_t *
512 pciexbus_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
513     topo_instance_t i)
514 {
515 	did_t *pd;
516 	tnode_t *ntn;
517 
518 	if ((pd = did_find(mod, dn)) == NULL)
519 		return (NULL);
520 	did_settnode(pd, parent);
521 	if ((ntn = pci_tnode_create(mod, parent, PCIEX_BUS, i, dn)) == NULL)
522 		return (NULL);
523 	if (did_props_set(ntn, pd, Bus_common_props, Bus_propcnt) < 0) {
524 		topo_node_unbind(ntn);
525 		return (NULL);
526 	}
527 	/*
528 	 * We can expect to find pci-express devices beneath the bus
529 	 */
530 	if (child_range_add(mod,
531 	    ntn, PCIEX_DEVICE, 0, MAX_PCIBUS_DEVS) < 0) {
532 		topo_node_unbind(ntn);
533 		return (NULL);
534 	}
535 	return (ntn);
536 }
537 
538 tnode_t *
539 pcifn_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
540     topo_instance_t i)
541 {
542 	did_t *pd;
543 	tnode_t *ntn;
544 
545 	if ((pd = did_find(mod, dn)) == NULL)
546 		return (NULL);
547 	did_excap_set(pd, PCIE_PCIECAP_DEV_TYPE_PCI_DEV);
548 
549 	if ((ntn = pci_tnode_create(mod, parent, PCI_FUNCTION, i, dn)) == NULL)
550 		return (NULL);
551 	if (did_props_set(ntn, pd, Fn_common_props, Fn_propcnt) < 0) {
552 		topo_node_unbind(ntn);
553 		return (NULL);
554 	}
555 	/*
556 	 * We may find pci buses beneath a function
557 	 */
558 	if (child_range_add(mod, ntn, PCI_BUS, 0, MAX_HB_BUSES) < 0) {
559 		topo_node_unbind(ntn);
560 		return (NULL);
561 	}
562 	return (ntn);
563 }
564 
565 tnode_t *
566 pcidev_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
567     topo_instance_t i)
568 {
569 	did_t *pd;
570 	tnode_t *ntn;
571 
572 	if ((pd = did_find(mod, dn)) == NULL)
573 		return (NULL);
574 	/* remember parent tnode */
575 	did_settnode(pd, parent);
576 
577 	if ((ntn = pci_tnode_create(mod, parent, PCI_DEVICE, i, dn)) == NULL)
578 		return (NULL);
579 	if (did_props_set(ntn, pd, Dev_common_props, Dev_propcnt) < 0) {
580 		topo_node_unbind(ntn);
581 		return (NULL);
582 	}
583 
584 	if (pci_create_dev_sensors(mod, ntn) < 0) {
585 		topo_node_unbind(ntn);
586 		return (NULL);
587 	}
588 
589 	/*
590 	 * We can expect to find pci functions beneath the device
591 	 */
592 	if (child_range_add(mod, ntn, PCI_FUNCTION, 0, MAX_PCIDEV_FNS) < 0) {
593 		topo_node_unbind(ntn);
594 		return (NULL);
595 	}
596 	return (ntn);
597 }
598 
599 tnode_t *
600 pcibus_declare(topo_mod_t *mod, tnode_t *parent, di_node_t dn,
601     topo_instance_t i)
602 {
603 	did_t *pd;
604 	tnode_t *ntn;
605 	int hbchild = 0;
606 
607 	if ((pd = did_find(mod, dn)) == NULL)
608 		return (NULL);
609 	did_settnode(pd, parent);
610 	if ((ntn = pci_tnode_create(mod, parent, PCI_BUS, i, dn)) == NULL)
611 		return (NULL);
612 	/*
613 	 * If our devinfo node is lacking certain information of its
614 	 * own, and our parent topology node is a hostbridge, we may
615 	 * need/want to inherit information available in the
616 	 * hostbridge node's private data.
617 	 */
618 	if (strcmp(topo_node_name(parent), HOSTBRIDGE) == 0)
619 		hbchild = 1;
620 	if (did_props_set(ntn, pd, Bus_common_props, Bus_propcnt) < 0) {
621 		topo_node_unbind(ntn);
622 		return (NULL);
623 	}
624 	/*
625 	 * We can expect to find pci devices beneath the bus
626 	 */
627 	if (child_range_add(mod, ntn, PCI_DEVICE, 0, MAX_PCIBUS_DEVS) < 0) {
628 		topo_node_unbind(ntn);
629 		return (NULL);
630 	}
631 	/*
632 	 * On each bus child of the hostbridge, we represent the
633 	 * hostbridge as a device outside the range of legal device
634 	 * numbers.
635 	 */
636 	if (hbchild == 1) {
637 		if (hostbridge_asdevice(mod, ntn) < 0) {
638 			topo_node_range_destroy(ntn, PCI_DEVICE);
639 			topo_node_unbind(ntn);
640 			return (NULL);
641 		}
642 	}
643 	return (ntn);
644 }
645 
646 static int
647 pci_bridge_declare(topo_mod_t *mod, tnode_t *fn, di_node_t din, int board,
648     int bridge, int rc, int depth)
649 {
650 	int err;
651 	char *devtyp;
652 
653 	devtyp = pci_devtype_get(mod, din);
654 	/* Check if the children are PCI or PCIe */
655 	if (devtyp && (strcmp(devtyp, "pciex") == 0))
656 		err = pci_children_instantiate(mod, fn, din, board, bridge,
657 		    rc, TRUST_BDF, depth + 1);
658 	else
659 		err = pci_children_instantiate(mod, fn, din, board, bridge,
660 		    rc - TO_PCI, TRUST_BDF, depth + 1);
661 	return (err);
662 }
663 
664 static void
665 declare_dev_and_fn(topo_mod_t *mod, tnode_t *bus, tnode_t **dev, di_node_t din,
666     int board, int bridge, int rc, int devno, int fnno, int depth)
667 {
668 	int dcnt = 0, rcnt, err;
669 	char *propstr, *label = NULL, *pdev = NULL;
670 	tnode_t *fn;
671 	uint_t class, subclass;
672 	uint_t vid, did;
673 	uint_t pdev_sz = 0;
674 	did_t *dp = NULL;
675 
676 	if (*dev == NULL) {
677 		if (rc >= 0)
678 			*dev = pciexdev_declare(mod, bus, din, devno);
679 		else
680 			*dev = pcidev_declare(mod, bus, din, devno);
681 		if (*dev == NULL)
682 			return;
683 		++dcnt;
684 	}
685 	if (rc >= 0)
686 		fn = pciexfn_declare(mod, *dev, din, fnno);
687 	else
688 		fn = pcifn_declare(mod, *dev, din, fnno);
689 
690 	if (fn == NULL) {
691 		if (dcnt) {
692 			topo_node_unbind(*dev);
693 			*dev = NULL;
694 		}
695 		return;
696 	}
697 
698 	if (pci_classcode_get(mod, din, &class, &subclass) < 0) {
699 		topo_node_unbind(fn);
700 		if (dcnt)
701 			topo_node_unbind(*dev);
702 		return;
703 	}
704 
705 	/*
706 	 * This function may be a bridge.  If not, check for a possible
707 	 * topology map file and kick off its enumeration of lower-level
708 	 * devices.
709 	 */
710 	if (class == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
711 		(void) pci_bridge_declare(mod, fn, din, board, bridge, rc,
712 		    depth);
713 	}
714 
715 	/*
716 	 * Check for a Neptune-based NIC. This could either be a Neptune
717 	 * adapter card or an Neptune ASIC on a board (e.g. motherboard)
718 	 *
719 	 * For Netpune adapter cards, use xfp-hc-topology.xml to expand
720 	 * topology to include the XFP optical module, which is a FRU on
721 	 * the Neptune based 10giga fiber NICs.
722 	 *
723 	 * For Neptune ASICs, use the XAUI enumerator to expand topology.
724 	 * The 10giga ports are externalized by a XAUI cards, which
725 	 * are FRUs. The XAUI enumerator in turn instantiates the XFP
726 	 * optical module FRUs.
727 	 */
728 	else if (class == PCI_CLASS_NET &&
729 	    di_uintprop_get(mod, din, DI_VENDIDPROP, &vid) >= 0 &&
730 	    di_uintprop_get(mod, din, DI_DEVIDPROP, &did) >= 0 &&
731 	    vid == SUN_VENDOR_ID && did == NEPTUNE_DEVICE_ID) {
732 		/*
733 		 * Is this an adapter card? Check the bus's physlot
734 		 */
735 		dp = did_find(mod, topo_node_getspecific(bus));
736 		if (did_physlot(dp) >= 0) {
737 			topo_mod_dprintf(mod, "Found Neptune slot\n");
738 			(void) topo_mod_enummap(mod, fn,
739 			    "xfp", FM_FMRI_SCHEME_HC);
740 		} else {
741 			topo_mod_dprintf(mod, "Found Neptune ASIC\n");
742 			if (topo_mod_load(mod, XAUI, TOPO_VERSION) == NULL) {
743 				topo_mod_dprintf(mod, "pcibus enum "
744 				    "could not load xaui enum\n");
745 				(void) topo_mod_seterrno(mod,
746 				    EMOD_PARTIAL_ENUM);
747 				return;
748 			} else {
749 				if (topo_node_range_create(mod, fn,
750 				    XAUI, 0, 1) < 0) {
751 					topo_mod_dprintf(mod,
752 					    "child_range_add for "
753 					    "XAUI failed: %s\n",
754 					    topo_strerror(
755 					    topo_mod_errno(mod)));
756 					return;
757 				}
758 				(void) topo_mod_enumerate(mod, fn,
759 				    XAUI, XAUI, fnno, fnno, fn);
760 			}
761 		}
762 	} else if (class == PCI_CLASS_NET) {
763 		/*
764 		 * Ask the nic module if there are any nodes that need to be
765 		 * enumerated under this device. This might include things like
766 		 * transceivers or some day, LEDs.
767 		 */
768 		if (topo_mod_load(mod, NIC, NIC_VERSION) == NULL) {
769 			topo_mod_dprintf(mod, "pcibus enum could not load "
770 			    "nic enum\n");
771 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
772 			return;
773 		}
774 
775 		(void) topo_mod_enumerate(mod, fn, NIC, NIC, 0, 0, din);
776 	} else if (class == PCI_CLASS_SERIALBUS && subclass == PCI_SERIAL_USB) {
777 		/*
778 		 * If we encounter a USB controller, make sure to enumerate all
779 		 * of its USB ports.
780 		 */
781 		if (topo_mod_load(mod, USB, USB_VERSION) == NULL) {
782 			topo_mod_dprintf(mod, "pcibus enum could not load "
783 			    "usb enum\n");
784 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
785 			return;
786 		}
787 
788 		(void) topo_mod_enumerate(mod, fn, USB, USB_PCI, 0, 0, din);
789 	} else if (class == PCI_CLASS_MASS) {
790 		di_node_t cn;
791 		int niports = 0;
792 		extern void pci_iports_instantiate(topo_mod_t *, tnode_t *,
793 		    di_node_t, int);
794 		extern void pci_receptacle_instantiate(topo_mod_t *, tnode_t *,
795 		    di_node_t);
796 
797 		for (cn = di_child_node(din); cn != DI_NODE_NIL;
798 		    cn = di_sibling_node(cn)) {
799 			if (strcmp(di_node_name(cn), IPORT) == 0)
800 				niports++;
801 		}
802 		if (niports > 0)
803 			pci_iports_instantiate(mod, fn, din, niports);
804 
805 		if ((rcnt = di_prop_lookup_strings(DDI_DEV_T_ANY, din,
806 		    DI_RECEPTACLE_PHYMASK, &propstr)) > 0) {
807 			if (topo_node_range_create(mod, fn, RECEPTACLE, 0,
808 			    rcnt) >= 0)
809 				pci_receptacle_instantiate(mod, fn, din);
810 		}
811 	}
812 
813 	/*
814 	 * If this is an NVMe device and if the FRU label indicates it's not an
815 	 * onboard device then invoke the disk enumerator to enumerate the NVMe
816 	 * controller and associated namespaces.
817 	 *
818 	 * We skip NVMe devices that appear to be onboard as those are likely
819 	 * M.2 or U.2 devices and so should be enumerated via a
820 	 * platform-specific XML map so that they can be associated with the
821 	 * correct physical bay/slot.  This code is intended to pick up NVMe
822 	 * devices that are part of PCIe add-in cards.
823 	 */
824 	if (topo_node_label(fn, &label, &err) != 0) {
825 		topo_mod_dprintf(mod, "%s: failed to lookup FRU label on %s=%d",
826 		    __func__, topo_node_name(fn), topo_node_instance(fn));
827 		goto out;
828 	}
829 
830 	if (class == PCI_CLASS_MASS && subclass == PCI_MASS_NVME &&
831 	    strcmp(label, "MB") != 0) {
832 		char *driver = di_driver_name(din);
833 		char *slash;
834 		topo_pgroup_info_t pgi;
835 
836 		if (topo_prop_get_string(fn, TOPO_PGROUP_IO, TOPO_IO_DEV,
837 		    &pdev, &err) != 0) {
838 			topo_mod_dprintf(mod, "%s: failed to lookup %s on "
839 			    "%s=%d", __func__, TOPO_IO_DEV, topo_node_name(fn),
840 			    topo_node_instance(fn));
841 			goto out;
842 		}
843 
844 		/*
845 		 * Add the binding properties that are required by the disk
846 		 * enumerator to discover the accociated NVMe controller.
847 		 */
848 		pdev_sz = strlen(pdev) + 1;
849 		if ((slash = strrchr(pdev, '/')) == NULL) {
850 			topo_mod_dprintf(mod, "%s: malformed dev path\n",
851 			    __func__);
852 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
853 			goto out;
854 		}
855 		*slash = '\0';
856 
857 		pgi.tpi_name = TOPO_PGROUP_BINDING;
858 		pgi.tpi_namestab = TOPO_STABILITY_PRIVATE;
859 		pgi.tpi_datastab = TOPO_STABILITY_PRIVATE;
860 		pgi.tpi_version = TOPO_VERSION;
861 		if (topo_pgroup_create(fn, &pgi, &err) != 0 ||
862 		    topo_prop_set_string(fn, TOPO_PGROUP_BINDING,
863 		    TOPO_BINDING_DRIVER, TOPO_PROP_IMMUTABLE, driver,
864 		    &err) != 0 ||
865 		    topo_prop_set_string(fn, TOPO_PGROUP_BINDING,
866 		    TOPO_BINDING_PARENT_DEV, TOPO_PROP_IMMUTABLE, pdev,
867 		    &err) != 0) {
868 			topo_mod_dprintf(mod, "%s: failed to set binding "
869 			    "props", __func__);
870 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
871 			goto out;
872 		}
873 
874 		/*
875 		 * Load and invoke the disk enumerator module.
876 		 */
877 		if (topo_mod_load(mod, DISK, TOPO_VERSION) == NULL) {
878 			topo_mod_dprintf(mod, "pcibus enum could not load "
879 			    "disk enum\n");
880 			(void) topo_mod_seterrno(mod, EMOD_PARTIAL_ENUM);
881 			goto out;
882 		}
883 		(void) topo_mod_enumerate(mod, fn, DISK, NVME, 0, 0, NULL);
884 	}
885 out:
886 	if (pdev != NULL) {
887 		topo_mod_free(mod, pdev, pdev_sz);
888 	}
889 	topo_mod_strfree(mod, label);
890 }
891 
892 int
893 pci_children_instantiate(topo_mod_t *mod, tnode_t *parent, di_node_t pn,
894     int board, int bridge, int rc, int bover, int depth)
895 {
896 	did_t *pps[MAX_PCIBUS_DEVS][MAX_PCIDEV_FNS];
897 	did_t *bp = NULL;
898 	did_t *np;
899 	di_node_t sib;
900 	di_node_t din;
901 	tnode_t *bn = NULL;
902 	tnode_t *dn = NULL;
903 	int pb = -1;
904 	int b, d, f;
905 
906 	for (d = 0; d < MAX_PCIBUS_DEVS; d++)
907 		for (f = 0; f < MAX_PCIDEV_FNS; f++)
908 			pps[d][f] = NULL;
909 
910 	/* start at the parent's first sibling */
911 	sib = di_child_node(pn);
912 	while (sib != DI_NODE_NIL) {
913 		np = did_create(mod, sib, board, bridge, rc, bover);
914 		if (np == NULL)
915 			return (-1);
916 		did_BDF(np, &b, &d, &f);
917 		pps[d][f] = np;
918 		if (bp == NULL)
919 			bp = np;
920 		if (pb < 0)
921 			pb = ((bover == TRUST_BDF) ? b : bover);
922 		sib = di_sibling_node(sib);
923 	}
924 	if (pb < 0 && bover < 0)
925 		return (0);
926 	if (rc >= 0)
927 		bn = pciexbus_declare(mod, parent, pn, ((pb < 0) ? bover : pb));
928 	else
929 		bn = pcibus_declare(mod, parent, pn, ((pb < 0) ? bover : pb));
930 	if (bn == NULL)
931 		return (-1);
932 	if (pb < 0)
933 		return (0);
934 
935 	for (d = 0; d < MAX_PCIBUS_DEVS; d++) {
936 		for (f = 0; f < MAX_PCIDEV_FNS; f++) {
937 			if (pps[d][f] == NULL)
938 				continue;
939 			din = did_dinode(pps[d][f]);
940 
941 			/*
942 			 * Try to enumerate as many devices and functions as
943 			 * possible.  If we fail to declare a device, break
944 			 * out of the function loop.
945 			 */
946 			declare_dev_and_fn(mod, bn,
947 			    &dn, din, board, bridge, rc, d, f, depth);
948 			did_rele(pps[d][f]);
949 
950 			if (dn == NULL)
951 				break;
952 		}
953 		dn = NULL;
954 	}
955 	return (0);
956 }
957 
958 static int
959 pciexbus_enum(topo_mod_t *mp, tnode_t *ptn, char *pnm, topo_instance_t min,
960     topo_instance_t max)
961 {
962 	di_node_t pdn;
963 	int rc, hb;
964 	tnode_t *hbtn;
965 	int retval;
966 
967 	/*
968 	 * PCI-Express; parent node's private data is a simple di_node_t
969 	 * and we have to construct our own did hash and did_t.
970 	 */
971 	rc = topo_node_instance(ptn);
972 	if ((hbtn = topo_node_parent(ptn)) != NULL)
973 		hb = topo_node_instance(hbtn);
974 	else
975 		hb = rc;
976 
977 	if ((pdn = topo_node_getspecific(ptn)) == DI_NODE_NIL) {
978 		topo_mod_dprintf(mp,
979 		    "Parent %s node missing private data.\n"
980 		    "Unable to proceed with %s enumeration.\n", pnm, PCIEX_BUS);
981 		return (0);
982 	}
983 	if (did_hash_init(mp) != 0)
984 		return (-1);
985 	if ((did_create(mp, pdn, 0, hb, rc, TRUST_BDF)) == NULL)
986 		return (-1);	/* errno already set */
987 
988 	retval = pci_children_instantiate(mp, ptn, pdn, 0, hb, rc,
989 	    (min == max) ? min : TRUST_BDF, 0);
990 	did_hash_fini(mp);
991 
992 	return (retval);
993 }
994 
995 static int
996 pcibus_enum(topo_mod_t *mp, tnode_t *ptn, char *pnm, topo_instance_t min,
997     topo_instance_t max, void *data)
998 {
999 	did_t *didp, *hbdid = (did_t *)data;
1000 	int retval;
1001 
1002 	/*
1003 	 * XXTOPO: we should not be sharing private node data with another
1004 	 * module. PCI Bus; Parent node's private data is a did_t.  We'll
1005 	 * use the did hash established by the parent.
1006 	 */
1007 	did_setspecific(mp, data);
1008 
1009 	/*
1010 	 * If we're looking for a specific bus-instance, find the right
1011 	 * did_t in the chain, otherwise, there should be only one did_t.
1012 	 */
1013 	if (min == max) {
1014 		int b;
1015 		didp = hbdid;
1016 		while (didp != NULL) {
1017 			did_BDF(didp, &b, NULL, NULL);
1018 			if (b == min)
1019 				break;
1020 			didp = did_link_get(didp);
1021 		}
1022 		if (didp == NULL) {
1023 			topo_mod_dprintf(mp,
1024 			    "Parent %s node missing private data related\n"
1025 			    "to %s instance %d.\n", pnm, PCI_BUS, min);
1026 			topo_mod_setspecific(mp, NULL);
1027 			return (0);
1028 		}
1029 	} else {
1030 		assert(did_link_get(hbdid) == NULL);
1031 		didp = hbdid;
1032 	}
1033 	retval = pci_children_instantiate(mp, ptn, did_dinode(didp),
1034 	    did_board(didp), did_bridge(didp), did_rc(didp),
1035 	    (min == max) ? min : TRUST_BDF, 0);
1036 
1037 	topo_mod_setspecific(mp, NULL);
1038 
1039 	return (retval);
1040 }
1041 
1042 /*ARGSUSED*/
1043 static int
1044 pci_enum(topo_mod_t *mod, tnode_t *ptn, const char *name,
1045     topo_instance_t min, topo_instance_t max, void *notused, void *data)
1046 {
1047 	int retval;
1048 	char *pname;
1049 
1050 	topo_mod_dprintf(mod, "Enumerating pci!\n");
1051 
1052 	if (strcmp(name, PCI_BUS) != 0 && strcmp(name, PCIEX_BUS) != 0) {
1053 		topo_mod_dprintf(mod,
1054 		    "Currently only know how to enumerate %s or %s.\n",
1055 		    PCI_BUS, PCIEX_BUS);
1056 		return (0);
1057 	}
1058 	pname = topo_node_name(ptn);
1059 	if (strcmp(pname, HOSTBRIDGE) != 0 && strcmp(pname, PCIEX_ROOT) != 0) {
1060 		topo_mod_dprintf(mod,
1061 		    "Currently can only enumerate a %s or %s directly\n",
1062 		    PCI_BUS, PCIEX_BUS);
1063 		topo_mod_dprintf(mod,
1064 		    "descended from a %s or %s node.\n",
1065 		    HOSTBRIDGE, PCIEX_ROOT);
1066 		return (0);
1067 	}
1068 
1069 	if (strcmp(name, PCI_BUS) == 0) {
1070 		retval = pcibus_enum(mod, ptn, pname, min, max, data);
1071 	} else if (strcmp(name, PCIEX_BUS) == 0) {
1072 		retval = pciexbus_enum(mod, ptn, pname, min, max);
1073 	} else {
1074 		topo_mod_dprintf(mod,
1075 		    "Currently only know how to enumerate %s or %s not %s.\n",
1076 		    PCI_BUS, PCIEX_BUS, name);
1077 		return (0);
1078 	}
1079 
1080 	return (retval);
1081 }
1082 
1083 /*ARGSUSED*/
1084 static void
1085 pci_release(topo_mod_t *mp, tnode_t *node)
1086 {
1087 	topo_method_unregister_all(mp, node);
1088 
1089 	/*
1090 	 * node private data (did_t) for this node is destroyed in
1091 	 * did_hash_destroy()
1092 	 */
1093 
1094 	topo_node_unbind(node);
1095 }
1096