xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/disk/disk_nvme.c (revision 744642a2e129e386f3b3718abdfedc2ba3e38716)
13c6ffbabSRob Johnston /*
23c6ffbabSRob Johnston  * This file and its contents are supplied under the terms of the
33c6ffbabSRob Johnston  * Common Development and Distribution License ("CDDL"), version 1.0.
43c6ffbabSRob Johnston  * You may only use this file in accordance with the terms of version
53c6ffbabSRob Johnston  * 1.0 of the CDDL.
63c6ffbabSRob Johnston  *
73c6ffbabSRob Johnston  * A full copy of the text of the CDDL should have accompanied this
83c6ffbabSRob Johnston  * source.  A copy of the CDDL is also available via the Internet at
93c6ffbabSRob Johnston  * http://www.illumos.org/license/CDDL.
103c6ffbabSRob Johnston  */
113c6ffbabSRob Johnston 
123c6ffbabSRob Johnston /*
133c6ffbabSRob Johnston  * Copyright 2020 Joyent, Inc.
14153f3212SHans Rosenfeld  * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
15*744642a2SRobert Mustacchi  * Copyright 2023 Oxide Computer Company
163c6ffbabSRob Johnston  */
173c6ffbabSRob Johnston 
183c6ffbabSRob Johnston /*
193c6ffbabSRob Johnston  * This file drives topo node enumeration of NVMe controllers.  A single "nvme"
203c6ffbabSRob Johnston  * node is enumerated for each NVMe controller.   Child "disk" nodes are then
21*744642a2SRobert Mustacchi  * enumerated for each active or attached NVMe namespace.
223c6ffbabSRob Johnston  *
233c6ffbabSRob Johnston  * nvme nodes are expected to be enumerated under either a "bay" node (for U.2
243c6ffbabSRob Johnston  * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC
253c6ffbabSRob Johnston  * devices).
263c6ffbabSRob Johnston  *
273c6ffbabSRob Johnston  * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven
283c6ffbabSRob Johnston  * by the pcibus topo module.
293c6ffbabSRob Johnston  *
303c6ffbabSRob Johnston  * In order to allow for associating a given NVMe controller with a physical
313c6ffbabSRob Johnston  * location, enumeration of U.2 and M.2 devices should be driven by a
323c6ffbabSRob Johnston  * platform-specific topo map which statically sets the following two
333c6ffbabSRob Johnston  * properties on the parent "bay" or "slot" node:
343c6ffbabSRob Johnston  *
353c6ffbabSRob Johnston  * propgroup        property        description
363c6ffbabSRob Johnston  * ---------        --------        ------------
373c6ffbabSRob Johnston  * binding          driver          "nvme"
383c6ffbabSRob Johnston  * binding          parent-device   devpath of parent PCIe device
393c6ffbabSRob Johnston  *
403c6ffbabSRob Johnston  * for example:
413c6ffbabSRob Johnston  *
423c6ffbabSRob Johnston  * <propgroup name="binding" version="1" name-stability="Private"
433c6ffbabSRob Johnston  *   data-stability="Private">
443c6ffbabSRob Johnston  *     <propval name="driver" type="string" value="nvme"/>
453c6ffbabSRob Johnston  *     <propval name="parent-device" type="string"
463c6ffbabSRob Johnston  *       value="/pci@0,0/pci8086,6f09@3,1"/>
473c6ffbabSRob Johnston  * </propgroup>
483c6ffbabSRob Johnston  * <dependents grouping="children">
493c6ffbabSRob Johnston  *     <range name="nvme" min="0" max="0">
503c6ffbabSRob Johnston  *         <enum-method name="disk" version="1"/>
513c6ffbabSRob Johnston  *     </range>
523c6ffbabSRob Johnston  * </dependents>
533c6ffbabSRob Johnston  */
543c6ffbabSRob Johnston #include <stdlib.h>
553c6ffbabSRob Johnston #include <sys/types.h>
563c6ffbabSRob Johnston #include <sys/stat.h>
573c6ffbabSRob Johnston #include <fcntl.h>
583c6ffbabSRob Johnston #include <unistd.h>
593c6ffbabSRob Johnston #include <string.h>
603c6ffbabSRob Johnston #include <strings.h>
61*744642a2SRobert Mustacchi #include <stdbool.h>
623c6ffbabSRob Johnston 
633c6ffbabSRob Johnston #include <sys/fm/protocol.h>
643c6ffbabSRob Johnston #include <fm/topo_hc.h>
653c6ffbabSRob Johnston #include <fm/topo_mod.h>
66*744642a2SRobert Mustacchi #include <topo_ufm.h>
673c6ffbabSRob Johnston 
683c6ffbabSRob Johnston #include <sys/dkio.h>
693c6ffbabSRob Johnston #include <sys/scsi/generic/inquiry.h>
703c6ffbabSRob Johnston 
713c6ffbabSRob Johnston #include <sys/nvme.h>
723c6ffbabSRob Johnston #include "disk.h"
733c6ffbabSRob Johnston #include "disk_drivers.h"
743c6ffbabSRob Johnston 
753c6ffbabSRob Johnston typedef struct nvme_enum_info {
763c6ffbabSRob Johnston 	topo_mod_t		*nei_mod;
773c6ffbabSRob Johnston 	di_node_t		nei_dinode;
783c6ffbabSRob Johnston 	nvme_identify_ctrl_t	*nei_idctl;
793c6ffbabSRob Johnston 	nvme_version_t		nei_vers;
803c6ffbabSRob Johnston 	tnode_t			*nei_parent;
813c6ffbabSRob Johnston 	tnode_t			*nei_nvme;
823c6ffbabSRob Johnston 	nvlist_t		*nei_nvme_fmri;
833c6ffbabSRob Johnston 	const char		*nei_nvme_path;
843c6ffbabSRob Johnston 	int			nei_fd;
853c6ffbabSRob Johnston } nvme_enum_info_t;
863c6ffbabSRob Johnston 
873c6ffbabSRob Johnston typedef struct devlink_arg {
883c6ffbabSRob Johnston 	topo_mod_t		*dla_mod;
893c6ffbabSRob Johnston 	char			*dla_logical_disk;
903c6ffbabSRob Johnston 	uint_t			dla_strsz;
913c6ffbabSRob Johnston } devlink_arg_t;
923c6ffbabSRob Johnston 
933c6ffbabSRob Johnston static int
943c6ffbabSRob Johnston devlink_cb(di_devlink_t dl, void *arg)
953c6ffbabSRob Johnston {
963c6ffbabSRob Johnston 	devlink_arg_t *dlarg = (devlink_arg_t *)arg;
973c6ffbabSRob Johnston 	topo_mod_t *mod = dlarg->dla_mod;
983c6ffbabSRob Johnston 	const char *devpath;
993c6ffbabSRob Johnston 	char *slice, *ctds;
1003c6ffbabSRob Johnston 
1013c6ffbabSRob Johnston 	if ((devpath = di_devlink_path(dl)) == NULL ||
1023c6ffbabSRob Johnston 	    (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) ==
1033c6ffbabSRob Johnston 	    NULL) {
1043c6ffbabSRob Johnston 		return (DI_WALK_TERMINATE);
1053c6ffbabSRob Johnston 	}
1063c6ffbabSRob Johnston 
1073c6ffbabSRob Johnston 	/*
1083c6ffbabSRob Johnston 	 * We need to keep track of the original string size before we
1093c6ffbabSRob Johnston 	 * truncate it with a NUL, so that we can free the right number of
1103c6ffbabSRob Johnston 	 * bytes when we're done, otherwise libumem will complain.
1113c6ffbabSRob Johnston 	 */
1123c6ffbabSRob Johnston 	dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1;
1133c6ffbabSRob Johnston 
1143c6ffbabSRob Johnston 	/* trim the slice off the public name */
1153c6ffbabSRob Johnston 	if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) &&
1163c6ffbabSRob Johnston 	    ((slice = strchr(ctds, 's')) != NULL))
1173c6ffbabSRob Johnston 		*slice = '\0';
1183c6ffbabSRob Johnston 
1193c6ffbabSRob Johnston 	return (DI_WALK_TERMINATE);
1203c6ffbabSRob Johnston }
1213c6ffbabSRob Johnston 
1223c6ffbabSRob Johnston static char *
1233c6ffbabSRob Johnston get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz)
1243c6ffbabSRob Johnston {
1253c6ffbabSRob Johnston 	di_devlink_handle_t devhdl;
1263c6ffbabSRob Johnston 	devlink_arg_t dlarg = { 0 };
1273c6ffbabSRob Johnston 	char *minorpath = NULL;
1283c6ffbabSRob Johnston 
1293c6ffbabSRob Johnston 	if (asprintf(&minorpath, "%s:a", devpath) < 0) {
1303c6ffbabSRob Johnston 		return (NULL);
1313c6ffbabSRob Johnston 	}
1323c6ffbabSRob Johnston 
1333c6ffbabSRob Johnston 	if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) {
1343c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__);
1353c6ffbabSRob Johnston 		free(minorpath);
1363c6ffbabSRob Johnston 		return (NULL);
1373c6ffbabSRob Johnston 	}
1383c6ffbabSRob Johnston 
1393c6ffbabSRob Johnston 	dlarg.dla_mod = mod;
1403c6ffbabSRob Johnston 
1413c6ffbabSRob Johnston 	(void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK,
1423c6ffbabSRob Johnston 	    &dlarg, devlink_cb);
1433c6ffbabSRob Johnston 
1443c6ffbabSRob Johnston 	(void) di_devlink_fini(&devhdl);
1453c6ffbabSRob Johnston 	free(minorpath);
1463c6ffbabSRob Johnston 
1473c6ffbabSRob Johnston 	*bufsz = dlarg.dla_strsz;
1483c6ffbabSRob Johnston 	return (dlarg.dla_logical_disk);
1493c6ffbabSRob Johnston }
1503c6ffbabSRob Johnston 
151*744642a2SRobert Mustacchi static bool
152*744642a2SRobert Mustacchi disk_nvme_make_ns_serial(topo_mod_t *mod, const nvme_identify_nsid_t *id,
153*744642a2SRobert Mustacchi     uint32_t nsid, char *buf, size_t buflen)
1543c6ffbabSRob Johnston {
155*744642a2SRobert Mustacchi 	uint8_t zero_guid[16] = { 0 };
156*744642a2SRobert Mustacchi 	int ret;
1573c6ffbabSRob Johnston 
158*744642a2SRobert Mustacchi 	if (bcmp(zero_guid, id->id_nguid, sizeof (id->id_nguid)) != 0) {
159*744642a2SRobert Mustacchi 		ret = snprintf(buf, buflen, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X"
160*744642a2SRobert Mustacchi 		    "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
161*744642a2SRobert Mustacchi 		    id->id_nguid[0], id->id_nguid[1], id->id_nguid[2],
162*744642a2SRobert Mustacchi 		    id->id_nguid[3], id->id_nguid[4], id->id_nguid[5],
163*744642a2SRobert Mustacchi 		    id->id_nguid[6], id->id_nguid[7], id->id_nguid[8],
164*744642a2SRobert Mustacchi 		    id->id_nguid[9], id->id_nguid[10], id->id_nguid[11],
165*744642a2SRobert Mustacchi 		    id->id_nguid[12], id->id_nguid[13], id->id_nguid[14],
166*744642a2SRobert Mustacchi 		    id->id_nguid[15]);
167*744642a2SRobert Mustacchi 	} else if (bcmp(zero_guid, id->id_eui64, sizeof (id->id_eui64)) != 0) {
168*744642a2SRobert Mustacchi 		ret = snprintf(buf, buflen,
169*744642a2SRobert Mustacchi 		    "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
170*744642a2SRobert Mustacchi 		    id->id_eui64[0], id->id_eui64[1], id->id_eui64[2],
171*744642a2SRobert Mustacchi 		    id->id_eui64[3], id->id_eui64[4], id->id_eui64[5],
172*744642a2SRobert Mustacchi 		    id->id_eui64[6], id->id_eui64[7]);
1733c6ffbabSRob Johnston 	} else {
174*744642a2SRobert Mustacchi 		ret = snprintf(buf, buflen, "%u", nsid);
175*744642a2SRobert Mustacchi 	}
176*744642a2SRobert Mustacchi 
177*744642a2SRobert Mustacchi 	if ((size_t)ret >= buflen) {
178*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "overflowed serial number for nsid %u: "
179*744642a2SRobert Mustacchi 		    "needed %zu bytes, got %d", nsid, buflen, ret);
180*744642a2SRobert Mustacchi 		return (false);
181*744642a2SRobert Mustacchi 	}
182*744642a2SRobert Mustacchi 
183*744642a2SRobert Mustacchi 	return (true);
1843c6ffbabSRob Johnston }
1853c6ffbabSRob Johnston 
1863c6ffbabSRob Johnston /*
187*744642a2SRobert Mustacchi  * Create the common I/O property group properties that are shared between
188*744642a2SRobert Mustacchi  * controllers and namespaces. We assume the property group was already created.
1893c6ffbabSRob Johnston  */
190*744642a2SRobert Mustacchi static bool
191*744642a2SRobert Mustacchi disk_nvme_common_io(topo_mod_t *mod, tnode_t *tn, di_node_t di)
192*744642a2SRobert Mustacchi {
193*744642a2SRobert Mustacchi 	int err;
194*744642a2SRobert Mustacchi 	int inst = di_instance(di);
195*744642a2SRobert Mustacchi 	const char *drv = di_driver_name(di);
196*744642a2SRobert Mustacchi 	char *path;
197*744642a2SRobert Mustacchi 	const char *ppaths[1];
1983c6ffbabSRob Johnston 
199*744642a2SRobert Mustacchi 	if (inst != -1 && topo_prop_set_uint32(tn, TOPO_PGROUP_IO,
200*744642a2SRobert Mustacchi 	    TOPO_IO_INSTANCE, TOPO_PROP_IMMUTABLE, (uint32_t)inst, &err) != 0) {
201*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
202*744642a2SRobert Mustacchi 		    "%s", TOPO_PGROUP_IO, TOPO_IO_INSTANCE, topo_node_name(tn),
203*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
204*744642a2SRobert Mustacchi 		return (false);
2053c6ffbabSRob Johnston 	}
2063c6ffbabSRob Johnston 
207*744642a2SRobert Mustacchi 	if (drv != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO,
208*744642a2SRobert Mustacchi 	    TOPO_IO_DRIVER, TOPO_PROP_IMMUTABLE, drv, &err) != 0) {
209*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
210*744642a2SRobert Mustacchi 		    "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn),
211*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
212*744642a2SRobert Mustacchi 		return (false);
213*744642a2SRobert Mustacchi 	}
2143c6ffbabSRob Johnston 
215*744642a2SRobert Mustacchi 	if (drv != NULL) {
216*744642a2SRobert Mustacchi 		nvlist_t *fmri = topo_mod_modfmri(mod, FM_MOD_SCHEME_VERSION,
217*744642a2SRobert Mustacchi 		    drv);
218*744642a2SRobert Mustacchi 		if (mod != NULL && topo_prop_set_fmri(tn, TOPO_PGROUP_IO,
219*744642a2SRobert Mustacchi 		    TOPO_IO_MODULE, TOPO_PROP_IMMUTABLE, fmri, &err) != 0) {
220*744642a2SRobert Mustacchi 			topo_mod_dprintf(mod, "failed to set %s:%s on %s[%"
221*744642a2SRobert Mustacchi 			    PRIu64 "]: %s", TOPO_PGROUP_IO, TOPO_IO_MODULE,
222*744642a2SRobert Mustacchi 			    topo_node_name(tn), topo_node_instance(tn),
2233c6ffbabSRob Johnston 			    topo_strerror(err));
224*744642a2SRobert Mustacchi 			nvlist_free(fmri);
225*744642a2SRobert Mustacchi 			return (false);
226*744642a2SRobert Mustacchi 		}
227*744642a2SRobert Mustacchi 		nvlist_free(fmri);
2283c6ffbabSRob Johnston 	}
2293c6ffbabSRob Johnston 
230*744642a2SRobert Mustacchi 	path = di_devfs_path(di);
2313c6ffbabSRob Johnston 	ppaths[0] = path;
232*744642a2SRobert Mustacchi 	if (path != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO,
233*744642a2SRobert Mustacchi 	    TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, path, &err) != 0) {
234*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
235*744642a2SRobert Mustacchi 		    "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn),
236*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
237*744642a2SRobert Mustacchi 		di_devfs_path_free(path);
238*744642a2SRobert Mustacchi 		return (false);
239*744642a2SRobert Mustacchi 	}
240*744642a2SRobert Mustacchi 
241*744642a2SRobert Mustacchi 	if (path != NULL && topo_prop_set_string_array(tn, TOPO_PGROUP_IO,
242*744642a2SRobert Mustacchi 	    TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err) != 0) {
243*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
244*744642a2SRobert Mustacchi 		    "%s", TOPO_PGROUP_IO, TOPO_IO_PHYS_PATH, topo_node_name(tn),
245*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
246*744642a2SRobert Mustacchi 		di_devfs_path_free(path);
247*744642a2SRobert Mustacchi 		return (false);
248*744642a2SRobert Mustacchi 	}
249*744642a2SRobert Mustacchi 	di_devfs_path_free(path);
250*744642a2SRobert Mustacchi 
251*744642a2SRobert Mustacchi 	return (true);
252*744642a2SRobert Mustacchi }
2533c6ffbabSRob Johnston 
2543c6ffbabSRob Johnston /*
255*744642a2SRobert Mustacchi  * Add the various storage and I/O property group items that are appropriate
256*744642a2SRobert Mustacchi  * given that we have a devinfo node. The storage property group has already
257*744642a2SRobert Mustacchi  * been created, but the I/O property group has not.
2583c6ffbabSRob Johnston  */
259*744642a2SRobert Mustacchi static void
260*744642a2SRobert Mustacchi disk_nvme_make_ns_di_props(topo_mod_t *mod, tnode_t *tn, di_node_t di)
261*744642a2SRobert Mustacchi {
262*744642a2SRobert Mustacchi 	int err;
263*744642a2SRobert Mustacchi 	char *devid, *mfg, *model, *rev, *serial, *log, *path;
264*744642a2SRobert Mustacchi 	uint_t buflen;
265*744642a2SRobert Mustacchi 
266*744642a2SRobert Mustacchi 	if (di_prop_lookup_strings(DDI_DEV_T_ANY, di, DEVID_PROP_NAME,
267*744642a2SRobert Mustacchi 	    &devid) != 1 ||
268*744642a2SRobert Mustacchi 	    di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_VENDOR_ID,
269*744642a2SRobert Mustacchi 	    &mfg) != 1 ||
270*744642a2SRobert Mustacchi 	    di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_PRODUCT_ID,
271*744642a2SRobert Mustacchi 	    &model) != 1 ||
272*744642a2SRobert Mustacchi 	    di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_REVISION_ID,
273*744642a2SRobert Mustacchi 	    &rev) != 1 ||
274*744642a2SRobert Mustacchi 	    di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_SERIAL_NO,
275*744642a2SRobert Mustacchi 	    &serial) != 1) {
276*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to get devinfo props for %s[%"
277*744642a2SRobert Mustacchi 		    PRIu64 "]", topo_node_name(tn), topo_node_instance(tn));
278*744642a2SRobert Mustacchi 		return;
2793c6ffbabSRob Johnston 	}
2803c6ffbabSRob Johnston 
281*744642a2SRobert Mustacchi 	/*
282*744642a2SRobert Mustacchi 	 * Set the basic storage manufacturer information. Yes, this is
283*744642a2SRobert Mustacchi 	 * information really about the NVMe controller and not the namespace.
284*744642a2SRobert Mustacchi 	 * That's how the storage property group basically works here.
285*744642a2SRobert Mustacchi 	 */
286*744642a2SRobert Mustacchi 	if (topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
287*744642a2SRobert Mustacchi 	    TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, mfg, &err) != 0 ||
288*744642a2SRobert Mustacchi 	    topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
289*744642a2SRobert Mustacchi 	    TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err) != 0 ||
290*744642a2SRobert Mustacchi 	    topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
291*744642a2SRobert Mustacchi 	    TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err) != 0 ||
292*744642a2SRobert Mustacchi 	    topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
293*744642a2SRobert Mustacchi 	    TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err) != 0) {
294*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set storage properties on "
295*744642a2SRobert Mustacchi 		    "%s[%" PRIu64 "]: %s", topo_node_name(tn),
296*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
297*744642a2SRobert Mustacchi 		return;
2983c6ffbabSRob Johnston 	}
2993c6ffbabSRob Johnston 
300*744642a2SRobert Mustacchi 	if (topo_pgroup_create(tn, &io_pgroup, &err) != 0) {
301*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to create I/O property "
302*744642a2SRobert Mustacchi 		    "group on %s[%" PRIu64 "]: %s",  topo_node_name(tn),
303*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
304*744642a2SRobert Mustacchi 	}
3053c6ffbabSRob Johnston 
306*744642a2SRobert Mustacchi 	if (!disk_nvme_common_io(mod, tn, di)) {
307*744642a2SRobert Mustacchi 		return;
308*744642a2SRobert Mustacchi 	}
309*744642a2SRobert Mustacchi 
310*744642a2SRobert Mustacchi 	/*
311*744642a2SRobert Mustacchi 	 * The last property that we'd like to attempt to create for a namespace
312*744642a2SRobert Mustacchi 	 * is a mapping back to its corresponding logical disk entry in /dev.
313*744642a2SRobert Mustacchi 	 * The logical disk will be everything past the trailing /, i.e. a
314*744642a2SRobert Mustacchi 	 * cXtXdX value.
315*744642a2SRobert Mustacchi 	 */
316*744642a2SRobert Mustacchi 	path = di_devfs_path(di);
317*744642a2SRobert Mustacchi 	if (path == NULL) {
318*744642a2SRobert Mustacchi 		return;
319*744642a2SRobert Mustacchi 	}
320*744642a2SRobert Mustacchi 	log = get_logical_disk(mod, path, &buflen);
3213c6ffbabSRob Johnston 	di_devfs_path_free(path);
322*744642a2SRobert Mustacchi 	if (log == NULL) {
323*744642a2SRobert Mustacchi 		return;
324*744642a2SRobert Mustacchi 	}
325*744642a2SRobert Mustacchi 	path = strrchr(log, '/');
326*744642a2SRobert Mustacchi 	if (path != NULL && path[1] != '\0' &&
327*744642a2SRobert Mustacchi 	    topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
328*744642a2SRobert Mustacchi 	    TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, path + 1,
329*744642a2SRobert Mustacchi 	    &err) != 0) {
330*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set %s:%s on %s[%"
331*744642a2SRobert Mustacchi 		    PRIu64 "]: %s", TOPO_PGROUP_STORAGE,
332*744642a2SRobert Mustacchi 		    TOPO_STORAGE_LOGICAL_DISK_NAME, topo_node_name(tn),
333*744642a2SRobert Mustacchi 		    topo_node_instance(tn), topo_strerror(err));
334*744642a2SRobert Mustacchi 	}
335*744642a2SRobert Mustacchi 	topo_mod_free(mod, log, buflen);
336*744642a2SRobert Mustacchi }
337*744642a2SRobert Mustacchi 
338*744642a2SRobert Mustacchi static void
339*744642a2SRobert Mustacchi disk_nvme_make_ns(nvme_enum_info_t *nei, uint32_t nsid)
340*744642a2SRobert Mustacchi {
341*744642a2SRobert Mustacchi 	topo_mod_t *mod = nei->nei_mod;
342*744642a2SRobert Mustacchi 	nvlist_t *auth = NULL, *fmri = NULL;
343*744642a2SRobert Mustacchi 	const topo_instance_t inst = nsid - 1;
344*744642a2SRobert Mustacchi 	nvme_ns_info_t info;
345*744642a2SRobert Mustacchi 	nvme_ioctl_t ioc;
346*744642a2SRobert Mustacchi 	char serial[64], capstr[64];
347*744642a2SRobert Mustacchi 	uint64_t cap, blksz;
348*744642a2SRobert Mustacchi 	tnode_t *tn;
349*744642a2SRobert Mustacchi 	uint8_t lba;
350*744642a2SRobert Mustacchi 	int err;
351*744642a2SRobert Mustacchi 
352*744642a2SRobert Mustacchi 	bzero(&ioc, sizeof (ioc));
353*744642a2SRobert Mustacchi 	bzero(&info, sizeof (info));
354*744642a2SRobert Mustacchi 	ioc.n_len = sizeof (nvme_ns_info_t);
355*744642a2SRobert Mustacchi 	ioc.n_buf = (uintptr_t)&info;
356*744642a2SRobert Mustacchi 	ioc.n_arg = nsid;
357*744642a2SRobert Mustacchi 
358*744642a2SRobert Mustacchi 	if (ioctl(nei->nei_fd, NVME_IOC_NS_INFO, &ioc) != 0) {
359*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to get namespace info for ns %u: "
360*744642a2SRobert Mustacchi 		    "%s", nsid, strerror(errno));
361*744642a2SRobert Mustacchi 		return;
362*744642a2SRobert Mustacchi 	}
363*744642a2SRobert Mustacchi 
364*744642a2SRobert Mustacchi 	if ((info.nni_state & NVME_NS_STATE_IGNORED) != 0) {
365*744642a2SRobert Mustacchi 		return;
366*744642a2SRobert Mustacchi 	}
367*744642a2SRobert Mustacchi 
368*744642a2SRobert Mustacchi 	if ((info.nni_state &
369*744642a2SRobert Mustacchi 	    (NVME_NS_STATE_ACTIVE | NVME_NS_STATE_ATTACHED)) == 0) {
370*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "skipping nsid %u because it is not "
371*744642a2SRobert Mustacchi 		    "active or attached (state: 0x%x)", nsid, info.nni_state);
372*744642a2SRobert Mustacchi 		return;
373*744642a2SRobert Mustacchi 	}
374*744642a2SRobert Mustacchi 
375*744642a2SRobert Mustacchi 	auth = topo_mod_auth(mod, nei->nei_nvme);
376*744642a2SRobert Mustacchi 	if (auth == NULL) {
377*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to get auth for nsid %u from "
378*744642a2SRobert Mustacchi 		    "parent %s[%" PRIu64 "]: %s", nsid,
379*744642a2SRobert Mustacchi 		    topo_node_name(nei->nei_nvme),
380*744642a2SRobert Mustacchi 		    topo_node_instance(nei->nei_nvme), topo_mod_errmsg(mod));
381*744642a2SRobert Mustacchi 		goto done;
382*744642a2SRobert Mustacchi 	}
383*744642a2SRobert Mustacchi 
384*744642a2SRobert Mustacchi 	/*
385*744642a2SRobert Mustacchi 	 * We want to construct the FMRI for the namespace. The namespace is a
386*744642a2SRobert Mustacchi 	 * little awkward in terms of things like the model, revision, and
387*744642a2SRobert Mustacchi 	 * serial. While blkdev sets up standard inquiry properties to map these
388*744642a2SRobert Mustacchi 	 * to the parent device which makes sense in the context of trying to
389*744642a2SRobert Mustacchi 	 * use this as a normal block device, it's not really appropriate here.
390*744642a2SRobert Mustacchi 	 * The namespace is not the NVMe controller. We construct the namespace
391*744642a2SRobert Mustacchi 	 * serial number from the preferential ordering of information that
392*744642a2SRobert Mustacchi 	 * we're given of the NGUID, EUI64, and then fall back to the namespace
393*744642a2SRobert Mustacchi 	 * number.
394*744642a2SRobert Mustacchi 	 */
395*744642a2SRobert Mustacchi 	if (!disk_nvme_make_ns_serial(mod, &info.nni_id, nsid, serial,
396*744642a2SRobert Mustacchi 	    sizeof (serial))) {
397*744642a2SRobert Mustacchi 		goto done;
398*744642a2SRobert Mustacchi 	}
399*744642a2SRobert Mustacchi 	fmri = topo_mod_hcfmri(mod, nei->nei_nvme, FM_HC_SCHEME_VERSION,
400*744642a2SRobert Mustacchi 	    DISK, inst, NULL, auth, NULL, NULL, serial);
401*744642a2SRobert Mustacchi 	if (fmri == NULL) {
402*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to make fmri for %s[%" PRIu64
403*744642a2SRobert Mustacchi 		    "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod));
404*744642a2SRobert Mustacchi 		goto done;
405*744642a2SRobert Mustacchi 	}
406*744642a2SRobert Mustacchi 
407*744642a2SRobert Mustacchi 	tn = topo_node_bind(mod, nei->nei_nvme, DISK, inst, fmri);
408*744642a2SRobert Mustacchi 	if (tn == NULL) {
409*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to bind fmri for %s[%" PRIu64
410*744642a2SRobert Mustacchi 		    "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod));
411*744642a2SRobert Mustacchi 		goto done;
412*744642a2SRobert Mustacchi 	}
413*744642a2SRobert Mustacchi 
414*744642a2SRobert Mustacchi 	/*
415*744642a2SRobert Mustacchi 	 * Always inherit our parent's FRU. The namespace is just a part of the
416*744642a2SRobert Mustacchi 	 * device in reality.
417*744642a2SRobert Mustacchi 	 */
418*744642a2SRobert Mustacchi 	if (topo_node_fru_set(tn, NULL, 0, &err) != 0) {
419*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to set FRU for %s[%" PRIu64
420*744642a2SRobert Mustacchi 		    "] on nsid %u: %s", DISK, inst, nsid, topo_strerror(err));
421*744642a2SRobert Mustacchi 		goto done;
422*744642a2SRobert Mustacchi 
423*744642a2SRobert Mustacchi 	}
424*744642a2SRobert Mustacchi 
425*744642a2SRobert Mustacchi 	/*
426*744642a2SRobert Mustacchi 	 * Our namespace may or may not be attached. From the namespace we will
427*744642a2SRobert Mustacchi 	 * always get the capacity and block information. The rest of it will
428*744642a2SRobert Mustacchi 	 * end up being filled in if we find a devinfo node.
429*744642a2SRobert Mustacchi 	 */
430*744642a2SRobert Mustacchi 	if (topo_pgroup_create(tn, &storage_pgroup, &err) != 0) {
431*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to create storage property "
432*744642a2SRobert Mustacchi 		    "group on %s[%" PRIu64 "]: %s", DISK, inst,
433*744642a2SRobert Mustacchi 		    topo_strerror(err));
434*744642a2SRobert Mustacchi 	}
435*744642a2SRobert Mustacchi 
436*744642a2SRobert Mustacchi 	lba = info.nni_id.id_flbas.lba_format;
437*744642a2SRobert Mustacchi 	blksz = 1ULL << info.nni_id.id_lbaf[lba].lbaf_lbads;
438*744642a2SRobert Mustacchi 	if (blksz != 0 && topo_prop_set_uint64(tn, TOPO_PGROUP_STORAGE,
439*744642a2SRobert Mustacchi 	    TOPO_STORAGE_LOG_BLOCK_SIZE, TOPO_PROP_IMMUTABLE, blksz, &err) !=
440*744642a2SRobert Mustacchi 	    0) {
441*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "failed to create property %s:%s on %s[%"
442*744642a2SRobert Mustacchi 		    PRIu64 "]: %s", TOPO_PGROUP_STORAGE,
443*744642a2SRobert Mustacchi 		    TOPO_STORAGE_LOG_BLOCK_SIZE, DISK, inst,
444*744642a2SRobert Mustacchi 		    topo_strerror(err));
445*744642a2SRobert Mustacchi 		goto done;
446*744642a2SRobert Mustacchi 	}
447*744642a2SRobert Mustacchi 
448*744642a2SRobert Mustacchi 	cap = blksz * info.nni_id.id_nsize;
449*744642a2SRobert Mustacchi 	if (snprintf(capstr, sizeof (capstr), "%" PRIu64, cap) >=
450*744642a2SRobert Mustacchi 	    sizeof (capstr)) {
451*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "overflowed capacity calculation on "
452*744642a2SRobert Mustacchi 		    "nsid %u", nsid);
453*744642a2SRobert Mustacchi 		goto done;
454*744642a2SRobert Mustacchi 	}
455*744642a2SRobert Mustacchi 
456*744642a2SRobert Mustacchi 	/*
457*744642a2SRobert Mustacchi 	 * Finally attempt to find a child node that has a matching name and go
458*744642a2SRobert Mustacchi 	 * from there. Sorry, this does result in node creation being O(n^2),
459*744642a2SRobert Mustacchi 	 * but at least n is usually small today.
460*744642a2SRobert Mustacchi 	 */
461*744642a2SRobert Mustacchi 	for (di_node_t di = di_child_node(nei->nei_dinode); di != DI_NODE_NIL;
462*744642a2SRobert Mustacchi 	    di = di_sibling_node(di)) {
463*744642a2SRobert Mustacchi 		const char *addr = di_bus_addr(di);
464*744642a2SRobert Mustacchi 		if (addr != NULL && strcmp(addr, info.nni_addr) == 0) {
465*744642a2SRobert Mustacchi 			disk_nvme_make_ns_di_props(mod, tn, di);
466*744642a2SRobert Mustacchi 		}
467*744642a2SRobert Mustacchi 	}
468*744642a2SRobert Mustacchi 
469*744642a2SRobert Mustacchi done:
4703c6ffbabSRob Johnston 	nvlist_free(auth);
4713c6ffbabSRob Johnston 	nvlist_free(fmri);
472*744642a2SRobert Mustacchi }
473*744642a2SRobert Mustacchi 
474*744642a2SRobert Mustacchi /*
475*744642a2SRobert Mustacchi  * Attempt to make a ufm node, but swallow the error so we can try to get as
476*744642a2SRobert Mustacchi  * much of the disk information as possible.
477*744642a2SRobert Mustacchi  */
478*744642a2SRobert Mustacchi static void
479*744642a2SRobert Mustacchi disk_nvme_make_ufm(topo_mod_t *mod, nvme_enum_info_t *nei)
480*744642a2SRobert Mustacchi {
481*744642a2SRobert Mustacchi 	topo_ufm_devinfo_t tud;
482*744642a2SRobert Mustacchi 	char *path = di_devfs_path(nei->nei_dinode);
483*744642a2SRobert Mustacchi 	if (path == NULL) {
484*744642a2SRobert Mustacchi 		return;
485*744642a2SRobert Mustacchi 	}
486*744642a2SRobert Mustacchi 
487*744642a2SRobert Mustacchi 	tud.tud_method = TOPO_UFM_M_DEVINFO;
488*744642a2SRobert Mustacchi 	tud.tud_path = path;
489*744642a2SRobert Mustacchi 	if (topo_mod_load(mod, TOPO_MOD_UFM, TOPO_VERSION) == NULL) {
490*744642a2SRobert Mustacchi 		topo_mod_dprintf(mod, "disk enum could not load ufm module");
491*744642a2SRobert Mustacchi 		di_devfs_path_free(path);
492*744642a2SRobert Mustacchi 		return;
493*744642a2SRobert Mustacchi 	}
494*744642a2SRobert Mustacchi 
495*744642a2SRobert Mustacchi 	(void) topo_mod_enumerate(mod, nei->nei_nvme, TOPO_MOD_UFM, UFM, 0, 0,
496*744642a2SRobert Mustacchi 	    &tud);
497*744642a2SRobert Mustacchi 	di_devfs_path_free(path);
4983c6ffbabSRob Johnston }
4993c6ffbabSRob Johnston 
5003c6ffbabSRob Johnston static const topo_pgroup_info_t nvme_pgroup = {
5013c6ffbabSRob Johnston 	TOPO_PGROUP_NVME,
5023c6ffbabSRob Johnston 	TOPO_STABILITY_PRIVATE,
5033c6ffbabSRob Johnston 	TOPO_STABILITY_PRIVATE,
5043c6ffbabSRob Johnston 	1
5053c6ffbabSRob Johnston };
5063c6ffbabSRob Johnston 
5073c6ffbabSRob Johnston static int
5083c6ffbabSRob Johnston make_nvme_node(nvme_enum_info_t *nvme_info)
5093c6ffbabSRob Johnston {
5103c6ffbabSRob Johnston 	topo_mod_t *mod = nvme_info->nei_mod;
5113c6ffbabSRob Johnston 	nvlist_t *auth = NULL, *fmri = NULL, *fru;
5123c6ffbabSRob Johnston 	tnode_t *nvme;
5133c6ffbabSRob Johnston 	char raw_rev[NVME_FWVER_SZ + 1], raw_model[NVME_MODEL_SZ + 1];
5143c6ffbabSRob Johnston 	char raw_serial[NVME_SERIAL_SZ + 1];
5153c6ffbabSRob Johnston 	char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL;
5163c6ffbabSRob Johnston 	char *pname = topo_node_name(nvme_info->nei_parent);
5173c6ffbabSRob Johnston 	char *label = NULL;
5183c6ffbabSRob Johnston 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
5193c6ffbabSRob Johnston 	int err = 0, ret = -1;
5203c6ffbabSRob Johnston 
5213c6ffbabSRob Johnston 	/*
5223c6ffbabSRob Johnston 	 * The raw strings returned by the IDENTIFY CONTROLLER command are
5233c6ffbabSRob Johnston 	 * not NUL-terminated, so we fix that up.
5243c6ffbabSRob Johnston 	 */
5253c6ffbabSRob Johnston 	(void) strncpy(raw_rev, nvme_info->nei_idctl->id_fwrev, NVME_FWVER_SZ);
5263c6ffbabSRob Johnston 	raw_rev[NVME_FWVER_SZ] = '\0';
5273c6ffbabSRob Johnston 	(void) strncpy(raw_model, nvme_info->nei_idctl->id_model,
5283c6ffbabSRob Johnston 	    NVME_MODEL_SZ);
5293c6ffbabSRob Johnston 	raw_model[NVME_MODEL_SZ] = '\0';
5303c6ffbabSRob Johnston 	(void) strncpy(raw_serial, nvme_info->nei_idctl->id_serial,
5313c6ffbabSRob Johnston 	    NVME_SERIAL_SZ);
5323c6ffbabSRob Johnston 	raw_serial[NVME_SERIAL_SZ] = '\0';
5333c6ffbabSRob Johnston 
5343c6ffbabSRob Johnston 	/*
5353c6ffbabSRob Johnston 	 * Next we pass the strings through a function that sanitizes them of
5363c6ffbabSRob Johnston 	 * any characters that can't be used in an FMRI string.
5373c6ffbabSRob Johnston 	 */
5383c6ffbabSRob Johnston 	rev = topo_mod_clean_str(mod, raw_rev);
5393c6ffbabSRob Johnston 	model = topo_mod_clean_str(mod, raw_model);
5403c6ffbabSRob Johnston 	serial = topo_mod_clean_str(mod, raw_serial);
5413c6ffbabSRob Johnston 
5423c6ffbabSRob Johnston 	auth = topo_mod_auth(mod, nvme_info->nei_parent);
5433c6ffbabSRob Johnston 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION,
5443c6ffbabSRob Johnston 	    NVME, 0, NULL, auth, model, rev, serial);
5453c6ffbabSRob Johnston 
5463c6ffbabSRob Johnston 	if (fmri == NULL) {
5473c6ffbabSRob Johnston 		/* errno set */
5486597d6fcSRobert Mustacchi 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64
5496597d6fcSRobert Mustacchi 		    "/%s=0", __func__, pname, pinst, NVME);
5503c6ffbabSRob Johnston 		goto error;
5513c6ffbabSRob Johnston 	}
5523c6ffbabSRob Johnston 
5533c6ffbabSRob Johnston 	/*
5543c6ffbabSRob Johnston 	 * If our parent is a pciexfn node, then we need to create a nvme range
555*744642a2SRobert Mustacchi 	 * underneath it to hold the nvme hierarchy.  For other cases, where
5563c6ffbabSRob Johnston 	 * enumeration is being driven by a topo map file, this range will have
5573c6ffbabSRob Johnston 	 * already been statically defined in the XML.
5583c6ffbabSRob Johnston 	 */
5593c6ffbabSRob Johnston 	if (strcmp(pname, PCIEX_FUNCTION) == 0) {
5603c6ffbabSRob Johnston 		if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0,
5613c6ffbabSRob Johnston 		    0) < 0) {
5623c6ffbabSRob Johnston 			/* errno set */
5633c6ffbabSRob Johnston 			topo_mod_dprintf(mod, "%s: error creating %s range",
5643c6ffbabSRob Johnston 			    __func__, NVME);
5653c6ffbabSRob Johnston 			goto error;
5663c6ffbabSRob Johnston 		}
5673c6ffbabSRob Johnston 	}
5683c6ffbabSRob Johnston 
5693c6ffbabSRob Johnston 	/*
5703c6ffbabSRob Johnston 	 * Create a new topo node to represent the NVMe controller and bind it
5713c6ffbabSRob Johnston 	 * to the parent node.
5723c6ffbabSRob Johnston 	 */
5733c6ffbabSRob Johnston 	if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0,
5743c6ffbabSRob Johnston 	    fmri)) == NULL) {
5753c6ffbabSRob Johnston 		/* errno set */
5766597d6fcSRobert Mustacchi 		topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64
5776597d6fcSRobert Mustacchi 		    "/%s=0", __func__, pname, pinst, NVME);
5783c6ffbabSRob Johnston 		goto error;
5793c6ffbabSRob Johnston 	}
5803c6ffbabSRob Johnston 	nvme_info->nei_nvme = nvme;
5813c6ffbabSRob Johnston 	nvme_info->nei_nvme_fmri = fmri;
5823c6ffbabSRob Johnston 
5833c6ffbabSRob Johnston 	/*
5843c6ffbabSRob Johnston 	 * If our parent node is a "pciexfn" node then this is a NVMe device on
5853c6ffbabSRob Johnston 	 * a PCIe AIC, so we inherit our parent's FRU.  Otherwise, we set the
5863c6ffbabSRob Johnston 	 * FRU to ourself.
5873c6ffbabSRob Johnston 	 */
5883c6ffbabSRob Johnston 	if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0)
5893c6ffbabSRob Johnston 		fru = NULL;
5903c6ffbabSRob Johnston 	else
5913c6ffbabSRob Johnston 		fru = fmri;
5923c6ffbabSRob Johnston 
5933c6ffbabSRob Johnston 	if (topo_node_fru_set(nvme, fru, 0, &err) != 0) {
5943c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
5953c6ffbabSRob Johnston 		    topo_strerror(err));
5963c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, err);
5973c6ffbabSRob Johnston 		goto error;
5983c6ffbabSRob Johnston 	}
5993c6ffbabSRob Johnston 
6003c6ffbabSRob Johnston 	/*
6013c6ffbabSRob Johnston 	 * Clone the label from our parent node.  We can't inherit the property
6023c6ffbabSRob Johnston 	 * because the label prop is mutable on bay nodes and only immutable
6033c6ffbabSRob Johnston 	 * properties can be inherited.
6043c6ffbabSRob Johnston 	 */
6053c6ffbabSRob Johnston 	if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 &&
6063c6ffbabSRob Johnston 	    err != ETOPO_PROP_NOENT) ||
6073c6ffbabSRob Johnston 	    topo_node_label_set(nvme, label, &err) != 0) {
6083c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: failed to set label: %s",
6093c6ffbabSRob Johnston 		    __func__, topo_strerror(err));
6103c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, err);
6113c6ffbabSRob Johnston 		goto error;
6123c6ffbabSRob Johnston 	}
6133c6ffbabSRob Johnston 
614*744642a2SRobert Mustacchi 	/*
615*744642a2SRobert Mustacchi 	 * Ensure that we have a UFM property set based on our devinfo path.
616*744642a2SRobert Mustacchi 	 * This is a little repetitive if our parent actually did so as well,
617*744642a2SRobert Mustacchi 	 * but given that the majority of such nodes are under bays and slots
618*744642a2SRobert Mustacchi 	 * right now, it's a worthwhile tradeoff.
619*744642a2SRobert Mustacchi 	 */
620*744642a2SRobert Mustacchi 	disk_nvme_make_ufm(mod, nvme_info);
621*744642a2SRobert Mustacchi 
6223c6ffbabSRob Johnston 	if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) {
6233c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
6243c6ffbabSRob Johnston 		    __func__, TOPO_PGROUP_NVME, topo_strerror(err));
6253c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, err);
6263c6ffbabSRob Johnston 		goto error;
6273c6ffbabSRob Johnston 	}
6283c6ffbabSRob Johnston 
6293c6ffbabSRob Johnston 	if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major,
6303c6ffbabSRob Johnston 	    nvme_info->nei_vers.v_minor) < 0) {
6313c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
6323c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
6333c6ffbabSRob Johnston 		goto error;
6343c6ffbabSRob Johnston 	}
6353c6ffbabSRob Johnston 	if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER,
6363c6ffbabSRob Johnston 	    TOPO_PROP_IMMUTABLE, vers, &err) != 0) {
6373c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
6383c6ffbabSRob Johnston 		    __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER);
6393c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, err);
6403c6ffbabSRob Johnston 		goto error;
6413c6ffbabSRob Johnston 	}
6423c6ffbabSRob Johnston 
6433c6ffbabSRob Johnston 	if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) {
6443c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
6453c6ffbabSRob Johnston 		    __func__, TOPO_PGROUP_IO, topo_strerror(err));
6463c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, err);
6473c6ffbabSRob Johnston 		goto error;
6483c6ffbabSRob Johnston 	}
649*744642a2SRobert Mustacchi 
650*744642a2SRobert Mustacchi 	if (!disk_nvme_common_io(mod, nvme, nvme_info->nei_dinode)) {
6513c6ffbabSRob Johnston 		goto error;
6523c6ffbabSRob Johnston 	}
6533c6ffbabSRob Johnston 
6543c6ffbabSRob Johnston 	/*
6553c6ffbabSRob Johnston 	 * Create a child disk node for each namespace.
6563c6ffbabSRob Johnston 	 */
6573c6ffbabSRob Johnston 	if (topo_node_range_create(mod, nvme, DISK, 0,
6583c6ffbabSRob Johnston 	    (nvme_info->nei_idctl->id_nn - 1)) < 0) {
6593c6ffbabSRob Johnston 		/* errno set */
6603c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "%s: error creating %s range", __func__,
6613c6ffbabSRob Johnston 		    DISK);
6623c6ffbabSRob Johnston 		goto error;
6633c6ffbabSRob Johnston 	}
6643c6ffbabSRob Johnston 
6653c6ffbabSRob Johnston 	/*
666*744642a2SRobert Mustacchi 	 * Iterate over each namespace to see if it's a candidate for inclusion.
667*744642a2SRobert Mustacchi 	 * Namespaces start at index 1 and not every namespace will be included.
668*744642a2SRobert Mustacchi 	 * We map things such that a disk instance is always namespace - 1 to
669*744642a2SRobert Mustacchi 	 * fit into the above mapping.
6703c6ffbabSRob Johnston 	 */
671*744642a2SRobert Mustacchi 	for (uint32_t i = 1; i <= nvme_info->nei_idctl->id_nn; i++) {
672*744642a2SRobert Mustacchi 		disk_nvme_make_ns(nvme_info, i);
6733c6ffbabSRob Johnston 	}
6743c6ffbabSRob Johnston 	ret = 0;
6753c6ffbabSRob Johnston 
6763c6ffbabSRob Johnston error:
6773c6ffbabSRob Johnston 	free(vers);
6783c6ffbabSRob Johnston 	nvlist_free(auth);
6793c6ffbabSRob Johnston 	nvlist_free(fmri);
6803c6ffbabSRob Johnston 	topo_mod_strfree(mod, rev);
6813c6ffbabSRob Johnston 	topo_mod_strfree(mod, model);
6823c6ffbabSRob Johnston 	topo_mod_strfree(mod, serial);
6833c6ffbabSRob Johnston 	topo_mod_strfree(mod, label);
6843c6ffbabSRob Johnston 	return (ret);
6853c6ffbabSRob Johnston }
6863c6ffbabSRob Johnston 
6873c6ffbabSRob Johnston struct diwalk_arg {
6883c6ffbabSRob Johnston 	topo_mod_t	*diwk_mod;
6893c6ffbabSRob Johnston 	tnode_t		*diwk_parent;
6903c6ffbabSRob Johnston };
6913c6ffbabSRob Johnston 
6923c6ffbabSRob Johnston /*
6933c6ffbabSRob Johnston  * This function gathers identity information from the NVMe controller and
6943c6ffbabSRob Johnston  * stores it in a struct.  This struct is passed to make_nvme_node(), which
6953c6ffbabSRob Johnston  * does the actual topo node creation.
6963c6ffbabSRob Johnston  */
6973c6ffbabSRob Johnston static int
6983c6ffbabSRob Johnston discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg)
6993c6ffbabSRob Johnston {
7003c6ffbabSRob Johnston 	struct diwalk_arg *wkarg = arg;
7013c6ffbabSRob Johnston 	topo_mod_t *mod = wkarg->diwk_mod;
7023c6ffbabSRob Johnston 	char *path = NULL, *devctl = NULL;
7033c6ffbabSRob Johnston 	nvme_ioctl_t nioc = { 0 };
7043c6ffbabSRob Johnston 	nvme_identify_ctrl_t *idctl = NULL;
7053c6ffbabSRob Johnston 	nvme_enum_info_t nvme_info = { 0 };
7063c6ffbabSRob Johnston 	int fd = -1, ret = DI_WALK_TERMINATE;
7073c6ffbabSRob Johnston 
7083c6ffbabSRob Johnston 	if ((path = di_devfs_minor_path(minor)) == NULL) {
7093c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "failed to get minor path");
7103c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
7113c6ffbabSRob Johnston 		return (ret);
7123c6ffbabSRob Johnston 	}
7133c6ffbabSRob Johnston 
7146597d6fcSRobert Mustacchi 	topo_mod_dprintf(mod, "%s=%" PRIu64 ": found nvme controller: %s",
7153c6ffbabSRob Johnston 	    topo_node_name(wkarg->diwk_parent),
7163c6ffbabSRob Johnston 	    topo_node_instance(wkarg->diwk_parent), path);
7173c6ffbabSRob Johnston 
7183c6ffbabSRob Johnston 	if (asprintf(&devctl, "/devices%s", path) < 0) {
7193c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "failed to alloc string");
7203c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
7213c6ffbabSRob Johnston 		goto error;
7223c6ffbabSRob Johnston 	}
7233c6ffbabSRob Johnston 
7243c6ffbabSRob Johnston 	if ((fd = open(devctl, O_RDWR)) < 0) {
7253c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "failed to open %s", devctl);
7263c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
7273c6ffbabSRob Johnston 		goto error;
7283c6ffbabSRob Johnston 	}
7293c6ffbabSRob Johnston 	if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) {
7303c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "zalloc failed");
7313c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
7323c6ffbabSRob Johnston 		goto error;
7333c6ffbabSRob Johnston 	}
7343c6ffbabSRob Johnston 	nioc.n_len = NVME_IDENTIFY_BUFSIZE;
7353c6ffbabSRob Johnston 	nioc.n_buf = (uintptr_t)idctl;
736153f3212SHans Rosenfeld 	nioc.n_arg = NVME_IDENTIFY_CTRL;
7373c6ffbabSRob Johnston 
738153f3212SHans Rosenfeld 	if (ioctl(fd, NVME_IOC_IDENTIFY, &nioc) != 0) {
739153f3212SHans Rosenfeld 		topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY ioctl "
7403c6ffbabSRob Johnston 		    "failed: %s", strerror(errno));
7413c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
7423c6ffbabSRob Johnston 		goto error;
7433c6ffbabSRob Johnston 	}
7443c6ffbabSRob Johnston 
7453c6ffbabSRob Johnston 	nioc.n_len = sizeof (nvme_version_t);
7463c6ffbabSRob Johnston 	nioc.n_buf = (uintptr_t)&nvme_info.nei_vers;
747153f3212SHans Rosenfeld 	nioc.n_arg = 0;
7483c6ffbabSRob Johnston 
7493c6ffbabSRob Johnston 	if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) {
7503c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s",
7513c6ffbabSRob Johnston 		    strerror(errno));
7523c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
7533c6ffbabSRob Johnston 		goto error;
7543c6ffbabSRob Johnston 	}
7553c6ffbabSRob Johnston 
7563c6ffbabSRob Johnston 	nvme_info.nei_mod = mod;
7573c6ffbabSRob Johnston 	nvme_info.nei_nvme_path = path;
7583c6ffbabSRob Johnston 	nvme_info.nei_dinode = node;
7593c6ffbabSRob Johnston 	nvme_info.nei_idctl = idctl;
7603c6ffbabSRob Johnston 	nvme_info.nei_parent = wkarg->diwk_parent;
7613c6ffbabSRob Johnston 	nvme_info.nei_fd = fd;
7623c6ffbabSRob Johnston 
7633c6ffbabSRob Johnston 	if (make_nvme_node(&nvme_info) != 0) {
7643c6ffbabSRob Johnston 		/* errno set */
7653c6ffbabSRob Johnston 		goto error;
7663c6ffbabSRob Johnston 	}
7673c6ffbabSRob Johnston 
7683c6ffbabSRob Johnston 	ret = DI_WALK_CONTINUE;
7693c6ffbabSRob Johnston 
7703c6ffbabSRob Johnston error:
7713c6ffbabSRob Johnston 	if (fd > 0)
7723c6ffbabSRob Johnston 		(void) close(fd);
7733c6ffbabSRob Johnston 	di_devfs_path_free(path);
7743c6ffbabSRob Johnston 	free(devctl);
7753c6ffbabSRob Johnston 	if (idctl != NULL)
7763c6ffbabSRob Johnston 		topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE);
7773c6ffbabSRob Johnston 	return (ret);
7783c6ffbabSRob Johnston }
7793c6ffbabSRob Johnston 
7803c6ffbabSRob Johnston int
7813c6ffbabSRob Johnston disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode)
7823c6ffbabSRob Johnston {
7833c6ffbabSRob Johnston 	char *parent = NULL;
7843c6ffbabSRob Johnston 	int err;
7853c6ffbabSRob Johnston 	di_node_t devtree;
7863c6ffbabSRob Johnston 	di_node_t dnode;
7873c6ffbabSRob Johnston 	struct diwalk_arg wkarg = { 0 };
7883c6ffbabSRob Johnston 	int ret = -1;
7893c6ffbabSRob Johnston 
7903c6ffbabSRob Johnston 	/*
7913c6ffbabSRob Johnston 	 * Lookup a property containing the devfs path of the parent PCIe
7923c6ffbabSRob Johnston 	 * device of the NVMe device we're attempting to enumerate.  This
7933c6ffbabSRob Johnston 	 * property is hard-coded in per-platform topo XML maps that are
7943c6ffbabSRob Johnston 	 * delivered with the OS.  This hard-coded path allows topo to map a
7953c6ffbabSRob Johnston 	 * given NVMe controller to a physical location (bay or slot) on the
7963c6ffbabSRob Johnston 	 * platform, when generating the topo snapshot.
7973c6ffbabSRob Johnston 	 */
7983c6ffbabSRob Johnston 	if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING,
7993c6ffbabSRob Johnston 	    TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) {
8003c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "parent node was missing nvme binding "
8013c6ffbabSRob Johnston 		    "properties\n");
8023c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, err);
8033c6ffbabSRob Johnston 		goto out;
8043c6ffbabSRob Johnston 	}
8053c6ffbabSRob Johnston 	if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) {
8063c6ffbabSRob Johnston 		topo_mod_dprintf(mod, "failed to get devinfo snapshot");
8073c6ffbabSRob Johnston 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
8083c6ffbabSRob Johnston 		goto out;
8093c6ffbabSRob Johnston 	}
8103c6ffbabSRob Johnston 
8113c6ffbabSRob Johnston 	/*
8123c6ffbabSRob Johnston 	 * Walk the devinfo tree looking NVMe devices. For each NVMe device,
8133c6ffbabSRob Johnston 	 * check if the devfs path of the parent matches the one specified in
8143c6ffbabSRob Johnston 	 * TOPO_BINDING_PARENT_DEV.
8153c6ffbabSRob Johnston 	 */
8163c6ffbabSRob Johnston 	wkarg.diwk_mod = mod;
8173c6ffbabSRob Johnston 	wkarg.diwk_parent = pnode;
8183c6ffbabSRob Johnston 	dnode = di_drv_first_node(NVME_DRV, devtree);
8193c6ffbabSRob Johnston 	while (dnode != DI_NODE_NIL) {
8203c6ffbabSRob Johnston 		char *path;
8213c6ffbabSRob Johnston 
8223c6ffbabSRob Johnston 		if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) {
8233c6ffbabSRob Johnston 			topo_mod_dprintf(mod, "failed to get dev path");
8243c6ffbabSRob Johnston 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
8253c6ffbabSRob Johnston 			goto out;
8263c6ffbabSRob Johnston 		}
8273c6ffbabSRob Johnston 		if (strcmp(parent, path) == 0) {
8283c6ffbabSRob Johnston 			if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0,
8293c6ffbabSRob Johnston 			    &wkarg, discover_nvme_ctl) < 0) {
8303c6ffbabSRob Johnston 				di_devfs_path_free(path);
8313c6ffbabSRob Johnston 				goto out;
8323c6ffbabSRob Johnston 			}
8333c6ffbabSRob Johnston 		}
8343c6ffbabSRob Johnston 		di_devfs_path_free(path);
8353c6ffbabSRob Johnston 		dnode = di_drv_next_node(dnode);
8363c6ffbabSRob Johnston 	}
8373c6ffbabSRob Johnston 	ret = 0;
8383c6ffbabSRob Johnston 
8393c6ffbabSRob Johnston out:
8403c6ffbabSRob Johnston 	topo_mod_strfree(mod, parent);
8413c6ffbabSRob Johnston 	return (ret);
8423c6ffbabSRob Johnston }
843