xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/disk/disk_nvme.c (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Joyent, Inc.
14  * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
15  */
16 
17 /*
18  * This file drives topo node enumeration of NVMe controllers.  A single "nvme"
19  * node is enumerated for each NVMe controller.   Child "disk" nodes are then
20  * enumerated for each configured NVMe namespace.
21  *
22  * nvme nodes are expected to be enumerated under either a "bay" node (for U.2
23  * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC
24  * devices).
25  *
26  * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven
27  * by the pcibus topo module.
28  *
29  * In order to allow for associating a given NVMe controller with a physical
30  * location, enumeration of U.2 and M.2 devices should be driven by a
31  * platform-specific topo map which statically sets the following two
32  * properties on the parent "bay" or "slot" node:
33  *
34  * propgroup        property        description
35  * ---------        --------        ------------
36  * binding          driver          "nvme"
37  * binding          parent-device   devpath of parent PCIe device
38  *
39  * for example:
40  *
41  * <propgroup name="binding" version="1" name-stability="Private"
42  *   data-stability="Private">
43  *     <propval name="driver" type="string" value="nvme"/>
44  *     <propval name="parent-device" type="string"
45  *       value="/pci@0,0/pci8086,6f09@3,1"/>
46  * </propgroup>
47  * <dependents grouping="children">
48  *     <range name="nvme" min="0" max="0">
49  *         <enum-method name="disk" version="1"/>
50  *     </range>
51  * </dependents>
52  */
53 #include <stdlib.h>
54 #include <sys/types.h>
55 #include <sys/stat.h>
56 #include <fcntl.h>
57 #include <unistd.h>
58 #include <string.h>
59 #include <strings.h>
60 
61 #include <sys/fm/protocol.h>
62 #include <fm/topo_hc.h>
63 #include <fm/topo_mod.h>
64 
65 #include <sys/dkio.h>
66 #include <sys/scsi/generic/inquiry.h>
67 
68 #include <sys/nvme.h>
69 #include "disk.h"
70 #include "disk_drivers.h"
71 
72 typedef struct nvme_enum_info {
73 	topo_mod_t		*nei_mod;
74 	di_node_t		nei_dinode;
75 	nvme_identify_ctrl_t	*nei_idctl;
76 	nvme_version_t		nei_vers;
77 	tnode_t			*nei_parent;
78 	tnode_t			*nei_nvme;
79 	nvlist_t		*nei_nvme_fmri;
80 	const char		*nei_nvme_path;
81 	int			nei_fd;
82 } nvme_enum_info_t;
83 
84 typedef struct devlink_arg {
85 	topo_mod_t		*dla_mod;
86 	char			*dla_logical_disk;
87 	uint_t			dla_strsz;
88 } devlink_arg_t;
89 
90 static int
91 devlink_cb(di_devlink_t dl, void *arg)
92 {
93 	devlink_arg_t *dlarg = (devlink_arg_t *)arg;
94 	topo_mod_t *mod = dlarg->dla_mod;
95 	const char *devpath;
96 	char *slice, *ctds;
97 
98 	if ((devpath = di_devlink_path(dl)) == NULL ||
99 	    (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) ==
100 	    NULL) {
101 		return (DI_WALK_TERMINATE);
102 	}
103 
104 	/*
105 	 * We need to keep track of the original string size before we
106 	 * truncate it with a NUL, so that we can free the right number of
107 	 * bytes when we're done, otherwise libumem will complain.
108 	 */
109 	dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1;
110 
111 	/* trim the slice off the public name */
112 	if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) &&
113 	    ((slice = strchr(ctds, 's')) != NULL))
114 		*slice = '\0';
115 
116 	return (DI_WALK_TERMINATE);
117 }
118 
119 static char *
120 get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz)
121 {
122 	di_devlink_handle_t devhdl;
123 	devlink_arg_t dlarg = { 0 };
124 	char *minorpath = NULL;
125 
126 	if (asprintf(&minorpath, "%s:a", devpath) < 0) {
127 		return (NULL);
128 	}
129 
130 	if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) {
131 		topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__);
132 		free(minorpath);
133 		return (NULL);
134 	}
135 
136 	dlarg.dla_mod = mod;
137 
138 	(void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK,
139 	    &dlarg, devlink_cb);
140 
141 	(void) di_devlink_fini(&devhdl);
142 	free(minorpath);
143 
144 	*bufsz = dlarg.dla_strsz;
145 	return (dlarg.dla_logical_disk);
146 }
147 
148 static int
149 make_disk_node(nvme_enum_info_t *nvme_info, di_node_t dinode,
150     topo_instance_t inst)
151 {
152 	topo_mod_t *mod = nvme_info->nei_mod;
153 	nvlist_t *auth = NULL, *fmri = NULL;
154 	tnode_t *disk;
155 	char *rev = NULL, *model = NULL, *serial = NULL, *path;
156 	char *logical_disk = NULL, *devid, *manuf, *ctd = NULL;
157 	char *cap_bytes_str = NULL, full_path[MAXPATHLEN + 1];
158 	char *pname = topo_node_name(nvme_info->nei_parent);
159 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
160 	const char **ppaths = NULL;
161 	struct dk_minfo minfo;
162 	uint64_t cap_bytes;
163 	uint_t bufsz;
164 	int fd = -1, err, ret = -1, r;
165 
166 	if ((path = di_devfs_path(dinode)) == NULL) {
167 		topo_mod_dprintf(mod, "%s: failed to get dev path", __func__);
168 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
169 		return (ret);
170 	}
171 
172 	topo_mod_dprintf(mod, "%s: found nvme namespace: %s", __func__, path);
173 
174 	/*
175 	 * Issue the DKIOCGMEDIAINFO ioctl to get the capacity
176 	 */
177 	(void) snprintf(full_path, MAXPATHLEN, "/devices%s%s", path,
178 	    PHYS_EXTN);
179 	if ((fd = open(full_path, O_RDWR)) < 0 ||
180 	    ioctl(fd, DKIOCGMEDIAINFO, &minfo) < 0) {
181 		topo_mod_dprintf(mod, "failed to get blkdev capacity (%s)",
182 		    strerror(errno));
183 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
184 		goto error;
185 	}
186 
187 	cap_bytes = minfo.dki_lbsize * minfo.dki_capacity;
188 
189 	if (asprintf(&cap_bytes_str, "%" PRIu64, cap_bytes) < 0) {
190 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
191 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
192 		goto error;
193 	}
194 
195 	/*
196 	 * Gather the FRU identity information from the devinfo properties
197 	 */
198 	if (di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, DEVID_PROP_NAME,
199 	    &devid) == -1 ||
200 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_VENDOR_ID,
201 	    &manuf) == -1 ||
202 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_PRODUCT_ID,
203 	    &model) == -1 ||
204 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_REVISION_ID,
205 	    &rev) == -1 ||
206 	    di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_SERIAL_NO,
207 	    &serial) == -1) {
208 		topo_mod_dprintf(mod, "%s: failed to lookup devinfo props on "
209 		    "%s", __func__, path);
210 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
211 		goto error;
212 	}
213 
214 	model = topo_mod_clean_str(mod, model);
215 	rev = topo_mod_clean_str(mod, rev);
216 	serial = topo_mod_clean_str(mod, serial);
217 
218 	/*
219 	 * Lookup the /dev/dsk/c#t#d# disk device name from the blkdev path
220 	 */
221 	if ((logical_disk = get_logical_disk(mod, path, &bufsz)) == NULL) {
222 		topo_mod_dprintf(mod, "failed to find logical disk");
223 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
224 		goto error;
225 	}
226 
227 	/*
228 	 * If we were able to look up the logical disk path for this namespace
229 	 * then set ctd to be that pathname, minus the "/dev/dsk/" portion.
230 	 */
231 	if ((ctd = strrchr(logical_disk, '/')) !=  NULL) {
232 		ctd = ctd + 1;
233 	} else {
234 		topo_mod_dprintf(mod, "malformed logical disk path: %s",
235 		    logical_disk);
236 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
237 		goto error;
238 	}
239 
240 	/*
241 	 * Build the FMRI and then bind the disk node to the parent nvme node.
242 	 */
243 	auth = topo_mod_auth(mod, nvme_info->nei_nvme);
244 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_nvme, FM_HC_SCHEME_VERSION,
245 	    DISK, inst, NULL, auth, model, rev, serial);
246 
247 	if (fmri == NULL) {
248 		/* errno set */
249 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%u/%s=0/%s=%u",
250 		    __func__, pname, pinst, NVME, DISK, inst);
251 		goto error;
252 	}
253 	if ((disk = topo_node_bind(mod, nvme_info->nei_nvme, DISK, inst,
254 	    fmri)) == NULL) {
255 		/* errno set */
256 		topo_mod_dprintf(mod, "%s: bind failed for %s=%u/%s=0/%s=%u",
257 		    __func__, pname, pinst, NVME, DISK, inst);
258 		goto error;
259 	}
260 
261 	/* Create authority and system propgroups */
262 	topo_pgroup_hcset(disk, auth);
263 
264 	/*
265 	 * As the "disk" in this case is simply a logical construct
266 	 * representing an NVMe namespace, we inherit the FRU from the parent
267 	 * node.
268 	 */
269 	if (topo_node_fru_set(disk, NULL, 0, &err) != 0) {
270 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
271 		    topo_strerror(err));
272 		(void) topo_mod_seterrno(mod, err);
273 		goto error;
274 	}
275 
276 	if ((ppaths = topo_mod_zalloc(mod, sizeof (char *))) == NULL) {
277 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
278 		goto error;
279 	}
280 	ppaths[0] = path;
281 
282 	/*
283 	 * Create the "storage" and "io" property groups and then fill them
284 	 * with the standard set of properties for "disk" nodes.
285 	 */
286 	if (topo_pgroup_create(disk, &io_pgroup, &err) != 0 ||
287 	    topo_pgroup_create(disk, &storage_pgroup, &err) != 0) {
288 		topo_mod_dprintf(mod, "%s: failed to create propgroups: %s",
289 		    __func__, topo_strerror(err));
290 		(void) topo_mod_seterrno(mod, err);
291 		goto error;
292 	}
293 
294 	r = topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH,
295 	    TOPO_PROP_IMMUTABLE, path, &err);
296 
297 	r += topo_prop_set_string_array(disk, TOPO_PGROUP_IO,
298 	    TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err);
299 
300 	r += topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEVID,
301 	    TOPO_PROP_IMMUTABLE, devid, &err);
302 
303 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
304 	    TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, manuf, &err);
305 
306 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
307 	    TOPO_STORAGE_CAPACITY, TOPO_PROP_IMMUTABLE, cap_bytes_str,
308 	    &err);
309 
310 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
311 	    TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err);
312 
313 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
314 	    TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err);
315 
316 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
317 	    TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err);
318 
319 	r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE,
320 	    TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, ctd, &err);
321 
322 	if (r != 0) {
323 		topo_mod_dprintf(mod, "%s: failed to create properties: %s",
324 		    __func__, topo_strerror(err));
325 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
326 		goto error;
327 	}
328 
329 	ret = 0;
330 
331 error:
332 	free(cap_bytes_str);
333 	if (fd > 0)
334 		(void) close(fd);
335 	if (ppaths != NULL)
336 		topo_mod_free(mod, ppaths, sizeof (char *));
337 	di_devfs_path_free(path);
338 	nvlist_free(auth);
339 	nvlist_free(fmri);
340 	topo_mod_strfree(mod, rev);
341 	topo_mod_strfree(mod, model);
342 	topo_mod_strfree(mod, serial);
343 	topo_mod_free(mod, logical_disk, bufsz);
344 	return (ret);
345 }
346 
347 static const topo_pgroup_info_t nvme_pgroup = {
348 	TOPO_PGROUP_NVME,
349 	TOPO_STABILITY_PRIVATE,
350 	TOPO_STABILITY_PRIVATE,
351 	1
352 };
353 
354 
355 static int
356 make_nvme_node(nvme_enum_info_t *nvme_info)
357 {
358 	topo_mod_t *mod = nvme_info->nei_mod;
359 	nvlist_t *auth = NULL, *fmri = NULL, *fru;
360 	tnode_t *nvme;
361 	char raw_rev[NVME_FWVER_SZ + 1], raw_model[NVME_MODEL_SZ + 1];
362 	char raw_serial[NVME_SERIAL_SZ + 1];
363 	char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL;
364 	char *pname = topo_node_name(nvme_info->nei_parent);
365 	char *label = NULL;
366 	topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
367 	int err = 0, ret = -1;
368 	di_node_t cn;
369 	uint_t i;
370 
371 	/*
372 	 * The raw strings returned by the IDENTIFY CONTROLLER command are
373 	 * not NUL-terminated, so we fix that up.
374 	 */
375 	(void) strncpy(raw_rev, nvme_info->nei_idctl->id_fwrev, NVME_FWVER_SZ);
376 	raw_rev[NVME_FWVER_SZ] = '\0';
377 	(void) strncpy(raw_model, nvme_info->nei_idctl->id_model,
378 	    NVME_MODEL_SZ);
379 	raw_model[NVME_MODEL_SZ] = '\0';
380 	(void) strncpy(raw_serial, nvme_info->nei_idctl->id_serial,
381 	    NVME_SERIAL_SZ);
382 	raw_serial[NVME_SERIAL_SZ] = '\0';
383 
384 	/*
385 	 * Next we pass the strings through a function that sanitizes them of
386 	 * any characters that can't be used in an FMRI string.
387 	 */
388 	rev = topo_mod_clean_str(mod, raw_rev);
389 	model = topo_mod_clean_str(mod, raw_model);
390 	serial = topo_mod_clean_str(mod, raw_serial);
391 
392 	auth = topo_mod_auth(mod, nvme_info->nei_parent);
393 	fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION,
394 	    NVME, 0, NULL, auth, model, rev, serial);
395 
396 	if (fmri == NULL) {
397 		/* errno set */
398 		topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%u/%s=0",
399 		    __func__, pname, pinst, NVME);
400 		goto error;
401 	}
402 
403 	/*
404 	 * If our parent is a pciexfn node, then we need to create a nvme range
405 	 * underneath it to hold the nvme heirarchy.  For other cases, where
406 	 * enumeration is being driven by a topo map file, this range will have
407 	 * already been statically defined in the XML.
408 	 */
409 	if (strcmp(pname, PCIEX_FUNCTION) == 0) {
410 		if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0,
411 		    0) < 0) {
412 			/* errno set */
413 			topo_mod_dprintf(mod, "%s: error creating %s range",
414 			    __func__, NVME);
415 			goto error;
416 		}
417 	}
418 
419 	/*
420 	 * Create a new topo node to represent the NVMe controller and bind it
421 	 * to the parent node.
422 	 */
423 	if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0,
424 	    fmri)) == NULL) {
425 		/* errno set */
426 		topo_mod_dprintf(mod, "%s: bind failed for %s=%u/%s=0",
427 		    __func__, pname, pinst, NVME);
428 		goto error;
429 	}
430 	nvme_info->nei_nvme = nvme;
431 	nvme_info->nei_nvme_fmri = fmri;
432 
433 	/*
434 	 * If our parent node is a "pciexfn" node then this is a NVMe device on
435 	 * a PCIe AIC, so we inherit our parent's FRU.  Otherwise, we set the
436 	 * FRU to ourself.
437 	 */
438 	if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0)
439 		fru = NULL;
440 	else
441 		fru = fmri;
442 
443 	if (topo_node_fru_set(nvme, fru, 0, &err) != 0) {
444 		topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
445 		    topo_strerror(err));
446 		(void) topo_mod_seterrno(mod, err);
447 		goto error;
448 	}
449 
450 	/*
451 	 * Clone the label from our parent node.  We can't inherit the property
452 	 * because the label prop is mutable on bay nodes and only immutable
453 	 * properties can be inherited.
454 	 */
455 	if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 &&
456 	    err != ETOPO_PROP_NOENT) ||
457 	    topo_node_label_set(nvme, label, &err) != 0) {
458 		topo_mod_dprintf(mod, "%s: failed to set label: %s",
459 		    __func__, topo_strerror(err));
460 		(void) topo_mod_seterrno(mod, err);
461 		goto error;
462 	}
463 
464 	if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) {
465 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
466 		    __func__, TOPO_PGROUP_NVME, topo_strerror(err));
467 		(void) topo_mod_seterrno(mod, err);
468 		goto error;
469 	}
470 
471 	if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major,
472 	    nvme_info->nei_vers.v_minor) < 0) {
473 		topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
474 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
475 		goto error;
476 	}
477 	if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER,
478 	    TOPO_PROP_IMMUTABLE, vers, &err) != 0) {
479 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
480 		    __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER);
481 		(void) topo_mod_seterrno(mod, err);
482 		goto error;
483 	}
484 
485 	if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) {
486 		topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
487 		    __func__, TOPO_PGROUP_IO, topo_strerror(err));
488 		(void) topo_mod_seterrno(mod, err);
489 		goto error;
490 	}
491 	if (topo_prop_set_string(nvme, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH,
492 	    TOPO_PROP_IMMUTABLE, nvme_info->nei_nvme_path, &err) != 0) {
493 		topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
494 		    __func__, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH);
495 		(void) topo_mod_seterrno(mod, err);
496 		goto error;
497 	}
498 
499 	/*
500 	 * Create a child disk node for each namespace.
501 	 */
502 	if (topo_node_range_create(mod, nvme, DISK, 0,
503 	    (nvme_info->nei_idctl->id_nn - 1)) < 0) {
504 		/* errno set */
505 		topo_mod_dprintf(mod, "%s: error creating %s range", __func__,
506 		    DISK);
507 		goto error;
508 	}
509 
510 	for (i = 0, cn = di_child_node(nvme_info->nei_dinode);
511 	    cn != DI_NODE_NIL;
512 	    i++, cn = di_sibling_node(cn)) {
513 
514 		if (make_disk_node(nvme_info, cn, i) != 0) {
515 			char *path = di_devfs_path(cn);
516 			/*
517 			 * We note the failure, but attempt to forge ahead and
518 			 * enumerate any other namespaces.
519 			 */
520 			topo_mod_dprintf(mod, "%s: make_disk_node() failed "
521 			    "for %s\n", __func__,
522 			    path ? path : "unknown path");
523 			di_devfs_path_free(path);
524 		}
525 	}
526 	ret = 0;
527 
528 error:
529 	free(vers);
530 	nvlist_free(auth);
531 	nvlist_free(fmri);
532 	topo_mod_strfree(mod, rev);
533 	topo_mod_strfree(mod, model);
534 	topo_mod_strfree(mod, serial);
535 	topo_mod_strfree(mod, label);
536 	return (ret);
537 }
538 
539 struct diwalk_arg {
540 	topo_mod_t	*diwk_mod;
541 	tnode_t		*diwk_parent;
542 };
543 
544 /*
545  * This function gathers identity information from the NVMe controller and
546  * stores it in a struct.  This struct is passed to make_nvme_node(), which
547  * does the actual topo node creation.
548  */
549 static int
550 discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg)
551 {
552 	struct diwalk_arg *wkarg = arg;
553 	topo_mod_t *mod = wkarg->diwk_mod;
554 	char *path = NULL, *devctl = NULL;
555 	nvme_ioctl_t nioc = { 0 };
556 	nvme_identify_ctrl_t *idctl = NULL;
557 	nvme_enum_info_t nvme_info = { 0 };
558 	int fd = -1, ret = DI_WALK_TERMINATE;
559 
560 	if ((path = di_devfs_minor_path(minor)) == NULL) {
561 		topo_mod_dprintf(mod, "failed to get minor path");
562 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
563 		return (ret);
564 	}
565 
566 	topo_mod_dprintf(mod, "%s=%u: found nvme controller: %s",
567 	    topo_node_name(wkarg->diwk_parent),
568 	    topo_node_instance(wkarg->diwk_parent), path);
569 
570 	if (asprintf(&devctl, "/devices%s", path) < 0) {
571 		topo_mod_dprintf(mod, "failed to alloc string");
572 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
573 		goto error;
574 	}
575 
576 	if ((fd = open(devctl, O_RDWR)) < 0) {
577 		topo_mod_dprintf(mod, "failed to open %s", devctl);
578 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
579 		goto error;
580 	}
581 	if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) {
582 		topo_mod_dprintf(mod, "zalloc failed");
583 		(void) topo_mod_seterrno(mod, EMOD_NOMEM);
584 		goto error;
585 	}
586 	nioc.n_len = NVME_IDENTIFY_BUFSIZE;
587 	nioc.n_buf = (uintptr_t)idctl;
588 	nioc.n_arg = NVME_IDENTIFY_CTRL;
589 
590 	if (ioctl(fd, NVME_IOC_IDENTIFY, &nioc) != 0) {
591 		topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY ioctl "
592 		    "failed: %s", strerror(errno));
593 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
594 		goto error;
595 	}
596 
597 	nioc.n_len = sizeof (nvme_version_t);
598 	nioc.n_buf = (uintptr_t)&nvme_info.nei_vers;
599 	nioc.n_arg = 0;
600 
601 	if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) {
602 		topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s",
603 		    strerror(errno));
604 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
605 		goto error;
606 	}
607 
608 	nvme_info.nei_mod = mod;
609 	nvme_info.nei_nvme_path = path;
610 	nvme_info.nei_dinode = node;
611 	nvme_info.nei_idctl = idctl;
612 	nvme_info.nei_parent = wkarg->diwk_parent;
613 	nvme_info.nei_fd = fd;
614 
615 	if (make_nvme_node(&nvme_info) != 0) {
616 		/* errno set */
617 		goto error;
618 	}
619 
620 	ret = DI_WALK_CONTINUE;
621 
622 error:
623 	if (fd > 0)
624 		(void) close(fd);
625 	di_devfs_path_free(path);
626 	free(devctl);
627 	if (idctl != NULL)
628 		topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE);
629 	return (ret);
630 }
631 
632 int
633 disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode)
634 {
635 	char *parent = NULL;
636 	int err;
637 	di_node_t devtree;
638 	di_node_t dnode;
639 	struct diwalk_arg wkarg = { 0 };
640 	int ret = -1;
641 
642 	/*
643 	 * Lookup a property containing the devfs path of the parent PCIe
644 	 * device of the NVMe device we're attempting to enumerate.  This
645 	 * property is hard-coded in per-platform topo XML maps that are
646 	 * delivered with the OS.  This hard-coded path allows topo to map a
647 	 * given NVMe controller to a physical location (bay or slot) on the
648 	 * platform, when generating the topo snapshot.
649 	 */
650 	if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING,
651 	    TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) {
652 		topo_mod_dprintf(mod, "parent node was missing nvme binding "
653 		    "properties\n");
654 		(void) topo_mod_seterrno(mod, err);
655 		goto out;
656 	}
657 	if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) {
658 		topo_mod_dprintf(mod, "failed to get devinfo snapshot");
659 		(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
660 		goto out;
661 	}
662 
663 	/*
664 	 * Walk the devinfo tree looking NVMe devices. For each NVMe device,
665 	 * check if the devfs path of the parent matches the one specified in
666 	 * TOPO_BINDING_PARENT_DEV.
667 	 */
668 	wkarg.diwk_mod = mod;
669 	wkarg.diwk_parent = pnode;
670 	dnode = di_drv_first_node(NVME_DRV, devtree);
671 	while (dnode != DI_NODE_NIL) {
672 		char *path;
673 
674 		if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) {
675 			topo_mod_dprintf(mod, "failed to get dev path");
676 			(void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
677 			goto out;
678 		}
679 		if (strcmp(parent, path) == 0) {
680 			if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0,
681 			    &wkarg, discover_nvme_ctl) < 0) {
682 				di_devfs_path_free(path);
683 				goto out;
684 			}
685 		}
686 		di_devfs_path_free(path);
687 		dnode = di_drv_next_node(dnode);
688 	}
689 	ret = 0;
690 
691 out:
692 	topo_mod_strfree(mod, parent);
693 	return (ret);
694 }
695