1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2020 Joyent, Inc.
14 * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
15 * Copyright 2024 Oxide Computer Company
16 */
17
18 /*
19 * This file drives topo node enumeration of NVMe controllers. A single "nvme"
20 * node is enumerated for each NVMe controller. Child "disk" nodes are then
21 * enumerated for each active or attached NVMe namespace.
22 *
23 * nvme nodes are expected to be enumerated under either a "bay" node (for U.2
24 * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC
25 * devices).
26 *
27 * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven
28 * by the pcibus topo module.
29 *
30 * In order to allow for associating a given NVMe controller with a physical
31 * location, enumeration of U.2 and M.2 devices should be driven by a
32 * platform-specific topo map which statically sets the following two
33 * properties on the parent "bay" or "slot" node:
34 *
35 * propgroup property description
36 * --------- -------- ------------
37 * binding driver "nvme"
38 * binding parent-device devpath of parent PCIe device
39 *
40 * for example:
41 *
42 * <propgroup name="binding" version="1" name-stability="Private"
43 * data-stability="Private">
44 * <propval name="driver" type="string" value="nvme"/>
45 * <propval name="parent-device" type="string"
46 * value="/pci@0,0/pci8086,6f09@3,1"/>
47 * </propgroup>
48 * <dependents grouping="children">
49 * <range name="nvme" min="0" max="0">
50 * <enum-method name="disk" version="1"/>
51 * </range>
52 * </dependents>
53 */
54 #include <stdlib.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <fcntl.h>
58 #include <unistd.h>
59 #include <string.h>
60 #include <strings.h>
61 #include <stdbool.h>
62
63 #include <sys/fm/protocol.h>
64 #include <fm/topo_hc.h>
65 #include <fm/topo_mod.h>
66 #include <topo_ufm.h>
67
68 #include <sys/dkio.h>
69 #include <sys/scsi/generic/inquiry.h>
70
71 #include <libnvme.h>
72 #include "disk.h"
73 #include "disk_drivers.h"
74
75 typedef struct nvme_enum_info {
76 topo_mod_t *nei_mod;
77 di_node_t nei_dinode;
78 nvme_t *nei_libnvme;
79 nvme_ctrl_t *nei_ctrl;
80 nvme_ctrl_info_t *nei_ctrl_info;
81 const nvme_version_t *nei_vers;
82 tnode_t *nei_parent;
83 tnode_t *nei_nvme;
84 nvlist_t *nei_nvme_fmri;
85 int nei_fd;
86 } nvme_enum_info_t;
87
88 typedef struct devlink_arg {
89 topo_mod_t *dla_mod;
90 char *dla_logical_disk;
91 uint_t dla_strsz;
92 } devlink_arg_t;
93
94 static int
devlink_cb(di_devlink_t dl,void * arg)95 devlink_cb(di_devlink_t dl, void *arg)
96 {
97 devlink_arg_t *dlarg = (devlink_arg_t *)arg;
98 topo_mod_t *mod = dlarg->dla_mod;
99 const char *devpath;
100 char *slice, *ctds;
101
102 if ((devpath = di_devlink_path(dl)) == NULL ||
103 (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) ==
104 NULL) {
105 return (DI_WALK_TERMINATE);
106 }
107
108 /*
109 * We need to keep track of the original string size before we
110 * truncate it with a NUL, so that we can free the right number of
111 * bytes when we're done, otherwise libumem will complain.
112 */
113 dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1;
114
115 /* trim the slice off the public name */
116 if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) &&
117 ((slice = strchr(ctds, 's')) != NULL))
118 *slice = '\0';
119
120 return (DI_WALK_TERMINATE);
121 }
122
123 static char *
get_logical_disk(topo_mod_t * mod,const char * devpath,uint_t * bufsz)124 get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz)
125 {
126 di_devlink_handle_t devhdl;
127 devlink_arg_t dlarg = { 0 };
128 char *minorpath = NULL;
129
130 if (asprintf(&minorpath, "%s:a", devpath) < 0) {
131 return (NULL);
132 }
133
134 if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) {
135 topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__);
136 free(minorpath);
137 return (NULL);
138 }
139
140 dlarg.dla_mod = mod;
141
142 (void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK,
143 &dlarg, devlink_cb);
144
145 (void) di_devlink_fini(&devhdl);
146 free(minorpath);
147
148 *bufsz = dlarg.dla_strsz;
149 return (dlarg.dla_logical_disk);
150 }
151
152 static bool
disk_nvme_make_ns_serial(topo_mod_t * mod,nvme_ns_info_t * ns_info,char * buf,size_t buflen)153 disk_nvme_make_ns_serial(topo_mod_t *mod, nvme_ns_info_t *ns_info, char *buf,
154 size_t buflen)
155 {
156 uint8_t nguid[16], eui64[8];
157 int ret;
158
159 if (nvme_ns_info_nguid(ns_info, nguid)) {
160 ret = snprintf(buf, buflen, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X"
161 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
162 nguid[0], nguid[1], nguid[2], nguid[3], nguid[4],
163 nguid[5], nguid[6], nguid[7], nguid[8], nguid[9],
164 nguid[10], nguid[11], nguid[12], nguid[13], nguid[14],
165 nguid[15]);
166 } else if (nvme_ns_info_eui64(ns_info, eui64)) {
167 ret = snprintf(buf, buflen,
168 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
169 eui64[0], eui64[1], eui64[2], eui64[3], eui64[4],
170 eui64[5], eui64[6], eui64[7]);
171 } else {
172 ret = snprintf(buf, buflen, "%u", nvme_ns_info_nsid(ns_info));
173 }
174
175 if ((size_t)ret >= buflen) {
176 topo_mod_dprintf(mod, "overflowed serial number for nsid %u: "
177 "needed %zu bytes, got %d", nvme_ns_info_nsid(ns_info),
178 buflen, ret);
179 return (false);
180 }
181
182 return (true);
183 }
184
185 /*
186 * Create the common I/O property group properties that are shared between
187 * controllers and namespaces. We assume the property group was already created.
188 */
189 static bool
disk_nvme_common_io(topo_mod_t * mod,tnode_t * tn,di_node_t di)190 disk_nvme_common_io(topo_mod_t *mod, tnode_t *tn, di_node_t di)
191 {
192 int err;
193 int inst = di_instance(di);
194 const char *drv = di_driver_name(di);
195 char *path;
196 const char *ppaths[1];
197
198 if (inst != -1 && topo_prop_set_uint32(tn, TOPO_PGROUP_IO,
199 TOPO_IO_INSTANCE, TOPO_PROP_IMMUTABLE, (uint32_t)inst, &err) != 0) {
200 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
201 "%s", TOPO_PGROUP_IO, TOPO_IO_INSTANCE, topo_node_name(tn),
202 topo_node_instance(tn), topo_strerror(err));
203 return (false);
204 }
205
206 if (drv != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO,
207 TOPO_IO_DRIVER, TOPO_PROP_IMMUTABLE, drv, &err) != 0) {
208 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
209 "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn),
210 topo_node_instance(tn), topo_strerror(err));
211 return (false);
212 }
213
214 if (drv != NULL) {
215 nvlist_t *fmri = topo_mod_modfmri(mod, FM_MOD_SCHEME_VERSION,
216 drv);
217 if (mod != NULL && topo_prop_set_fmri(tn, TOPO_PGROUP_IO,
218 TOPO_IO_MODULE, TOPO_PROP_IMMUTABLE, fmri, &err) != 0) {
219 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%"
220 PRIu64 "]: %s", TOPO_PGROUP_IO, TOPO_IO_MODULE,
221 topo_node_name(tn), topo_node_instance(tn),
222 topo_strerror(err));
223 nvlist_free(fmri);
224 return (false);
225 }
226 nvlist_free(fmri);
227 }
228
229 path = di_devfs_path(di);
230 ppaths[0] = path;
231 if (path != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO,
232 TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, path, &err) != 0) {
233 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
234 "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn),
235 topo_node_instance(tn), topo_strerror(err));
236 di_devfs_path_free(path);
237 return (false);
238 }
239
240 if (path != NULL && topo_prop_set_string_array(tn, TOPO_PGROUP_IO,
241 TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err) != 0) {
242 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: "
243 "%s", TOPO_PGROUP_IO, TOPO_IO_PHYS_PATH, topo_node_name(tn),
244 topo_node_instance(tn), topo_strerror(err));
245 di_devfs_path_free(path);
246 return (false);
247 }
248 di_devfs_path_free(path);
249
250 return (true);
251 }
252
253 /*
254 * Add the various storage and I/O property group items that are appropriate
255 * given that we have a devinfo node. The storage property group has already
256 * been created, but the I/O property group has not.
257 */
258 static void
disk_nvme_make_ns_di_props(topo_mod_t * mod,tnode_t * tn,di_node_t di)259 disk_nvme_make_ns_di_props(topo_mod_t *mod, tnode_t *tn, di_node_t di)
260 {
261 int err;
262 char *devid, *mfg, *model, *rev, *serial, *log, *path;
263 uint_t buflen;
264
265 if (di_prop_lookup_strings(DDI_DEV_T_ANY, di, DEVID_PROP_NAME,
266 &devid) != 1 ||
267 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_VENDOR_ID,
268 &mfg) != 1 ||
269 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_PRODUCT_ID,
270 &model) != 1 ||
271 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_REVISION_ID,
272 &rev) != 1 ||
273 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_SERIAL_NO,
274 &serial) != 1) {
275 topo_mod_dprintf(mod, "failed to get devinfo props for %s[%"
276 PRIu64 "]", topo_node_name(tn), topo_node_instance(tn));
277 return;
278 }
279
280 /*
281 * Set the basic storage manufacturer information. Yes, this is
282 * information really about the NVMe controller and not the namespace.
283 * That's how the storage property group basically works here.
284 */
285 if (topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
286 TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, mfg, &err) != 0 ||
287 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
288 TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err) != 0 ||
289 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
290 TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err) != 0 ||
291 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
292 TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err) != 0) {
293 topo_mod_dprintf(mod, "failed to set storage properties on "
294 "%s[%" PRIu64 "]: %s", topo_node_name(tn),
295 topo_node_instance(tn), topo_strerror(err));
296 return;
297 }
298
299 if (topo_pgroup_create(tn, &io_pgroup, &err) != 0) {
300 topo_mod_dprintf(mod, "failed to create I/O property "
301 "group on %s[%" PRIu64 "]: %s", topo_node_name(tn),
302 topo_node_instance(tn), topo_strerror(err));
303 }
304
305 if (!disk_nvme_common_io(mod, tn, di)) {
306 return;
307 }
308
309 /*
310 * The last property that we'd like to attempt to create for a namespace
311 * is a mapping back to its corresponding logical disk entry in /dev.
312 * The logical disk will be everything past the trailing /, i.e. a
313 * cXtXdX value.
314 */
315 path = di_devfs_path(di);
316 if (path == NULL) {
317 return;
318 }
319 log = get_logical_disk(mod, path, &buflen);
320 di_devfs_path_free(path);
321 if (log == NULL) {
322 return;
323 }
324 path = strrchr(log, '/');
325 if (path != NULL && path[1] != '\0' &&
326 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE,
327 TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, path + 1,
328 &err) != 0) {
329 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%"
330 PRIu64 "]: %s", TOPO_PGROUP_STORAGE,
331 TOPO_STORAGE_LOGICAL_DISK_NAME, topo_node_name(tn),
332 topo_node_instance(tn), topo_strerror(err));
333 }
334 topo_mod_free(mod, log, buflen);
335 }
336
337 static void
disk_nvme_make_ns(nvme_enum_info_t * nei,nvme_ns_info_t * ns_info)338 disk_nvme_make_ns(nvme_enum_info_t *nei, nvme_ns_info_t *ns_info)
339 {
340 topo_mod_t *mod = nei->nei_mod;
341 nvlist_t *auth = NULL, *fmri = NULL;
342 const uint32_t nsid = nvme_ns_info_nsid(ns_info);
343 const topo_instance_t inst = nsid - 1;
344 char serial[64], capstr[64];
345 const nvme_nvm_lba_fmt_t *fmt;
346 const char *bd_addr;
347 uint64_t cap, blksz, capblks;
348 tnode_t *tn;
349 int err;
350
351 auth = topo_mod_auth(mod, nei->nei_nvme);
352 if (auth == NULL) {
353 topo_mod_dprintf(mod, "failed to get auth for nsid %u from "
354 "parent %s[%" PRIu64 "]: %s", nsid,
355 topo_node_name(nei->nei_nvme),
356 topo_node_instance(nei->nei_nvme), topo_mod_errmsg(mod));
357 goto done;
358 }
359
360 /*
361 * We want to construct the FMRI for the namespace. The namespace is a
362 * little awkward in terms of things like the model, revision, and
363 * serial. While blkdev sets up standard inquiry properties to map these
364 * to the parent device which makes sense in the context of trying to
365 * use this as a normal block device, it's not really appropriate here.
366 * The namespace is not the NVMe controller. We construct the namespace
367 * serial number from the preferential ordering of information that
368 * we're given of the NGUID, EUI64, and then fall back to the namespace
369 * number.
370 */
371 if (!disk_nvme_make_ns_serial(mod, ns_info, serial, sizeof (serial))) {
372 goto done;
373 }
374 fmri = topo_mod_hcfmri(mod, nei->nei_nvme, FM_HC_SCHEME_VERSION,
375 DISK, inst, NULL, auth, NULL, NULL, serial);
376 if (fmri == NULL) {
377 topo_mod_dprintf(mod, "failed to make fmri for %s[%" PRIu64
378 "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod));
379 goto done;
380 }
381
382 tn = topo_node_bind(mod, nei->nei_nvme, DISK, inst, fmri);
383 if (tn == NULL) {
384 topo_mod_dprintf(mod, "failed to bind fmri for %s[%" PRIu64
385 "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod));
386 goto done;
387 }
388
389 /*
390 * Always inherit our parent's FRU. The namespace is just a part of the
391 * device in reality.
392 */
393 if (topo_node_fru_set(tn, NULL, 0, &err) != 0) {
394 topo_mod_dprintf(mod, "failed to set FRU for %s[%" PRIu64
395 "] on nsid %u: %s", DISK, inst, nsid, topo_strerror(err));
396 goto done;
397
398 }
399
400 /*
401 * Our namespace may or may not be attached. From the namespace we will
402 * always get the capacity and block information. The rest of it will
403 * end up being filled in if we find a devinfo node.
404 */
405 if (topo_pgroup_create(tn, &storage_pgroup, &err) != 0) {
406 topo_mod_dprintf(mod, "failed to create storage property "
407 "group on %s[%" PRIu64 "]: %s", DISK, inst,
408 topo_strerror(err));
409 }
410
411 if (!nvme_ns_info_curformat(ns_info, &fmt)) {
412 topo_mod_dprintf(mod, "failed to get current namespace "
413 "format: %s", nvme_ns_info_errmsg(ns_info));
414 goto done;
415 }
416
417 blksz = nvme_nvm_lba_fmt_data_size(fmt);
418 if (topo_prop_set_uint64(tn, TOPO_PGROUP_STORAGE,
419 TOPO_STORAGE_LOG_BLOCK_SIZE, TOPO_PROP_IMMUTABLE, blksz, &err) !=
420 0) {
421 topo_mod_dprintf(mod, "failed to create property %s:%s on %s[%"
422 PRIu64 "]: %s", TOPO_PGROUP_STORAGE,
423 TOPO_STORAGE_LOG_BLOCK_SIZE, DISK, inst,
424 topo_strerror(err));
425 goto done;
426 }
427
428 if (!nvme_ns_info_cap(ns_info, &capblks)) {
429 topo_mod_dprintf(mod, "failed to get namespace capacity: %s",
430 nvme_ns_info_errmsg(ns_info));
431 goto done;
432 }
433
434 cap = blksz * capblks;
435 if (snprintf(capstr, sizeof (capstr), "%" PRIu64, cap) >=
436 sizeof (capstr)) {
437 topo_mod_dprintf(mod, "overflowed capacity calculation on "
438 "nsid %u", nsid);
439 goto done;
440 }
441
442 /*
443 * Finally attempt to find a child node that has a matching name and go
444 * from there. Sorry, this does result in node creation being O(n^2),
445 * but at least n is usually small today. Note, we may not have a blkdev
446 * address because the disk may not be attached.
447 */
448 if (!nvme_ns_info_bd_addr(ns_info, &bd_addr)) {
449 if (nvme_ns_info_err(ns_info) != NVME_INFO_ERR_NS_NO_BLKDEV) {
450 topo_mod_dprintf(mod, "failed to get namespace blkdev "
451 "address: %s", nvme_ns_info_errmsg(ns_info));
452 }
453 goto done;
454 }
455
456 for (di_node_t di = di_child_node(nei->nei_dinode); di != DI_NODE_NIL;
457 di = di_sibling_node(di)) {
458 const char *addr = di_bus_addr(di);
459 if (addr != NULL && strcmp(addr, bd_addr) == 0) {
460 disk_nvme_make_ns_di_props(mod, tn, di);
461 }
462 }
463
464 done:
465 nvlist_free(auth);
466 nvlist_free(fmri);
467 }
468
469 /*
470 * Attempt to make a ufm node, but swallow the error so we can try to get as
471 * much of the disk information as possible.
472 */
473 static void
disk_nvme_make_ufm(topo_mod_t * mod,nvme_enum_info_t * nei)474 disk_nvme_make_ufm(topo_mod_t *mod, nvme_enum_info_t *nei)
475 {
476 topo_ufm_devinfo_t tud;
477 char *path = di_devfs_path(nei->nei_dinode);
478 if (path == NULL) {
479 return;
480 }
481
482 tud.tud_method = TOPO_UFM_M_DEVINFO;
483 tud.tud_path = path;
484 if (topo_mod_load(mod, TOPO_MOD_UFM, TOPO_VERSION) == NULL) {
485 topo_mod_dprintf(mod, "disk enum could not load ufm module");
486 di_devfs_path_free(path);
487 return;
488 }
489
490 (void) topo_mod_enumerate(mod, nei->nei_nvme, TOPO_MOD_UFM, UFM, 0, 0,
491 &tud);
492 di_devfs_path_free(path);
493 }
494
495 static const topo_pgroup_info_t nvme_pgroup = {
496 TOPO_PGROUP_NVME,
497 TOPO_STABILITY_PRIVATE,
498 TOPO_STABILITY_PRIVATE,
499 1
500 };
501
502 static int
make_nvme_node(nvme_enum_info_t * nvme_info)503 make_nvme_node(nvme_enum_info_t *nvme_info)
504 {
505 topo_mod_t *mod = nvme_info->nei_mod;
506 nvme_ctrl_info_t *info = nvme_info->nei_ctrl_info;
507 nvme_ns_iter_t *iter = NULL;
508 nvme_iter_t nret;
509 const nvme_ns_disc_t *disc;
510 nvlist_t *auth = NULL, *fmri = NULL, *fru;
511 tnode_t *nvme;
512 char *model = NULL, *serial = NULL, *vers = NULL;
513 char *pname = topo_node_name(nvme_info->nei_parent);
514 char *label = NULL;
515 topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent);
516 int err = 0, ret = -1;
517
518 /*
519 * Pass the model and serial strings through a function that sanitizes
520 * them of any characters that can't be used in an FMRI string. Note, we
521 * do not use the firmware revision here because that's not really a
522 * device property that should be part of the FMRI (it can be changed at
523 * runtime).
524 */
525 model = topo_mod_clean_str(mod, nvme_ctrl_info_model(info));
526 serial = topo_mod_clean_str(mod, nvme_ctrl_info_serial(info));
527
528 auth = topo_mod_auth(mod, nvme_info->nei_parent);
529 fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION,
530 NVME, 0, NULL, auth, model, NULL, serial);
531
532 if (fmri == NULL) {
533 /* errno set */
534 topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64
535 "/%s=0", __func__, pname, pinst, NVME);
536 goto error;
537 }
538
539 /*
540 * If our parent is a pciexfn node, then we need to create a nvme range
541 * underneath it to hold the nvme hierarchy. For other cases, where
542 * enumeration is being driven by a topo map file, this range will have
543 * already been statically defined in the XML.
544 */
545 if (strcmp(pname, PCIEX_FUNCTION) == 0) {
546 if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0,
547 0) < 0) {
548 /* errno set */
549 topo_mod_dprintf(mod, "%s: error creating %s range",
550 __func__, NVME);
551 goto error;
552 }
553 }
554
555 /*
556 * Create a new topo node to represent the NVMe controller and bind it
557 * to the parent node.
558 */
559 if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0,
560 fmri)) == NULL) {
561 /* errno set */
562 topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64
563 "/%s=0", __func__, pname, pinst, NVME);
564 goto error;
565 }
566 nvme_info->nei_nvme = nvme;
567 nvme_info->nei_nvme_fmri = fmri;
568
569 /*
570 * If our parent node is a "pciexfn" node then this is a NVMe device on
571 * a PCIe AIC, so we inherit our parent's FRU. Otherwise, we set the
572 * FRU to ourself.
573 */
574 if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0)
575 fru = NULL;
576 else
577 fru = fmri;
578
579 if (topo_node_fru_set(nvme, fru, 0, &err) != 0) {
580 topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__,
581 topo_strerror(err));
582 (void) topo_mod_seterrno(mod, err);
583 goto error;
584 }
585
586 /*
587 * Clone the label from our parent node. We can't inherit the property
588 * because the label prop is mutable on bay nodes and only immutable
589 * properties can be inherited.
590 */
591 if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 &&
592 err != ETOPO_PROP_NOENT) ||
593 topo_node_label_set(nvme, label, &err) != 0) {
594 topo_mod_dprintf(mod, "%s: failed to set label: %s",
595 __func__, topo_strerror(err));
596 (void) topo_mod_seterrno(mod, err);
597 goto error;
598 }
599
600 /*
601 * Ensure that we have a UFM property set based on our devinfo path.
602 * This is a little repetitive if our parent actually did so as well,
603 * but given that the majority of such nodes are under bays and slots
604 * right now, it's a worthwhile tradeoff.
605 */
606 disk_nvme_make_ufm(mod, nvme_info);
607
608 if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) {
609 topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
610 __func__, TOPO_PGROUP_NVME, topo_strerror(err));
611 (void) topo_mod_seterrno(mod, err);
612 goto error;
613 }
614
615 if (asprintf(&vers, "%u.%u", nvme_info->nei_vers->v_major,
616 nvme_info->nei_vers->v_minor) < 0) {
617 topo_mod_dprintf(mod, "%s: failed to alloc string", __func__);
618 (void) topo_mod_seterrno(mod, EMOD_NOMEM);
619 goto error;
620 }
621 if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER,
622 TOPO_PROP_IMMUTABLE, vers, &err) != 0) {
623 topo_mod_dprintf(mod, "%s: failed to set %s/%s property",
624 __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER);
625 (void) topo_mod_seterrno(mod, err);
626 goto error;
627 }
628
629 if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) {
630 topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s",
631 __func__, TOPO_PGROUP_IO, topo_strerror(err));
632 (void) topo_mod_seterrno(mod, err);
633 goto error;
634 }
635
636 if (!disk_nvme_common_io(mod, nvme, nvme_info->nei_dinode)) {
637 goto error;
638 }
639
640 /*
641 * Create a child disk node for each namespace.
642 */
643 if (topo_node_range_create(mod, nvme, DISK, 0,
644 nvme_ctrl_info_nns(info) - 1) < 0) {
645 /* errno set */
646 topo_mod_dprintf(mod, "%s: error creating %s range", __func__,
647 DISK);
648 goto error;
649 }
650
651 /*
652 * Iterate over each namespace to see if it's a candidate for inclusion.
653 * Namespaces start at index 1 and not every namespace will be included.
654 * We map things such that a disk instance is always namespace - 1 to
655 * fit into the above mapping.
656 */
657 if (!nvme_ns_discover_init(nvme_info->nei_ctrl,
658 NVME_NS_DISC_F_NOT_IGNORED, &iter)) {
659 topo_mod_dprintf(mod, "failed to initialize namespace "
660 "discovery: %s", nvme_errmsg(nvme_info->nei_libnvme));
661 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
662 goto error;
663 }
664
665 for (nret = nvme_ns_discover_step(iter, &disc); nret == NVME_ITER_VALID;
666 nret = nvme_ns_discover_step(iter, &disc)) {
667 nvme_ns_info_t *ns_info;
668 uint32_t nsid = nvme_ns_disc_nsid(disc);
669
670 if (!nvme_ctrl_ns_info_snap(nvme_info->nei_ctrl, nsid,
671 &ns_info)) {
672 topo_mod_dprintf(mod, "failed to get namespace "
673 "information for ns %u: %s", nsid,
674 nvme_errmsg(nvme_info->nei_libnvme));
675 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
676 goto error;
677 }
678
679 disk_nvme_make_ns(nvme_info, ns_info);
680 nvme_ns_info_free(ns_info);
681 }
682
683 if (nret == NVME_ITER_ERROR) {
684 topo_mod_dprintf(mod, "namespace discovery failed: %s",
685 nvme_errmsg(nvme_info->nei_libnvme));
686 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
687 }
688 ret = 0;
689
690 error:
691 nvme_ns_discover_fini(iter);
692 free(vers);
693 nvlist_free(auth);
694 nvlist_free(fmri);
695 topo_mod_strfree(mod, model);
696 topo_mod_strfree(mod, serial);
697 topo_mod_strfree(mod, label);
698 return (ret);
699 }
700
701 /*
702 * This function gathers identity information from the NVMe controller and
703 * stores it in a struct. This struct is passed to make_nvme_node(), which
704 * does the actual topo node creation.
705 */
706 static int
discover_nvme_ctl(topo_mod_t * mod,tnode_t * pnode,di_node_t dinode)707 discover_nvme_ctl(topo_mod_t *mod, tnode_t *pnode, di_node_t dinode)
708 {
709 topo_disk_t *disk = topo_mod_getspecific(mod);
710 nvme_enum_info_t nvme_info = { 0 };
711 int ret;
712
713 nvme_info.nei_mod = mod;
714 nvme_info.nei_dinode = dinode;
715 nvme_info.nei_parent = pnode;
716 nvme_info.nei_libnvme = disk->td_nvme;
717
718 if (!nvme_ctrl_init(disk->td_nvme, dinode, &nvme_info.nei_ctrl)) {
719 topo_mod_dprintf(mod, "failed to initialize nvme_ctrl_t: %s",
720 nvme_errmsg(disk->td_nvme));
721 return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
722 }
723
724 if (!nvme_ctrl_info_snap(nvme_info.nei_ctrl,
725 &nvme_info.nei_ctrl_info)) {
726 topo_mod_dprintf(mod, "failed to initialize nvme_ctrl_t: %s",
727 nvme_errmsg(disk->td_nvme));
728 ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
729 goto error;
730 }
731
732 nvme_info.nei_vers = nvme_ctrl_info_version(nvme_info.nei_ctrl_info);
733
734 if ((ret = make_nvme_node(&nvme_info)) != 0) {
735 goto error;
736 }
737
738 error:
739 if (nvme_info.nei_ctrl_info != NULL)
740 nvme_ctrl_info_free(nvme_info.nei_ctrl_info);
741 if (nvme_info.nei_ctrl != NULL)
742 nvme_ctrl_fini(nvme_info.nei_ctrl);
743 return (ret);
744 }
745
746 int
disk_nvme_enum_disk(topo_mod_t * mod,tnode_t * pnode)747 disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode)
748 {
749 char *parent = NULL;
750 int err;
751 di_node_t devtree;
752 di_node_t dnode;
753 int ret = -1;
754
755 /*
756 * Lookup a property containing the devfs path of the parent PCIe
757 * device of the NVMe device we're attempting to enumerate. This
758 * property is hard-coded in per-platform topo XML maps that are
759 * delivered with the OS. This hard-coded path allows topo to map a
760 * given NVMe controller to a physical location (bay or slot) on the
761 * platform, when generating the topo snapshot.
762 */
763 if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING,
764 TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) {
765 topo_mod_dprintf(mod, "parent node was missing nvme binding "
766 "properties\n");
767 (void) topo_mod_seterrno(mod, err);
768 goto out;
769 }
770 if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) {
771 topo_mod_dprintf(mod, "failed to get devinfo snapshot");
772 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
773 goto out;
774 }
775
776 /*
777 * Walk the devinfo tree looking NVMe devices. For each NVMe device,
778 * check if the devfs path of the parent matches the one specified in
779 * TOPO_BINDING_PARENT_DEV.
780 */
781 dnode = di_drv_first_node(NVME_DRV, devtree);
782 while (dnode != DI_NODE_NIL) {
783 char *path;
784
785 if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) {
786 topo_mod_dprintf(mod, "failed to get dev path");
787 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN);
788 goto out;
789 }
790 if (strcmp(parent, path) == 0) {
791 ret = discover_nvme_ctl(mod, pnode, dnode);
792 di_devfs_path_free(path);
793 goto out;
794 }
795 di_devfs_path_free(path);
796 dnode = di_drv_next_node(dnode);
797 }
798 ret = 0;
799
800 out:
801 topo_mod_strfree(mod, parent);
802 return (ret);
803 }
804