13c6ffbabSRob Johnston /* 23c6ffbabSRob Johnston * This file and its contents are supplied under the terms of the 33c6ffbabSRob Johnston * Common Development and Distribution License ("CDDL"), version 1.0. 43c6ffbabSRob Johnston * You may only use this file in accordance with the terms of version 53c6ffbabSRob Johnston * 1.0 of the CDDL. 63c6ffbabSRob Johnston * 73c6ffbabSRob Johnston * A full copy of the text of the CDDL should have accompanied this 83c6ffbabSRob Johnston * source. A copy of the CDDL is also available via the Internet at 93c6ffbabSRob Johnston * http://www.illumos.org/license/CDDL. 103c6ffbabSRob Johnston */ 113c6ffbabSRob Johnston 123c6ffbabSRob Johnston /* 133c6ffbabSRob Johnston * Copyright 2020 Joyent, Inc. 14153f3212SHans Rosenfeld * Copyright 2022 Tintri by DDN, Inc. All rights reserved. 15*744642a2SRobert Mustacchi * Copyright 2023 Oxide Computer Company 163c6ffbabSRob Johnston */ 173c6ffbabSRob Johnston 183c6ffbabSRob Johnston /* 193c6ffbabSRob Johnston * This file drives topo node enumeration of NVMe controllers. A single "nvme" 203c6ffbabSRob Johnston * node is enumerated for each NVMe controller. Child "disk" nodes are then 21*744642a2SRobert Mustacchi * enumerated for each active or attached NVMe namespace. 223c6ffbabSRob Johnston * 233c6ffbabSRob Johnston * nvme nodes are expected to be enumerated under either a "bay" node (for U.2 243c6ffbabSRob Johnston * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC 253c6ffbabSRob Johnston * devices). 263c6ffbabSRob Johnston * 273c6ffbabSRob Johnston * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven 283c6ffbabSRob Johnston * by the pcibus topo module. 293c6ffbabSRob Johnston * 303c6ffbabSRob Johnston * In order to allow for associating a given NVMe controller with a physical 313c6ffbabSRob Johnston * location, enumeration of U.2 and M.2 devices should be driven by a 323c6ffbabSRob Johnston * platform-specific topo map which statically sets the following two 333c6ffbabSRob Johnston * properties on the parent "bay" or "slot" node: 343c6ffbabSRob Johnston * 353c6ffbabSRob Johnston * propgroup property description 363c6ffbabSRob Johnston * --------- -------- ------------ 373c6ffbabSRob Johnston * binding driver "nvme" 383c6ffbabSRob Johnston * binding parent-device devpath of parent PCIe device 393c6ffbabSRob Johnston * 403c6ffbabSRob Johnston * for example: 413c6ffbabSRob Johnston * 423c6ffbabSRob Johnston * <propgroup name="binding" version="1" name-stability="Private" 433c6ffbabSRob Johnston * data-stability="Private"> 443c6ffbabSRob Johnston * <propval name="driver" type="string" value="nvme"/> 453c6ffbabSRob Johnston * <propval name="parent-device" type="string" 463c6ffbabSRob Johnston * value="/pci@0,0/pci8086,6f09@3,1"/> 473c6ffbabSRob Johnston * </propgroup> 483c6ffbabSRob Johnston * <dependents grouping="children"> 493c6ffbabSRob Johnston * <range name="nvme" min="0" max="0"> 503c6ffbabSRob Johnston * <enum-method name="disk" version="1"/> 513c6ffbabSRob Johnston * </range> 523c6ffbabSRob Johnston * </dependents> 533c6ffbabSRob Johnston */ 543c6ffbabSRob Johnston #include <stdlib.h> 553c6ffbabSRob Johnston #include <sys/types.h> 563c6ffbabSRob Johnston #include <sys/stat.h> 573c6ffbabSRob Johnston #include <fcntl.h> 583c6ffbabSRob Johnston #include <unistd.h> 593c6ffbabSRob Johnston #include <string.h> 603c6ffbabSRob Johnston #include <strings.h> 61*744642a2SRobert Mustacchi #include <stdbool.h> 623c6ffbabSRob Johnston 633c6ffbabSRob Johnston #include <sys/fm/protocol.h> 643c6ffbabSRob Johnston #include <fm/topo_hc.h> 653c6ffbabSRob Johnston #include <fm/topo_mod.h> 66*744642a2SRobert Mustacchi #include <topo_ufm.h> 673c6ffbabSRob Johnston 683c6ffbabSRob Johnston #include <sys/dkio.h> 693c6ffbabSRob Johnston #include <sys/scsi/generic/inquiry.h> 703c6ffbabSRob Johnston 713c6ffbabSRob Johnston #include <sys/nvme.h> 723c6ffbabSRob Johnston #include "disk.h" 733c6ffbabSRob Johnston #include "disk_drivers.h" 743c6ffbabSRob Johnston 753c6ffbabSRob Johnston typedef struct nvme_enum_info { 763c6ffbabSRob Johnston topo_mod_t *nei_mod; 773c6ffbabSRob Johnston di_node_t nei_dinode; 783c6ffbabSRob Johnston nvme_identify_ctrl_t *nei_idctl; 793c6ffbabSRob Johnston nvme_version_t nei_vers; 803c6ffbabSRob Johnston tnode_t *nei_parent; 813c6ffbabSRob Johnston tnode_t *nei_nvme; 823c6ffbabSRob Johnston nvlist_t *nei_nvme_fmri; 833c6ffbabSRob Johnston const char *nei_nvme_path; 843c6ffbabSRob Johnston int nei_fd; 853c6ffbabSRob Johnston } nvme_enum_info_t; 863c6ffbabSRob Johnston 873c6ffbabSRob Johnston typedef struct devlink_arg { 883c6ffbabSRob Johnston topo_mod_t *dla_mod; 893c6ffbabSRob Johnston char *dla_logical_disk; 903c6ffbabSRob Johnston uint_t dla_strsz; 913c6ffbabSRob Johnston } devlink_arg_t; 923c6ffbabSRob Johnston 933c6ffbabSRob Johnston static int 943c6ffbabSRob Johnston devlink_cb(di_devlink_t dl, void *arg) 953c6ffbabSRob Johnston { 963c6ffbabSRob Johnston devlink_arg_t *dlarg = (devlink_arg_t *)arg; 973c6ffbabSRob Johnston topo_mod_t *mod = dlarg->dla_mod; 983c6ffbabSRob Johnston const char *devpath; 993c6ffbabSRob Johnston char *slice, *ctds; 1003c6ffbabSRob Johnston 1013c6ffbabSRob Johnston if ((devpath = di_devlink_path(dl)) == NULL || 1023c6ffbabSRob Johnston (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) == 1033c6ffbabSRob Johnston NULL) { 1043c6ffbabSRob Johnston return (DI_WALK_TERMINATE); 1053c6ffbabSRob Johnston } 1063c6ffbabSRob Johnston 1073c6ffbabSRob Johnston /* 1083c6ffbabSRob Johnston * We need to keep track of the original string size before we 1093c6ffbabSRob Johnston * truncate it with a NUL, so that we can free the right number of 1103c6ffbabSRob Johnston * bytes when we're done, otherwise libumem will complain. 1113c6ffbabSRob Johnston */ 1123c6ffbabSRob Johnston dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1; 1133c6ffbabSRob Johnston 1143c6ffbabSRob Johnston /* trim the slice off the public name */ 1153c6ffbabSRob Johnston if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) && 1163c6ffbabSRob Johnston ((slice = strchr(ctds, 's')) != NULL)) 1173c6ffbabSRob Johnston *slice = '\0'; 1183c6ffbabSRob Johnston 1193c6ffbabSRob Johnston return (DI_WALK_TERMINATE); 1203c6ffbabSRob Johnston } 1213c6ffbabSRob Johnston 1223c6ffbabSRob Johnston static char * 1233c6ffbabSRob Johnston get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz) 1243c6ffbabSRob Johnston { 1253c6ffbabSRob Johnston di_devlink_handle_t devhdl; 1263c6ffbabSRob Johnston devlink_arg_t dlarg = { 0 }; 1273c6ffbabSRob Johnston char *minorpath = NULL; 1283c6ffbabSRob Johnston 1293c6ffbabSRob Johnston if (asprintf(&minorpath, "%s:a", devpath) < 0) { 1303c6ffbabSRob Johnston return (NULL); 1313c6ffbabSRob Johnston } 1323c6ffbabSRob Johnston 1333c6ffbabSRob Johnston if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) { 1343c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__); 1353c6ffbabSRob Johnston free(minorpath); 1363c6ffbabSRob Johnston return (NULL); 1373c6ffbabSRob Johnston } 1383c6ffbabSRob Johnston 1393c6ffbabSRob Johnston dlarg.dla_mod = mod; 1403c6ffbabSRob Johnston 1413c6ffbabSRob Johnston (void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK, 1423c6ffbabSRob Johnston &dlarg, devlink_cb); 1433c6ffbabSRob Johnston 1443c6ffbabSRob Johnston (void) di_devlink_fini(&devhdl); 1453c6ffbabSRob Johnston free(minorpath); 1463c6ffbabSRob Johnston 1473c6ffbabSRob Johnston *bufsz = dlarg.dla_strsz; 1483c6ffbabSRob Johnston return (dlarg.dla_logical_disk); 1493c6ffbabSRob Johnston } 1503c6ffbabSRob Johnston 151*744642a2SRobert Mustacchi static bool 152*744642a2SRobert Mustacchi disk_nvme_make_ns_serial(topo_mod_t *mod, const nvme_identify_nsid_t *id, 153*744642a2SRobert Mustacchi uint32_t nsid, char *buf, size_t buflen) 1543c6ffbabSRob Johnston { 155*744642a2SRobert Mustacchi uint8_t zero_guid[16] = { 0 }; 156*744642a2SRobert Mustacchi int ret; 1573c6ffbabSRob Johnston 158*744642a2SRobert Mustacchi if (bcmp(zero_guid, id->id_nguid, sizeof (id->id_nguid)) != 0) { 159*744642a2SRobert Mustacchi ret = snprintf(buf, buflen, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X" 160*744642a2SRobert Mustacchi "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 161*744642a2SRobert Mustacchi id->id_nguid[0], id->id_nguid[1], id->id_nguid[2], 162*744642a2SRobert Mustacchi id->id_nguid[3], id->id_nguid[4], id->id_nguid[5], 163*744642a2SRobert Mustacchi id->id_nguid[6], id->id_nguid[7], id->id_nguid[8], 164*744642a2SRobert Mustacchi id->id_nguid[9], id->id_nguid[10], id->id_nguid[11], 165*744642a2SRobert Mustacchi id->id_nguid[12], id->id_nguid[13], id->id_nguid[14], 166*744642a2SRobert Mustacchi id->id_nguid[15]); 167*744642a2SRobert Mustacchi } else if (bcmp(zero_guid, id->id_eui64, sizeof (id->id_eui64)) != 0) { 168*744642a2SRobert Mustacchi ret = snprintf(buf, buflen, 169*744642a2SRobert Mustacchi "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 170*744642a2SRobert Mustacchi id->id_eui64[0], id->id_eui64[1], id->id_eui64[2], 171*744642a2SRobert Mustacchi id->id_eui64[3], id->id_eui64[4], id->id_eui64[5], 172*744642a2SRobert Mustacchi id->id_eui64[6], id->id_eui64[7]); 1733c6ffbabSRob Johnston } else { 174*744642a2SRobert Mustacchi ret = snprintf(buf, buflen, "%u", nsid); 175*744642a2SRobert Mustacchi } 176*744642a2SRobert Mustacchi 177*744642a2SRobert Mustacchi if ((size_t)ret >= buflen) { 178*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "overflowed serial number for nsid %u: " 179*744642a2SRobert Mustacchi "needed %zu bytes, got %d", nsid, buflen, ret); 180*744642a2SRobert Mustacchi return (false); 181*744642a2SRobert Mustacchi } 182*744642a2SRobert Mustacchi 183*744642a2SRobert Mustacchi return (true); 1843c6ffbabSRob Johnston } 1853c6ffbabSRob Johnston 1863c6ffbabSRob Johnston /* 187*744642a2SRobert Mustacchi * Create the common I/O property group properties that are shared between 188*744642a2SRobert Mustacchi * controllers and namespaces. We assume the property group was already created. 1893c6ffbabSRob Johnston */ 190*744642a2SRobert Mustacchi static bool 191*744642a2SRobert Mustacchi disk_nvme_common_io(topo_mod_t *mod, tnode_t *tn, di_node_t di) 192*744642a2SRobert Mustacchi { 193*744642a2SRobert Mustacchi int err; 194*744642a2SRobert Mustacchi int inst = di_instance(di); 195*744642a2SRobert Mustacchi const char *drv = di_driver_name(di); 196*744642a2SRobert Mustacchi char *path; 197*744642a2SRobert Mustacchi const char *ppaths[1]; 1983c6ffbabSRob Johnston 199*744642a2SRobert Mustacchi if (inst != -1 && topo_prop_set_uint32(tn, TOPO_PGROUP_IO, 200*744642a2SRobert Mustacchi TOPO_IO_INSTANCE, TOPO_PROP_IMMUTABLE, (uint32_t)inst, &err) != 0) { 201*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 202*744642a2SRobert Mustacchi "%s", TOPO_PGROUP_IO, TOPO_IO_INSTANCE, topo_node_name(tn), 203*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 204*744642a2SRobert Mustacchi return (false); 2053c6ffbabSRob Johnston } 2063c6ffbabSRob Johnston 207*744642a2SRobert Mustacchi if (drv != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO, 208*744642a2SRobert Mustacchi TOPO_IO_DRIVER, TOPO_PROP_IMMUTABLE, drv, &err) != 0) { 209*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 210*744642a2SRobert Mustacchi "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn), 211*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 212*744642a2SRobert Mustacchi return (false); 213*744642a2SRobert Mustacchi } 2143c6ffbabSRob Johnston 215*744642a2SRobert Mustacchi if (drv != NULL) { 216*744642a2SRobert Mustacchi nvlist_t *fmri = topo_mod_modfmri(mod, FM_MOD_SCHEME_VERSION, 217*744642a2SRobert Mustacchi drv); 218*744642a2SRobert Mustacchi if (mod != NULL && topo_prop_set_fmri(tn, TOPO_PGROUP_IO, 219*744642a2SRobert Mustacchi TOPO_IO_MODULE, TOPO_PROP_IMMUTABLE, fmri, &err) != 0) { 220*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" 221*744642a2SRobert Mustacchi PRIu64 "]: %s", TOPO_PGROUP_IO, TOPO_IO_MODULE, 222*744642a2SRobert Mustacchi topo_node_name(tn), topo_node_instance(tn), 2233c6ffbabSRob Johnston topo_strerror(err)); 224*744642a2SRobert Mustacchi nvlist_free(fmri); 225*744642a2SRobert Mustacchi return (false); 226*744642a2SRobert Mustacchi } 227*744642a2SRobert Mustacchi nvlist_free(fmri); 2283c6ffbabSRob Johnston } 2293c6ffbabSRob Johnston 230*744642a2SRobert Mustacchi path = di_devfs_path(di); 2313c6ffbabSRob Johnston ppaths[0] = path; 232*744642a2SRobert Mustacchi if (path != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO, 233*744642a2SRobert Mustacchi TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, path, &err) != 0) { 234*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 235*744642a2SRobert Mustacchi "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn), 236*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 237*744642a2SRobert Mustacchi di_devfs_path_free(path); 238*744642a2SRobert Mustacchi return (false); 239*744642a2SRobert Mustacchi } 240*744642a2SRobert Mustacchi 241*744642a2SRobert Mustacchi if (path != NULL && topo_prop_set_string_array(tn, TOPO_PGROUP_IO, 242*744642a2SRobert Mustacchi TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err) != 0) { 243*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 244*744642a2SRobert Mustacchi "%s", TOPO_PGROUP_IO, TOPO_IO_PHYS_PATH, topo_node_name(tn), 245*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 246*744642a2SRobert Mustacchi di_devfs_path_free(path); 247*744642a2SRobert Mustacchi return (false); 248*744642a2SRobert Mustacchi } 249*744642a2SRobert Mustacchi di_devfs_path_free(path); 250*744642a2SRobert Mustacchi 251*744642a2SRobert Mustacchi return (true); 252*744642a2SRobert Mustacchi } 2533c6ffbabSRob Johnston 2543c6ffbabSRob Johnston /* 255*744642a2SRobert Mustacchi * Add the various storage and I/O property group items that are appropriate 256*744642a2SRobert Mustacchi * given that we have a devinfo node. The storage property group has already 257*744642a2SRobert Mustacchi * been created, but the I/O property group has not. 2583c6ffbabSRob Johnston */ 259*744642a2SRobert Mustacchi static void 260*744642a2SRobert Mustacchi disk_nvme_make_ns_di_props(topo_mod_t *mod, tnode_t *tn, di_node_t di) 261*744642a2SRobert Mustacchi { 262*744642a2SRobert Mustacchi int err; 263*744642a2SRobert Mustacchi char *devid, *mfg, *model, *rev, *serial, *log, *path; 264*744642a2SRobert Mustacchi uint_t buflen; 265*744642a2SRobert Mustacchi 266*744642a2SRobert Mustacchi if (di_prop_lookup_strings(DDI_DEV_T_ANY, di, DEVID_PROP_NAME, 267*744642a2SRobert Mustacchi &devid) != 1 || 268*744642a2SRobert Mustacchi di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_VENDOR_ID, 269*744642a2SRobert Mustacchi &mfg) != 1 || 270*744642a2SRobert Mustacchi di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_PRODUCT_ID, 271*744642a2SRobert Mustacchi &model) != 1 || 272*744642a2SRobert Mustacchi di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_REVISION_ID, 273*744642a2SRobert Mustacchi &rev) != 1 || 274*744642a2SRobert Mustacchi di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_SERIAL_NO, 275*744642a2SRobert Mustacchi &serial) != 1) { 276*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to get devinfo props for %s[%" 277*744642a2SRobert Mustacchi PRIu64 "]", topo_node_name(tn), topo_node_instance(tn)); 278*744642a2SRobert Mustacchi return; 2793c6ffbabSRob Johnston } 2803c6ffbabSRob Johnston 281*744642a2SRobert Mustacchi /* 282*744642a2SRobert Mustacchi * Set the basic storage manufacturer information. Yes, this is 283*744642a2SRobert Mustacchi * information really about the NVMe controller and not the namespace. 284*744642a2SRobert Mustacchi * That's how the storage property group basically works here. 285*744642a2SRobert Mustacchi */ 286*744642a2SRobert Mustacchi if (topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 287*744642a2SRobert Mustacchi TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, mfg, &err) != 0 || 288*744642a2SRobert Mustacchi topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 289*744642a2SRobert Mustacchi TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err) != 0 || 290*744642a2SRobert Mustacchi topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 291*744642a2SRobert Mustacchi TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err) != 0 || 292*744642a2SRobert Mustacchi topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 293*744642a2SRobert Mustacchi TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err) != 0) { 294*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set storage properties on " 295*744642a2SRobert Mustacchi "%s[%" PRIu64 "]: %s", topo_node_name(tn), 296*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 297*744642a2SRobert Mustacchi return; 2983c6ffbabSRob Johnston } 2993c6ffbabSRob Johnston 300*744642a2SRobert Mustacchi if (topo_pgroup_create(tn, &io_pgroup, &err) != 0) { 301*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to create I/O property " 302*744642a2SRobert Mustacchi "group on %s[%" PRIu64 "]: %s", topo_node_name(tn), 303*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 304*744642a2SRobert Mustacchi } 3053c6ffbabSRob Johnston 306*744642a2SRobert Mustacchi if (!disk_nvme_common_io(mod, tn, di)) { 307*744642a2SRobert Mustacchi return; 308*744642a2SRobert Mustacchi } 309*744642a2SRobert Mustacchi 310*744642a2SRobert Mustacchi /* 311*744642a2SRobert Mustacchi * The last property that we'd like to attempt to create for a namespace 312*744642a2SRobert Mustacchi * is a mapping back to its corresponding logical disk entry in /dev. 313*744642a2SRobert Mustacchi * The logical disk will be everything past the trailing /, i.e. a 314*744642a2SRobert Mustacchi * cXtXdX value. 315*744642a2SRobert Mustacchi */ 316*744642a2SRobert Mustacchi path = di_devfs_path(di); 317*744642a2SRobert Mustacchi if (path == NULL) { 318*744642a2SRobert Mustacchi return; 319*744642a2SRobert Mustacchi } 320*744642a2SRobert Mustacchi log = get_logical_disk(mod, path, &buflen); 3213c6ffbabSRob Johnston di_devfs_path_free(path); 322*744642a2SRobert Mustacchi if (log == NULL) { 323*744642a2SRobert Mustacchi return; 324*744642a2SRobert Mustacchi } 325*744642a2SRobert Mustacchi path = strrchr(log, '/'); 326*744642a2SRobert Mustacchi if (path != NULL && path[1] != '\0' && 327*744642a2SRobert Mustacchi topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 328*744642a2SRobert Mustacchi TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, path + 1, 329*744642a2SRobert Mustacchi &err) != 0) { 330*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" 331*744642a2SRobert Mustacchi PRIu64 "]: %s", TOPO_PGROUP_STORAGE, 332*744642a2SRobert Mustacchi TOPO_STORAGE_LOGICAL_DISK_NAME, topo_node_name(tn), 333*744642a2SRobert Mustacchi topo_node_instance(tn), topo_strerror(err)); 334*744642a2SRobert Mustacchi } 335*744642a2SRobert Mustacchi topo_mod_free(mod, log, buflen); 336*744642a2SRobert Mustacchi } 337*744642a2SRobert Mustacchi 338*744642a2SRobert Mustacchi static void 339*744642a2SRobert Mustacchi disk_nvme_make_ns(nvme_enum_info_t *nei, uint32_t nsid) 340*744642a2SRobert Mustacchi { 341*744642a2SRobert Mustacchi topo_mod_t *mod = nei->nei_mod; 342*744642a2SRobert Mustacchi nvlist_t *auth = NULL, *fmri = NULL; 343*744642a2SRobert Mustacchi const topo_instance_t inst = nsid - 1; 344*744642a2SRobert Mustacchi nvme_ns_info_t info; 345*744642a2SRobert Mustacchi nvme_ioctl_t ioc; 346*744642a2SRobert Mustacchi char serial[64], capstr[64]; 347*744642a2SRobert Mustacchi uint64_t cap, blksz; 348*744642a2SRobert Mustacchi tnode_t *tn; 349*744642a2SRobert Mustacchi uint8_t lba; 350*744642a2SRobert Mustacchi int err; 351*744642a2SRobert Mustacchi 352*744642a2SRobert Mustacchi bzero(&ioc, sizeof (ioc)); 353*744642a2SRobert Mustacchi bzero(&info, sizeof (info)); 354*744642a2SRobert Mustacchi ioc.n_len = sizeof (nvme_ns_info_t); 355*744642a2SRobert Mustacchi ioc.n_buf = (uintptr_t)&info; 356*744642a2SRobert Mustacchi ioc.n_arg = nsid; 357*744642a2SRobert Mustacchi 358*744642a2SRobert Mustacchi if (ioctl(nei->nei_fd, NVME_IOC_NS_INFO, &ioc) != 0) { 359*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to get namespace info for ns %u: " 360*744642a2SRobert Mustacchi "%s", nsid, strerror(errno)); 361*744642a2SRobert Mustacchi return; 362*744642a2SRobert Mustacchi } 363*744642a2SRobert Mustacchi 364*744642a2SRobert Mustacchi if ((info.nni_state & NVME_NS_STATE_IGNORED) != 0) { 365*744642a2SRobert Mustacchi return; 366*744642a2SRobert Mustacchi } 367*744642a2SRobert Mustacchi 368*744642a2SRobert Mustacchi if ((info.nni_state & 369*744642a2SRobert Mustacchi (NVME_NS_STATE_ACTIVE | NVME_NS_STATE_ATTACHED)) == 0) { 370*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "skipping nsid %u because it is not " 371*744642a2SRobert Mustacchi "active or attached (state: 0x%x)", nsid, info.nni_state); 372*744642a2SRobert Mustacchi return; 373*744642a2SRobert Mustacchi } 374*744642a2SRobert Mustacchi 375*744642a2SRobert Mustacchi auth = topo_mod_auth(mod, nei->nei_nvme); 376*744642a2SRobert Mustacchi if (auth == NULL) { 377*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to get auth for nsid %u from " 378*744642a2SRobert Mustacchi "parent %s[%" PRIu64 "]: %s", nsid, 379*744642a2SRobert Mustacchi topo_node_name(nei->nei_nvme), 380*744642a2SRobert Mustacchi topo_node_instance(nei->nei_nvme), topo_mod_errmsg(mod)); 381*744642a2SRobert Mustacchi goto done; 382*744642a2SRobert Mustacchi } 383*744642a2SRobert Mustacchi 384*744642a2SRobert Mustacchi /* 385*744642a2SRobert Mustacchi * We want to construct the FMRI for the namespace. The namespace is a 386*744642a2SRobert Mustacchi * little awkward in terms of things like the model, revision, and 387*744642a2SRobert Mustacchi * serial. While blkdev sets up standard inquiry properties to map these 388*744642a2SRobert Mustacchi * to the parent device which makes sense in the context of trying to 389*744642a2SRobert Mustacchi * use this as a normal block device, it's not really appropriate here. 390*744642a2SRobert Mustacchi * The namespace is not the NVMe controller. We construct the namespace 391*744642a2SRobert Mustacchi * serial number from the preferential ordering of information that 392*744642a2SRobert Mustacchi * we're given of the NGUID, EUI64, and then fall back to the namespace 393*744642a2SRobert Mustacchi * number. 394*744642a2SRobert Mustacchi */ 395*744642a2SRobert Mustacchi if (!disk_nvme_make_ns_serial(mod, &info.nni_id, nsid, serial, 396*744642a2SRobert Mustacchi sizeof (serial))) { 397*744642a2SRobert Mustacchi goto done; 398*744642a2SRobert Mustacchi } 399*744642a2SRobert Mustacchi fmri = topo_mod_hcfmri(mod, nei->nei_nvme, FM_HC_SCHEME_VERSION, 400*744642a2SRobert Mustacchi DISK, inst, NULL, auth, NULL, NULL, serial); 401*744642a2SRobert Mustacchi if (fmri == NULL) { 402*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to make fmri for %s[%" PRIu64 403*744642a2SRobert Mustacchi "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod)); 404*744642a2SRobert Mustacchi goto done; 405*744642a2SRobert Mustacchi } 406*744642a2SRobert Mustacchi 407*744642a2SRobert Mustacchi tn = topo_node_bind(mod, nei->nei_nvme, DISK, inst, fmri); 408*744642a2SRobert Mustacchi if (tn == NULL) { 409*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to bind fmri for %s[%" PRIu64 410*744642a2SRobert Mustacchi "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod)); 411*744642a2SRobert Mustacchi goto done; 412*744642a2SRobert Mustacchi } 413*744642a2SRobert Mustacchi 414*744642a2SRobert Mustacchi /* 415*744642a2SRobert Mustacchi * Always inherit our parent's FRU. The namespace is just a part of the 416*744642a2SRobert Mustacchi * device in reality. 417*744642a2SRobert Mustacchi */ 418*744642a2SRobert Mustacchi if (topo_node_fru_set(tn, NULL, 0, &err) != 0) { 419*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to set FRU for %s[%" PRIu64 420*744642a2SRobert Mustacchi "] on nsid %u: %s", DISK, inst, nsid, topo_strerror(err)); 421*744642a2SRobert Mustacchi goto done; 422*744642a2SRobert Mustacchi 423*744642a2SRobert Mustacchi } 424*744642a2SRobert Mustacchi 425*744642a2SRobert Mustacchi /* 426*744642a2SRobert Mustacchi * Our namespace may or may not be attached. From the namespace we will 427*744642a2SRobert Mustacchi * always get the capacity and block information. The rest of it will 428*744642a2SRobert Mustacchi * end up being filled in if we find a devinfo node. 429*744642a2SRobert Mustacchi */ 430*744642a2SRobert Mustacchi if (topo_pgroup_create(tn, &storage_pgroup, &err) != 0) { 431*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to create storage property " 432*744642a2SRobert Mustacchi "group on %s[%" PRIu64 "]: %s", DISK, inst, 433*744642a2SRobert Mustacchi topo_strerror(err)); 434*744642a2SRobert Mustacchi } 435*744642a2SRobert Mustacchi 436*744642a2SRobert Mustacchi lba = info.nni_id.id_flbas.lba_format; 437*744642a2SRobert Mustacchi blksz = 1ULL << info.nni_id.id_lbaf[lba].lbaf_lbads; 438*744642a2SRobert Mustacchi if (blksz != 0 && topo_prop_set_uint64(tn, TOPO_PGROUP_STORAGE, 439*744642a2SRobert Mustacchi TOPO_STORAGE_LOG_BLOCK_SIZE, TOPO_PROP_IMMUTABLE, blksz, &err) != 440*744642a2SRobert Mustacchi 0) { 441*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "failed to create property %s:%s on %s[%" 442*744642a2SRobert Mustacchi PRIu64 "]: %s", TOPO_PGROUP_STORAGE, 443*744642a2SRobert Mustacchi TOPO_STORAGE_LOG_BLOCK_SIZE, DISK, inst, 444*744642a2SRobert Mustacchi topo_strerror(err)); 445*744642a2SRobert Mustacchi goto done; 446*744642a2SRobert Mustacchi } 447*744642a2SRobert Mustacchi 448*744642a2SRobert Mustacchi cap = blksz * info.nni_id.id_nsize; 449*744642a2SRobert Mustacchi if (snprintf(capstr, sizeof (capstr), "%" PRIu64, cap) >= 450*744642a2SRobert Mustacchi sizeof (capstr)) { 451*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "overflowed capacity calculation on " 452*744642a2SRobert Mustacchi "nsid %u", nsid); 453*744642a2SRobert Mustacchi goto done; 454*744642a2SRobert Mustacchi } 455*744642a2SRobert Mustacchi 456*744642a2SRobert Mustacchi /* 457*744642a2SRobert Mustacchi * Finally attempt to find a child node that has a matching name and go 458*744642a2SRobert Mustacchi * from there. Sorry, this does result in node creation being O(n^2), 459*744642a2SRobert Mustacchi * but at least n is usually small today. 460*744642a2SRobert Mustacchi */ 461*744642a2SRobert Mustacchi for (di_node_t di = di_child_node(nei->nei_dinode); di != DI_NODE_NIL; 462*744642a2SRobert Mustacchi di = di_sibling_node(di)) { 463*744642a2SRobert Mustacchi const char *addr = di_bus_addr(di); 464*744642a2SRobert Mustacchi if (addr != NULL && strcmp(addr, info.nni_addr) == 0) { 465*744642a2SRobert Mustacchi disk_nvme_make_ns_di_props(mod, tn, di); 466*744642a2SRobert Mustacchi } 467*744642a2SRobert Mustacchi } 468*744642a2SRobert Mustacchi 469*744642a2SRobert Mustacchi done: 4703c6ffbabSRob Johnston nvlist_free(auth); 4713c6ffbabSRob Johnston nvlist_free(fmri); 472*744642a2SRobert Mustacchi } 473*744642a2SRobert Mustacchi 474*744642a2SRobert Mustacchi /* 475*744642a2SRobert Mustacchi * Attempt to make a ufm node, but swallow the error so we can try to get as 476*744642a2SRobert Mustacchi * much of the disk information as possible. 477*744642a2SRobert Mustacchi */ 478*744642a2SRobert Mustacchi static void 479*744642a2SRobert Mustacchi disk_nvme_make_ufm(topo_mod_t *mod, nvme_enum_info_t *nei) 480*744642a2SRobert Mustacchi { 481*744642a2SRobert Mustacchi topo_ufm_devinfo_t tud; 482*744642a2SRobert Mustacchi char *path = di_devfs_path(nei->nei_dinode); 483*744642a2SRobert Mustacchi if (path == NULL) { 484*744642a2SRobert Mustacchi return; 485*744642a2SRobert Mustacchi } 486*744642a2SRobert Mustacchi 487*744642a2SRobert Mustacchi tud.tud_method = TOPO_UFM_M_DEVINFO; 488*744642a2SRobert Mustacchi tud.tud_path = path; 489*744642a2SRobert Mustacchi if (topo_mod_load(mod, TOPO_MOD_UFM, TOPO_VERSION) == NULL) { 490*744642a2SRobert Mustacchi topo_mod_dprintf(mod, "disk enum could not load ufm module"); 491*744642a2SRobert Mustacchi di_devfs_path_free(path); 492*744642a2SRobert Mustacchi return; 493*744642a2SRobert Mustacchi } 494*744642a2SRobert Mustacchi 495*744642a2SRobert Mustacchi (void) topo_mod_enumerate(mod, nei->nei_nvme, TOPO_MOD_UFM, UFM, 0, 0, 496*744642a2SRobert Mustacchi &tud); 497*744642a2SRobert Mustacchi di_devfs_path_free(path); 4983c6ffbabSRob Johnston } 4993c6ffbabSRob Johnston 5003c6ffbabSRob Johnston static const topo_pgroup_info_t nvme_pgroup = { 5013c6ffbabSRob Johnston TOPO_PGROUP_NVME, 5023c6ffbabSRob Johnston TOPO_STABILITY_PRIVATE, 5033c6ffbabSRob Johnston TOPO_STABILITY_PRIVATE, 5043c6ffbabSRob Johnston 1 5053c6ffbabSRob Johnston }; 5063c6ffbabSRob Johnston 5073c6ffbabSRob Johnston static int 5083c6ffbabSRob Johnston make_nvme_node(nvme_enum_info_t *nvme_info) 5093c6ffbabSRob Johnston { 5103c6ffbabSRob Johnston topo_mod_t *mod = nvme_info->nei_mod; 5113c6ffbabSRob Johnston nvlist_t *auth = NULL, *fmri = NULL, *fru; 5123c6ffbabSRob Johnston tnode_t *nvme; 5133c6ffbabSRob Johnston char raw_rev[NVME_FWVER_SZ + 1], raw_model[NVME_MODEL_SZ + 1]; 5143c6ffbabSRob Johnston char raw_serial[NVME_SERIAL_SZ + 1]; 5153c6ffbabSRob Johnston char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL; 5163c6ffbabSRob Johnston char *pname = topo_node_name(nvme_info->nei_parent); 5173c6ffbabSRob Johnston char *label = NULL; 5183c6ffbabSRob Johnston topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent); 5193c6ffbabSRob Johnston int err = 0, ret = -1; 5203c6ffbabSRob Johnston 5213c6ffbabSRob Johnston /* 5223c6ffbabSRob Johnston * The raw strings returned by the IDENTIFY CONTROLLER command are 5233c6ffbabSRob Johnston * not NUL-terminated, so we fix that up. 5243c6ffbabSRob Johnston */ 5253c6ffbabSRob Johnston (void) strncpy(raw_rev, nvme_info->nei_idctl->id_fwrev, NVME_FWVER_SZ); 5263c6ffbabSRob Johnston raw_rev[NVME_FWVER_SZ] = '\0'; 5273c6ffbabSRob Johnston (void) strncpy(raw_model, nvme_info->nei_idctl->id_model, 5283c6ffbabSRob Johnston NVME_MODEL_SZ); 5293c6ffbabSRob Johnston raw_model[NVME_MODEL_SZ] = '\0'; 5303c6ffbabSRob Johnston (void) strncpy(raw_serial, nvme_info->nei_idctl->id_serial, 5313c6ffbabSRob Johnston NVME_SERIAL_SZ); 5323c6ffbabSRob Johnston raw_serial[NVME_SERIAL_SZ] = '\0'; 5333c6ffbabSRob Johnston 5343c6ffbabSRob Johnston /* 5353c6ffbabSRob Johnston * Next we pass the strings through a function that sanitizes them of 5363c6ffbabSRob Johnston * any characters that can't be used in an FMRI string. 5373c6ffbabSRob Johnston */ 5383c6ffbabSRob Johnston rev = topo_mod_clean_str(mod, raw_rev); 5393c6ffbabSRob Johnston model = topo_mod_clean_str(mod, raw_model); 5403c6ffbabSRob Johnston serial = topo_mod_clean_str(mod, raw_serial); 5413c6ffbabSRob Johnston 5423c6ffbabSRob Johnston auth = topo_mod_auth(mod, nvme_info->nei_parent); 5433c6ffbabSRob Johnston fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION, 5443c6ffbabSRob Johnston NVME, 0, NULL, auth, model, rev, serial); 5453c6ffbabSRob Johnston 5463c6ffbabSRob Johnston if (fmri == NULL) { 5473c6ffbabSRob Johnston /* errno set */ 5486597d6fcSRobert Mustacchi topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64 5496597d6fcSRobert Mustacchi "/%s=0", __func__, pname, pinst, NVME); 5503c6ffbabSRob Johnston goto error; 5513c6ffbabSRob Johnston } 5523c6ffbabSRob Johnston 5533c6ffbabSRob Johnston /* 5543c6ffbabSRob Johnston * If our parent is a pciexfn node, then we need to create a nvme range 555*744642a2SRobert Mustacchi * underneath it to hold the nvme hierarchy. For other cases, where 5563c6ffbabSRob Johnston * enumeration is being driven by a topo map file, this range will have 5573c6ffbabSRob Johnston * already been statically defined in the XML. 5583c6ffbabSRob Johnston */ 5593c6ffbabSRob Johnston if (strcmp(pname, PCIEX_FUNCTION) == 0) { 5603c6ffbabSRob Johnston if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0, 5613c6ffbabSRob Johnston 0) < 0) { 5623c6ffbabSRob Johnston /* errno set */ 5633c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: error creating %s range", 5643c6ffbabSRob Johnston __func__, NVME); 5653c6ffbabSRob Johnston goto error; 5663c6ffbabSRob Johnston } 5673c6ffbabSRob Johnston } 5683c6ffbabSRob Johnston 5693c6ffbabSRob Johnston /* 5703c6ffbabSRob Johnston * Create a new topo node to represent the NVMe controller and bind it 5713c6ffbabSRob Johnston * to the parent node. 5723c6ffbabSRob Johnston */ 5733c6ffbabSRob Johnston if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0, 5743c6ffbabSRob Johnston fmri)) == NULL) { 5753c6ffbabSRob Johnston /* errno set */ 5766597d6fcSRobert Mustacchi topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64 5776597d6fcSRobert Mustacchi "/%s=0", __func__, pname, pinst, NVME); 5783c6ffbabSRob Johnston goto error; 5793c6ffbabSRob Johnston } 5803c6ffbabSRob Johnston nvme_info->nei_nvme = nvme; 5813c6ffbabSRob Johnston nvme_info->nei_nvme_fmri = fmri; 5823c6ffbabSRob Johnston 5833c6ffbabSRob Johnston /* 5843c6ffbabSRob Johnston * If our parent node is a "pciexfn" node then this is a NVMe device on 5853c6ffbabSRob Johnston * a PCIe AIC, so we inherit our parent's FRU. Otherwise, we set the 5863c6ffbabSRob Johnston * FRU to ourself. 5873c6ffbabSRob Johnston */ 5883c6ffbabSRob Johnston if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0) 5893c6ffbabSRob Johnston fru = NULL; 5903c6ffbabSRob Johnston else 5913c6ffbabSRob Johnston fru = fmri; 5923c6ffbabSRob Johnston 5933c6ffbabSRob Johnston if (topo_node_fru_set(nvme, fru, 0, &err) != 0) { 5943c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__, 5953c6ffbabSRob Johnston topo_strerror(err)); 5963c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, err); 5973c6ffbabSRob Johnston goto error; 5983c6ffbabSRob Johnston } 5993c6ffbabSRob Johnston 6003c6ffbabSRob Johnston /* 6013c6ffbabSRob Johnston * Clone the label from our parent node. We can't inherit the property 6023c6ffbabSRob Johnston * because the label prop is mutable on bay nodes and only immutable 6033c6ffbabSRob Johnston * properties can be inherited. 6043c6ffbabSRob Johnston */ 6053c6ffbabSRob Johnston if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 && 6063c6ffbabSRob Johnston err != ETOPO_PROP_NOENT) || 6073c6ffbabSRob Johnston topo_node_label_set(nvme, label, &err) != 0) { 6083c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: failed to set label: %s", 6093c6ffbabSRob Johnston __func__, topo_strerror(err)); 6103c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, err); 6113c6ffbabSRob Johnston goto error; 6123c6ffbabSRob Johnston } 6133c6ffbabSRob Johnston 614*744642a2SRobert Mustacchi /* 615*744642a2SRobert Mustacchi * Ensure that we have a UFM property set based on our devinfo path. 616*744642a2SRobert Mustacchi * This is a little repetitive if our parent actually did so as well, 617*744642a2SRobert Mustacchi * but given that the majority of such nodes are under bays and slots 618*744642a2SRobert Mustacchi * right now, it's a worthwhile tradeoff. 619*744642a2SRobert Mustacchi */ 620*744642a2SRobert Mustacchi disk_nvme_make_ufm(mod, nvme_info); 621*744642a2SRobert Mustacchi 6223c6ffbabSRob Johnston if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) { 6233c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", 6243c6ffbabSRob Johnston __func__, TOPO_PGROUP_NVME, topo_strerror(err)); 6253c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, err); 6263c6ffbabSRob Johnston goto error; 6273c6ffbabSRob Johnston } 6283c6ffbabSRob Johnston 6293c6ffbabSRob Johnston if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major, 6303c6ffbabSRob Johnston nvme_info->nei_vers.v_minor) < 0) { 6313c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: failed to alloc string", __func__); 6323c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_NOMEM); 6333c6ffbabSRob Johnston goto error; 6343c6ffbabSRob Johnston } 6353c6ffbabSRob Johnston if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER, 6363c6ffbabSRob Johnston TOPO_PROP_IMMUTABLE, vers, &err) != 0) { 6373c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: failed to set %s/%s property", 6383c6ffbabSRob Johnston __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER); 6393c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, err); 6403c6ffbabSRob Johnston goto error; 6413c6ffbabSRob Johnston } 6423c6ffbabSRob Johnston 6433c6ffbabSRob Johnston if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) { 6443c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", 6453c6ffbabSRob Johnston __func__, TOPO_PGROUP_IO, topo_strerror(err)); 6463c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, err); 6473c6ffbabSRob Johnston goto error; 6483c6ffbabSRob Johnston } 649*744642a2SRobert Mustacchi 650*744642a2SRobert Mustacchi if (!disk_nvme_common_io(mod, nvme, nvme_info->nei_dinode)) { 6513c6ffbabSRob Johnston goto error; 6523c6ffbabSRob Johnston } 6533c6ffbabSRob Johnston 6543c6ffbabSRob Johnston /* 6553c6ffbabSRob Johnston * Create a child disk node for each namespace. 6563c6ffbabSRob Johnston */ 6573c6ffbabSRob Johnston if (topo_node_range_create(mod, nvme, DISK, 0, 6583c6ffbabSRob Johnston (nvme_info->nei_idctl->id_nn - 1)) < 0) { 6593c6ffbabSRob Johnston /* errno set */ 6603c6ffbabSRob Johnston topo_mod_dprintf(mod, "%s: error creating %s range", __func__, 6613c6ffbabSRob Johnston DISK); 6623c6ffbabSRob Johnston goto error; 6633c6ffbabSRob Johnston } 6643c6ffbabSRob Johnston 6653c6ffbabSRob Johnston /* 666*744642a2SRobert Mustacchi * Iterate over each namespace to see if it's a candidate for inclusion. 667*744642a2SRobert Mustacchi * Namespaces start at index 1 and not every namespace will be included. 668*744642a2SRobert Mustacchi * We map things such that a disk instance is always namespace - 1 to 669*744642a2SRobert Mustacchi * fit into the above mapping. 6703c6ffbabSRob Johnston */ 671*744642a2SRobert Mustacchi for (uint32_t i = 1; i <= nvme_info->nei_idctl->id_nn; i++) { 672*744642a2SRobert Mustacchi disk_nvme_make_ns(nvme_info, i); 6733c6ffbabSRob Johnston } 6743c6ffbabSRob Johnston ret = 0; 6753c6ffbabSRob Johnston 6763c6ffbabSRob Johnston error: 6773c6ffbabSRob Johnston free(vers); 6783c6ffbabSRob Johnston nvlist_free(auth); 6793c6ffbabSRob Johnston nvlist_free(fmri); 6803c6ffbabSRob Johnston topo_mod_strfree(mod, rev); 6813c6ffbabSRob Johnston topo_mod_strfree(mod, model); 6823c6ffbabSRob Johnston topo_mod_strfree(mod, serial); 6833c6ffbabSRob Johnston topo_mod_strfree(mod, label); 6843c6ffbabSRob Johnston return (ret); 6853c6ffbabSRob Johnston } 6863c6ffbabSRob Johnston 6873c6ffbabSRob Johnston struct diwalk_arg { 6883c6ffbabSRob Johnston topo_mod_t *diwk_mod; 6893c6ffbabSRob Johnston tnode_t *diwk_parent; 6903c6ffbabSRob Johnston }; 6913c6ffbabSRob Johnston 6923c6ffbabSRob Johnston /* 6933c6ffbabSRob Johnston * This function gathers identity information from the NVMe controller and 6943c6ffbabSRob Johnston * stores it in a struct. This struct is passed to make_nvme_node(), which 6953c6ffbabSRob Johnston * does the actual topo node creation. 6963c6ffbabSRob Johnston */ 6973c6ffbabSRob Johnston static int 6983c6ffbabSRob Johnston discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg) 6993c6ffbabSRob Johnston { 7003c6ffbabSRob Johnston struct diwalk_arg *wkarg = arg; 7013c6ffbabSRob Johnston topo_mod_t *mod = wkarg->diwk_mod; 7023c6ffbabSRob Johnston char *path = NULL, *devctl = NULL; 7033c6ffbabSRob Johnston nvme_ioctl_t nioc = { 0 }; 7043c6ffbabSRob Johnston nvme_identify_ctrl_t *idctl = NULL; 7053c6ffbabSRob Johnston nvme_enum_info_t nvme_info = { 0 }; 7063c6ffbabSRob Johnston int fd = -1, ret = DI_WALK_TERMINATE; 7073c6ffbabSRob Johnston 7083c6ffbabSRob Johnston if ((path = di_devfs_minor_path(minor)) == NULL) { 7093c6ffbabSRob Johnston topo_mod_dprintf(mod, "failed to get minor path"); 7103c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 7113c6ffbabSRob Johnston return (ret); 7123c6ffbabSRob Johnston } 7133c6ffbabSRob Johnston 7146597d6fcSRobert Mustacchi topo_mod_dprintf(mod, "%s=%" PRIu64 ": found nvme controller: %s", 7153c6ffbabSRob Johnston topo_node_name(wkarg->diwk_parent), 7163c6ffbabSRob Johnston topo_node_instance(wkarg->diwk_parent), path); 7173c6ffbabSRob Johnston 7183c6ffbabSRob Johnston if (asprintf(&devctl, "/devices%s", path) < 0) { 7193c6ffbabSRob Johnston topo_mod_dprintf(mod, "failed to alloc string"); 7203c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_NOMEM); 7213c6ffbabSRob Johnston goto error; 7223c6ffbabSRob Johnston } 7233c6ffbabSRob Johnston 7243c6ffbabSRob Johnston if ((fd = open(devctl, O_RDWR)) < 0) { 7253c6ffbabSRob Johnston topo_mod_dprintf(mod, "failed to open %s", devctl); 7263c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 7273c6ffbabSRob Johnston goto error; 7283c6ffbabSRob Johnston } 7293c6ffbabSRob Johnston if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) { 7303c6ffbabSRob Johnston topo_mod_dprintf(mod, "zalloc failed"); 7313c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_NOMEM); 7323c6ffbabSRob Johnston goto error; 7333c6ffbabSRob Johnston } 7343c6ffbabSRob Johnston nioc.n_len = NVME_IDENTIFY_BUFSIZE; 7353c6ffbabSRob Johnston nioc.n_buf = (uintptr_t)idctl; 736153f3212SHans Rosenfeld nioc.n_arg = NVME_IDENTIFY_CTRL; 7373c6ffbabSRob Johnston 738153f3212SHans Rosenfeld if (ioctl(fd, NVME_IOC_IDENTIFY, &nioc) != 0) { 739153f3212SHans Rosenfeld topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY ioctl " 7403c6ffbabSRob Johnston "failed: %s", strerror(errno)); 7413c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 7423c6ffbabSRob Johnston goto error; 7433c6ffbabSRob Johnston } 7443c6ffbabSRob Johnston 7453c6ffbabSRob Johnston nioc.n_len = sizeof (nvme_version_t); 7463c6ffbabSRob Johnston nioc.n_buf = (uintptr_t)&nvme_info.nei_vers; 747153f3212SHans Rosenfeld nioc.n_arg = 0; 7483c6ffbabSRob Johnston 7493c6ffbabSRob Johnston if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) { 7503c6ffbabSRob Johnston topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s", 7513c6ffbabSRob Johnston strerror(errno)); 7523c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 7533c6ffbabSRob Johnston goto error; 7543c6ffbabSRob Johnston } 7553c6ffbabSRob Johnston 7563c6ffbabSRob Johnston nvme_info.nei_mod = mod; 7573c6ffbabSRob Johnston nvme_info.nei_nvme_path = path; 7583c6ffbabSRob Johnston nvme_info.nei_dinode = node; 7593c6ffbabSRob Johnston nvme_info.nei_idctl = idctl; 7603c6ffbabSRob Johnston nvme_info.nei_parent = wkarg->diwk_parent; 7613c6ffbabSRob Johnston nvme_info.nei_fd = fd; 7623c6ffbabSRob Johnston 7633c6ffbabSRob Johnston if (make_nvme_node(&nvme_info) != 0) { 7643c6ffbabSRob Johnston /* errno set */ 7653c6ffbabSRob Johnston goto error; 7663c6ffbabSRob Johnston } 7673c6ffbabSRob Johnston 7683c6ffbabSRob Johnston ret = DI_WALK_CONTINUE; 7693c6ffbabSRob Johnston 7703c6ffbabSRob Johnston error: 7713c6ffbabSRob Johnston if (fd > 0) 7723c6ffbabSRob Johnston (void) close(fd); 7733c6ffbabSRob Johnston di_devfs_path_free(path); 7743c6ffbabSRob Johnston free(devctl); 7753c6ffbabSRob Johnston if (idctl != NULL) 7763c6ffbabSRob Johnston topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE); 7773c6ffbabSRob Johnston return (ret); 7783c6ffbabSRob Johnston } 7793c6ffbabSRob Johnston 7803c6ffbabSRob Johnston int 7813c6ffbabSRob Johnston disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode) 7823c6ffbabSRob Johnston { 7833c6ffbabSRob Johnston char *parent = NULL; 7843c6ffbabSRob Johnston int err; 7853c6ffbabSRob Johnston di_node_t devtree; 7863c6ffbabSRob Johnston di_node_t dnode; 7873c6ffbabSRob Johnston struct diwalk_arg wkarg = { 0 }; 7883c6ffbabSRob Johnston int ret = -1; 7893c6ffbabSRob Johnston 7903c6ffbabSRob Johnston /* 7913c6ffbabSRob Johnston * Lookup a property containing the devfs path of the parent PCIe 7923c6ffbabSRob Johnston * device of the NVMe device we're attempting to enumerate. This 7933c6ffbabSRob Johnston * property is hard-coded in per-platform topo XML maps that are 7943c6ffbabSRob Johnston * delivered with the OS. This hard-coded path allows topo to map a 7953c6ffbabSRob Johnston * given NVMe controller to a physical location (bay or slot) on the 7963c6ffbabSRob Johnston * platform, when generating the topo snapshot. 7973c6ffbabSRob Johnston */ 7983c6ffbabSRob Johnston if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING, 7993c6ffbabSRob Johnston TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) { 8003c6ffbabSRob Johnston topo_mod_dprintf(mod, "parent node was missing nvme binding " 8013c6ffbabSRob Johnston "properties\n"); 8023c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, err); 8033c6ffbabSRob Johnston goto out; 8043c6ffbabSRob Johnston } 8053c6ffbabSRob Johnston if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) { 8063c6ffbabSRob Johnston topo_mod_dprintf(mod, "failed to get devinfo snapshot"); 8073c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 8083c6ffbabSRob Johnston goto out; 8093c6ffbabSRob Johnston } 8103c6ffbabSRob Johnston 8113c6ffbabSRob Johnston /* 8123c6ffbabSRob Johnston * Walk the devinfo tree looking NVMe devices. For each NVMe device, 8133c6ffbabSRob Johnston * check if the devfs path of the parent matches the one specified in 8143c6ffbabSRob Johnston * TOPO_BINDING_PARENT_DEV. 8153c6ffbabSRob Johnston */ 8163c6ffbabSRob Johnston wkarg.diwk_mod = mod; 8173c6ffbabSRob Johnston wkarg.diwk_parent = pnode; 8183c6ffbabSRob Johnston dnode = di_drv_first_node(NVME_DRV, devtree); 8193c6ffbabSRob Johnston while (dnode != DI_NODE_NIL) { 8203c6ffbabSRob Johnston char *path; 8213c6ffbabSRob Johnston 8223c6ffbabSRob Johnston if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) { 8233c6ffbabSRob Johnston topo_mod_dprintf(mod, "failed to get dev path"); 8243c6ffbabSRob Johnston (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 8253c6ffbabSRob Johnston goto out; 8263c6ffbabSRob Johnston } 8273c6ffbabSRob Johnston if (strcmp(parent, path) == 0) { 8283c6ffbabSRob Johnston if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0, 8293c6ffbabSRob Johnston &wkarg, discover_nvme_ctl) < 0) { 8303c6ffbabSRob Johnston di_devfs_path_free(path); 8313c6ffbabSRob Johnston goto out; 8323c6ffbabSRob Johnston } 8333c6ffbabSRob Johnston } 8343c6ffbabSRob Johnston di_devfs_path_free(path); 8353c6ffbabSRob Johnston dnode = di_drv_next_node(dnode); 8363c6ffbabSRob Johnston } 8373c6ffbabSRob Johnston ret = 0; 8383c6ffbabSRob Johnston 8393c6ffbabSRob Johnston out: 8403c6ffbabSRob Johnston topo_mod_strfree(mod, parent); 8413c6ffbabSRob Johnston return (ret); 8423c6ffbabSRob Johnston } 843