/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2020 Joyent, Inc. */ /* * This file drives topo node enumeration of NVMe controllers. A single "nvme" * node is enumerated for each NVMe controller. Child "disk" nodes are then * enumerated for each configured NVMe namespace. * * nvme nodes are expected to be enumerated under either a "bay" node (for U.2 * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC * devices). * * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven * by the pcibus topo module. * * In order to allow for associating a given NVMe controller with a physical * location, enumeration of U.2 and M.2 devices should be driven by a * platform-specific topo map which statically sets the following two * properties on the parent "bay" or "slot" node: * * propgroup property description * --------- -------- ------------ * binding driver "nvme" * binding parent-device devpath of parent PCIe device * * for example: * * * * * * * * * * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "disk.h" #include "disk_drivers.h" typedef struct nvme_enum_info { topo_mod_t *nei_mod; di_node_t nei_dinode; nvme_identify_ctrl_t *nei_idctl; nvme_version_t nei_vers; tnode_t *nei_parent; tnode_t *nei_nvme; nvlist_t *nei_nvme_fmri; const char *nei_nvme_path; int nei_fd; } nvme_enum_info_t; typedef struct devlink_arg { topo_mod_t *dla_mod; char *dla_logical_disk; uint_t dla_strsz; } devlink_arg_t; static int devlink_cb(di_devlink_t dl, void *arg) { devlink_arg_t *dlarg = (devlink_arg_t *)arg; topo_mod_t *mod = dlarg->dla_mod; const char *devpath; char *slice, *ctds; if ((devpath = di_devlink_path(dl)) == NULL || (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) == NULL) { return (DI_WALK_TERMINATE); } /* * We need to keep track of the original string size before we * truncate it with a NUL, so that we can free the right number of * bytes when we're done, otherwise libumem will complain. */ dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1; /* trim the slice off the public name */ if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) && ((slice = strchr(ctds, 's')) != NULL)) *slice = '\0'; return (DI_WALK_TERMINATE); } static char * get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz) { di_devlink_handle_t devhdl; devlink_arg_t dlarg = { 0 }; char *minorpath = NULL; if (asprintf(&minorpath, "%s:a", devpath) < 0) { return (NULL); } if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) { topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__); free(minorpath); return (NULL); } dlarg.dla_mod = mod; (void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK, &dlarg, devlink_cb); (void) di_devlink_fini(&devhdl); free(minorpath); *bufsz = dlarg.dla_strsz; return (dlarg.dla_logical_disk); } static int make_disk_node(nvme_enum_info_t *nvme_info, di_node_t dinode, topo_instance_t inst) { topo_mod_t *mod = nvme_info->nei_mod; nvlist_t *auth = NULL, *fmri = NULL; tnode_t *disk; char *rev = NULL, *model = NULL, *serial = NULL, *path; char *logical_disk = NULL, *devid, *manuf, *ctd = NULL; char *cap_bytes_str = NULL, full_path[MAXPATHLEN + 1]; char *pname = topo_node_name(nvme_info->nei_parent); topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent); const char **ppaths = NULL; struct dk_minfo minfo; uint64_t cap_bytes; uint_t bufsz; int fd = -1, err, ret = -1, r; if ((path = di_devfs_path(dinode)) == NULL) { topo_mod_dprintf(mod, "%s: failed to get dev path", __func__); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); return (ret); } topo_mod_dprintf(mod, "%s: found nvme namespace: %s", __func__, path); /* * Issue the DKIOCGMEDIAINFO ioctl to get the capacity */ (void) snprintf(full_path, MAXPATHLEN, "/devices%s%s", path, PHYS_EXTN); if ((fd = open(full_path, O_RDWR)) < 0 || ioctl(fd, DKIOCGMEDIAINFO, &minfo) < 0) { topo_mod_dprintf(mod, "failed to get blkdev capacity (%s)", strerror(errno)); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } cap_bytes = minfo.dki_lbsize * minfo.dki_capacity; if (asprintf(&cap_bytes_str, "%" PRIu64, cap_bytes) < 0) { topo_mod_dprintf(mod, "%s: failed to alloc string", __func__); (void) topo_mod_seterrno(mod, EMOD_NOMEM); goto error; } /* * Gather the FRU identity information from the devinfo properties */ if (di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, DEVID_PROP_NAME, &devid) == -1 || di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_VENDOR_ID, &manuf) == -1 || di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_PRODUCT_ID, &model) == -1 || di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_REVISION_ID, &rev) == -1 || di_prop_lookup_strings(DDI_DEV_T_ANY, dinode, INQUIRY_SERIAL_NO, &serial) == -1) { topo_mod_dprintf(mod, "%s: failed to lookup devinfo props on " "%s", __func__, path); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } model = topo_mod_clean_str(mod, model); rev = topo_mod_clean_str(mod, rev); serial = topo_mod_clean_str(mod, serial); /* * Lookup the /dev/dsk/c#t#d# disk device name from the blkdev path */ if ((logical_disk = get_logical_disk(mod, path, &bufsz)) == NULL) { topo_mod_dprintf(mod, "failed to find logical disk"); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } /* * If we were able to look up the logical disk path for this namespace * then set ctd to be that pathname, minus the "/dev/dsk/" portion. */ if ((ctd = strrchr(logical_disk, '/')) != NULL) { ctd = ctd + 1; } else { topo_mod_dprintf(mod, "malformed logical disk path: %s", logical_disk); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } /* * Build the FMRI and then bind the disk node to the parent nvme node. */ auth = topo_mod_auth(mod, nvme_info->nei_nvme); fmri = topo_mod_hcfmri(mod, nvme_info->nei_nvme, FM_HC_SCHEME_VERSION, DISK, inst, NULL, auth, model, rev, serial); if (fmri == NULL) { /* errno set */ topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%u/%s=0/%s=%u", __func__, pname, pinst, NVME, DISK, inst); goto error; } if ((disk = topo_node_bind(mod, nvme_info->nei_nvme, DISK, inst, fmri)) == NULL) { /* errno set */ topo_mod_dprintf(mod, "%s: bind failed for %s=%u/%s=0/%s=%u", __func__, pname, pinst, NVME, DISK, inst); goto error; } /* Create authority and system propgroups */ topo_pgroup_hcset(disk, auth); /* * As the "disk" in this case is simply a logical construct * representing an NVMe namespace, we inherit the FRU from the parent * node. */ if (topo_node_fru_set(disk, NULL, 0, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__, topo_strerror(err)); (void) topo_mod_seterrno(mod, err); goto error; } if ((ppaths = topo_mod_zalloc(mod, sizeof (char *))) == NULL) { (void) topo_mod_seterrno(mod, EMOD_NOMEM); goto error; } ppaths[0] = path; /* * Create the "storage" and "io" property groups and then fill them * with the standard set of properties for "disk" nodes. */ if (topo_pgroup_create(disk, &io_pgroup, &err) != 0 || topo_pgroup_create(disk, &storage_pgroup, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to create propgroups: %s", __func__, topo_strerror(err)); (void) topo_mod_seterrno(mod, err); goto error; } r = topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, path, &err); r += topo_prop_set_string_array(disk, TOPO_PGROUP_IO, TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_IO, TOPO_IO_DEVID, TOPO_PROP_IMMUTABLE, devid, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE, TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, manuf, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE, TOPO_STORAGE_CAPACITY, TOPO_PROP_IMMUTABLE, cap_bytes_str, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE, TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE, TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE, TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err); r += topo_prop_set_string(disk, TOPO_PGROUP_STORAGE, TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, ctd, &err); if (r != 0) { topo_mod_dprintf(mod, "%s: failed to create properties: %s", __func__, topo_strerror(err)); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } ret = 0; error: free(cap_bytes_str); if (fd > 0) (void) close(fd); if (ppaths != NULL) topo_mod_free(mod, ppaths, sizeof (char *)); di_devfs_path_free(path); nvlist_free(auth); nvlist_free(fmri); topo_mod_strfree(mod, rev); topo_mod_strfree(mod, model); topo_mod_strfree(mod, serial); topo_mod_free(mod, logical_disk, bufsz); return (ret); } static const topo_pgroup_info_t nvme_pgroup = { TOPO_PGROUP_NVME, TOPO_STABILITY_PRIVATE, TOPO_STABILITY_PRIVATE, 1 }; static int make_nvme_node(nvme_enum_info_t *nvme_info) { topo_mod_t *mod = nvme_info->nei_mod; nvlist_t *auth = NULL, *fmri = NULL, *fru; tnode_t *nvme; char raw_rev[NVME_FWVER_SZ + 1], raw_model[NVME_MODEL_SZ + 1]; char raw_serial[NVME_SERIAL_SZ + 1]; char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL; char *pname = topo_node_name(nvme_info->nei_parent); char *label = NULL; topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent); int err = 0, ret = -1; di_node_t cn; uint_t i; /* * The raw strings returned by the IDENTIFY CONTROLLER command are * not NUL-terminated, so we fix that up. */ (void) strncpy(raw_rev, nvme_info->nei_idctl->id_fwrev, NVME_FWVER_SZ); raw_rev[NVME_FWVER_SZ] = '\0'; (void) strncpy(raw_model, nvme_info->nei_idctl->id_model, NVME_MODEL_SZ); raw_model[NVME_MODEL_SZ] = '\0'; (void) strncpy(raw_serial, nvme_info->nei_idctl->id_serial, NVME_SERIAL_SZ); raw_serial[NVME_SERIAL_SZ] = '\0'; /* * Next we pass the strings through a function that sanitizes them of * any characters that can't be used in an FMRI string. */ rev = topo_mod_clean_str(mod, raw_rev); model = topo_mod_clean_str(mod, raw_model); serial = topo_mod_clean_str(mod, raw_serial); auth = topo_mod_auth(mod, nvme_info->nei_parent); fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION, NVME, 0, NULL, auth, model, rev, serial); if (fmri == NULL) { /* errno set */ topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%u/%s=0", __func__, pname, pinst, NVME); goto error; } /* * If our parent is a pciexfn node, then we need to create a nvme range * underneath it to hold the nvme heirarchy. For other cases, where * enumeration is being driven by a topo map file, this range will have * already been statically defined in the XML. */ if (strcmp(pname, PCIEX_FUNCTION) == 0) { if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0, 0) < 0) { /* errno set */ topo_mod_dprintf(mod, "%s: error creating %s range", __func__, NVME); goto error; } } /* * Create a new topo node to represent the NVMe controller and bind it * to the parent node. */ if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0, fmri)) == NULL) { /* errno set */ topo_mod_dprintf(mod, "%s: bind failed for %s=%u/%s=0", __func__, pname, pinst, NVME); goto error; } nvme_info->nei_nvme = nvme; nvme_info->nei_nvme_fmri = fmri; /* * If our parent node is a "pciexfn" node then this is a NVMe device on * a PCIe AIC, so we inherit our parent's FRU. Otherwise, we set the * FRU to ourself. */ if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0) fru = NULL; else fru = fmri; if (topo_node_fru_set(nvme, fru, 0, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__, topo_strerror(err)); (void) topo_mod_seterrno(mod, err); goto error; } /* * Clone the label from our parent node. We can't inherit the property * because the label prop is mutable on bay nodes and only immutable * properties can be inherited. */ if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 && err != ETOPO_PROP_NOENT) || topo_node_label_set(nvme, label, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to set label: %s", __func__, topo_strerror(err)); (void) topo_mod_seterrno(mod, err); goto error; } if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", __func__, TOPO_PGROUP_NVME, topo_strerror(err)); (void) topo_mod_seterrno(mod, err); goto error; } if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major, nvme_info->nei_vers.v_minor) < 0) { topo_mod_dprintf(mod, "%s: failed to alloc string", __func__); (void) topo_mod_seterrno(mod, EMOD_NOMEM); goto error; } if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER, TOPO_PROP_IMMUTABLE, vers, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to set %s/%s property", __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER); (void) topo_mod_seterrno(mod, err); goto error; } if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", __func__, TOPO_PGROUP_IO, topo_strerror(err)); (void) topo_mod_seterrno(mod, err); goto error; } if (topo_prop_set_string(nvme, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, nvme_info->nei_nvme_path, &err) != 0) { topo_mod_dprintf(mod, "%s: failed to set %s/%s property", __func__, TOPO_PGROUP_IO, TOPO_IO_DEV_PATH); (void) topo_mod_seterrno(mod, err); goto error; } /* * Create a child disk node for each namespace. */ if (topo_node_range_create(mod, nvme, DISK, 0, (nvme_info->nei_idctl->id_nn - 1)) < 0) { /* errno set */ topo_mod_dprintf(mod, "%s: error creating %s range", __func__, DISK); goto error; } for (i = 0, cn = di_child_node(nvme_info->nei_dinode); cn != DI_NODE_NIL; i++, cn = di_sibling_node(cn)) { if (make_disk_node(nvme_info, cn, i) != 0) { char *path = di_devfs_path(cn); /* * We note the failure, but attempt to forge ahead and * enumerate any other namespaces. */ topo_mod_dprintf(mod, "%s: make_disk_node() failed " "for %s\n", __func__, path ? path : "unknown path"); di_devfs_path_free(path); } } ret = 0; error: free(vers); nvlist_free(auth); nvlist_free(fmri); topo_mod_strfree(mod, rev); topo_mod_strfree(mod, model); topo_mod_strfree(mod, serial); topo_mod_strfree(mod, label); return (ret); } struct diwalk_arg { topo_mod_t *diwk_mod; tnode_t *diwk_parent; }; /* * This function gathers identity information from the NVMe controller and * stores it in a struct. This struct is passed to make_nvme_node(), which * does the actual topo node creation. */ static int discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg) { struct diwalk_arg *wkarg = arg; topo_mod_t *mod = wkarg->diwk_mod; char *path = NULL, *devctl = NULL; nvme_ioctl_t nioc = { 0 }; nvme_identify_ctrl_t *idctl = NULL; nvme_enum_info_t nvme_info = { 0 }; int fd = -1, ret = DI_WALK_TERMINATE; if ((path = di_devfs_minor_path(minor)) == NULL) { topo_mod_dprintf(mod, "failed to get minor path"); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); return (ret); } topo_mod_dprintf(mod, "%s=%u: found nvme controller: %s", topo_node_name(wkarg->diwk_parent), topo_node_instance(wkarg->diwk_parent), path); if (asprintf(&devctl, "/devices%s", path) < 0) { topo_mod_dprintf(mod, "failed to alloc string"); (void) topo_mod_seterrno(mod, EMOD_NOMEM); goto error; } if ((fd = open(devctl, O_RDWR)) < 0) { topo_mod_dprintf(mod, "failed to open %s", devctl); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) { topo_mod_dprintf(mod, "zalloc failed"); (void) topo_mod_seterrno(mod, EMOD_NOMEM); goto error; } nioc.n_len = NVME_IDENTIFY_BUFSIZE; nioc.n_buf = (uintptr_t)idctl; if (ioctl(fd, NVME_IOC_IDENTIFY_CTRL, &nioc) != 0) { topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY_CTRL ioctl " "failed: %s", strerror(errno)); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } nioc.n_len = sizeof (nvme_version_t); nioc.n_buf = (uintptr_t)&nvme_info.nei_vers; if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) { topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s", strerror(errno)); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto error; } nvme_info.nei_mod = mod; nvme_info.nei_nvme_path = path; nvme_info.nei_dinode = node; nvme_info.nei_idctl = idctl; nvme_info.nei_parent = wkarg->diwk_parent; nvme_info.nei_fd = fd; if (make_nvme_node(&nvme_info) != 0) { /* errno set */ goto error; } ret = DI_WALK_CONTINUE; error: if (fd > 0) (void) close(fd); di_devfs_path_free(path); free(devctl); if (idctl != NULL) topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE); return (ret); } int disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode) { char *parent = NULL; int err; di_node_t devtree; di_node_t dnode; struct diwalk_arg wkarg = { 0 }; int ret = -1; /* * Lookup a property containing the devfs path of the parent PCIe * device of the NVMe device we're attempting to enumerate. This * property is hard-coded in per-platform topo XML maps that are * delivered with the OS. This hard-coded path allows topo to map a * given NVMe controller to a physical location (bay or slot) on the * platform, when generating the topo snapshot. */ if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING, TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) { topo_mod_dprintf(mod, "parent node was missing nvme binding " "properties\n"); (void) topo_mod_seterrno(mod, err); goto out; } if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) { topo_mod_dprintf(mod, "failed to get devinfo snapshot"); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto out; } /* * Walk the devinfo tree looking NVMe devices. For each NVMe device, * check if the devfs path of the parent matches the one specified in * TOPO_BINDING_PARENT_DEV. */ wkarg.diwk_mod = mod; wkarg.diwk_parent = pnode; dnode = di_drv_first_node(NVME_DRV, devtree); while (dnode != DI_NODE_NIL) { char *path; if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) { topo_mod_dprintf(mod, "failed to get dev path"); (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); goto out; } if (strcmp(parent, path) == 0) { if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0, &wkarg, discover_nvme_ctl) < 0) { di_devfs_path_free(path); goto out; } } di_devfs_path_free(path); dnode = di_drv_next_node(dnode); } ret = 0; out: topo_mod_strfree(mod, parent); return (ret); }