1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2025 Oxide Computer Company 14 */ 15 16 /* 17 * Programmatic interface to NVMe Devices 18 * 19 * libnvme exists to provide a means of performing non-I/O related operations on 20 * an NVMe device. This is intended to allow software, regardless of whether it 21 * is part of illumos or not, to operate on NVMe devices and perform most of the 22 * administrative and operator tasks that might come up. This library does not 23 * provide a stable interface yet. The rest of this block comment goes into the 24 * organization and background into why it looks the way it does. 25 * 26 * -------------------- 27 * Library Organization 28 * -------------------- 29 * 30 * There are two large classes of source files that make up this library 31 * currently: 32 * 33 * 1. Source code that implements the library's interfaces is found alongside 34 * this file in lib/libnvme/common. This code is generally organized based 35 * around the portion of the NVMe specification that it implements. So for 36 * example, code that implements logic related to the features is found 37 * in libnvme_feature.c, formatting namespaces in libnvme_format.c, log 38 * pages in libnvme_log.c, etc. All files in the library begin with 39 * 'libnvme_' as a way to help namespace the file names from the second set 40 * of files. 41 * 42 * 2. Validation logic that is shared between libnvme and the kernel is found 43 * in common/nvme/. While the kernel must validate requests regardless, we 44 * leverage this shared information as a means for trying to ensure that we 45 * have useful errors early. That code is factored in a way to facilitate 46 * easier unit testing. 47 * 48 * Because of the nature of this split, all of the opaque structures that we 49 * create and their relationships are all maintained in the library (group 1). 50 * All of the logic in group 2 is designed to be constant data tables and 51 * functions that are fed information about the controller they are operating on 52 * to answer them. 53 * 54 * There are several general classes of interfaces and related structures that 55 * we have in the library. We break them into the following general categories 56 * based on their purpose: 57 * 58 * DISCOVERY 59 * 60 * One of the large responsibilities of this library is helping someone discover 61 * information about something, whether that be a controller, a namespace, a log 62 * page, a feature, a unique command, etc. Information about one of these items 63 * is contained in a generally opaque discovery structure. For example, the 64 * nvme_log_disc_t. 65 * 66 * The goal of these structures is to contain all of the metadata for working 67 * with the object in question. Continuing on the log page discovery example, it 68 * can tell us information about what fields are required, whether or not the 69 * log might be supported, whether it operates on a controller, a namespace, or 70 * something else, as well as more human-usable things such as names and 71 * descriptions. 72 * 73 * Discovery objects are both for humans and for programmatic consumption. There 74 * are several cases where requests can be created directly from discovery 75 * objects. A well designed discovery object can allow a general implementation 76 * of a consumer such as nvmeadm to build up a request without having to 77 * hardcode everything about what is needed for each request (though most 78 * consumers still need to have information about the actual contents, meaning, 79 * and semantics of a log or feature). 80 * 81 * Discovery objects are obtained in two general ways. The first is using one of 82 * the iterator/callback based functions to discover a given class of data. The 83 * second path is that several of the functions which operate based on the name 84 * of something, e.g. nvme_log_req_init_by_name(), 85 * nvme_get_feat_req_init_by_name(), etc. will return a discovery object. 86 * 87 * When a discovery object is returned based on iteration (more below), the 88 * memory is owned by the iterator. When it is returned by a request 89 * initialization function, then it has its own life time and must be freed. 90 * We try to make this distinction clear in the API based on whether or not the 91 * discovery object is 'const'. 92 * 93 * All discovery objects should be fully filled out before they are handed back 94 * to a caller. It is an explicit design goal that every function that gets data 95 * from the discovery structure operates on a const version of the pointer. This 96 * is the hint that you cannot perform additional I/O or related after handing 97 * out the discovery structure. Attempts to loosen this constraint should be 98 * considered carefully due to how we communicate ownership. 99 * 100 * ITERATORS 101 * 102 * A common pattern of the library is iterating over items. This includes 103 * controllers and namespaces, but also as part of discovering what specific 104 * logs, commands, features, etc. are actually supported by the device. 105 * Iteration always follows the same general pattern: 106 * 107 * 1. An iterator is initialized with a call to nvme_<name>_discover_init(). 108 * This will generally return a structure of the form nvme_<name>_iter_t. This 109 * structure contains the memory for the corresponding value that is returned 110 * from step in (2). 111 * 112 * 2. To actually pull values out of an iterator, one must call the 113 * nvme_<name>_step() function for the iterator. This will return a 114 * corresponding nvme_<name>_disc_t structure that is opaque and has a suite of 115 * functions that are usable for getting information out from it. This structure 116 * is valid only until the next time the nvme_<name>_step() is called. The 117 * return value of step indicates the state of the data and indicates whether or 118 * not there is an error, the iterator has finished, or we successfully stepped 119 * and the data is filled out. 120 * 121 * If discovery data needs to outlive a given iteration, then it can be 122 * duplicated which will give it a separate lifetime, though that comes with 123 * the responsibility that it must then be freed. 124 * 125 * 3. To finish using iterators, one finally calls the corresponding 126 * nvme_<name>_discover_fini(). That will deallocate the iterator structure and 127 * finish everything up. 128 * 129 * REQUESTS 130 * 131 * One of the chief goals of this library is to be able to perform requests. 132 * Each request has a structure that can be initialized, filled out, and then 133 * executed. A request structure can be reused multiple times with minor 134 * adjustments in-between (though changes aren't required). Request structures 135 * are either initialized in a blank mode where every value must be filled out 136 * or they can be initialized through their discovery object (or the common name 137 * of such an object). 138 * 139 * When a request structure is initialized through a discovery object, it 140 * automatically sets several of the fields, knows which ones are still required 141 * to be set, and which fields cannot be set. For example, if you create a get 142 * log page request from a log discovery object, it will not allow you to change 143 * the log page you're requesting; however, in return you don't have to specify 144 * the command set interface or log identifier. 145 * 146 * Request objects are tied to a controller. See 'Parallelism, Thread Safety, 147 * and Errors' for more information. 148 * 149 * INFORMATION SNAPSHOTS 150 * 151 * To get information about a namespace or controller, one has to take an 152 * information snapshot. Once an information snapshot is obtained, this snapshot 153 * answers all questions about the controller with a mostly consistent set of 154 * point-in-time data. The main reason for this design was to try and simplify 155 * where errors can occur and to provide a straightforward serialization point 156 * so that way the raw underlying data could be gathered at one system and then 157 * interpreted later on another. 158 * 159 * The only reason that there are some fallible operations on the snapshot are 160 * things that are not guaranteed to exist for all such NVMe controllers. 161 * 162 * LIBRARY, CONTROLLER, NAMESPACE and SNAPSHOT HANDLES 163 * 164 * The last major set of types used in this library are opaque handles. As you 165 * might have guessed given the request structures, all of the objects which 166 * represent something are opaque. Each library handle is independent of one 167 * another and each controller handle is independent of one another. In general, 168 * it is expected that only a single controller handle is used at a given time 169 * for a given library handle, but this is not currently enforced. Error 170 * information and parallelism is tied into this, see 'Parallelism, Thread 171 * Safety, and Errors' for more information. 172 * 173 * ----------------- 174 * Opaque Structures 175 * ----------------- 176 * 177 * One of the things that might stand out in libnvme is the use of opaque 178 * structures everywhere with functions to access every arbitrary piece of data. 179 * This and the function pattern around building up a request were done to try 180 * and deal with the evolutionary nature of the NVMe specification. If you look 181 * at the various requests, with the exception of firmware download, almost 182 * every request has added additional features through the spec revisions. NVMe 183 * 2.0 changed most things again with the requirement to specify the command set 184 * interface. 185 * 186 * While the way that the NVMe specification has done this is quite reasonable, 187 * it makes it much more difficult to use a traditional series of arguments to 188 * functions or a structure without having to try to version the symbol through 189 * clever games. If instead we accept that the specification will change and 190 * that the specification is always taking these additional arguments out of 191 * values that must be zero, then an opaque request structure where you have to 192 * make an explicit function call and recompile to get slightly different 193 * behavior is mostly reasonable. We may not be able to be perfect given we're 194 * at the mercy of the specification, but at least this is better than the 195 * alternative. 196 * 197 * This is ultimately why all the request structures are opaque and use a 198 * pseudo-builder pattern to fill out the request information. Further evidence 199 * to this point is that there was no way to avoid changing every kernel 200 * structure here while retaining semantic operations. No one wants to manually 201 * assemble cdw12-15 here. That's not how we can add value for the library. 202 * 203 * Similarly, for all discovery objects we ended up utilizing opaque objects. 204 * The main reason here is that we want to be able to embed this library as a 205 * committed interface in other languages and having the discovery structures be 206 * something that everyone can see means it'll be harder to extend it. While 207 * this concern is somewhat more theoretical given the iterator pattern, given 208 * the other bits in the request structure we decided to lean into the 209 * opaqueness. 210 * 211 * -------------------------------------- 212 * Parallelism, Thread Safety, and Errors 213 * -------------------------------------- 214 * 215 * One of the library's major design points is how do we achieve thread-safety, 216 * how does ownership work, where do errors appear, and what is the degree of 217 * parallelism that is achievable. To work through this we look at a few 218 * different things: 219 * 220 * 1. The degree to which the hardware allows for parallelism 221 * 2. The degree to which users might desire parallelism 222 * 3. The ergonomics of getting and storing errors 223 * 224 * The NVMe specification allows for different degrees of admin command 225 * parallelism on a per-command basis. This is discoverable, but the main point 226 * is that there are a class of commands where only one can be outstanding at a 227 * time, which likely fall into the case of most of the destructive commands 228 * like Format NVM, Activate Firmware, etc. Our expectation to some extent is 229 * that most admin queue commands don't need to be issued in parallel; however, 230 * beyond how we structure the library and error handling, we don't try to 231 * enforce that here. The kernel does do some enforcement through requiring 232 * mandatory write locks to perform some operations. 233 * 234 * When we get to how do folks want to use this, during the initial design phase 235 * we mostly theorized based on how nvmeadm is using it today and how various 236 * daemons like a FRU monitor or an appliance kit's software might want to 237 * interact with it. Our general starting assumption is that it's very 238 * reasonable for each discovered controller to be handled in parallel, but that 239 * operations on a controller itself are likely serial given that we're not 240 * issuing I/O through this mechanism. If we were, then that'd be an entirely 241 * different set of constraints. 242 * 243 * To discuss the perceived ergonomics, we need to first discuss what error 244 * information we want to be able to have. It's an important goal of both the 245 * NVMe driver and this library to give useful semantic errors. In particular, 246 * for any operation we want to make sure that we include the following 247 * information: 248 * 249 * o A hopefully distinguishable semantic error 250 * o Saving errno as a system error if relevant (e.g if open(2) failed) 251 * o A message for humans that gives more specifics about what happened and is 252 * intended to be passed along to the output of a command or another error 253 * message. 254 * o If a controller error occurs, we want to be able to provide the 255 * controller's sc (status code) and sct (status code type). 256 * 257 * With this we get to the questions around ergonomics and related which are 258 * entirely subjective. Given that we want to capture that information how do we 259 * best do this given the tooling that we have. When the library was first being 260 * prototyped all errors were on the nvme_t, basically the top-level handle. 261 * This meant that each operation on a controller had to be done serially or you 262 * would have to use different handles. However, the simplicity was that there 263 * was one thing to check. 264 * 265 * This evolution changed slightly when we introduced information snapshots. 266 * Because the information snapshots are meant to be separate entities whose 267 * lifetime can extend beyond the nvme_t library handle, they ended up 268 * developing their own error codes and functions. This has been okay because 269 * there aren't too many use cases there, though the need to duplicate error 270 * handling functions is a bit painful. 271 * 272 * From there, we did consider what if each request had its own error 273 * information that could be extracted. That would turn into a lot of functions 274 * to get at that data. The controller's allowed parallelism for admin commands 275 * varies based on each command. Some commands must occur when there are no 276 * other admin commands on the controller and others when there there is nothing 277 * on the namespace. However, due to that nuance, it would lead to forcing the 278 * consumer to understand the controller's specifics more than is often 279 * necessary for a given request. To add to that, it'd also just be a pain to 280 * try to get all the error information out in a different way and the consumers 281 * we started writing in this fashion were not looking good. 282 * 283 * We also considered whether we could consolidate all the error functions on 284 * each request into one structure that we get, but that didn't move the needle 285 * too much. It also raised some more concerns around how we minimize races and 286 * how data changes around that. 287 * 288 * So all of this led us to our current compromise position: we allow for 289 * parallelism at the controller level. More specifically: 290 * 291 * 1. Operations which take the nvme_t handle set errors on it and must operate 292 * serially. That is the nvme_t should only be used from one thread at any 293 * time, but may move between threads. Errors are set on it. 294 * 295 * 2. The nvme_ctrl_t has its own error information. A given nvme_ctrl_t should 296 * only be used serially; however, different ones can be used in parallel. A 297 * controller doesn't guarantee exclusivity. That requires an explicit 298 * locking operation. 299 * 300 * 3. Both request structures and namespaces place their errors on the 301 * corresponding controller that they were created from. Therefore the 302 * per-controller serialization in (2) applies here as well. If two requests 303 * are tied to different controllers, they can proceed in parallel. 304 * 305 * 4. Once a controller or namespace snapshot is obtained, they fall into a 306 * similar pattern: each one can be operated on in parallel, but generally 307 * one should only operate on a single one serially. 308 * 309 * Other than the constraints defined above, the library does not care which 310 * threads that an operation occurs on. These can be moved to wherever it needs 311 * to be. Locking and related in the kernel is based on the open file descriptor 312 * to the controller. 313 * 314 * ---------------- 315 * Field Validation 316 * ---------------- 317 * 318 * Every request is made up of fields that correspond to parts of the NVMe 319 * specification. Our requests operate in terms of the logical fields that we 320 * opt to expose and that the kernel knows how to consume. In general, we don't 321 * expose the raw cdw values that make up the commands (except for the vendor 322 * unique commands or arguments that are explicitly that way ala get features). 323 * While operating on raw cdw arguments would be a simple way to create ABI 324 * stability, it would leave everyone having to break up all the fields 325 * themselves and we believe end up somewhat more error prone than the 326 * interfaces we expose today. 327 * 328 * Requests are created in one of two ways today: they are either initialized 329 * from corresponding discovery data e.g. nvme_log_req_init_by_disc() and 330 * nvme_get_feat_req_init_by_name(), or one creates a raw request ala 331 * nvme_get_feat_req_init(). In the former cases, we fill out a bunch of the 332 * fields that would normally need to be set such as the log or feature ID. We 333 * also will note which fields are allowed and expected. For example, the health 334 * log page does not take or expect a lsp (log specific parameter) or related 335 * and therefore we can flag that with an _UNUSE class error. Conversely, 336 * requests that are created from their raw form will not have any such error 337 * checking performed until they are finalized and checked by the kernel. The 338 * set of fields that can be set in a request is usually tracked in the 339 * structure with a member of the form <prefix>_allow. 340 * 341 * One set of library error checking that is uniform between both types is that 342 * of missing fields. There are minimum fields that must be set for different 343 * types of requests. That check will always be performed regardless of the path 344 * that is taken through the system. Tracking which members must still be set is 345 * done by a member of the form <prefix>_need. 346 * 347 * When we perform validation, we try to push the vast majority of it into the 348 * common validation code that is shared between the kernel and userland. This 349 * is wrapped up through the nvme_field_check_one() logic. The common code will 350 * check if the field is supported by the controller (generating an _UNSUP class 351 * error if not) and if the value of the field is within a valid range 352 * (generating a _RANGE class error if not). 353 * 354 * While we try to fold the majority of such checks into the common code as 355 * possible, it isn't perfect and some things have to be checked outside of 356 * that. Those consist of the following general cases: 357 * 358 * 1) Items that are not semantically fields in the actual command but are 359 * things that we are tracking ourselves in the library. An example of this 360 * would be fields in the vuc request structure that we are synthesizing 361 * ourselves. 362 * 363 * 2) While the field logic has the specifics of what controller is being 364 * operated upon, it doesn't have all the knowledge of what things can be 365 * combined or not. It can answer the specifics about its field, but cannot look 366 * at the broader request. 367 * 368 * As a result, there are some duplicated checks in the library and the kernel, 369 * though several are left just to the kernel. However, the vast majority of 370 * validation does happen through these common routines which leaves the library 371 * nvme_<type>_req_set_<field> functions generally wrappers around checking 372 * common code and updating our tracking around what fields are set or not so we 373 * can issue an ioctl. 374 */ 375 376 #include <stdlib.h> 377 #include <stdarg.h> 378 #include <libdevinfo.h> 379 #include <unistd.h> 380 #include <string.h> 381 #include <sys/types.h> 382 #include <sys/stat.h> 383 #include <fcntl.h> 384 #include <upanic.h> 385 386 #include "libnvme_impl.h" 387 388 bool 389 nvme_vers_ctrl_atleast(const nvme_ctrl_t *ctrl, const nvme_version_t *targ) 390 { 391 return (nvme_vers_atleast(&ctrl->nc_vers, targ)); 392 } 393 394 bool 395 nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t *ci, 396 const nvme_version_t *targ) 397 { 398 return (nvme_vers_atleast(&ci->nci_vers, targ)); 399 } 400 401 bool 402 nvme_vers_ns_info_atleast(const nvme_ns_info_t *info, 403 const nvme_version_t *targ) 404 { 405 return (nvme_vers_atleast(&info->nni_vers, targ)); 406 } 407 408 bool 409 nvme_guid_valid(const nvme_ctrl_t *ctrl, const uint8_t guid[16]) 410 { 411 const uint8_t zero_guid[16] = { 0 }; 412 413 return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v2) && 414 memcmp(zero_guid, guid, sizeof (zero_guid)) != 0); 415 } 416 417 bool 418 nvme_eui64_valid(const nvme_ctrl_t *ctrl, const uint8_t eui64[8]) 419 { 420 const uint8_t zero_eui[8] = { 0 }; 421 422 return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v1) && 423 memcmp(zero_eui, eui64, sizeof (zero_eui)) != 0); 424 } 425 426 int 427 nvme_format_nguid(const uint8_t nguid[16], char *buf, size_t len) 428 { 429 return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X" 430 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 431 nguid[0], nguid[1], nguid[2], nguid[3], nguid[4], nguid[5], 432 nguid[6], nguid[7], nguid[8], nguid[9], nguid[10], nguid[11], 433 nguid[12], nguid[13], nguid[14], nguid[15])); 434 } 435 436 int 437 nvme_format_eui64(const uint8_t eui64[8], char *buf, size_t len) 438 { 439 return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 440 eui64[0], eui64[1], eui64[2], eui64[3], eui64[4], eui64[5], 441 eui64[6], eui64[7])); 442 } 443 444 void 445 nvme_fini(nvme_t *nvme) 446 { 447 if (nvme == NULL) 448 return; 449 450 if (nvme->nh_devinfo != DI_NODE_NIL) { 451 di_fini(nvme->nh_devinfo); 452 } 453 454 free(nvme); 455 } 456 457 nvme_t * 458 nvme_init(void) 459 { 460 nvme_t *nvme; 461 462 nvme = calloc(1, sizeof (nvme_t)); 463 if (nvme == NULL) { 464 return (NULL); 465 } 466 467 nvme->nh_devinfo = di_init("/", DINFOCPYALL); 468 if (nvme->nh_devinfo == DI_NODE_NIL) { 469 nvme_fini(nvme); 470 return (NULL); 471 } 472 473 return (nvme); 474 } 475 476 void 477 nvme_ctrl_discover_fini(nvme_ctrl_iter_t *iter) 478 { 479 free(iter); 480 } 481 482 nvme_iter_t 483 nvme_ctrl_discover_step(nvme_ctrl_iter_t *iter, const nvme_ctrl_disc_t **discp) 484 { 485 di_minor_t m; 486 487 *discp = NULL; 488 if (iter->ni_done) { 489 return (NVME_ITER_DONE); 490 } 491 492 for (;;) { 493 if (iter->ni_cur == NULL) { 494 iter->ni_cur = di_drv_first_node("nvme", 495 iter->ni_nvme->nh_devinfo); 496 } else { 497 iter->ni_cur = di_drv_next_node(iter->ni_cur); 498 } 499 500 if (iter->ni_cur == NULL) { 501 iter->ni_done = true; 502 return (NVME_ITER_DONE); 503 } 504 505 for (m = di_minor_next(iter->ni_cur, DI_MINOR_NIL); 506 m != DI_MINOR_NIL; m = di_minor_next(iter->ni_cur, m)) { 507 if (strcmp(di_minor_nodetype(m), 508 DDI_NT_NVME_NEXUS) == 0) { 509 break; 510 } 511 } 512 513 if (m == DI_MINOR_NIL) { 514 continue; 515 } 516 517 iter->ni_disc.ncd_devi = iter->ni_cur; 518 iter->ni_disc.ncd_minor = m; 519 *discp = &iter->ni_disc; 520 return (NVME_ITER_VALID); 521 } 522 523 return (NVME_ITER_DONE); 524 } 525 526 bool 527 nvme_ctrl_discover_init(nvme_t *nvme, nvme_ctrl_iter_t **iterp) 528 { 529 nvme_ctrl_iter_t *iter; 530 531 if (iterp == NULL) { 532 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 533 "invalid nvme_ctrl_iter_t output pointer: %p", iterp)); 534 } 535 536 iter = calloc(1, sizeof (nvme_ctrl_iter_t)); 537 if (iter == NULL) { 538 int e = errno; 539 return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to " 540 "allocate memory for a new nvme_ctrl_iter_t: %s", 541 strerror(e))); 542 } 543 iter->ni_nvme = nvme; 544 *iterp = iter; 545 return (nvme_success(nvme)); 546 } 547 548 bool 549 nvme_ctrl_discover(nvme_t *nvme, nvme_ctrl_disc_f func, void *arg) 550 { 551 nvme_ctrl_iter_t *iter; 552 const nvme_ctrl_disc_t *disc; 553 nvme_iter_t ret; 554 555 if (func == NULL) { 556 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 557 "invalid nvme_ctrl_disc_f function pointer: %p", func)); 558 } 559 560 if (!nvme_ctrl_discover_init(nvme, &iter)) { 561 return (false); 562 } 563 564 while ((ret = nvme_ctrl_discover_step(iter, &disc)) == 565 NVME_ITER_VALID) { 566 if (!func(nvme, disc, arg)) 567 break; 568 } 569 570 nvme_ctrl_discover_fini(iter); 571 if (ret == NVME_ITER_ERROR) { 572 return (false); 573 } 574 575 return (nvme_success(nvme)); 576 } 577 578 di_node_t 579 nvme_ctrl_disc_devi(const nvme_ctrl_disc_t *discp) 580 { 581 return (discp->ncd_devi); 582 } 583 584 di_minor_t 585 nvme_ctrl_disc_minor(const nvme_ctrl_disc_t *discp) 586 { 587 return (discp->ncd_minor); 588 } 589 590 void 591 nvme_ctrl_fini(nvme_ctrl_t *ctrl) 592 { 593 if (ctrl == NULL) { 594 return; 595 } 596 597 if (ctrl->nc_sup_logs != NULL) { 598 free(ctrl->nc_sup_logs); 599 } 600 601 if (ctrl->nc_sup_logs_err != NULL) { 602 free(ctrl->nc_sup_logs_err); 603 } 604 605 if (ctrl->nc_devi_path != NULL) { 606 di_devfs_path_free(ctrl->nc_devi_path); 607 } 608 609 if (ctrl->nc_fd >= 0) { 610 (void) close(ctrl->nc_fd); 611 ctrl->nc_fd = -1; 612 } 613 614 free(ctrl); 615 } 616 617 bool 618 nvme_ctrl_init(nvme_t *nvme, di_node_t di, nvme_ctrl_t **outp) 619 { 620 const char *drv; 621 int32_t inst; 622 di_minor_t minor; 623 char *path, buf[PATH_MAX]; 624 nvme_ctrl_t *ctrl; 625 nvme_ioctl_ctrl_info_t ctrl_info; 626 627 if (di == DI_NODE_NIL) { 628 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 629 "invalid di_node_t: %p", di)); 630 } 631 632 if (outp == NULL) { 633 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 634 "invalid nvme_ctrl_t output pointer: %p", outp)); 635 } 636 *outp = NULL; 637 638 drv = di_driver_name(di); 639 inst = di_instance(di); 640 if (drv == NULL || inst < 0) { 641 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s has " 642 "no driver attached", di_node_name(di))); 643 } 644 645 if (strcmp(drv, "nvme") != 0) { 646 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't " 647 "attached to nvme, found %s", di_node_name(di), drv)); 648 } 649 650 /* 651 * We have an NVMe node. Find the right minor that corresponds to the 652 * attachment point. Once we find that then we can go ahead and open a 653 * path to that and construct the device. 654 */ 655 minor = DI_MINOR_NIL; 656 while ((minor = di_minor_next(di, minor)) != DI_MINOR_NIL) { 657 if (strcmp(di_minor_nodetype(minor), DDI_NT_NVME_NEXUS) == 0) { 658 break; 659 } 660 } 661 662 if (minor == DI_MINOR_NIL) { 663 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't " 664 "attached to nvme, found %s", di_node_name(di), drv)); 665 } 666 667 path = di_devfs_minor_path(minor); 668 if (path == NULL) { 669 int e = errno; 670 return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to " 671 "obtain /devices path for the requested minor: %s", 672 strerror(e))); 673 } 674 675 if (snprintf(buf, sizeof (buf), "/devices%s", path) >= sizeof (buf)) { 676 di_devfs_path_free(path); 677 return (nvme_error(nvme, NVME_ERR_INTERNAL, 0, "failed to " 678 "construct full /devices minor path, would have overflown " 679 "internal buffer")); 680 } 681 di_devfs_path_free(path); 682 683 ctrl = calloc(1, sizeof (*ctrl)); 684 if (ctrl == NULL) { 685 int e = errno; 686 return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to " 687 "allocate memory for a new nvme_ctrl_t: %s", strerror(e))); 688 } 689 690 ctrl->nc_nvme = nvme; 691 ctrl->nc_devi = di; 692 ctrl->nc_minor = minor; 693 ctrl->nc_inst = inst; 694 ctrl->nc_fd = open(buf, O_RDWR | O_CLOEXEC); 695 if (ctrl->nc_fd < 0) { 696 int e = errno; 697 nvme_ctrl_fini(ctrl); 698 return (nvme_error(nvme, NVME_ERR_OPEN_DEV, e, "failed to open " 699 "device path %s: %s", buf, strerror(e))); 700 } 701 702 ctrl->nc_devi_path = di_devfs_path(di); 703 if (ctrl->nc_devi_path == NULL) { 704 int e = errno; 705 nvme_ctrl_fini(ctrl); 706 return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to " 707 "obtain /devices path for the controller: %s", 708 strerror(e))); 709 } 710 711 if (!nvme_ioc_ctrl_info(ctrl, &ctrl_info)) { 712 nvme_err_data_t err; 713 714 nvme_ctrl_err_save(ctrl, &err); 715 nvme_err_set(nvme, &err); 716 nvme_ctrl_fini(ctrl); 717 return (false); 718 } 719 720 ctrl->nc_vers = ctrl_info.nci_vers; 721 ctrl->nc_info = ctrl_info.nci_ctrl_id; 722 723 nvme_vendor_map_ctrl(ctrl); 724 725 *outp = ctrl; 726 return (nvme_success(nvme)); 727 } 728 729 typedef struct { 730 bool ncia_found; 731 int32_t ncia_inst; 732 nvme_ctrl_t *ncia_ctrl; 733 nvme_err_data_t ncia_err; 734 } nvme_ctrl_init_arg_t; 735 736 bool 737 nvme_ctrl_init_by_instance_cb(nvme_t *nvme, const nvme_ctrl_disc_t *disc, 738 void *arg) 739 { 740 nvme_ctrl_init_arg_t *init = arg; 741 742 if (di_instance(disc->ncd_devi) != init->ncia_inst) { 743 return (true); 744 } 745 746 /* 747 * If we fail to open the controller, we need to save the error 748 * information because it's going to end up being clobbered because this 749 * is a callback function surrounded by other libnvme callers. 750 */ 751 init->ncia_found = true; 752 if (!nvme_ctrl_init(nvme, disc->ncd_devi, &init->ncia_ctrl)) { 753 nvme_err_save(nvme, &init->ncia_err); 754 } 755 756 return (false); 757 } 758 759 bool 760 nvme_ctrl_init_by_instance(nvme_t *nvme, int32_t inst, nvme_ctrl_t **outp) 761 { 762 nvme_ctrl_init_arg_t init; 763 764 if (inst < 0) { 765 return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0, 766 "encountered illegal negative instance number: %d", inst)); 767 } 768 769 if (outp == NULL) { 770 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 771 "invalid nvme_ctrl_t output pointer: %p", outp)); 772 } 773 774 init.ncia_found = false; 775 init.ncia_inst = inst; 776 init.ncia_ctrl = NULL; 777 778 if (!nvme_ctrl_discover(nvme, nvme_ctrl_init_by_instance_cb, &init)) { 779 return (false); 780 } 781 782 if (!init.ncia_found) { 783 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 784 "failed to find NVMe controller nvme%d", inst)); 785 } 786 787 /* 788 * If we don't have an NVMe controller structure but we did find the 789 * instance, then we must have had an error constructing this will which 790 * be on our handle. We have to reconstruct the error from saved 791 * information as nvme_ctrl_discover will have clobbered it. 792 */ 793 if (init.ncia_ctrl == NULL) { 794 nvme_err_set(nvme, &init.ncia_err); 795 return (false); 796 } 797 798 *outp = init.ncia_ctrl; 799 return (nvme_success(nvme)); 800 } 801 802 bool 803 nvme_ctrl_devi(nvme_ctrl_t *ctrl, di_node_t *devip) 804 { 805 *devip = ctrl->nc_devi; 806 return (nvme_ctrl_success(ctrl)); 807 } 808 809 bool 810 nvme_ioc_ctrl_info(nvme_ctrl_t *ctrl, nvme_ioctl_ctrl_info_t *info) 811 { 812 (void) memset(info, 0, sizeof (nvme_ioctl_ctrl_info_t)); 813 814 if (ioctl(ctrl->nc_fd, NVME_IOC_CTRL_INFO, info) != 0) { 815 int e = errno; 816 return (nvme_ioctl_syserror(ctrl, e, "controller info")); 817 } 818 819 if (info->nci_common.nioc_drv_err != NVME_IOCTL_E_OK) { 820 return (nvme_ioctl_error(ctrl, &info->nci_common, 821 "controller info")); 822 } 823 824 return (true); 825 } 826 827 bool 828 nvme_ioc_ns_info(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ioctl_ns_info_t *info) 829 { 830 (void) memset(info, 0, sizeof (nvme_ioctl_ns_info_t)); 831 info->nni_common.nioc_nsid = nsid; 832 833 if (ioctl(ctrl->nc_fd, NVME_IOC_NS_INFO, info) != 0) { 834 int e = errno; 835 return (nvme_ioctl_syserror(ctrl, e, "namespace info")); 836 } 837 838 if (info->nni_common.nioc_drv_err != NVME_IOCTL_E_OK) { 839 return (nvme_ioctl_error(ctrl, &info->nni_common, 840 "namespace info")); 841 } 842 843 return (true); 844 } 845 846 const char * 847 nvme_tporttostr(nvme_ctrl_transport_t tport) 848 { 849 switch (tport) { 850 case NVME_CTRL_TRANSPORT_PCI: 851 return ("PCI"); 852 case NVME_CTRL_TRANSPORT_TCP: 853 return ("TCP"); 854 case NVME_CTRL_TRANSPORT_RDMA: 855 return ("RDMA"); 856 default: 857 return ("unknown transport"); 858 } 859 } 860 861 static bool 862 nvme_ns_discover_validate(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level) 863 { 864 switch (level) { 865 case NVME_NS_DISC_F_ALL: 866 case NVME_NS_DISC_F_ALLOCATED: 867 case NVME_NS_DISC_F_ACTIVE: 868 case NVME_NS_DISC_F_NOT_IGNORED: 869 case NVME_NS_DISC_F_BLKDEV: 870 return (true); 871 default: 872 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "invalid " 873 "namespace discovery level specified: 0x%x", level)); 874 } 875 } 876 877 void 878 nvme_ns_discover_fini(nvme_ns_iter_t *iter) 879 { 880 free(iter); 881 } 882 883 const char * 884 nvme_nsleveltostr(nvme_ns_disc_level_t level) 885 { 886 switch (level) { 887 case NVME_NS_DISC_F_ALL: 888 return ("unallocated"); 889 case NVME_NS_DISC_F_ALLOCATED: 890 return ("allocated"); 891 case NVME_NS_DISC_F_ACTIVE: 892 return ("active"); 893 case NVME_NS_DISC_F_NOT_IGNORED: 894 return ("not ignored"); 895 case NVME_NS_DISC_F_BLKDEV: 896 return ("blkdev"); 897 default: 898 return ("unknown level"); 899 } 900 } 901 902 nvme_ns_disc_level_t 903 nvme_ns_state_to_disc_level(nvme_ns_state_t state) 904 { 905 if ((state & NVME_NS_STATE_ALLOCATED) == 0) { 906 return (NVME_NS_DISC_F_ALL); 907 } 908 909 if ((state & NVME_NS_STATE_ACTIVE) == 0) { 910 return (NVME_NS_DISC_F_ALLOCATED); 911 } 912 913 if ((state & NVME_NS_STATE_IGNORED) != 0) { 914 return (NVME_NS_DISC_F_ACTIVE); 915 } 916 917 if ((state & NVME_NS_STATE_ATTACHED) == 0) { 918 return (NVME_NS_DISC_F_NOT_IGNORED); 919 } else { 920 return (NVME_NS_DISC_F_BLKDEV); 921 } 922 } 923 924 nvme_iter_t 925 nvme_ns_discover_step(nvme_ns_iter_t *iter, const nvme_ns_disc_t **discp) 926 { 927 nvme_ctrl_t *ctrl = iter->nni_ctrl; 928 929 if (iter->nni_err) { 930 return (NVME_ITER_ERROR); 931 } 932 933 if (iter->nni_done) { 934 return (NVME_ITER_DONE); 935 } 936 937 while (iter->nni_cur_idx <= ctrl->nc_info.id_nn) { 938 uint32_t nsid = iter->nni_cur_idx; 939 nvme_ioctl_ns_info_t ns_info = { 0 }; 940 nvme_ns_disc_level_t level; 941 942 if (!nvme_ioc_ns_info(ctrl, nsid, &ns_info)) { 943 iter->nni_err = true; 944 return (NVME_ITER_ERROR); 945 } 946 947 iter->nni_cur_idx++; 948 level = nvme_ns_state_to_disc_level(ns_info.nni_state); 949 if (iter->nni_level > level) { 950 continue; 951 } 952 953 (void) memset(&iter->nni_disc, 0, sizeof (nvme_ns_disc_t)); 954 iter->nni_disc.nnd_nsid = nsid; 955 iter->nni_disc.nnd_level = level; 956 957 if (nvme_guid_valid(ctrl, ns_info.nni_id.id_nguid)) { 958 iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_NGUID_VALID; 959 (void) memcpy(iter->nni_disc.nnd_nguid, 960 ns_info.nni_id.id_nguid, 961 sizeof (ns_info.nni_id.id_nguid)); 962 } 963 964 if (nvme_eui64_valid(ctrl, ns_info.nni_id.id_eui64)) { 965 iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_EUI64_VALID; 966 (void) memcpy(iter->nni_disc.nnd_eui64, 967 ns_info.nni_id.id_eui64, 968 sizeof (ns_info.nni_id.id_eui64)); 969 } 970 971 *discp = &iter->nni_disc; 972 return (NVME_ITER_VALID); 973 } 974 975 iter->nni_done = true; 976 return (NVME_ITER_DONE); 977 } 978 979 bool 980 nvme_ns_discover_init(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level, 981 nvme_ns_iter_t **iterp) 982 { 983 nvme_ns_iter_t *iter; 984 985 if (!nvme_ns_discover_validate(ctrl, level)) { 986 return (false); 987 } 988 989 if (iterp == NULL) { 990 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 991 "encountered invalid nvme_ns_iter_t output pointer: %p", 992 iterp)); 993 } 994 995 iter = calloc(1, sizeof (nvme_ns_iter_t)); 996 if (iter == NULL) { 997 int e = errno; 998 return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to " 999 "allocate memory for a new nvme_ns_iter_t: %s", 1000 strerror(e))); 1001 } 1002 1003 iter->nni_ctrl = ctrl; 1004 iter->nni_level = level; 1005 iter->nni_cur_idx = 1; 1006 1007 *iterp = iter; 1008 return (nvme_ctrl_success(ctrl)); 1009 } 1010 1011 bool 1012 nvme_ns_discover(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level, 1013 nvme_ns_disc_f func, void *arg) 1014 { 1015 nvme_ns_iter_t *iter; 1016 nvme_iter_t ret; 1017 const nvme_ns_disc_t *disc; 1018 1019 if (!nvme_ns_discover_validate(ctrl, level)) { 1020 return (false); 1021 } 1022 1023 if (func == NULL) { 1024 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1025 "encountered invalid nvme_ns_disc_f function pointer: %p", 1026 func)); 1027 } 1028 1029 if (!nvme_ns_discover_init(ctrl, level, &iter)) { 1030 return (false); 1031 } 1032 1033 while ((ret = nvme_ns_discover_step(iter, &disc)) == NVME_ITER_VALID) { 1034 if (!func(ctrl, disc, arg)) 1035 break; 1036 } 1037 1038 nvme_ns_discover_fini(iter); 1039 if (ret == NVME_ITER_ERROR) { 1040 return (false); 1041 } 1042 1043 return (nvme_ctrl_success(ctrl)); 1044 } 1045 1046 uint32_t 1047 nvme_ns_disc_nsid(const nvme_ns_disc_t *discp) 1048 { 1049 return (discp->nnd_nsid); 1050 } 1051 1052 nvme_ns_disc_level_t 1053 nvme_ns_disc_level(const nvme_ns_disc_t *discp) 1054 { 1055 return (discp->nnd_level); 1056 } 1057 1058 nvme_ns_disc_flags_t 1059 nvme_ns_disc_flags(const nvme_ns_disc_t *discp) 1060 { 1061 return (discp->nnd_flags); 1062 } 1063 1064 const uint8_t * 1065 nvme_ns_disc_eui64(const nvme_ns_disc_t *discp) 1066 { 1067 if ((discp->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) == 0) { 1068 return (NULL); 1069 } 1070 1071 return (discp->nnd_eui64); 1072 } 1073 1074 const uint8_t * 1075 nvme_ns_disc_nguid(const nvme_ns_disc_t *discp) 1076 { 1077 if ((discp->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) == 0) { 1078 return (NULL); 1079 } 1080 1081 return (discp->nnd_nguid); 1082 } 1083 1084 void 1085 nvme_ns_fini(nvme_ns_t *ns) 1086 { 1087 free(ns); 1088 } 1089 1090 bool 1091 nvme_ns_init(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ns_t **nsp) 1092 { 1093 nvme_ns_t *ns; 1094 1095 if (nsp == NULL) { 1096 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1097 "encountered invalid nvme_ns_t output pointer: %p", nsp)); 1098 } 1099 1100 if (nsid < NVME_NSID_MIN || nsid > ctrl->nc_info.id_nn) { 1101 return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "requested " 1102 "namespace 0x%x is invalid, valid namespaces are [0x%x, " 1103 "0x%x]", nsid, NVME_NSID_MIN, ctrl->nc_info.id_nn)); 1104 } 1105 1106 ns = calloc(1, sizeof (nvme_ns_t)); 1107 if (ns == NULL) { 1108 int e = errno; 1109 return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to " 1110 "allocate memory for a new nvme_ns_t: %s", strerror(e))); 1111 } 1112 1113 ns->nn_ctrl = ctrl; 1114 ns->nn_nsid = nsid; 1115 1116 *nsp = ns; 1117 return (nvme_ctrl_success(ctrl)); 1118 } 1119 1120 typedef struct { 1121 nvme_ctrl_t *nnia_ctrl; 1122 const char *nnia_name; 1123 bool nnia_found; 1124 nvme_ns_t *nnia_ns; 1125 nvme_err_data_t nnia_err; 1126 } nvme_ns_init_arg_t; 1127 1128 static bool 1129 nvme_ns_init_by_name_cb(nvme_ctrl_t *ctrl, const nvme_ns_disc_t *disc, 1130 void *arg) 1131 { 1132 nvme_ns_init_arg_t *init = arg; 1133 char buf[NVME_NGUID_NAMELEN]; 1134 CTASSERT(NVME_NGUID_NAMELEN > NVME_EUI64_NAMELEN); 1135 1136 if ((disc->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) != 0) { 1137 (void) nvme_format_nguid(disc->nnd_nguid, buf, sizeof (buf)); 1138 if (strcasecmp(init->nnia_name, buf) == 0) 1139 goto match; 1140 } 1141 1142 if ((disc->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) != 0) { 1143 (void) nvme_format_eui64(disc->nnd_eui64, buf, sizeof (buf)); 1144 if (strcasecmp(init->nnia_name, buf) == 0) 1145 goto match; 1146 } 1147 1148 (void) snprintf(buf, sizeof (buf), "%u", disc->nnd_nsid); 1149 if (strcasecmp(init->nnia_name, buf) == 0) 1150 goto match; 1151 1152 return (true); 1153 1154 match: 1155 init->nnia_found = true; 1156 if (!nvme_ns_init(ctrl, disc->nnd_nsid, &init->nnia_ns)) { 1157 nvme_ctrl_err_save(ctrl, &init->nnia_err); 1158 } 1159 1160 return (false); 1161 } 1162 1163 /* 1164 * Attempt to find a namespace by 'name'. A name could be the NGUID, EUI64, or 1165 * just the plain old namespace ID. 1166 */ 1167 bool 1168 nvme_ns_init_by_name(nvme_ctrl_t *ctrl, const char *ns_name, nvme_ns_t **nsp) 1169 { 1170 nvme_ns_init_arg_t init; 1171 1172 if (ns_name == NULL) { 1173 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1174 "encountered invalid namespace name: %p", ns_name)); 1175 } 1176 1177 if (nsp == NULL) { 1178 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1179 "encountered invalid nvme_ns_t output pointer: %p", nsp)); 1180 } 1181 1182 init.nnia_ctrl = ctrl; 1183 init.nnia_name = ns_name; 1184 init.nnia_found = false; 1185 init.nnia_ns = NULL; 1186 1187 if (!nvme_ns_discover(ctrl, NVME_NS_DISC_F_ALL, nvme_ns_init_by_name_cb, 1188 &init)) { 1189 return (false); 1190 } 1191 1192 if (!init.nnia_found) { 1193 return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "failed to " 1194 "find NVMe namespace %s on nvme%d", ns_name, 1195 ctrl->nc_inst)); 1196 } 1197 1198 if (init.nnia_ns == NULL) { 1199 nvme_ctrl_err_set(ctrl, &init.nnia_err); 1200 return (false); 1201 } 1202 1203 *nsp = init.nnia_ns; 1204 return (nvme_ctrl_success(ctrl)); 1205 } 1206 1207 bool 1208 nvme_ctrl_ns_init(nvme_t *nvme, const char *name, nvme_ctrl_t **ctrlp, 1209 nvme_ns_t **nsp) 1210 { 1211 const char *slash, *ns_name; 1212 char *eptr; 1213 nvme_ctrl_t *ctrl; 1214 nvme_ns_t *ns; 1215 unsigned long inst; 1216 size_t ctrl_namelen; 1217 1218 if (name == NULL) { 1219 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 1220 "invalid name to search for: %p", name)); 1221 } 1222 1223 /* 1224 * We require a controller, but the namespace output pointer is only 1225 * required if we end up having a namespace present. 1226 */ 1227 if (ctrlp == NULL) { 1228 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 1229 "invalid nvme_ctrl_t output pointer: %p", ctrlp)); 1230 } 1231 1232 slash = strchr(name, '/'); 1233 if (slash != NULL) { 1234 ctrl_namelen = (uintptr_t)slash - (uintptr_t)name; 1235 ns_name = slash + 1; 1236 1237 if (nsp == NULL) { 1238 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, 1239 "encountered invalid nvme_ns_t output pointer: %p", 1240 nsp)); 1241 } 1242 1243 } else { 1244 ctrl_namelen = strlen(name); 1245 ns_name = NULL; 1246 } 1247 1248 *ctrlp = NULL; 1249 if (nsp != NULL) { 1250 *nsp = NULL; 1251 } 1252 1253 if (strncmp(name, "nvme", 4) != 0) { 1254 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, "unable " 1255 "to map controller '%.*s' to a known device class, " 1256 "expected the controller to start with 'nvme'", 1257 (int)ctrl_namelen, name)); 1258 } 1259 1260 /* 1261 * Before we go ahead and try to parse this with strtoul we need to 1262 * manually check two things that strtoul will not: 1263 * 1264 * 1) If we have a null terminator, then we'll just get a 0 back. 1265 * 2) If there are multiple leading zeros in a row then that's an error. 1266 * We don't want to conflate 001 and 1 as the same here. The only valid 1267 * case is 'nvme0' which is 5 characters long, hence the check below. 1268 */ 1269 if (ctrl_namelen == 4) { 1270 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 1271 "no controller instance specified in %.*s", 1272 (int)ctrl_namelen, name)); 1273 } 1274 1275 if (name[4] == '0' && ctrl_namelen > 5) { 1276 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 1277 "leading zeros aren't allowed for the instance specified " 1278 "in %.*s", (int)ctrl_namelen, name)); 1279 } 1280 1281 errno = 0; 1282 inst = strtoul(name + 4, &eptr, 10); 1283 if (errno != 0 || (*eptr != '\0' && eptr != slash)) { 1284 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 1285 "failed to parse controller instance from %.*s", 1286 (int)ctrl_namelen, name)); 1287 } 1288 1289 if (inst > INT32_MAX) { 1290 return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0, 1291 "parsed controller instance %lu is outside the valid " 1292 "range [0, %d]", inst, INT32_MAX)); 1293 } 1294 1295 if (!nvme_ctrl_init_by_instance(nvme, (int32_t)inst, &ctrl)) { 1296 return (false); 1297 } 1298 1299 if (ns_name == NULL) { 1300 *ctrlp = ctrl; 1301 return (nvme_success(nvme)); 1302 } 1303 1304 if (!nvme_ns_init_by_name(ctrl, ns_name, &ns)) { 1305 nvme_err_data_t err; 1306 1307 nvme_ctrl_err_save(ctrl, &err); 1308 nvme_err_set(nvme, &err); 1309 nvme_ctrl_fini(ctrl); 1310 return (false); 1311 } 1312 1313 *ctrlp = ctrl; 1314 *nsp = ns; 1315 1316 return (nvme_success(nvme)); 1317 } 1318 1319 bool 1320 nvme_ns_bd_attach(nvme_ns_t *ns) 1321 { 1322 nvme_ctrl_t *ctrl = ns->nn_ctrl; 1323 nvme_ioctl_common_t com; 1324 1325 (void) memset(&com, 0, sizeof (com)); 1326 com.nioc_nsid = ns->nn_nsid; 1327 1328 if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_ATTACH, &com) != 0) { 1329 int e = errno; 1330 return (nvme_ioctl_syserror(ctrl, e, "namespace attach")); 1331 } 1332 1333 if (com.nioc_drv_err != NVME_IOCTL_E_OK) { 1334 return (nvme_ioctl_error(ctrl, &com, "namespace attach")); 1335 } 1336 1337 return (nvme_ctrl_success(ctrl)); 1338 } 1339 1340 bool 1341 nvme_ns_bd_detach(nvme_ns_t *ns) 1342 { 1343 nvme_ctrl_t *ctrl = ns->nn_ctrl; 1344 nvme_ioctl_common_t com; 1345 1346 (void) memset(&com, 0, sizeof (com)); 1347 com.nioc_nsid = ns->nn_nsid; 1348 1349 if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_DETACH, &com) != 0) { 1350 int e = errno; 1351 return (nvme_ioctl_syserror(ctrl, e, "namespace detach")); 1352 } 1353 1354 if (com.nioc_drv_err != NVME_IOCTL_E_OK) { 1355 return (nvme_ioctl_error(ctrl, &com, "namespace detach")); 1356 } 1357 1358 return (nvme_ctrl_success(ctrl)); 1359 } 1360 1361 /* 1362 * Check for a lock programming error and upanic() if so. 1363 */ 1364 static void 1365 nvme_lock_check(nvme_ctrl_t *ctrl) 1366 { 1367 char msg[1024]; 1368 int ret; 1369 const char *up; 1370 size_t ulen; 1371 const char *base = "fatal libnvme locking error detected"; 1372 1373 if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) { 1374 return; 1375 } 1376 1377 ret = snprintf(msg, sizeof (msg), "%s: %s (controller %p)", base, 1378 ctrl->nc_err.ne_errmsg, ctrl); 1379 if (ret >= sizeof (msg)) { 1380 ulen = sizeof (msg); 1381 up = msg; 1382 } else if (ret <= 0) { 1383 ulen = strlen(base) + 1; 1384 up = base; 1385 } else { 1386 ulen = (size_t)ret + 1; 1387 up = msg; 1388 } 1389 1390 upanic(up, ulen); 1391 } 1392 1393 static bool 1394 nvme_lock_common(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_lock_level_t level, 1395 nvme_lock_flags_t flags) 1396 { 1397 nvme_ioctl_lock_t lock; 1398 const nvme_lock_flags_t all_flags = NVME_LOCK_F_DONT_BLOCK; 1399 1400 if (level != NVME_LOCK_L_READ && level != NVME_LOCK_L_WRITE) { 1401 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown " 1402 "lock level: 0x%x", level)); 1403 } 1404 1405 if ((flags & ~all_flags) != 0) { 1406 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown " 1407 "lock flags: 0x%x", flags & ~all_flags)); 1408 } 1409 1410 (void) memset(&lock, 0, sizeof (lock)); 1411 lock.nil_common.nioc_nsid = nsid; 1412 if (nsid != 0) { 1413 lock.nil_ent = NVME_LOCK_E_NS; 1414 } else { 1415 lock.nil_ent = NVME_LOCK_E_CTRL; 1416 } 1417 lock.nil_level = level; 1418 lock.nil_flags = flags; 1419 1420 if (ioctl(ctrl->nc_fd, NVME_IOC_LOCK, &lock) != 0) { 1421 int e = errno; 1422 return (nvme_ioctl_syserror(ctrl, e, "lock")); 1423 } 1424 1425 if (lock.nil_common.nioc_drv_err != NVME_IOCTL_E_OK) { 1426 (void) nvme_ioctl_error(ctrl, &lock.nil_common, "lock"); 1427 nvme_lock_check(ctrl); 1428 return (false); 1429 } 1430 1431 return (nvme_ctrl_success(ctrl)); 1432 } 1433 1434 /* 1435 * You may reasonably be wondering why does this return and why do we basically 1436 * panic everywhere. The reality is twofold. The first part of this is that we 1437 * know from experience in libc that error checking mutexes are not the most 1438 * common and the kernel simplicity of mutex_enter() and mutex_exit() are really 1439 * a boon. The second piece here is that the way that the ioctl path works here, 1440 * only programming errors or mischief in the library could cause this to fail 1441 * at the raw ioctl / errno level. That is EBADF/EFAULT, etc. are our fault and 1442 * if you cannot unlock because of that you're not going to get much further. 1443 */ 1444 void 1445 nvme_unlock_common(nvme_ctrl_t *ctrl, uint32_t nsid) 1446 { 1447 nvme_ioctl_unlock_t unlock; 1448 1449 (void) memset(&unlock, 0, sizeof (unlock)); 1450 unlock.niu_common.nioc_nsid = nsid; 1451 if (nsid != 0) { 1452 unlock.niu_ent = NVME_LOCK_E_NS; 1453 } else { 1454 unlock.niu_ent = NVME_LOCK_E_CTRL; 1455 } 1456 1457 /* 1458 * Because all unlock ioctls errors are promoted to an error, we don't 1459 * bother calling nvme_ioctl_syserror() here. 1460 */ 1461 if (ioctl(ctrl->nc_fd, NVME_IOC_UNLOCK, &unlock) != 0) { 1462 int e = errno; 1463 (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, e, "internal " 1464 "programming error: failed to issue unlock ioctl: %s", 1465 strerror(e)); 1466 nvme_lock_check(ctrl); 1467 return; 1468 } 1469 1470 if (unlock.niu_common.nioc_drv_err != NVME_IOCTL_E_OK) { 1471 (void) nvme_ioctl_error(ctrl, &unlock.niu_common, "unlock"); 1472 /* 1473 * Promote any other failure to a new fatal failure. Consumers 1474 * expect this to have worked. 1475 */ 1476 if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) { 1477 nvme_err_data_t err; 1478 nvme_ctrl_err_save(ctrl, &err); 1479 (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, 0, 1480 "internal programming error: received unexpected " 1481 "libnvme error 0x%x: %s", err.ne_err, 1482 err.ne_errmsg); 1483 } 1484 nvme_lock_check(ctrl); 1485 return; 1486 } 1487 1488 (void) nvme_ctrl_success(ctrl); 1489 } 1490 1491 bool 1492 nvme_ctrl_lock(nvme_ctrl_t *ctrl, nvme_lock_level_t level, 1493 nvme_lock_flags_t flags) 1494 { 1495 return (nvme_lock_common(ctrl, 0, level, flags)); 1496 } 1497 1498 bool 1499 nvme_ns_lock(nvme_ns_t *ns, nvme_lock_level_t level, 1500 nvme_lock_flags_t flags) 1501 { 1502 return (nvme_lock_common(ns->nn_ctrl, ns->nn_nsid, level, flags)); 1503 } 1504 1505 void 1506 nvme_ctrl_unlock(nvme_ctrl_t *ctrl) 1507 { 1508 nvme_unlock_common(ctrl, 0); 1509 } 1510 1511 void 1512 nvme_ns_unlock(nvme_ns_t *ns) 1513 { 1514 nvme_unlock_common(ns->nn_ctrl, ns->nn_nsid); 1515 } 1516