1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 /* 17 * Programmatic interface to NVMe Devices 18 * 19 * libnvme exists to provide a means of performing non-I/O related operations on 20 * an NVMe device. This is intended to allow software, regardless of whether it 21 * is part of illumos or not, to operate on NVMe devices and perform most of the 22 * administrative and operator tasks that might come up. This library does not 23 * provide a stable interface yet. The rest of this block comment goes into the 24 * organization and background into why it looks the way it does. 25 * 26 * -------------------- 27 * Library Organization 28 * -------------------- 29 * 30 * There are two large classes of source files that make up this library 31 * currently: 32 * 33 * 1. Source code that implements the library's interfaces is found alongside 34 * this file in lib/libnvme/common. This code is generally organized based 35 * around the portion of the NVMe specification that it implements. So for 36 * example, code that implements logic related to the features is found 37 * in libnvme_feature.c, formatting namespaces in libnvme_format.c, log 38 * pages in libnvme_log.c, etc. All files in the library begin with 39 * 'libnvme_' as a way to help namespace the file names from the second set 40 * of files. 41 * 42 * 2. Validation logic that is shared between libnvme and the kernel is found 43 * in common/nvme/. While the kernel must validate requests regardless, we 44 * leverage this shared information as a means for trying to ensure that we 45 * have useful errors early. That code is factored in a way to facilitate 46 * easier unit testing. 47 * 48 * Because of the nature of this split, all of the opaque structures that we 49 * create and their relationships are all maintained in the library (group 1). 50 * All of the logic in group 2 is designed to be constant data tables and 51 * functions that are fed information about the controller they are operating on 52 * to answer them. 53 * 54 * There are several general classes of interfaces and related structures that 55 * we have in the library. We break them into the following general categories 56 * based on their purpose: 57 * 58 * DISCOVERY 59 * 60 * One of the large responsibilities of this library is helping someone discover 61 * information about something, whether that be a controller, a namespace, a log 62 * page, a feature, a unique command, etc. Information about one of these items 63 * is contained in a generally opaque discovery structure. For example, the 64 * nvme_log_disc_t. 65 * 66 * The goal of these structures is to contain all of the metadata for working 67 * with the object in question. Continuing on the log page discovery example, it 68 * can tell us information about what fields are required, whether or not the 69 * log might be supported, whether it operates on a controller, a namespace, or 70 * something else, as well as more human-usable things such as names and 71 * descriptions. 72 * 73 * Discovery objects are both for humans and for programmatic consumption. There 74 * are several cases where requests can be created directly from discovery 75 * objects. A well designed discovery object can allow a general implementation 76 * of a consumer such as nvmeadm to build up a request without having to 77 * hardcode everything about what is needed for each request (though most 78 * consumers still need to have information about the actual contents, meaning, 79 * and semantics of a log or feature). 80 * 81 * Discovery objects are obtained in two general ways. The first is using one of 82 * the iterator/callback based functions to discover a given class of data. The 83 * second path is that several of the functions which operate based on the name 84 * of something, e.g. nvme_log_req_init_by_name(), 85 * nvme_get_feat_req_init_by_name(), etc. will return a discovery object. 86 * 87 * When a discovery object is returned based on iteration (more below), the 88 * memory is owned by the iterator. When it is returned by a request 89 * initialization function, then it has its own life time and must be freed. 90 * We try to make this distinction clear in the API based on whether or not the 91 * discovery object is 'const'. 92 * 93 * All discovery objects should be fully filled out before they are handed back 94 * to a caller. It is an explicit design goal that every function that gets data 95 * from the discovery structure operates on a const version of the pointer. This 96 * is the hint that you cannot perform additional I/O or related after handing 97 * out the discovery structure. Attempts to loosen this constraint should be 98 * considered carefully due to how we communicate ownership. 99 * 100 * ITERATORS 101 * 102 * A common pattern of the library is iterating over items. This includes 103 * controllers and namespaces, but also as part of discovering what specific 104 * logs, commands, features, etc. are actually supported by the device. 105 * Iteration always follows the same general pattern: 106 * 107 * 1. An iterator is initialized with a call to nvme_<name>_discover_init(). 108 * This will generally return a structure of the form nvme_<name>_iter_t. This 109 * structure contains the memory for the corresponding value that is returned 110 * from step in (2). 111 * 112 * 2. To actually pull values out of an iterator, one must call the 113 * nvme_<name>_step() function for the iterator. This will return a 114 * corresponding nvme_<name>_disc_t structure that is opaque and has a suite of 115 * functions that are usable for getting information out from it. This structure 116 * is valid only until the next time the nvme_<name>_step() is called. The 117 * return value of step indicates the state of the data and indicates whether or 118 * not there is an error, the iterator has finished, or we successfully stepped 119 * and the data is filled out. 120 * 121 * If discovery data needs to outlive a given iteration, then it can be 122 * duplicated which will give it a separate lifetime, though that comes with 123 * the responsibility that it must then be freed. 124 * 125 * 3. To finish using iterators, one finally calls the corresponding 126 * nvme_<name>_discover_fini(). That will deallocate the iterator structure and 127 * finish everything up. 128 * 129 * REQUESTS 130 * 131 * One of the chief goals of this library is to be able to perform requests. 132 * Each request has a structure that can be initialized, filled out, and then 133 * executed. A request structure can be reused multiple times with minor 134 * adjustments in-between (though changes aren't required). Request structures 135 * are either initialized in a blank mode where every value must be filled out 136 * or they can be initialized through their discovery object (or the common name 137 * of such an object). 138 * 139 * When a request structure is initialized through a discovery object, it 140 * automatically sets several of the fields, knows which ones are still required 141 * to be set, and which fields cannot be set. For example, if you create a get 142 * log page request from a log discovery object, it will not allow you to change 143 * the log page you're requesting; however, in return you don't have to specify 144 * the command set interface or log identifier. 145 * 146 * Request objects are tied to a controller. See 'Parallelism, Thread Safety, 147 * and Errors' for more information. 148 * 149 * INFORMATION SNAPSHOTS 150 * 151 * To get information about a namespace or controller, one has to take an 152 * information snapshot. Once an information snapshot is obtained, this snapshot 153 * answers all questions about the controller with a mostly consistent set of 154 * point-in-time data. The main reason for this design was to try and simplify 155 * where errors can occur and to provide a straightforward serialization point 156 * so that way the raw underlying data could be gathered at one system and then 157 * interpreted later on another. 158 * 159 * The only reason that there are some fallible operations on the snapshot are 160 * things that are not guaranteed to exist for all such NVMe controllers. 161 * 162 * LIBRARY, CONTROLLER, NAMESPACE and SNAPSHOT HANDLES 163 * 164 * The last major set of types used in this library are opaque handles. As you 165 * might have guessed given the request structures, all of the objects which 166 * represent something are opaque. Each library handle is independent of one 167 * another and each controller handle is independent of one another. In general, 168 * it is expected that only a single controller handle is used at a given time 169 * for a given library handle, but this is not currently enforced. Error 170 * information and parallelism is tied into this, see 'Parallelism, Thread 171 * Safety, and Errors' for more information. 172 * 173 * ----------------- 174 * Opaque Structures 175 * ----------------- 176 * 177 * One of the things that might stand out in libnvme is the use of opaque 178 * structures everywhere with functions to access every arbitrary piece of data. 179 * This and the function pattern around building up a request were done to try 180 * and deal with the evolutionary nature of the NVMe specification. If you look 181 * at the various requests, with the exception of firmware download, almost 182 * every request has added additional features through the spec revisions. NVMe 183 * 2.0 changed most things again with the requirement to specify the command set 184 * interface. 185 * 186 * While the way that the NVMe specification has done this is quite reasonable, 187 * it makes it much more difficult to use a traditional series of arguments to 188 * functions or a structure without having to try to version the symbol through 189 * clever games. If instead we accept that the specification will change and 190 * that the specification is always taking these additional arguments out of 191 * values that must be zero, then an opaque request structure where you have to 192 * make an explicit function call and recompile to get slightly different 193 * behavior is mostly reasonable. We may not be able to be perfect given we're 194 * at the mercy of the specification, but at least this is better than the 195 * alternative. 196 * 197 * This is ultimately why all the request structures are opaque and use a 198 * pseudo-builder pattern to fill out the request information. Further evidence 199 * to this point is that there was no way to avoid changing every kernel 200 * structure here while retaining semantic operations. No one wants to manually 201 * assemble cdw12-15 here. That's not how we can add value for the library. 202 * 203 * Similarly, for all discovery objects we ended up utilizing opaque objects. 204 * The main reason here is that we want to be able to embed this library as a 205 * committed interface in other languages and having the discovery structures be 206 * something that everyone can see means it'll be harder to extend it. While 207 * this concern is somewhat more theoretical given the iterator pattern, given 208 * the other bits in the request structure we decided to lean into the 209 * opaqueness. 210 * 211 * -------------------------------------- 212 * Parallelism, Thread Safety, and Errors 213 * -------------------------------------- 214 * 215 * One of the library's major design points is how do we achieve thread-safety, 216 * how does ownership work, where do errors appear, and what is the degree of 217 * parallelism that is achievable. To work through this we look at a few 218 * different things: 219 * 220 * 1. The degree to which the hardware allows for parallelism 221 * 2. The degree to which users might desire parallelism 222 * 3. The ergonomics of getting and storing errors 223 * 224 * The NVMe specification allows for different degrees of admin command 225 * parallelism on a per-command basis. This is discoverable, but the main point 226 * is that there are a class of commands where only one can be outstanding at a 227 * time, which likely fall into the case of most of the destructive commands 228 * like Format NVM, Activate Firmware, etc. Our expectation to some extent is 229 * that most admin queue commands don't need to be issued in parallel; however, 230 * beyond how we structure the library and error handling, we don't try to 231 * enforce that here. The kernel does do some enforcement through requiring 232 * mandatory write locks to perform some operations. 233 * 234 * When we get to how do folks want to use this, during the initial design phase 235 * we mostly theorized based on how nvmeadm is using it today and how various 236 * daemons like a FRU monitor or an appliance kit's software might want to 237 * interact with it. Our general starting assumption is that it's very 238 * reasonable for each discovered controller to be handled in parallel, but that 239 * operations on a controller itself are likely serial given that we're not 240 * issuing I/O through this mechanism. If we were, then that'd be an entirely 241 * different set of constraints. 242 * 243 * To discuss the perceived ergonomics, we need to first discuss what error 244 * information we want to be able to have. It's an important goal of both the 245 * NVMe driver and this library to give useful semantic errors. In particular, 246 * for any operation we want to make sure that we include the following 247 * information: 248 * 249 * o A hopefully distinguishable semantic error 250 * o Saving errno as a system error if relevant (e.g if open(2) failed) 251 * o A message for humans that gives more specifics about what happened and is 252 * intended to be passed along to the output of a command or another error 253 * message. 254 * o If a controller error occurs, we want to be able to provide the 255 * controller's sc (status code) and sct (status code type). 256 * 257 * With this we get to the questions around ergonomics and related which are 258 * entirely subjective. Given that we want to capture that information how do we 259 * best do this given the tooling that we have. When the library was first being 260 * prototyped all errors were on the nvme_t, basically the top-level handle. 261 * This meant that each operation on a controller had to be done serially or you 262 * would have to use different handles. However, the simplicity was that there 263 * was one thing to check. 264 * 265 * This evolution changed slightly when we introduced information snapshots. 266 * Because the information snapshots are meant to be separate entities whose 267 * lifetime can extend beyond the nvme_t library handle, they ended up 268 * developing their own error codes and functions. This has been okay because 269 * there aren't too many use cases there, though the need to duplicate error 270 * handling functions is a bit painful. 271 * 272 * From there, we did consider what if each request had its own error 273 * information that could be extracted. That would turn into a lot of functions 274 * to get at that data. The controller's allowed parallelism for admin commands 275 * varies based on each command. Some commands must occur when there are no 276 * other admin commands on the controller and others when there there is nothing 277 * on the namespace. However, due to that nuance, it would lead to forcing the 278 * consumer to understand the controller's specifics more than is often 279 * necessary for a given request. To add to that, it'd also just be a pain to 280 * try to get all the error information out in a different way and the consumers 281 * we started writing in this fashion were not looking good. 282 * 283 * We also considered whether we could consolidate all the error functions on 284 * each request into one structure that we get, but that didn't move the needle 285 * too much. It also raised some more concerns around how we minimize races and 286 * how data changes around that. 287 * 288 * So all of this led us to our current compromise position: we allow for 289 * parallelism at the controller level. More specifically: 290 * 291 * 1. Operations which take the nvme_t handle set errors on it and must operate 292 * serially. That is the nvme_t should only be used from one thread at any 293 * time, but may move between threads. Errors are set on it. 294 * 295 * 2. The nvme_ctrl_t has its own error information. A given nvme_ctrl_t should 296 * only be used serially; however, different ones can be used in parallel. A 297 * controller doesn't guarantee exclusivity. That requires an explicit 298 * locking operation. 299 * 300 * 3. Both request structures and namespaces place their errors on the 301 * corresponding controller that they were created from. Therefore the 302 * per-controller serialization in (2) applies here as well. If two requests 303 * are tied to different controllers, they can proceed in parallel. 304 * 305 * 4. Once a controller or namespace snapshot is obtained, they fall into a 306 * similar pattern: each one can be operated on in parallel, but generally 307 * one should only operate on a single one serially. 308 * 309 * Other than the constraints defined above, the library does not care which 310 * threads that an operation occurs on. These can be moved to wherever it needs 311 * to be. Locking and related in the kernel is based on the open file descriptor 312 * to the controller. 313 * 314 * ---------------- 315 * Field Validation 316 * ---------------- 317 * 318 * Every request is made up of fields that correspond to parts of the NVMe 319 * specification. Our requests operate in terms of the logical fields that we 320 * opt to expose and that the kernel knows how to consume. In general, we don't 321 * expose the raw cdw values that make up the commands (except for the vendor 322 * unique commands or arguments that are explicitly that way ala get features). 323 * While operating on raw cdw arguments would be a simple way to create ABI 324 * stability, it would leave everyone having to break up all the fields 325 * themselves and we believe end up somewhat more error prone than the 326 * interfaces we expose today. 327 * 328 * Requests are created in one of two ways today: they are either initialized 329 * from corresponding discovery data e.g. nvme_log_req_init_by_disc() and 330 * nvme_get_feat_req_init_by_name(), or one creates a raw request ala 331 * nvme_get_feat_req_init(). In the former cases, we fill out a bunch of the 332 * fields that would normally need to be set such as the log or feature ID. We 333 * also will note which fields are allowed and expected. For example, the health 334 * log page does not take or expect a lsp (log specific parameter) or related 335 * and therefore we can flag that with an _UNUSE class error. Conversely, 336 * requests that are created from their raw form will not have any such error 337 * checking performed until they are finalized and checked by the kernel. The 338 * set of fields that can be set in a request is usually tracked in the 339 * structure with a member of the form <prefix>_allow. 340 * 341 * One set of library error checking that is uniform between both types is that 342 * of missing fields. There are minimum fields that must be set for different 343 * types of requests. That check will always be performed regardless of the path 344 * that is taken through the system. Tracking which members must still be set is 345 * done by a member of the form <prefix>_need. 346 * 347 * When we perform validation, we try to push the vast majority of it into the 348 * common validation code that is shared between the kernel and userland. This 349 * is wrapped up through the nvme_field_check_one() logic. The common code will 350 * check if the field is supported by the controller (generating an _UNSUP class 351 * error if not) and if the value of the field is within a valid range 352 * (generating a _RANGE class error if not). 353 * 354 * While we try to fold the majority of such checks into the common code as 355 * possible, it isn't perfect and some things have to be checked outside of 356 * that. Those consist of the following general cases: 357 * 358 * 1) Items that are not semantically fields in the actual command but are 359 * things that we are tracking ourselves in the library. An example of this 360 * would be fields in the vuc request structure that we are synthesizing 361 * ourselves. 362 * 363 * 2) While the field logic has the specifics of what controller is being 364 * operated upon, it doesn't have all the knowledge of what things can be 365 * combined or not. It can answer the specifics about its field, but cannot look 366 * at the broader request. 367 * 368 * As a result, there are some duplicated checks in the library and the kernel, 369 * though several are left just to the kernel. However, the vast majority of 370 * validation does happen through these common routines which leaves the library 371 * nvme_<type>_req_set_<field> functions generally wrappers around checking 372 * common code and updating our tracking around what fields are set or not so we 373 * can issue an ioctl. 374 */ 375 376 #include <stdlib.h> 377 #include <stdarg.h> 378 #include <libdevinfo.h> 379 #include <unistd.h> 380 #include <string.h> 381 #include <sys/types.h> 382 #include <sys/stat.h> 383 #include <fcntl.h> 384 #include <upanic.h> 385 386 #include "libnvme_impl.h" 387 388 bool 389 nvme_vers_ctrl_atleast(const nvme_ctrl_t *ctrl, const nvme_version_t *targ) 390 { 391 return (nvme_vers_atleast(&ctrl->nc_vers, targ)); 392 } 393 394 bool 395 nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t *ci, 396 const nvme_version_t *targ) 397 { 398 return (nvme_vers_atleast(&ci->nci_vers, targ)); 399 } 400 401 bool 402 nvme_vers_ns_info_atleast(const nvme_ns_info_t *info, 403 const nvme_version_t *targ) 404 { 405 return (nvme_vers_atleast(&info->nni_vers, targ)); 406 } 407 408 bool 409 nvme_guid_valid(const nvme_ctrl_t *ctrl, const uint8_t guid[16]) 410 { 411 const uint8_t zero_guid[16] = { 0 }; 412 413 return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v2) && 414 memcmp(zero_guid, guid, sizeof (zero_guid)) != 0); 415 } 416 417 bool 418 nvme_eui64_valid(const nvme_ctrl_t *ctrl, const uint8_t eui64[8]) 419 { 420 const uint8_t zero_eui[8] = { 0 }; 421 422 return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v1) && 423 memcmp(zero_eui, eui64, sizeof (zero_eui)) != 0); 424 } 425 426 int 427 nvme_format_nguid(const uint8_t nguid[16], char *buf, size_t len) 428 { 429 return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X" 430 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 431 nguid[0], nguid[1], nguid[2], nguid[3], nguid[4], nguid[5], 432 nguid[6], nguid[7], nguid[8], nguid[9], nguid[10], nguid[11], 433 nguid[12], nguid[13], nguid[14], nguid[15])); 434 } 435 436 int 437 nvme_format_eui64(const uint8_t eui64[8], char *buf, size_t len) 438 { 439 return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 440 eui64[0], eui64[1], eui64[2], eui64[3], eui64[4], eui64[5], 441 eui64[6], eui64[7])); 442 } 443 444 void 445 nvme_fini(nvme_t *nvme) 446 { 447 if (nvme == NULL) 448 return; 449 450 if (nvme->nh_devinfo != DI_NODE_NIL) { 451 di_fini(nvme->nh_devinfo); 452 } 453 454 free(nvme); 455 } 456 457 nvme_t * 458 nvme_init(void) 459 { 460 nvme_t *nvme; 461 462 nvme = calloc(1, sizeof (nvme_t)); 463 if (nvme == NULL) { 464 return (NULL); 465 } 466 467 nvme->nh_devinfo = di_init("/", DINFOCPYALL); 468 if (nvme->nh_devinfo == DI_NODE_NIL) { 469 nvme_fini(nvme); 470 return (NULL); 471 } 472 473 return (nvme); 474 } 475 476 void 477 nvme_ctrl_discover_fini(nvme_ctrl_iter_t *iter) 478 { 479 free(iter); 480 } 481 482 nvme_iter_t 483 nvme_ctrl_discover_step(nvme_ctrl_iter_t *iter, const nvme_ctrl_disc_t **discp) 484 { 485 di_minor_t m; 486 487 *discp = NULL; 488 if (iter->ni_done) { 489 return (NVME_ITER_DONE); 490 } 491 492 for (;;) { 493 if (iter->ni_cur == NULL) { 494 iter->ni_cur = di_drv_first_node("nvme", 495 iter->ni_nvme->nh_devinfo); 496 } else { 497 iter->ni_cur = di_drv_next_node(iter->ni_cur); 498 } 499 500 if (iter->ni_cur == NULL) { 501 iter->ni_done = true; 502 return (NVME_ITER_DONE); 503 } 504 505 for (m = di_minor_next(iter->ni_cur, DI_MINOR_NIL); 506 m != DI_MINOR_NIL; m = di_minor_next(iter->ni_cur, m)) { 507 if (strcmp(di_minor_nodetype(m), 508 DDI_NT_NVME_NEXUS) == 0) { 509 break; 510 } 511 } 512 513 if (m == DI_MINOR_NIL) { 514 continue; 515 } 516 517 iter->ni_disc.ncd_devi = iter->ni_cur; 518 iter->ni_disc.ncd_minor = m; 519 *discp = &iter->ni_disc; 520 return (NVME_ITER_VALID); 521 } 522 523 return (NVME_ITER_DONE); 524 } 525 526 bool 527 nvme_ctrl_discover_init(nvme_t *nvme, nvme_ctrl_iter_t **iterp) 528 { 529 nvme_ctrl_iter_t *iter; 530 531 if (iterp == NULL) { 532 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 533 "invalid nvme_ctrl_iter_t output pointer: %p", iterp)); 534 } 535 536 iter = calloc(1, sizeof (nvme_ctrl_iter_t)); 537 if (iter == NULL) { 538 int e = errno; 539 return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to " 540 "allocate memory for a new nvme_ctrl_iter_t: %s", 541 strerror(e))); 542 } 543 iter->ni_nvme = nvme; 544 *iterp = iter; 545 return (nvme_success(nvme)); 546 } 547 548 bool 549 nvme_ctrl_discover(nvme_t *nvme, nvme_ctrl_disc_f func, void *arg) 550 { 551 nvme_ctrl_iter_t *iter; 552 const nvme_ctrl_disc_t *disc; 553 nvme_iter_t ret; 554 555 if (func == NULL) { 556 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 557 "invalid nvme_ctrl_disc_f function pointer: %p", func)); 558 } 559 560 if (!nvme_ctrl_discover_init(nvme, &iter)) { 561 return (false); 562 } 563 564 while ((ret = nvme_ctrl_discover_step(iter, &disc)) == 565 NVME_ITER_VALID) { 566 if (!func(nvme, disc, arg)) 567 break; 568 } 569 570 nvme_ctrl_discover_fini(iter); 571 if (ret == NVME_ITER_ERROR) { 572 return (false); 573 } 574 575 return (nvme_success(nvme)); 576 } 577 578 di_node_t 579 nvme_ctrl_disc_devi(const nvme_ctrl_disc_t *discp) 580 { 581 return (discp->ncd_devi); 582 } 583 584 di_minor_t 585 nvme_ctrl_disc_minor(const nvme_ctrl_disc_t *discp) 586 { 587 return (discp->ncd_minor); 588 } 589 590 void 591 nvme_ctrl_fini(nvme_ctrl_t *ctrl) 592 { 593 if (ctrl == NULL) { 594 return; 595 } 596 597 if (ctrl->nc_devi_path != NULL) { 598 di_devfs_path_free(ctrl->nc_devi_path); 599 } 600 601 if (ctrl->nc_fd >= 0) { 602 (void) close(ctrl->nc_fd); 603 ctrl->nc_fd = -1; 604 } 605 606 free(ctrl); 607 } 608 609 bool 610 nvme_ctrl_init(nvme_t *nvme, di_node_t di, nvme_ctrl_t **outp) 611 { 612 const char *drv; 613 int32_t inst; 614 di_minor_t minor; 615 char *path, buf[PATH_MAX]; 616 nvme_ctrl_t *ctrl; 617 nvme_ioctl_ctrl_info_t ctrl_info; 618 619 if (di == DI_NODE_NIL) { 620 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 621 "invalid di_node_t: %p", di)); 622 } 623 624 if (outp == NULL) { 625 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 626 "invalid nvme_ctrl_t output pointer: %p", outp)); 627 } 628 *outp = NULL; 629 630 drv = di_driver_name(di); 631 inst = di_instance(di); 632 if (drv == NULL || inst < 0) { 633 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s has " 634 "no driver attached", di_node_name(di))); 635 } 636 637 if (strcmp(drv, "nvme") != 0) { 638 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't " 639 "attached to nvme, found %s", di_node_name(di), drv)); 640 } 641 642 /* 643 * We have an NVMe node. Find the right minor that corresponds to the 644 * attachment point. Once we find that then we can go ahead and open a 645 * path to that and construct the device. 646 */ 647 minor = DI_MINOR_NIL; 648 while ((minor = di_minor_next(di, minor)) != DI_MINOR_NIL) { 649 if (strcmp(di_minor_nodetype(minor), DDI_NT_NVME_NEXUS) == 0) { 650 break; 651 } 652 } 653 654 if (minor == DI_MINOR_NIL) { 655 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't " 656 "attached to nvme, found %s", di_node_name(di), drv)); 657 } 658 659 path = di_devfs_minor_path(minor); 660 if (path == NULL) { 661 int e = errno; 662 return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to " 663 "obtain /devices path for the requested minor: %s", 664 strerror(e))); 665 } 666 667 if (snprintf(buf, sizeof (buf), "/devices%s", path) >= sizeof (buf)) { 668 di_devfs_path_free(path); 669 return (nvme_error(nvme, NVME_ERR_INTERNAL, 0, "failed to " 670 "construct full /devices minor path, would have overflown " 671 "internal buffer")); 672 } 673 di_devfs_path_free(path); 674 675 ctrl = calloc(1, sizeof (*ctrl)); 676 if (ctrl == NULL) { 677 int e = errno; 678 return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to " 679 "allocate memory for a new nvme_ctrl_t: %s", strerror(e))); 680 } 681 682 ctrl->nc_nvme = nvme; 683 ctrl->nc_devi = di; 684 ctrl->nc_minor = minor; 685 ctrl->nc_inst = inst; 686 ctrl->nc_fd = open(buf, O_RDWR | O_CLOEXEC); 687 if (ctrl->nc_fd < 0) { 688 int e = errno; 689 nvme_ctrl_fini(ctrl); 690 return (nvme_error(nvme, NVME_ERR_OPEN_DEV, e, "failed to open " 691 "device path %s: %s", buf, strerror(e))); 692 } 693 694 ctrl->nc_devi_path = di_devfs_path(di); 695 if (ctrl->nc_devi_path == NULL) { 696 int e = errno; 697 nvme_ctrl_fini(ctrl); 698 return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to " 699 "obtain /devices path for the controller: %s", 700 strerror(e))); 701 } 702 703 if (!nvme_ioc_ctrl_info(ctrl, &ctrl_info)) { 704 nvme_err_data_t err; 705 706 nvme_ctrl_err_save(ctrl, &err); 707 nvme_err_set(nvme, &err); 708 nvme_ctrl_fini(ctrl); 709 return (false); 710 } 711 712 ctrl->nc_vers = ctrl_info.nci_vers; 713 ctrl->nc_info = ctrl_info.nci_ctrl_id; 714 715 nvme_vendor_map_ctrl(ctrl); 716 717 *outp = ctrl; 718 return (nvme_success(nvme)); 719 } 720 721 typedef struct { 722 bool ncia_found; 723 int32_t ncia_inst; 724 nvme_ctrl_t *ncia_ctrl; 725 nvme_err_data_t ncia_err; 726 } nvme_ctrl_init_arg_t; 727 728 bool 729 nvme_ctrl_init_by_instance_cb(nvme_t *nvme, const nvme_ctrl_disc_t *disc, 730 void *arg) 731 { 732 nvme_ctrl_init_arg_t *init = arg; 733 734 if (di_instance(disc->ncd_devi) != init->ncia_inst) { 735 return (true); 736 } 737 738 /* 739 * If we fail to open the controller, we need to save the error 740 * information because it's going to end up being clobbered because this 741 * is a callback function surrounded by other libnvme callers. 742 */ 743 init->ncia_found = true; 744 if (!nvme_ctrl_init(nvme, disc->ncd_devi, &init->ncia_ctrl)) { 745 nvme_err_save(nvme, &init->ncia_err); 746 } 747 748 return (false); 749 } 750 751 bool 752 nvme_ctrl_init_by_instance(nvme_t *nvme, int32_t inst, nvme_ctrl_t **outp) 753 { 754 nvme_ctrl_init_arg_t init; 755 756 if (inst < 0) { 757 return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0, 758 "encountered illegal negative instance number: %d", inst)); 759 } 760 761 if (outp == NULL) { 762 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 763 "invalid nvme_ctrl_t output pointer: %p", outp)); 764 } 765 766 init.ncia_found = false; 767 init.ncia_inst = inst; 768 init.ncia_ctrl = NULL; 769 770 if (!nvme_ctrl_discover(nvme, nvme_ctrl_init_by_instance_cb, &init)) { 771 return (false); 772 } 773 774 if (!init.ncia_found) { 775 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 776 "failed to find NVMe controller nvme%d", inst)); 777 } 778 779 /* 780 * If we don't have an NVMe controller structure but we did find the 781 * instance, then we must have had an error constructing this will which 782 * be on our handle. We have to reconstruct the error from saved 783 * information as nvme_ctrl_discover will have clobbered it. 784 */ 785 if (init.ncia_ctrl == NULL) { 786 nvme_err_set(nvme, &init.ncia_err); 787 return (false); 788 } 789 790 *outp = init.ncia_ctrl; 791 return (nvme_success(nvme)); 792 } 793 794 bool 795 nvme_ctrl_devi(nvme_ctrl_t *ctrl, di_node_t *devip) 796 { 797 *devip = ctrl->nc_devi; 798 return (nvme_ctrl_success(ctrl)); 799 } 800 801 bool 802 nvme_ioc_ctrl_info(nvme_ctrl_t *ctrl, nvme_ioctl_ctrl_info_t *info) 803 { 804 (void) memset(info, 0, sizeof (nvme_ioctl_ctrl_info_t)); 805 806 if (ioctl(ctrl->nc_fd, NVME_IOC_CTRL_INFO, info) != 0) { 807 int e = errno; 808 return (nvme_ioctl_syserror(ctrl, e, "controller info")); 809 } 810 811 if (info->nci_common.nioc_drv_err != NVME_IOCTL_E_OK) { 812 return (nvme_ioctl_error(ctrl, &info->nci_common, 813 "controller info")); 814 } 815 816 return (true); 817 } 818 819 bool 820 nvme_ioc_ns_info(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ioctl_ns_info_t *info) 821 { 822 (void) memset(info, 0, sizeof (nvme_ioctl_ns_info_t)); 823 info->nni_common.nioc_nsid = nsid; 824 825 if (ioctl(ctrl->nc_fd, NVME_IOC_NS_INFO, info) != 0) { 826 int e = errno; 827 return (nvme_ioctl_syserror(ctrl, e, "namespace info")); 828 } 829 830 if (info->nni_common.nioc_drv_err != NVME_IOCTL_E_OK) { 831 return (nvme_ioctl_error(ctrl, &info->nni_common, 832 "namespace info")); 833 } 834 835 return (true); 836 } 837 838 const char * 839 nvme_tporttostr(nvme_ctrl_transport_t tport) 840 { 841 switch (tport) { 842 case NVME_CTRL_TRANSPORT_PCI: 843 return ("PCI"); 844 case NVME_CTRL_TRANSPORT_TCP: 845 return ("TCP"); 846 case NVME_CTRL_TRANSPORT_RDMA: 847 return ("RDMA"); 848 default: 849 return ("unknown transport"); 850 } 851 } 852 853 static bool 854 nvme_ns_discover_validate(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level) 855 { 856 switch (level) { 857 case NVME_NS_DISC_F_ALL: 858 case NVME_NS_DISC_F_ALLOCATED: 859 case NVME_NS_DISC_F_ACTIVE: 860 case NVME_NS_DISC_F_NOT_IGNORED: 861 case NVME_NS_DISC_F_BLKDEV: 862 return (true); 863 default: 864 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "invalid " 865 "namespace discovery level specified: 0x%x", level)); 866 } 867 } 868 869 void 870 nvme_ns_discover_fini(nvme_ns_iter_t *iter) 871 { 872 free(iter); 873 } 874 875 const char * 876 nvme_nsleveltostr(nvme_ns_disc_level_t level) 877 { 878 switch (level) { 879 case NVME_NS_DISC_F_ALL: 880 return ("unallocated"); 881 case NVME_NS_DISC_F_ALLOCATED: 882 return ("allocated"); 883 case NVME_NS_DISC_F_ACTIVE: 884 return ("active"); 885 case NVME_NS_DISC_F_NOT_IGNORED: 886 return ("not ignored"); 887 case NVME_NS_DISC_F_BLKDEV: 888 return ("blkdev"); 889 default: 890 return ("unknown level"); 891 } 892 } 893 894 nvme_ns_disc_level_t 895 nvme_ns_state_to_disc_level(nvme_ns_state_t state) 896 { 897 if ((state & NVME_NS_STATE_ALLOCATED) == 0) { 898 return (NVME_NS_DISC_F_ALL); 899 } 900 901 if ((state & NVME_NS_STATE_ACTIVE) == 0) { 902 return (NVME_NS_DISC_F_ALLOCATED); 903 } 904 905 if ((state & NVME_NS_STATE_IGNORED) != 0) { 906 return (NVME_NS_DISC_F_ACTIVE); 907 } 908 909 if ((state & NVME_NS_STATE_ATTACHED) == 0) { 910 return (NVME_NS_DISC_F_NOT_IGNORED); 911 } else { 912 return (NVME_NS_DISC_F_BLKDEV); 913 } 914 } 915 916 nvme_iter_t 917 nvme_ns_discover_step(nvme_ns_iter_t *iter, const nvme_ns_disc_t **discp) 918 { 919 nvme_ctrl_t *ctrl = iter->nni_ctrl; 920 921 if (iter->nni_err) { 922 return (NVME_ITER_ERROR); 923 } 924 925 if (iter->nni_done) { 926 return (NVME_ITER_DONE); 927 } 928 929 while (iter->nni_cur_idx <= ctrl->nc_info.id_nn) { 930 uint32_t nsid = iter->nni_cur_idx; 931 nvme_ioctl_ns_info_t ns_info = { 0 }; 932 nvme_ns_disc_level_t level; 933 934 if (!nvme_ioc_ns_info(ctrl, nsid, &ns_info)) { 935 iter->nni_err = true; 936 return (NVME_ITER_ERROR); 937 } 938 939 iter->nni_cur_idx++; 940 level = nvme_ns_state_to_disc_level(ns_info.nni_state); 941 if (iter->nni_level > level) { 942 continue; 943 } 944 945 (void) memset(&iter->nni_disc, 0, sizeof (nvme_ns_disc_t)); 946 iter->nni_disc.nnd_nsid = nsid; 947 iter->nni_disc.nnd_level = level; 948 949 if (nvme_guid_valid(ctrl, ns_info.nni_id.id_nguid)) { 950 iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_NGUID_VALID; 951 (void) memcpy(iter->nni_disc.nnd_nguid, 952 ns_info.nni_id.id_nguid, 953 sizeof (ns_info.nni_id.id_nguid)); 954 } 955 956 if (nvme_eui64_valid(ctrl, ns_info.nni_id.id_eui64)) { 957 iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_EUI64_VALID; 958 (void) memcpy(iter->nni_disc.nnd_eui64, 959 ns_info.nni_id.id_eui64, 960 sizeof (ns_info.nni_id.id_eui64)); 961 } 962 963 *discp = &iter->nni_disc; 964 return (NVME_ITER_VALID); 965 } 966 967 iter->nni_done = true; 968 return (NVME_ITER_DONE); 969 } 970 971 bool 972 nvme_ns_discover_init(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level, 973 nvme_ns_iter_t **iterp) 974 { 975 nvme_ns_iter_t *iter; 976 977 if (!nvme_ns_discover_validate(ctrl, level)) { 978 return (false); 979 } 980 981 if (iterp == NULL) { 982 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 983 "encountered invalid nvme_ns_iter_t output pointer: %p", 984 iterp)); 985 } 986 987 iter = calloc(1, sizeof (nvme_ns_iter_t)); 988 if (iter == NULL) { 989 int e = errno; 990 return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to " 991 "allocate memory for a new nvme_ns_iter_t: %s", 992 strerror(e))); 993 } 994 995 iter->nni_ctrl = ctrl; 996 iter->nni_level = level; 997 iter->nni_cur_idx = 1; 998 999 *iterp = iter; 1000 return (nvme_ctrl_success(ctrl)); 1001 } 1002 1003 bool 1004 nvme_ns_discover(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level, 1005 nvme_ns_disc_f func, void *arg) 1006 { 1007 nvme_ns_iter_t *iter; 1008 nvme_iter_t ret; 1009 const nvme_ns_disc_t *disc; 1010 1011 if (!nvme_ns_discover_validate(ctrl, level)) { 1012 return (false); 1013 } 1014 1015 if (func == NULL) { 1016 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1017 "encountered invalid nvme_ns_disc_f function pointer: %p", 1018 func)); 1019 } 1020 1021 if (!nvme_ns_discover_init(ctrl, level, &iter)) { 1022 return (false); 1023 } 1024 1025 while ((ret = nvme_ns_discover_step(iter, &disc)) == NVME_ITER_VALID) { 1026 if (!func(ctrl, disc, arg)) 1027 break; 1028 } 1029 1030 nvme_ns_discover_fini(iter); 1031 if (ret == NVME_ITER_ERROR) { 1032 return (false); 1033 } 1034 1035 return (nvme_ctrl_success(ctrl)); 1036 } 1037 1038 uint32_t 1039 nvme_ns_disc_nsid(const nvme_ns_disc_t *discp) 1040 { 1041 return (discp->nnd_nsid); 1042 } 1043 1044 nvme_ns_disc_level_t 1045 nvme_ns_disc_level(const nvme_ns_disc_t *discp) 1046 { 1047 return (discp->nnd_level); 1048 } 1049 1050 nvme_ns_disc_flags_t 1051 nvme_ns_disc_flags(const nvme_ns_disc_t *discp) 1052 { 1053 return (discp->nnd_flags); 1054 } 1055 1056 const uint8_t * 1057 nvme_ns_disc_eui64(const nvme_ns_disc_t *discp) 1058 { 1059 if ((discp->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) == 0) { 1060 return (NULL); 1061 } 1062 1063 return (discp->nnd_eui64); 1064 } 1065 1066 const uint8_t * 1067 nvme_ns_disc_nguid(const nvme_ns_disc_t *discp) 1068 { 1069 if ((discp->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) == 0) { 1070 return (NULL); 1071 } 1072 1073 return (discp->nnd_nguid); 1074 } 1075 1076 void 1077 nvme_ns_fini(nvme_ns_t *ns) 1078 { 1079 free(ns); 1080 } 1081 1082 bool 1083 nvme_ns_init(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ns_t **nsp) 1084 { 1085 nvme_ns_t *ns; 1086 1087 if (nsp == NULL) { 1088 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1089 "encountered invalid nvme_ns_t output pointer: %p", nsp)); 1090 } 1091 1092 if (nsid < NVME_NSID_MIN || nsid > ctrl->nc_info.id_nn) { 1093 return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "requested " 1094 "namespace 0x%x is invalid, valid namespaces are [0x%x, " 1095 "0x%x]", nsid, NVME_NSID_MIN, ctrl->nc_info.id_nn)); 1096 } 1097 1098 ns = calloc(1, sizeof (nvme_ns_t)); 1099 if (ns == NULL) { 1100 int e = errno; 1101 return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to " 1102 "allocate memory for a new nvme_ns_t: %s", strerror(e))); 1103 } 1104 1105 ns->nn_ctrl = ctrl; 1106 ns->nn_nsid = nsid; 1107 1108 *nsp = ns; 1109 return (nvme_ctrl_success(ctrl)); 1110 } 1111 1112 typedef struct { 1113 nvme_ctrl_t *nnia_ctrl; 1114 const char *nnia_name; 1115 bool nnia_found; 1116 nvme_ns_t *nnia_ns; 1117 nvme_err_data_t nnia_err; 1118 } nvme_ns_init_arg_t; 1119 1120 static bool 1121 nvme_ns_init_by_name_cb(nvme_ctrl_t *ctrl, const nvme_ns_disc_t *disc, 1122 void *arg) 1123 { 1124 nvme_ns_init_arg_t *init = arg; 1125 char buf[NVME_NGUID_NAMELEN]; 1126 CTASSERT(NVME_NGUID_NAMELEN > NVME_EUI64_NAMELEN); 1127 1128 if ((disc->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) != 0) { 1129 (void) nvme_format_nguid(disc->nnd_nguid, buf, sizeof (buf)); 1130 if (strcasecmp(init->nnia_name, buf) == 0) 1131 goto match; 1132 } 1133 1134 if ((disc->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) != 0) { 1135 (void) nvme_format_eui64(disc->nnd_eui64, buf, sizeof (buf)); 1136 if (strcasecmp(init->nnia_name, buf) == 0) 1137 goto match; 1138 } 1139 1140 (void) snprintf(buf, sizeof (buf), "%u", disc->nnd_nsid); 1141 if (strcasecmp(init->nnia_name, buf) == 0) 1142 goto match; 1143 1144 return (true); 1145 1146 match: 1147 init->nnia_found = true; 1148 if (!nvme_ns_init(ctrl, disc->nnd_nsid, &init->nnia_ns)) { 1149 nvme_ctrl_err_save(ctrl, &init->nnia_err); 1150 } 1151 1152 return (false); 1153 } 1154 1155 /* 1156 * Attempt to find a namespace by 'name'. A name could be the NGUID, EUI64, or 1157 * just the plain old namespace ID. 1158 */ 1159 bool 1160 nvme_ns_init_by_name(nvme_ctrl_t *ctrl, const char *ns_name, nvme_ns_t **nsp) 1161 { 1162 nvme_ns_init_arg_t init; 1163 1164 if (ns_name == NULL) { 1165 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1166 "encountered invalid namespace name: %p", ns_name)); 1167 } 1168 1169 if (nsp == NULL) { 1170 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0, 1171 "encountered invalid nvme_ns_t output pointer: %p", nsp)); 1172 } 1173 1174 init.nnia_ctrl = ctrl; 1175 init.nnia_name = ns_name; 1176 init.nnia_found = false; 1177 init.nnia_ns = NULL; 1178 1179 if (!nvme_ns_discover(ctrl, NVME_NS_DISC_F_ALL, nvme_ns_init_by_name_cb, 1180 &init)) { 1181 return (false); 1182 } 1183 1184 if (!init.nnia_found) { 1185 return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "failed to " 1186 "find NVMe namespace %s on nvme%d", ns_name, 1187 ctrl->nc_inst)); 1188 } 1189 1190 if (init.nnia_ns == NULL) { 1191 nvme_ctrl_err_set(ctrl, &init.nnia_err); 1192 return (false); 1193 } 1194 1195 *nsp = init.nnia_ns; 1196 return (nvme_ctrl_success(ctrl)); 1197 } 1198 1199 bool 1200 nvme_ctrl_ns_init(nvme_t *nvme, const char *name, nvme_ctrl_t **ctrlp, 1201 nvme_ns_t **nsp) 1202 { 1203 const char *slash, *ns_name; 1204 char *eptr; 1205 nvme_ctrl_t *ctrl; 1206 nvme_ns_t *ns; 1207 unsigned long inst; 1208 size_t ctrl_namelen; 1209 1210 if (name == NULL) { 1211 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 1212 "invalid name to search for: %p", name)); 1213 } 1214 1215 /* 1216 * We require a controller, but the namespace output pointer is only 1217 * required if we end up having a namespace present. 1218 */ 1219 if (ctrlp == NULL) { 1220 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered " 1221 "invalid nvme_ctrl_t output pointer: %p", ctrlp)); 1222 } 1223 1224 slash = strchr(name, '/'); 1225 if (slash != NULL) { 1226 ctrl_namelen = (uintptr_t)slash - (uintptr_t)name; 1227 ns_name = slash + 1; 1228 1229 if (nsp == NULL) { 1230 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, 1231 "encountered invalid nvme_ns_t output pointer: %p", 1232 nsp)); 1233 } 1234 1235 } else { 1236 ctrl_namelen = strlen(name); 1237 ns_name = NULL; 1238 } 1239 1240 *ctrlp = NULL; 1241 if (nsp != NULL) { 1242 *nsp = NULL; 1243 } 1244 1245 if (strncmp(name, "nvme", 4) != 0) { 1246 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, "unable " 1247 "to map controller '%.*s' to a known device class, " 1248 "expected the controller to start with 'nvme'", 1249 (int)ctrl_namelen, name)); 1250 } 1251 1252 /* 1253 * Before we go ahead and try to parse this with strtoul we need to 1254 * manually check two things that strtoul will not: 1255 * 1256 * 1) If we have a null terminator, then we'll just get a 0 back. 1257 * 2) If there are multiple leading zeros in a row then that's an error. 1258 * We don't want to conflate 001 and 1 as the same here. The only valid 1259 * case is 'nvme0' which is 5 characters long, hence the check below. 1260 */ 1261 if (ctrl_namelen == 4) { 1262 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 1263 "no controller instance specified in %.*s", 1264 (int)ctrl_namelen, name)); 1265 } 1266 1267 if (name[4] == '0' && ctrl_namelen > 5) { 1268 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 1269 "leading zeros aren't allowed for the instance specified " 1270 "in %.*s", (int)ctrl_namelen, name)); 1271 } 1272 1273 errno = 0; 1274 inst = strtoul(name + 4, &eptr, 10); 1275 if (errno != 0 || (*eptr != '\0' && eptr != slash)) { 1276 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, 1277 "failed to parse controller instance from %.*s", 1278 (int)ctrl_namelen, name)); 1279 } 1280 1281 if (inst > INT32_MAX) { 1282 return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0, 1283 "parsed controller instance %lu is outside the valid " 1284 "range [0, %d]", inst, INT32_MAX)); 1285 } 1286 1287 if (!nvme_ctrl_init_by_instance(nvme, (int32_t)inst, &ctrl)) { 1288 return (false); 1289 } 1290 1291 if (ns_name == NULL) { 1292 *ctrlp = ctrl; 1293 return (nvme_success(nvme)); 1294 } 1295 1296 if (!nvme_ns_init_by_name(ctrl, ns_name, &ns)) { 1297 nvme_err_data_t err; 1298 1299 nvme_ctrl_err_save(ctrl, &err); 1300 nvme_err_set(nvme, &err); 1301 nvme_ctrl_fini(ctrl); 1302 return (false); 1303 } 1304 1305 *ctrlp = ctrl; 1306 *nsp = ns; 1307 1308 return (nvme_success(nvme)); 1309 } 1310 1311 bool 1312 nvme_ns_bd_attach(nvme_ns_t *ns) 1313 { 1314 nvme_ctrl_t *ctrl = ns->nn_ctrl; 1315 nvme_ioctl_common_t com; 1316 1317 (void) memset(&com, 0, sizeof (com)); 1318 com.nioc_nsid = ns->nn_nsid; 1319 1320 if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_ATTACH, &com) != 0) { 1321 int e = errno; 1322 return (nvme_ioctl_syserror(ctrl, e, "namespace attach")); 1323 } 1324 1325 if (com.nioc_drv_err != NVME_IOCTL_E_OK) { 1326 return (nvme_ioctl_error(ctrl, &com, "namespace attach")); 1327 } 1328 1329 return (nvme_ctrl_success(ctrl)); 1330 } 1331 1332 bool 1333 nvme_ns_bd_detach(nvme_ns_t *ns) 1334 { 1335 nvme_ctrl_t *ctrl = ns->nn_ctrl; 1336 nvme_ioctl_common_t com; 1337 1338 (void) memset(&com, 0, sizeof (com)); 1339 com.nioc_nsid = ns->nn_nsid; 1340 1341 if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_DETACH, &com) != 0) { 1342 int e = errno; 1343 return (nvme_ioctl_syserror(ctrl, e, "namespace detach")); 1344 } 1345 1346 if (com.nioc_drv_err != NVME_IOCTL_E_OK) { 1347 return (nvme_ioctl_error(ctrl, &com, "namespace detach")); 1348 } 1349 1350 return (nvme_ctrl_success(ctrl)); 1351 } 1352 1353 /* 1354 * Check for a lock programming error and upanic() if so. 1355 */ 1356 static void 1357 nvme_lock_check(nvme_ctrl_t *ctrl) 1358 { 1359 char msg[1024]; 1360 int ret; 1361 const char *up; 1362 size_t ulen; 1363 const char *base = "fatal libnvme locking error detected"; 1364 1365 if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) { 1366 return; 1367 } 1368 1369 ret = snprintf(msg, sizeof (msg), "%s: %s (controller %p)", base, 1370 ctrl->nc_err.ne_errmsg, ctrl); 1371 if (ret >= sizeof (msg)) { 1372 ulen = sizeof (msg); 1373 up = msg; 1374 } else if (ret <= 0) { 1375 ulen = strlen(base) + 1; 1376 up = base; 1377 } else { 1378 ulen = (size_t)ret + 1; 1379 up = msg; 1380 } 1381 1382 upanic(up, ulen); 1383 } 1384 1385 static bool 1386 nvme_lock_common(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_lock_level_t level, 1387 nvme_lock_flags_t flags) 1388 { 1389 nvme_ioctl_lock_t lock; 1390 const nvme_lock_flags_t all_flags = NVME_LOCK_F_DONT_BLOCK; 1391 1392 if (level != NVME_LOCK_L_READ && level != NVME_LOCK_L_WRITE) { 1393 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown " 1394 "lock level: 0x%x", level)); 1395 } 1396 1397 if ((flags & ~all_flags) != 0) { 1398 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown " 1399 "lock flags: 0x%x", flags & ~all_flags)); 1400 } 1401 1402 (void) memset(&lock, 0, sizeof (lock)); 1403 lock.nil_common.nioc_nsid = nsid; 1404 if (nsid != 0) { 1405 lock.nil_ent = NVME_LOCK_E_NS; 1406 } else { 1407 lock.nil_ent = NVME_LOCK_E_CTRL; 1408 } 1409 lock.nil_level = level; 1410 lock.nil_flags = flags; 1411 1412 if (ioctl(ctrl->nc_fd, NVME_IOC_LOCK, &lock) != 0) { 1413 int e = errno; 1414 return (nvme_ioctl_syserror(ctrl, e, "lock")); 1415 } 1416 1417 if (lock.nil_common.nioc_drv_err != NVME_IOCTL_E_OK) { 1418 (void) nvme_ioctl_error(ctrl, &lock.nil_common, "lock"); 1419 nvme_lock_check(ctrl); 1420 return (false); 1421 } 1422 1423 return (nvme_ctrl_success(ctrl)); 1424 } 1425 1426 /* 1427 * You may reasonably be wondering why does this return and why do we basically 1428 * panic everywhere. The reality is twofold. The first part of this is that we 1429 * know from experience in libc that error checking mutexes are not the most 1430 * common and the kernel simplicity of mutex_enter() and mutex_exit() are really 1431 * a boon. The second piece here is that the way that the ioctl path works here, 1432 * only programming errors or mischief in the library could cause this to fail 1433 * at the raw ioctl / errno level. That is EBADF/EFAULT, etc. are our fault and 1434 * if you cannot unlock because of that you're not going to get much further. 1435 */ 1436 void 1437 nvme_unlock_common(nvme_ctrl_t *ctrl, uint32_t nsid) 1438 { 1439 nvme_ioctl_unlock_t unlock; 1440 1441 (void) memset(&unlock, 0, sizeof (unlock)); 1442 unlock.niu_common.nioc_nsid = nsid; 1443 if (nsid != 0) { 1444 unlock.niu_ent = NVME_LOCK_E_NS; 1445 } else { 1446 unlock.niu_ent = NVME_LOCK_E_CTRL; 1447 } 1448 1449 /* 1450 * Because all unlock ioctls errors are promoted to an error, we don't 1451 * bother calling nvme_ioctl_syserror() here. 1452 */ 1453 if (ioctl(ctrl->nc_fd, NVME_IOC_UNLOCK, &unlock) != 0) { 1454 int e = errno; 1455 (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, e, "internal " 1456 "programming error: failed to issue unlock ioctl: %s", 1457 strerror(e)); 1458 nvme_lock_check(ctrl); 1459 return; 1460 } 1461 1462 if (unlock.niu_common.nioc_drv_err != NVME_IOCTL_E_OK) { 1463 (void) nvme_ioctl_error(ctrl, &unlock.niu_common, "unlock"); 1464 /* 1465 * Promote any other failure to a new fatal failure. Consumers 1466 * expect this to have worked. 1467 */ 1468 if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) { 1469 nvme_err_data_t err; 1470 nvme_ctrl_err_save(ctrl, &err); 1471 (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, 0, 1472 "internal programming error: received unexpected " 1473 "libnvme error 0x%x: %s", err.ne_err, 1474 err.ne_errmsg); 1475 } 1476 nvme_lock_check(ctrl); 1477 return; 1478 } 1479 1480 (void) nvme_ctrl_success(ctrl); 1481 } 1482 1483 bool 1484 nvme_ctrl_lock(nvme_ctrl_t *ctrl, nvme_lock_level_t level, 1485 nvme_lock_flags_t flags) 1486 { 1487 return (nvme_lock_common(ctrl, 0, level, flags)); 1488 } 1489 1490 bool 1491 nvme_ns_lock(nvme_ns_t *ns, nvme_lock_level_t level, 1492 nvme_lock_flags_t flags) 1493 { 1494 return (nvme_lock_common(ns->nn_ctrl, ns->nn_nsid, level, flags)); 1495 } 1496 1497 void 1498 nvme_ctrl_unlock(nvme_ctrl_t *ctrl) 1499 { 1500 nvme_unlock_common(ctrl, 0); 1501 } 1502 1503 void 1504 nvme_ns_unlock(nvme_ns_t *ns) 1505 { 1506 nvme_unlock_common(ns->nn_ctrl, ns->nn_nsid); 1507 } 1508