1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2025 Oxide Computer Company
14 */
15
16 /*
17 * Programmatic interface to NVMe Devices
18 *
19 * libnvme exists to provide a means of performing non-I/O related operations on
20 * an NVMe device. This is intended to allow software, regardless of whether it
21 * is part of illumos or not, to operate on NVMe devices and perform most of the
22 * administrative and operator tasks that might come up. This library does not
23 * provide a stable interface yet. The rest of this block comment goes into the
24 * organization and background into why it looks the way it does.
25 *
26 * --------------------
27 * Library Organization
28 * --------------------
29 *
30 * There are two large classes of source files that make up this library
31 * currently:
32 *
33 * 1. Source code that implements the library's interfaces is found alongside
34 * this file in lib/libnvme/common. This code is generally organized based
35 * around the portion of the NVMe specification that it implements. So for
36 * example, code that implements logic related to the features is found
37 * in libnvme_feature.c, formatting namespaces in libnvme_format.c, log
38 * pages in libnvme_log.c, etc. All files in the library begin with
39 * 'libnvme_' as a way to help namespace the file names from the second set
40 * of files.
41 *
42 * 2. Validation logic that is shared between libnvme and the kernel is found
43 * in common/nvme/. While the kernel must validate requests regardless, we
44 * leverage this shared information as a means for trying to ensure that we
45 * have useful errors early. That code is factored in a way to facilitate
46 * easier unit testing.
47 *
48 * Because of the nature of this split, all of the opaque structures that we
49 * create and their relationships are all maintained in the library (group 1).
50 * All of the logic in group 2 is designed to be constant data tables and
51 * functions that are fed information about the controller they are operating on
52 * to answer them.
53 *
54 * There are several general classes of interfaces and related structures that
55 * we have in the library. We break them into the following general categories
56 * based on their purpose:
57 *
58 * DISCOVERY
59 *
60 * One of the large responsibilities of this library is helping someone discover
61 * information about something, whether that be a controller, a namespace, a log
62 * page, a feature, a unique command, etc. Information about one of these items
63 * is contained in a generally opaque discovery structure. For example, the
64 * nvme_log_disc_t.
65 *
66 * The goal of these structures is to contain all of the metadata for working
67 * with the object in question. Continuing on the log page discovery example, it
68 * can tell us information about what fields are required, whether or not the
69 * log might be supported, whether it operates on a controller, a namespace, or
70 * something else, as well as more human-usable things such as names and
71 * descriptions.
72 *
73 * Discovery objects are both for humans and for programmatic consumption. There
74 * are several cases where requests can be created directly from discovery
75 * objects. A well designed discovery object can allow a general implementation
76 * of a consumer such as nvmeadm to build up a request without having to
77 * hardcode everything about what is needed for each request (though most
78 * consumers still need to have information about the actual contents, meaning,
79 * and semantics of a log or feature).
80 *
81 * Discovery objects are obtained in two general ways. The first is using one of
82 * the iterator/callback based functions to discover a given class of data. The
83 * second path is that several of the functions which operate based on the name
84 * of something, e.g. nvme_log_req_init_by_name(),
85 * nvme_get_feat_req_init_by_name(), etc. will return a discovery object.
86 *
87 * When a discovery object is returned based on iteration (more below), the
88 * memory is owned by the iterator. When it is returned by a request
89 * initialization function, then it has its own life time and must be freed.
90 * We try to make this distinction clear in the API based on whether or not the
91 * discovery object is 'const'.
92 *
93 * All discovery objects should be fully filled out before they are handed back
94 * to a caller. It is an explicit design goal that every function that gets data
95 * from the discovery structure operates on a const version of the pointer. This
96 * is the hint that you cannot perform additional I/O or related after handing
97 * out the discovery structure. Attempts to loosen this constraint should be
98 * considered carefully due to how we communicate ownership.
99 *
100 * ITERATORS
101 *
102 * A common pattern of the library is iterating over items. This includes
103 * controllers and namespaces, but also as part of discovering what specific
104 * logs, commands, features, etc. are actually supported by the device.
105 * Iteration always follows the same general pattern:
106 *
107 * 1. An iterator is initialized with a call to nvme_<name>_discover_init().
108 * This will generally return a structure of the form nvme_<name>_iter_t. This
109 * structure contains the memory for the corresponding value that is returned
110 * from step in (2).
111 *
112 * 2. To actually pull values out of an iterator, one must call the
113 * nvme_<name>_step() function for the iterator. This will return a
114 * corresponding nvme_<name>_disc_t structure that is opaque and has a suite of
115 * functions that are usable for getting information out from it. This structure
116 * is valid only until the next time the nvme_<name>_step() is called. The
117 * return value of step indicates the state of the data and indicates whether or
118 * not there is an error, the iterator has finished, or we successfully stepped
119 * and the data is filled out.
120 *
121 * If discovery data needs to outlive a given iteration, then it can be
122 * duplicated which will give it a separate lifetime, though that comes with
123 * the responsibility that it must then be freed.
124 *
125 * 3. To finish using iterators, one finally calls the corresponding
126 * nvme_<name>_discover_fini(). That will deallocate the iterator structure and
127 * finish everything up.
128 *
129 * REQUESTS
130 *
131 * One of the chief goals of this library is to be able to perform requests.
132 * Each request has a structure that can be initialized, filled out, and then
133 * executed. A request structure can be reused multiple times with minor
134 * adjustments in-between (though changes aren't required). Request structures
135 * are either initialized in a blank mode where every value must be filled out
136 * or they can be initialized through their discovery object (or the common name
137 * of such an object).
138 *
139 * When a request structure is initialized through a discovery object, it
140 * automatically sets several of the fields, knows which ones are still required
141 * to be set, and which fields cannot be set. For example, if you create a get
142 * log page request from a log discovery object, it will not allow you to change
143 * the log page you're requesting; however, in return you don't have to specify
144 * the command set interface or log identifier.
145 *
146 * Request objects are tied to a controller. See 'Parallelism, Thread Safety,
147 * and Errors' for more information.
148 *
149 * INFORMATION SNAPSHOTS
150 *
151 * To get information about a namespace or controller, one has to take an
152 * information snapshot. Once an information snapshot is obtained, this snapshot
153 * answers all questions about the controller with a mostly consistent set of
154 * point-in-time data. The main reason for this design was to try and simplify
155 * where errors can occur and to provide a straightforward serialization point
156 * so that way the raw underlying data could be gathered at one system and then
157 * interpreted later on another.
158 *
159 * The only reason that there are some fallible operations on the snapshot are
160 * things that are not guaranteed to exist for all such NVMe controllers.
161 *
162 * LIBRARY, CONTROLLER, NAMESPACE and SNAPSHOT HANDLES
163 *
164 * The last major set of types used in this library are opaque handles. As you
165 * might have guessed given the request structures, all of the objects which
166 * represent something are opaque. Each library handle is independent of one
167 * another and each controller handle is independent of one another. In general,
168 * it is expected that only a single controller handle is used at a given time
169 * for a given library handle, but this is not currently enforced. Error
170 * information and parallelism is tied into this, see 'Parallelism, Thread
171 * Safety, and Errors' for more information.
172 *
173 * -----------------
174 * Opaque Structures
175 * -----------------
176 *
177 * One of the things that might stand out in libnvme is the use of opaque
178 * structures everywhere with functions to access every arbitrary piece of data.
179 * This and the function pattern around building up a request were done to try
180 * and deal with the evolutionary nature of the NVMe specification. If you look
181 * at the various requests, with the exception of firmware download, almost
182 * every request has added additional features through the spec revisions. NVMe
183 * 2.0 changed most things again with the requirement to specify the command set
184 * interface.
185 *
186 * While the way that the NVMe specification has done this is quite reasonable,
187 * it makes it much more difficult to use a traditional series of arguments to
188 * functions or a structure without having to try to version the symbol through
189 * clever games. If instead we accept that the specification will change and
190 * that the specification is always taking these additional arguments out of
191 * values that must be zero, then an opaque request structure where you have to
192 * make an explicit function call and recompile to get slightly different
193 * behavior is mostly reasonable. We may not be able to be perfect given we're
194 * at the mercy of the specification, but at least this is better than the
195 * alternative.
196 *
197 * This is ultimately why all the request structures are opaque and use a
198 * pseudo-builder pattern to fill out the request information. Further evidence
199 * to this point is that there was no way to avoid changing every kernel
200 * structure here while retaining semantic operations. No one wants to manually
201 * assemble cdw12-15 here. That's not how we can add value for the library.
202 *
203 * Similarly, for all discovery objects we ended up utilizing opaque objects.
204 * The main reason here is that we want to be able to embed this library as a
205 * committed interface in other languages and having the discovery structures be
206 * something that everyone can see means it'll be harder to extend it. While
207 * this concern is somewhat more theoretical given the iterator pattern, given
208 * the other bits in the request structure we decided to lean into the
209 * opaqueness.
210 *
211 * --------------------------------------
212 * Parallelism, Thread Safety, and Errors
213 * --------------------------------------
214 *
215 * One of the library's major design points is how do we achieve thread-safety,
216 * how does ownership work, where do errors appear, and what is the degree of
217 * parallelism that is achievable. To work through this we look at a few
218 * different things:
219 *
220 * 1. The degree to which the hardware allows for parallelism
221 * 2. The degree to which users might desire parallelism
222 * 3. The ergonomics of getting and storing errors
223 *
224 * The NVMe specification allows for different degrees of admin command
225 * parallelism on a per-command basis. This is discoverable, but the main point
226 * is that there are a class of commands where only one can be outstanding at a
227 * time, which likely fall into the case of most of the destructive commands
228 * like Format NVM, Activate Firmware, etc. Our expectation to some extent is
229 * that most admin queue commands don't need to be issued in parallel; however,
230 * beyond how we structure the library and error handling, we don't try to
231 * enforce that here. The kernel does do some enforcement through requiring
232 * mandatory write locks to perform some operations.
233 *
234 * When we get to how do folks want to use this, during the initial design phase
235 * we mostly theorized based on how nvmeadm is using it today and how various
236 * daemons like a FRU monitor or an appliance kit's software might want to
237 * interact with it. Our general starting assumption is that it's very
238 * reasonable for each discovered controller to be handled in parallel, but that
239 * operations on a controller itself are likely serial given that we're not
240 * issuing I/O through this mechanism. If we were, then that'd be an entirely
241 * different set of constraints.
242 *
243 * To discuss the perceived ergonomics, we need to first discuss what error
244 * information we want to be able to have. It's an important goal of both the
245 * NVMe driver and this library to give useful semantic errors. In particular,
246 * for any operation we want to make sure that we include the following
247 * information:
248 *
249 * o A hopefully distinguishable semantic error
250 * o Saving errno as a system error if relevant (e.g if open(2) failed)
251 * o A message for humans that gives more specifics about what happened and is
252 * intended to be passed along to the output of a command or another error
253 * message.
254 * o If a controller error occurs, we want to be able to provide the
255 * controller's sc (status code) and sct (status code type).
256 *
257 * With this we get to the questions around ergonomics and related which are
258 * entirely subjective. Given that we want to capture that information how do we
259 * best do this given the tooling that we have. When the library was first being
260 * prototyped all errors were on the nvme_t, basically the top-level handle.
261 * This meant that each operation on a controller had to be done serially or you
262 * would have to use different handles. However, the simplicity was that there
263 * was one thing to check.
264 *
265 * This evolution changed slightly when we introduced information snapshots.
266 * Because the information snapshots are meant to be separate entities whose
267 * lifetime can extend beyond the nvme_t library handle, they ended up
268 * developing their own error codes and functions. This has been okay because
269 * there aren't too many use cases there, though the need to duplicate error
270 * handling functions is a bit painful.
271 *
272 * From there, we did consider what if each request had its own error
273 * information that could be extracted. That would turn into a lot of functions
274 * to get at that data. The controller's allowed parallelism for admin commands
275 * varies based on each command. Some commands must occur when there are no
276 * other admin commands on the controller and others when there there is nothing
277 * on the namespace. However, due to that nuance, it would lead to forcing the
278 * consumer to understand the controller's specifics more than is often
279 * necessary for a given request. To add to that, it'd also just be a pain to
280 * try to get all the error information out in a different way and the consumers
281 * we started writing in this fashion were not looking good.
282 *
283 * We also considered whether we could consolidate all the error functions on
284 * each request into one structure that we get, but that didn't move the needle
285 * too much. It also raised some more concerns around how we minimize races and
286 * how data changes around that.
287 *
288 * So all of this led us to our current compromise position: we allow for
289 * parallelism at the controller level. More specifically:
290 *
291 * 1. Operations which take the nvme_t handle set errors on it and must operate
292 * serially. That is the nvme_t should only be used from one thread at any
293 * time, but may move between threads. Errors are set on it.
294 *
295 * 2. The nvme_ctrl_t has its own error information. A given nvme_ctrl_t should
296 * only be used serially; however, different ones can be used in parallel. A
297 * controller doesn't guarantee exclusivity. That requires an explicit
298 * locking operation.
299 *
300 * 3. Both request structures and namespaces place their errors on the
301 * corresponding controller that they were created from. Therefore the
302 * per-controller serialization in (2) applies here as well. If two requests
303 * are tied to different controllers, they can proceed in parallel.
304 *
305 * 4. Once a controller or namespace snapshot is obtained, they fall into a
306 * similar pattern: each one can be operated on in parallel, but generally
307 * one should only operate on a single one serially.
308 *
309 * Other than the constraints defined above, the library does not care which
310 * threads that an operation occurs on. These can be moved to wherever it needs
311 * to be. Locking and related in the kernel is based on the open file descriptor
312 * to the controller.
313 *
314 * ----------------
315 * Field Validation
316 * ----------------
317 *
318 * Every request is made up of fields that correspond to parts of the NVMe
319 * specification. Our requests operate in terms of the logical fields that we
320 * opt to expose and that the kernel knows how to consume. In general, we don't
321 * expose the raw cdw values that make up the commands (except for the vendor
322 * unique commands or arguments that are explicitly that way ala get features).
323 * While operating on raw cdw arguments would be a simple way to create ABI
324 * stability, it would leave everyone having to break up all the fields
325 * themselves and we believe end up somewhat more error prone than the
326 * interfaces we expose today.
327 *
328 * Requests are created in one of two ways today: they are either initialized
329 * from corresponding discovery data e.g. nvme_log_req_init_by_disc() and
330 * nvme_get_feat_req_init_by_name(), or one creates a raw request ala
331 * nvme_get_feat_req_init(). In the former cases, we fill out a bunch of the
332 * fields that would normally need to be set such as the log or feature ID. We
333 * also will note which fields are allowed and expected. For example, the health
334 * log page does not take or expect a lsp (log specific parameter) or related
335 * and therefore we can flag that with an _UNUSE class error. Conversely,
336 * requests that are created from their raw form will not have any such error
337 * checking performed until they are finalized and checked by the kernel. The
338 * set of fields that can be set in a request is usually tracked in the
339 * structure with a member of the form <prefix>_allow.
340 *
341 * One set of library error checking that is uniform between both types is that
342 * of missing fields. There are minimum fields that must be set for different
343 * types of requests. That check will always be performed regardless of the path
344 * that is taken through the system. Tracking which members must still be set is
345 * done by a member of the form <prefix>_need.
346 *
347 * When we perform validation, we try to push the vast majority of it into the
348 * common validation code that is shared between the kernel and userland. This
349 * is wrapped up through the nvme_field_check_one() logic. The common code will
350 * check if the field is supported by the controller (generating an _UNSUP class
351 * error if not) and if the value of the field is within a valid range
352 * (generating a _RANGE class error if not).
353 *
354 * While we try to fold the majority of such checks into the common code as
355 * possible, it isn't perfect and some things have to be checked outside of
356 * that. Those consist of the following general cases:
357 *
358 * 1) Items that are not semantically fields in the actual command but are
359 * things that we are tracking ourselves in the library. An example of this
360 * would be fields in the vuc request structure that we are synthesizing
361 * ourselves.
362 *
363 * 2) While the field logic has the specifics of what controller is being
364 * operated upon, it doesn't have all the knowledge of what things can be
365 * combined or not. It can answer the specifics about its field, but cannot look
366 * at the broader request.
367 *
368 * As a result, there are some duplicated checks in the library and the kernel,
369 * though several are left just to the kernel. However, the vast majority of
370 * validation does happen through these common routines which leaves the library
371 * nvme_<type>_req_set_<field> functions generally wrappers around checking
372 * common code and updating our tracking around what fields are set or not so we
373 * can issue an ioctl.
374 */
375
376 #include <stdlib.h>
377 #include <stdarg.h>
378 #include <libdevinfo.h>
379 #include <unistd.h>
380 #include <string.h>
381 #include <sys/types.h>
382 #include <sys/stat.h>
383 #include <fcntl.h>
384 #include <upanic.h>
385
386 #include "libnvme_impl.h"
387
388 bool
nvme_vers_ctrl_atleast(const nvme_ctrl_t * ctrl,const nvme_version_t * targ)389 nvme_vers_ctrl_atleast(const nvme_ctrl_t *ctrl, const nvme_version_t *targ)
390 {
391 return (nvme_vers_atleast(&ctrl->nc_vers, targ));
392 }
393
394 bool
nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t * ci,const nvme_version_t * targ)395 nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t *ci,
396 const nvme_version_t *targ)
397 {
398 return (nvme_vers_atleast(&ci->nci_vers, targ));
399 }
400
401 bool
nvme_vers_ns_info_atleast(const nvme_ns_info_t * info,const nvme_version_t * targ)402 nvme_vers_ns_info_atleast(const nvme_ns_info_t *info,
403 const nvme_version_t *targ)
404 {
405 return (nvme_vers_atleast(&info->nni_vers, targ));
406 }
407
408 bool
nvme_guid_valid(const nvme_ctrl_t * ctrl,const uint8_t guid[16])409 nvme_guid_valid(const nvme_ctrl_t *ctrl, const uint8_t guid[16])
410 {
411 const uint8_t zero_guid[16] = { 0 };
412
413 return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v2) &&
414 memcmp(zero_guid, guid, sizeof (zero_guid)) != 0);
415 }
416
417 bool
nvme_eui64_valid(const nvme_ctrl_t * ctrl,const uint8_t eui64[8])418 nvme_eui64_valid(const nvme_ctrl_t *ctrl, const uint8_t eui64[8])
419 {
420 const uint8_t zero_eui[8] = { 0 };
421
422 return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v1) &&
423 memcmp(zero_eui, eui64, sizeof (zero_eui)) != 0);
424 }
425
426 int
nvme_format_nguid(const uint8_t nguid[16],char * buf,size_t len)427 nvme_format_nguid(const uint8_t nguid[16], char *buf, size_t len)
428 {
429 return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X"
430 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
431 nguid[0], nguid[1], nguid[2], nguid[3], nguid[4], nguid[5],
432 nguid[6], nguid[7], nguid[8], nguid[9], nguid[10], nguid[11],
433 nguid[12], nguid[13], nguid[14], nguid[15]));
434 }
435
436 int
nvme_format_eui64(const uint8_t eui64[8],char * buf,size_t len)437 nvme_format_eui64(const uint8_t eui64[8], char *buf, size_t len)
438 {
439 return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
440 eui64[0], eui64[1], eui64[2], eui64[3], eui64[4], eui64[5],
441 eui64[6], eui64[7]));
442 }
443
444 void
nvme_fini(nvme_t * nvme)445 nvme_fini(nvme_t *nvme)
446 {
447 if (nvme == NULL)
448 return;
449
450 if (nvme->nh_devinfo != DI_NODE_NIL) {
451 di_fini(nvme->nh_devinfo);
452 }
453
454 free(nvme);
455 }
456
457 nvme_t *
nvme_init(void)458 nvme_init(void)
459 {
460 nvme_t *nvme;
461
462 nvme = calloc(1, sizeof (nvme_t));
463 if (nvme == NULL) {
464 return (NULL);
465 }
466
467 nvme->nh_devinfo = di_init("/", DINFOCPYALL);
468 if (nvme->nh_devinfo == DI_NODE_NIL) {
469 nvme_fini(nvme);
470 return (NULL);
471 }
472
473 return (nvme);
474 }
475
476 void
nvme_ctrl_discover_fini(nvme_ctrl_iter_t * iter)477 nvme_ctrl_discover_fini(nvme_ctrl_iter_t *iter)
478 {
479 free(iter);
480 }
481
482 nvme_iter_t
nvme_ctrl_discover_step(nvme_ctrl_iter_t * iter,const nvme_ctrl_disc_t ** discp)483 nvme_ctrl_discover_step(nvme_ctrl_iter_t *iter, const nvme_ctrl_disc_t **discp)
484 {
485 di_minor_t m;
486
487 *discp = NULL;
488 if (iter->ni_done) {
489 return (NVME_ITER_DONE);
490 }
491
492 for (;;) {
493 if (iter->ni_cur == NULL) {
494 iter->ni_cur = di_drv_first_node("nvme",
495 iter->ni_nvme->nh_devinfo);
496 } else {
497 iter->ni_cur = di_drv_next_node(iter->ni_cur);
498 }
499
500 if (iter->ni_cur == NULL) {
501 iter->ni_done = true;
502 return (NVME_ITER_DONE);
503 }
504
505 for (m = di_minor_next(iter->ni_cur, DI_MINOR_NIL);
506 m != DI_MINOR_NIL; m = di_minor_next(iter->ni_cur, m)) {
507 if (strcmp(di_minor_nodetype(m),
508 DDI_NT_NVME_NEXUS) == 0) {
509 break;
510 }
511 }
512
513 if (m == DI_MINOR_NIL) {
514 continue;
515 }
516
517 iter->ni_disc.ncd_devi = iter->ni_cur;
518 iter->ni_disc.ncd_minor = m;
519 *discp = &iter->ni_disc;
520 return (NVME_ITER_VALID);
521 }
522
523 return (NVME_ITER_DONE);
524 }
525
526 bool
nvme_ctrl_discover_init(nvme_t * nvme,nvme_ctrl_iter_t ** iterp)527 nvme_ctrl_discover_init(nvme_t *nvme, nvme_ctrl_iter_t **iterp)
528 {
529 nvme_ctrl_iter_t *iter;
530
531 if (iterp == NULL) {
532 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
533 "invalid nvme_ctrl_iter_t output pointer: %p", iterp));
534 }
535
536 iter = calloc(1, sizeof (nvme_ctrl_iter_t));
537 if (iter == NULL) {
538 int e = errno;
539 return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to "
540 "allocate memory for a new nvme_ctrl_iter_t: %s",
541 strerror(e)));
542 }
543 iter->ni_nvme = nvme;
544 *iterp = iter;
545 return (nvme_success(nvme));
546 }
547
548 bool
nvme_ctrl_discover(nvme_t * nvme,nvme_ctrl_disc_f func,void * arg)549 nvme_ctrl_discover(nvme_t *nvme, nvme_ctrl_disc_f func, void *arg)
550 {
551 nvme_ctrl_iter_t *iter;
552 const nvme_ctrl_disc_t *disc;
553 nvme_iter_t ret;
554
555 if (func == NULL) {
556 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
557 "invalid nvme_ctrl_disc_f function pointer: %p", func));
558 }
559
560 if (!nvme_ctrl_discover_init(nvme, &iter)) {
561 return (false);
562 }
563
564 while ((ret = nvme_ctrl_discover_step(iter, &disc)) ==
565 NVME_ITER_VALID) {
566 if (!func(nvme, disc, arg))
567 break;
568 }
569
570 nvme_ctrl_discover_fini(iter);
571 if (ret == NVME_ITER_ERROR) {
572 return (false);
573 }
574
575 return (nvme_success(nvme));
576 }
577
578 di_node_t
nvme_ctrl_disc_devi(const nvme_ctrl_disc_t * discp)579 nvme_ctrl_disc_devi(const nvme_ctrl_disc_t *discp)
580 {
581 return (discp->ncd_devi);
582 }
583
584 di_minor_t
nvme_ctrl_disc_minor(const nvme_ctrl_disc_t * discp)585 nvme_ctrl_disc_minor(const nvme_ctrl_disc_t *discp)
586 {
587 return (discp->ncd_minor);
588 }
589
590 void
nvme_ctrl_fini(nvme_ctrl_t * ctrl)591 nvme_ctrl_fini(nvme_ctrl_t *ctrl)
592 {
593 if (ctrl == NULL) {
594 return;
595 }
596
597 if (ctrl->nc_sup_logs != NULL) {
598 free(ctrl->nc_sup_logs);
599 }
600
601 if (ctrl->nc_sup_logs_err != NULL) {
602 free(ctrl->nc_sup_logs_err);
603 }
604
605 if (ctrl->nc_devi_path != NULL) {
606 di_devfs_path_free(ctrl->nc_devi_path);
607 }
608
609 if (ctrl->nc_fd >= 0) {
610 (void) close(ctrl->nc_fd);
611 ctrl->nc_fd = -1;
612 }
613
614 free(ctrl);
615 }
616
617 bool
nvme_ctrl_init(nvme_t * nvme,di_node_t di,nvme_ctrl_t ** outp)618 nvme_ctrl_init(nvme_t *nvme, di_node_t di, nvme_ctrl_t **outp)
619 {
620 const char *drv;
621 int32_t inst;
622 di_minor_t minor;
623 char *path, buf[PATH_MAX];
624 nvme_ctrl_t *ctrl;
625 nvme_ioctl_ctrl_info_t ctrl_info;
626
627 if (di == DI_NODE_NIL) {
628 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
629 "invalid di_node_t: %p", di));
630 }
631
632 if (outp == NULL) {
633 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
634 "invalid nvme_ctrl_t output pointer: %p", outp));
635 }
636 *outp = NULL;
637
638 drv = di_driver_name(di);
639 inst = di_instance(di);
640 if (drv == NULL || inst < 0) {
641 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s has "
642 "no driver attached", di_node_name(di)));
643 }
644
645 if (strcmp(drv, "nvme") != 0) {
646 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't "
647 "attached to nvme, found %s", di_node_name(di), drv));
648 }
649
650 /*
651 * We have an NVMe node. Find the right minor that corresponds to the
652 * attachment point. Once we find that then we can go ahead and open a
653 * path to that and construct the device.
654 */
655 minor = DI_MINOR_NIL;
656 while ((minor = di_minor_next(di, minor)) != DI_MINOR_NIL) {
657 if (strcmp(di_minor_nodetype(minor), DDI_NT_NVME_NEXUS) == 0) {
658 break;
659 }
660 }
661
662 if (minor == DI_MINOR_NIL) {
663 return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't "
664 "attached to nvme, found %s", di_node_name(di), drv));
665 }
666
667 path = di_devfs_minor_path(minor);
668 if (path == NULL) {
669 int e = errno;
670 return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to "
671 "obtain /devices path for the requested minor: %s",
672 strerror(e)));
673 }
674
675 if (snprintf(buf, sizeof (buf), "/devices%s", path) >= sizeof (buf)) {
676 di_devfs_path_free(path);
677 return (nvme_error(nvme, NVME_ERR_INTERNAL, 0, "failed to "
678 "construct full /devices minor path, would have overflown "
679 "internal buffer"));
680 }
681 di_devfs_path_free(path);
682
683 ctrl = calloc(1, sizeof (*ctrl));
684 if (ctrl == NULL) {
685 int e = errno;
686 return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to "
687 "allocate memory for a new nvme_ctrl_t: %s", strerror(e)));
688 }
689
690 ctrl->nc_nvme = nvme;
691 ctrl->nc_devi = di;
692 ctrl->nc_minor = minor;
693 ctrl->nc_inst = inst;
694 ctrl->nc_fd = open(buf, O_RDWR | O_CLOEXEC);
695 if (ctrl->nc_fd < 0) {
696 int e = errno;
697 nvme_ctrl_fini(ctrl);
698 return (nvme_error(nvme, NVME_ERR_OPEN_DEV, e, "failed to open "
699 "device path %s: %s", buf, strerror(e)));
700 }
701
702 ctrl->nc_devi_path = di_devfs_path(di);
703 if (ctrl->nc_devi_path == NULL) {
704 int e = errno;
705 nvme_ctrl_fini(ctrl);
706 return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to "
707 "obtain /devices path for the controller: %s",
708 strerror(e)));
709 }
710
711 if (!nvme_ioc_ctrl_info(ctrl, &ctrl_info)) {
712 nvme_err_data_t err;
713
714 nvme_ctrl_err_save(ctrl, &err);
715 nvme_err_set(nvme, &err);
716 nvme_ctrl_fini(ctrl);
717 return (false);
718 }
719
720 ctrl->nc_vers = ctrl_info.nci_vers;
721 ctrl->nc_info = ctrl_info.nci_ctrl_id;
722
723 nvme_vendor_map_ctrl(ctrl);
724
725 *outp = ctrl;
726 return (nvme_success(nvme));
727 }
728
729 typedef struct {
730 bool ncia_found;
731 int32_t ncia_inst;
732 nvme_ctrl_t *ncia_ctrl;
733 nvme_err_data_t ncia_err;
734 } nvme_ctrl_init_arg_t;
735
736 bool
nvme_ctrl_init_by_instance_cb(nvme_t * nvme,const nvme_ctrl_disc_t * disc,void * arg)737 nvme_ctrl_init_by_instance_cb(nvme_t *nvme, const nvme_ctrl_disc_t *disc,
738 void *arg)
739 {
740 nvme_ctrl_init_arg_t *init = arg;
741
742 if (di_instance(disc->ncd_devi) != init->ncia_inst) {
743 return (true);
744 }
745
746 /*
747 * If we fail to open the controller, we need to save the error
748 * information because it's going to end up being clobbered because this
749 * is a callback function surrounded by other libnvme callers.
750 */
751 init->ncia_found = true;
752 if (!nvme_ctrl_init(nvme, disc->ncd_devi, &init->ncia_ctrl)) {
753 nvme_err_save(nvme, &init->ncia_err);
754 }
755
756 return (false);
757 }
758
759 bool
nvme_ctrl_init_by_instance(nvme_t * nvme,int32_t inst,nvme_ctrl_t ** outp)760 nvme_ctrl_init_by_instance(nvme_t *nvme, int32_t inst, nvme_ctrl_t **outp)
761 {
762 nvme_ctrl_init_arg_t init;
763
764 if (inst < 0) {
765 return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0,
766 "encountered illegal negative instance number: %d", inst));
767 }
768
769 if (outp == NULL) {
770 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
771 "invalid nvme_ctrl_t output pointer: %p", outp));
772 }
773
774 init.ncia_found = false;
775 init.ncia_inst = inst;
776 init.ncia_ctrl = NULL;
777
778 if (!nvme_ctrl_discover(nvme, nvme_ctrl_init_by_instance_cb, &init)) {
779 return (false);
780 }
781
782 if (!init.ncia_found) {
783 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
784 "failed to find NVMe controller nvme%d", inst));
785 }
786
787 /*
788 * If we don't have an NVMe controller structure but we did find the
789 * instance, then we must have had an error constructing this will which
790 * be on our handle. We have to reconstruct the error from saved
791 * information as nvme_ctrl_discover will have clobbered it.
792 */
793 if (init.ncia_ctrl == NULL) {
794 nvme_err_set(nvme, &init.ncia_err);
795 return (false);
796 }
797
798 *outp = init.ncia_ctrl;
799 return (nvme_success(nvme));
800 }
801
802 bool
nvme_ctrl_devi(nvme_ctrl_t * ctrl,di_node_t * devip)803 nvme_ctrl_devi(nvme_ctrl_t *ctrl, di_node_t *devip)
804 {
805 *devip = ctrl->nc_devi;
806 return (nvme_ctrl_success(ctrl));
807 }
808
809 bool
nvme_ioc_ctrl_info(nvme_ctrl_t * ctrl,nvme_ioctl_ctrl_info_t * info)810 nvme_ioc_ctrl_info(nvme_ctrl_t *ctrl, nvme_ioctl_ctrl_info_t *info)
811 {
812 (void) memset(info, 0, sizeof (nvme_ioctl_ctrl_info_t));
813
814 if (ioctl(ctrl->nc_fd, NVME_IOC_CTRL_INFO, info) != 0) {
815 int e = errno;
816 return (nvme_ioctl_syserror(ctrl, e, "controller info"));
817 }
818
819 if (info->nci_common.nioc_drv_err != NVME_IOCTL_E_OK) {
820 return (nvme_ioctl_error(ctrl, &info->nci_common,
821 "controller info"));
822 }
823
824 return (true);
825 }
826
827 bool
nvme_ioc_ns_info(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_ioctl_ns_info_t * info)828 nvme_ioc_ns_info(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ioctl_ns_info_t *info)
829 {
830 (void) memset(info, 0, sizeof (nvme_ioctl_ns_info_t));
831 info->nni_common.nioc_nsid = nsid;
832
833 if (ioctl(ctrl->nc_fd, NVME_IOC_NS_INFO, info) != 0) {
834 int e = errno;
835 return (nvme_ioctl_syserror(ctrl, e, "namespace info"));
836 }
837
838 if (info->nni_common.nioc_drv_err != NVME_IOCTL_E_OK) {
839 return (nvme_ioctl_error(ctrl, &info->nni_common,
840 "namespace info"));
841 }
842
843 return (true);
844 }
845
846 const char *
nvme_tporttostr(nvme_ctrl_transport_t tport)847 nvme_tporttostr(nvme_ctrl_transport_t tport)
848 {
849 switch (tport) {
850 case NVME_CTRL_TRANSPORT_PCI:
851 return ("PCI");
852 case NVME_CTRL_TRANSPORT_TCP:
853 return ("TCP");
854 case NVME_CTRL_TRANSPORT_RDMA:
855 return ("RDMA");
856 default:
857 return ("unknown transport");
858 }
859 }
860
861 static bool
nvme_ns_discover_validate(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level)862 nvme_ns_discover_validate(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level)
863 {
864 switch (level) {
865 case NVME_NS_DISC_F_ALL:
866 case NVME_NS_DISC_F_ALLOCATED:
867 case NVME_NS_DISC_F_ACTIVE:
868 case NVME_NS_DISC_F_NOT_IGNORED:
869 case NVME_NS_DISC_F_BLKDEV:
870 return (true);
871 default:
872 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "invalid "
873 "namespace discovery level specified: 0x%x", level));
874 }
875 }
876
877 void
nvme_ns_discover_fini(nvme_ns_iter_t * iter)878 nvme_ns_discover_fini(nvme_ns_iter_t *iter)
879 {
880 free(iter);
881 }
882
883 const char *
nvme_nsleveltostr(nvme_ns_disc_level_t level)884 nvme_nsleveltostr(nvme_ns_disc_level_t level)
885 {
886 switch (level) {
887 case NVME_NS_DISC_F_ALL:
888 return ("unallocated");
889 case NVME_NS_DISC_F_ALLOCATED:
890 return ("allocated");
891 case NVME_NS_DISC_F_ACTIVE:
892 return ("active");
893 case NVME_NS_DISC_F_NOT_IGNORED:
894 return ("not ignored");
895 case NVME_NS_DISC_F_BLKDEV:
896 return ("blkdev");
897 default:
898 return ("unknown level");
899 }
900 }
901
902 nvme_ns_disc_level_t
nvme_ns_state_to_disc_level(nvme_ns_state_t state)903 nvme_ns_state_to_disc_level(nvme_ns_state_t state)
904 {
905 if ((state & NVME_NS_STATE_ALLOCATED) == 0) {
906 return (NVME_NS_DISC_F_ALL);
907 }
908
909 if ((state & NVME_NS_STATE_ACTIVE) == 0) {
910 return (NVME_NS_DISC_F_ALLOCATED);
911 }
912
913 if ((state & NVME_NS_STATE_IGNORED) != 0) {
914 return (NVME_NS_DISC_F_ACTIVE);
915 }
916
917 if ((state & NVME_NS_STATE_ATTACHED) == 0) {
918 return (NVME_NS_DISC_F_NOT_IGNORED);
919 } else {
920 return (NVME_NS_DISC_F_BLKDEV);
921 }
922 }
923
924 nvme_iter_t
nvme_ns_discover_step(nvme_ns_iter_t * iter,const nvme_ns_disc_t ** discp)925 nvme_ns_discover_step(nvme_ns_iter_t *iter, const nvme_ns_disc_t **discp)
926 {
927 nvme_ctrl_t *ctrl = iter->nni_ctrl;
928
929 if (iter->nni_err) {
930 return (NVME_ITER_ERROR);
931 }
932
933 if (iter->nni_done) {
934 return (NVME_ITER_DONE);
935 }
936
937 while (iter->nni_cur_idx <= ctrl->nc_info.id_nn) {
938 uint32_t nsid = iter->nni_cur_idx;
939 nvme_ioctl_ns_info_t ns_info = { 0 };
940 nvme_ns_disc_level_t level;
941
942 if (!nvme_ioc_ns_info(ctrl, nsid, &ns_info)) {
943 iter->nni_err = true;
944 return (NVME_ITER_ERROR);
945 }
946
947 iter->nni_cur_idx++;
948 level = nvme_ns_state_to_disc_level(ns_info.nni_state);
949 if (iter->nni_level > level) {
950 continue;
951 }
952
953 (void) memset(&iter->nni_disc, 0, sizeof (nvme_ns_disc_t));
954 iter->nni_disc.nnd_nsid = nsid;
955 iter->nni_disc.nnd_level = level;
956
957 if (nvme_guid_valid(ctrl, ns_info.nni_id.id_nguid)) {
958 iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_NGUID_VALID;
959 (void) memcpy(iter->nni_disc.nnd_nguid,
960 ns_info.nni_id.id_nguid,
961 sizeof (ns_info.nni_id.id_nguid));
962 }
963
964 if (nvme_eui64_valid(ctrl, ns_info.nni_id.id_eui64)) {
965 iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_EUI64_VALID;
966 (void) memcpy(iter->nni_disc.nnd_eui64,
967 ns_info.nni_id.id_eui64,
968 sizeof (ns_info.nni_id.id_eui64));
969 }
970
971 *discp = &iter->nni_disc;
972 return (NVME_ITER_VALID);
973 }
974
975 iter->nni_done = true;
976 return (NVME_ITER_DONE);
977 }
978
979 bool
nvme_ns_discover_init(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level,nvme_ns_iter_t ** iterp)980 nvme_ns_discover_init(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level,
981 nvme_ns_iter_t **iterp)
982 {
983 nvme_ns_iter_t *iter;
984
985 if (!nvme_ns_discover_validate(ctrl, level)) {
986 return (false);
987 }
988
989 if (iterp == NULL) {
990 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
991 "encountered invalid nvme_ns_iter_t output pointer: %p",
992 iterp));
993 }
994
995 iter = calloc(1, sizeof (nvme_ns_iter_t));
996 if (iter == NULL) {
997 int e = errno;
998 return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to "
999 "allocate memory for a new nvme_ns_iter_t: %s",
1000 strerror(e)));
1001 }
1002
1003 iter->nni_ctrl = ctrl;
1004 iter->nni_level = level;
1005 iter->nni_cur_idx = 1;
1006
1007 *iterp = iter;
1008 return (nvme_ctrl_success(ctrl));
1009 }
1010
1011 bool
nvme_ns_discover(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level,nvme_ns_disc_f func,void * arg)1012 nvme_ns_discover(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level,
1013 nvme_ns_disc_f func, void *arg)
1014 {
1015 nvme_ns_iter_t *iter;
1016 nvme_iter_t ret;
1017 const nvme_ns_disc_t *disc;
1018
1019 if (!nvme_ns_discover_validate(ctrl, level)) {
1020 return (false);
1021 }
1022
1023 if (func == NULL) {
1024 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1025 "encountered invalid nvme_ns_disc_f function pointer: %p",
1026 func));
1027 }
1028
1029 if (!nvme_ns_discover_init(ctrl, level, &iter)) {
1030 return (false);
1031 }
1032
1033 while ((ret = nvme_ns_discover_step(iter, &disc)) == NVME_ITER_VALID) {
1034 if (!func(ctrl, disc, arg))
1035 break;
1036 }
1037
1038 nvme_ns_discover_fini(iter);
1039 if (ret == NVME_ITER_ERROR) {
1040 return (false);
1041 }
1042
1043 return (nvme_ctrl_success(ctrl));
1044 }
1045
1046 uint32_t
nvme_ns_disc_nsid(const nvme_ns_disc_t * discp)1047 nvme_ns_disc_nsid(const nvme_ns_disc_t *discp)
1048 {
1049 return (discp->nnd_nsid);
1050 }
1051
1052 nvme_ns_disc_level_t
nvme_ns_disc_level(const nvme_ns_disc_t * discp)1053 nvme_ns_disc_level(const nvme_ns_disc_t *discp)
1054 {
1055 return (discp->nnd_level);
1056 }
1057
1058 nvme_ns_disc_flags_t
nvme_ns_disc_flags(const nvme_ns_disc_t * discp)1059 nvme_ns_disc_flags(const nvme_ns_disc_t *discp)
1060 {
1061 return (discp->nnd_flags);
1062 }
1063
1064 const uint8_t *
nvme_ns_disc_eui64(const nvme_ns_disc_t * discp)1065 nvme_ns_disc_eui64(const nvme_ns_disc_t *discp)
1066 {
1067 if ((discp->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) == 0) {
1068 return (NULL);
1069 }
1070
1071 return (discp->nnd_eui64);
1072 }
1073
1074 const uint8_t *
nvme_ns_disc_nguid(const nvme_ns_disc_t * discp)1075 nvme_ns_disc_nguid(const nvme_ns_disc_t *discp)
1076 {
1077 if ((discp->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) == 0) {
1078 return (NULL);
1079 }
1080
1081 return (discp->nnd_nguid);
1082 }
1083
1084 void
nvme_ns_fini(nvme_ns_t * ns)1085 nvme_ns_fini(nvme_ns_t *ns)
1086 {
1087 free(ns);
1088 }
1089
1090 bool
nvme_ns_init(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_ns_t ** nsp)1091 nvme_ns_init(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ns_t **nsp)
1092 {
1093 nvme_ns_t *ns;
1094
1095 if (nsp == NULL) {
1096 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1097 "encountered invalid nvme_ns_t output pointer: %p", nsp));
1098 }
1099
1100 if (nsid < NVME_NSID_MIN || nsid > ctrl->nc_info.id_nn) {
1101 return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "requested "
1102 "namespace 0x%x is invalid, valid namespaces are [0x%x, "
1103 "0x%x]", nsid, NVME_NSID_MIN, ctrl->nc_info.id_nn));
1104 }
1105
1106 ns = calloc(1, sizeof (nvme_ns_t));
1107 if (ns == NULL) {
1108 int e = errno;
1109 return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to "
1110 "allocate memory for a new nvme_ns_t: %s", strerror(e)));
1111 }
1112
1113 ns->nn_ctrl = ctrl;
1114 ns->nn_nsid = nsid;
1115
1116 *nsp = ns;
1117 return (nvme_ctrl_success(ctrl));
1118 }
1119
1120 typedef struct {
1121 nvme_ctrl_t *nnia_ctrl;
1122 const char *nnia_name;
1123 bool nnia_found;
1124 nvme_ns_t *nnia_ns;
1125 nvme_err_data_t nnia_err;
1126 } nvme_ns_init_arg_t;
1127
1128 static bool
nvme_ns_init_by_name_cb(nvme_ctrl_t * ctrl,const nvme_ns_disc_t * disc,void * arg)1129 nvme_ns_init_by_name_cb(nvme_ctrl_t *ctrl, const nvme_ns_disc_t *disc,
1130 void *arg)
1131 {
1132 nvme_ns_init_arg_t *init = arg;
1133 char buf[NVME_NGUID_NAMELEN];
1134 CTASSERT(NVME_NGUID_NAMELEN > NVME_EUI64_NAMELEN);
1135
1136 if ((disc->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) != 0) {
1137 (void) nvme_format_nguid(disc->nnd_nguid, buf, sizeof (buf));
1138 if (strcasecmp(init->nnia_name, buf) == 0)
1139 goto match;
1140 }
1141
1142 if ((disc->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) != 0) {
1143 (void) nvme_format_eui64(disc->nnd_eui64, buf, sizeof (buf));
1144 if (strcasecmp(init->nnia_name, buf) == 0)
1145 goto match;
1146 }
1147
1148 (void) snprintf(buf, sizeof (buf), "%u", disc->nnd_nsid);
1149 if (strcasecmp(init->nnia_name, buf) == 0)
1150 goto match;
1151
1152 return (true);
1153
1154 match:
1155 init->nnia_found = true;
1156 if (!nvme_ns_init(ctrl, disc->nnd_nsid, &init->nnia_ns)) {
1157 nvme_ctrl_err_save(ctrl, &init->nnia_err);
1158 }
1159
1160 return (false);
1161 }
1162
1163 /*
1164 * Attempt to find a namespace by 'name'. A name could be the NGUID, EUI64, or
1165 * just the plain old namespace ID.
1166 */
1167 bool
nvme_ns_init_by_name(nvme_ctrl_t * ctrl,const char * ns_name,nvme_ns_t ** nsp)1168 nvme_ns_init_by_name(nvme_ctrl_t *ctrl, const char *ns_name, nvme_ns_t **nsp)
1169 {
1170 nvme_ns_init_arg_t init;
1171
1172 if (ns_name == NULL) {
1173 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1174 "encountered invalid namespace name: %p", ns_name));
1175 }
1176
1177 if (nsp == NULL) {
1178 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1179 "encountered invalid nvme_ns_t output pointer: %p", nsp));
1180 }
1181
1182 init.nnia_ctrl = ctrl;
1183 init.nnia_name = ns_name;
1184 init.nnia_found = false;
1185 init.nnia_ns = NULL;
1186
1187 if (!nvme_ns_discover(ctrl, NVME_NS_DISC_F_ALL, nvme_ns_init_by_name_cb,
1188 &init)) {
1189 return (false);
1190 }
1191
1192 if (!init.nnia_found) {
1193 return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "failed to "
1194 "find NVMe namespace %s on nvme%d", ns_name,
1195 ctrl->nc_inst));
1196 }
1197
1198 if (init.nnia_ns == NULL) {
1199 nvme_ctrl_err_set(ctrl, &init.nnia_err);
1200 return (false);
1201 }
1202
1203 *nsp = init.nnia_ns;
1204 return (nvme_ctrl_success(ctrl));
1205 }
1206
1207 bool
nvme_ctrl_ns_init(nvme_t * nvme,const char * name,nvme_ctrl_t ** ctrlp,nvme_ns_t ** nsp)1208 nvme_ctrl_ns_init(nvme_t *nvme, const char *name, nvme_ctrl_t **ctrlp,
1209 nvme_ns_t **nsp)
1210 {
1211 const char *slash, *ns_name;
1212 char *eptr;
1213 nvme_ctrl_t *ctrl;
1214 nvme_ns_t *ns;
1215 unsigned long inst;
1216 size_t ctrl_namelen;
1217
1218 if (name == NULL) {
1219 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
1220 "invalid name to search for: %p", name));
1221 }
1222
1223 /*
1224 * We require a controller, but the namespace output pointer is only
1225 * required if we end up having a namespace present.
1226 */
1227 if (ctrlp == NULL) {
1228 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
1229 "invalid nvme_ctrl_t output pointer: %p", ctrlp));
1230 }
1231
1232 slash = strchr(name, '/');
1233 if (slash != NULL) {
1234 ctrl_namelen = (uintptr_t)slash - (uintptr_t)name;
1235 ns_name = slash + 1;
1236
1237 if (nsp == NULL) {
1238 return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0,
1239 "encountered invalid nvme_ns_t output pointer: %p",
1240 nsp));
1241 }
1242
1243 } else {
1244 ctrl_namelen = strlen(name);
1245 ns_name = NULL;
1246 }
1247
1248 *ctrlp = NULL;
1249 if (nsp != NULL) {
1250 *nsp = NULL;
1251 }
1252
1253 if (strncmp(name, "nvme", 4) != 0) {
1254 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, "unable "
1255 "to map controller '%.*s' to a known device class, "
1256 "expected the controller to start with 'nvme'",
1257 (int)ctrl_namelen, name));
1258 }
1259
1260 /*
1261 * Before we go ahead and try to parse this with strtoul we need to
1262 * manually check two things that strtoul will not:
1263 *
1264 * 1) If we have a null terminator, then we'll just get a 0 back.
1265 * 2) If there are multiple leading zeros in a row then that's an error.
1266 * We don't want to conflate 001 and 1 as the same here. The only valid
1267 * case is 'nvme0' which is 5 characters long, hence the check below.
1268 */
1269 if (ctrl_namelen == 4) {
1270 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1271 "no controller instance specified in %.*s",
1272 (int)ctrl_namelen, name));
1273 }
1274
1275 if (name[4] == '0' && ctrl_namelen > 5) {
1276 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1277 "leading zeros aren't allowed for the instance specified "
1278 "in %.*s", (int)ctrl_namelen, name));
1279 }
1280
1281 errno = 0;
1282 inst = strtoul(name + 4, &eptr, 10);
1283 if (errno != 0 || (*eptr != '\0' && eptr != slash)) {
1284 return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1285 "failed to parse controller instance from %.*s",
1286 (int)ctrl_namelen, name));
1287 }
1288
1289 if (inst > INT32_MAX) {
1290 return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0,
1291 "parsed controller instance %lu is outside the valid "
1292 "range [0, %d]", inst, INT32_MAX));
1293 }
1294
1295 if (!nvme_ctrl_init_by_instance(nvme, (int32_t)inst, &ctrl)) {
1296 return (false);
1297 }
1298
1299 if (ns_name == NULL) {
1300 *ctrlp = ctrl;
1301 return (nvme_success(nvme));
1302 }
1303
1304 if (!nvme_ns_init_by_name(ctrl, ns_name, &ns)) {
1305 nvme_err_data_t err;
1306
1307 nvme_ctrl_err_save(ctrl, &err);
1308 nvme_err_set(nvme, &err);
1309 nvme_ctrl_fini(ctrl);
1310 return (false);
1311 }
1312
1313 *ctrlp = ctrl;
1314 *nsp = ns;
1315
1316 return (nvme_success(nvme));
1317 }
1318
1319 bool
nvme_ns_bd_attach(nvme_ns_t * ns)1320 nvme_ns_bd_attach(nvme_ns_t *ns)
1321 {
1322 nvme_ctrl_t *ctrl = ns->nn_ctrl;
1323 nvme_ioctl_common_t com;
1324
1325 (void) memset(&com, 0, sizeof (com));
1326 com.nioc_nsid = ns->nn_nsid;
1327
1328 if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_ATTACH, &com) != 0) {
1329 int e = errno;
1330 return (nvme_ioctl_syserror(ctrl, e, "namespace attach"));
1331 }
1332
1333 if (com.nioc_drv_err != NVME_IOCTL_E_OK) {
1334 return (nvme_ioctl_error(ctrl, &com, "namespace attach"));
1335 }
1336
1337 return (nvme_ctrl_success(ctrl));
1338 }
1339
1340 bool
nvme_ns_bd_detach(nvme_ns_t * ns)1341 nvme_ns_bd_detach(nvme_ns_t *ns)
1342 {
1343 nvme_ctrl_t *ctrl = ns->nn_ctrl;
1344 nvme_ioctl_common_t com;
1345
1346 (void) memset(&com, 0, sizeof (com));
1347 com.nioc_nsid = ns->nn_nsid;
1348
1349 if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_DETACH, &com) != 0) {
1350 int e = errno;
1351 return (nvme_ioctl_syserror(ctrl, e, "namespace detach"));
1352 }
1353
1354 if (com.nioc_drv_err != NVME_IOCTL_E_OK) {
1355 return (nvme_ioctl_error(ctrl, &com, "namespace detach"));
1356 }
1357
1358 return (nvme_ctrl_success(ctrl));
1359 }
1360
1361 /*
1362 * Check for a lock programming error and upanic() if so.
1363 */
1364 static void
nvme_lock_check(nvme_ctrl_t * ctrl)1365 nvme_lock_check(nvme_ctrl_t *ctrl)
1366 {
1367 char msg[1024];
1368 int ret;
1369 const char *up;
1370 size_t ulen;
1371 const char *base = "fatal libnvme locking error detected";
1372
1373 if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) {
1374 return;
1375 }
1376
1377 ret = snprintf(msg, sizeof (msg), "%s: %s (controller %p)", base,
1378 ctrl->nc_err.ne_errmsg, ctrl);
1379 if (ret >= sizeof (msg)) {
1380 ulen = sizeof (msg);
1381 up = msg;
1382 } else if (ret <= 0) {
1383 ulen = strlen(base) + 1;
1384 up = base;
1385 } else {
1386 ulen = (size_t)ret + 1;
1387 up = msg;
1388 }
1389
1390 upanic(up, ulen);
1391 }
1392
1393 static bool
nvme_lock_common(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_lock_level_t level,nvme_lock_flags_t flags)1394 nvme_lock_common(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_lock_level_t level,
1395 nvme_lock_flags_t flags)
1396 {
1397 nvme_ioctl_lock_t lock;
1398 const nvme_lock_flags_t all_flags = NVME_LOCK_F_DONT_BLOCK;
1399
1400 if (level != NVME_LOCK_L_READ && level != NVME_LOCK_L_WRITE) {
1401 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown "
1402 "lock level: 0x%x", level));
1403 }
1404
1405 if ((flags & ~all_flags) != 0) {
1406 return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown "
1407 "lock flags: 0x%x", flags & ~all_flags));
1408 }
1409
1410 (void) memset(&lock, 0, sizeof (lock));
1411 lock.nil_common.nioc_nsid = nsid;
1412 if (nsid != 0) {
1413 lock.nil_ent = NVME_LOCK_E_NS;
1414 } else {
1415 lock.nil_ent = NVME_LOCK_E_CTRL;
1416 }
1417 lock.nil_level = level;
1418 lock.nil_flags = flags;
1419
1420 if (ioctl(ctrl->nc_fd, NVME_IOC_LOCK, &lock) != 0) {
1421 int e = errno;
1422 return (nvme_ioctl_syserror(ctrl, e, "lock"));
1423 }
1424
1425 if (lock.nil_common.nioc_drv_err != NVME_IOCTL_E_OK) {
1426 (void) nvme_ioctl_error(ctrl, &lock.nil_common, "lock");
1427 nvme_lock_check(ctrl);
1428 return (false);
1429 }
1430
1431 return (nvme_ctrl_success(ctrl));
1432 }
1433
1434 /*
1435 * You may reasonably be wondering why does this return and why do we basically
1436 * panic everywhere. The reality is twofold. The first part of this is that we
1437 * know from experience in libc that error checking mutexes are not the most
1438 * common and the kernel simplicity of mutex_enter() and mutex_exit() are really
1439 * a boon. The second piece here is that the way that the ioctl path works here,
1440 * only programming errors or mischief in the library could cause this to fail
1441 * at the raw ioctl / errno level. That is EBADF/EFAULT, etc. are our fault and
1442 * if you cannot unlock because of that you're not going to get much further.
1443 */
1444 void
nvme_unlock_common(nvme_ctrl_t * ctrl,uint32_t nsid)1445 nvme_unlock_common(nvme_ctrl_t *ctrl, uint32_t nsid)
1446 {
1447 nvme_ioctl_unlock_t unlock;
1448
1449 (void) memset(&unlock, 0, sizeof (unlock));
1450 unlock.niu_common.nioc_nsid = nsid;
1451 if (nsid != 0) {
1452 unlock.niu_ent = NVME_LOCK_E_NS;
1453 } else {
1454 unlock.niu_ent = NVME_LOCK_E_CTRL;
1455 }
1456
1457 /*
1458 * Because all unlock ioctls errors are promoted to an error, we don't
1459 * bother calling nvme_ioctl_syserror() here.
1460 */
1461 if (ioctl(ctrl->nc_fd, NVME_IOC_UNLOCK, &unlock) != 0) {
1462 int e = errno;
1463 (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, e, "internal "
1464 "programming error: failed to issue unlock ioctl: %s",
1465 strerror(e));
1466 nvme_lock_check(ctrl);
1467 return;
1468 }
1469
1470 if (unlock.niu_common.nioc_drv_err != NVME_IOCTL_E_OK) {
1471 (void) nvme_ioctl_error(ctrl, &unlock.niu_common, "unlock");
1472 /*
1473 * Promote any other failure to a new fatal failure. Consumers
1474 * expect this to have worked.
1475 */
1476 if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) {
1477 nvme_err_data_t err;
1478 nvme_ctrl_err_save(ctrl, &err);
1479 (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, 0,
1480 "internal programming error: received unexpected "
1481 "libnvme error 0x%x: %s", err.ne_err,
1482 err.ne_errmsg);
1483 }
1484 nvme_lock_check(ctrl);
1485 return;
1486 }
1487
1488 (void) nvme_ctrl_success(ctrl);
1489 }
1490
1491 bool
nvme_ctrl_lock(nvme_ctrl_t * ctrl,nvme_lock_level_t level,nvme_lock_flags_t flags)1492 nvme_ctrl_lock(nvme_ctrl_t *ctrl, nvme_lock_level_t level,
1493 nvme_lock_flags_t flags)
1494 {
1495 return (nvme_lock_common(ctrl, 0, level, flags));
1496 }
1497
1498 bool
nvme_ns_lock(nvme_ns_t * ns,nvme_lock_level_t level,nvme_lock_flags_t flags)1499 nvme_ns_lock(nvme_ns_t *ns, nvme_lock_level_t level,
1500 nvme_lock_flags_t flags)
1501 {
1502 return (nvme_lock_common(ns->nn_ctrl, ns->nn_nsid, level, flags));
1503 }
1504
1505 void
nvme_ctrl_unlock(nvme_ctrl_t * ctrl)1506 nvme_ctrl_unlock(nvme_ctrl_t *ctrl)
1507 {
1508 nvme_unlock_common(ctrl, 0);
1509 }
1510
1511 void
nvme_ns_unlock(nvme_ns_t * ns)1512 nvme_ns_unlock(nvme_ns_t *ns)
1513 {
1514 nvme_unlock_common(ns->nn_ctrl, ns->nn_nsid);
1515 }
1516