xref: /illumos-gate/usr/src/lib/libnvme/common/libnvme.c (revision 533affcbc7fc4d0c8132976ea454aaa715fe2307)
1*533affcbSRobert Mustacchi /*
2*533affcbSRobert Mustacchi  * This file and its contents are supplied under the terms of the
3*533affcbSRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
4*533affcbSRobert Mustacchi  * You may only use this file in accordance with the terms of version
5*533affcbSRobert Mustacchi  * 1.0 of the CDDL.
6*533affcbSRobert Mustacchi  *
7*533affcbSRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
8*533affcbSRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
9*533affcbSRobert Mustacchi  * http://www.illumos.org/license/CDDL.
10*533affcbSRobert Mustacchi  */
11*533affcbSRobert Mustacchi 
12*533affcbSRobert Mustacchi /*
13*533affcbSRobert Mustacchi  * Copyright 2024 Oxide Computer Company
14*533affcbSRobert Mustacchi  */
15*533affcbSRobert Mustacchi 
16*533affcbSRobert Mustacchi /*
17*533affcbSRobert Mustacchi  * Programmatic interface to NVMe Devices
18*533affcbSRobert Mustacchi  *
19*533affcbSRobert Mustacchi  * libnvme exists to provide a means of performing non-I/O related operations on
20*533affcbSRobert Mustacchi  * an NVMe device. This is intended to allow software, regardless of whether it
21*533affcbSRobert Mustacchi  * is part of illumos or not, to operate on NVMe devices and perform most of the
22*533affcbSRobert Mustacchi  * administrative and operator tasks that might come up. This library does not
23*533affcbSRobert Mustacchi  * provide a stable interface yet. The rest of this block comment goes into the
24*533affcbSRobert Mustacchi  * organization and background into why it looks the way it does.
25*533affcbSRobert Mustacchi  *
26*533affcbSRobert Mustacchi  * --------------------
27*533affcbSRobert Mustacchi  * Library Organization
28*533affcbSRobert Mustacchi  * --------------------
29*533affcbSRobert Mustacchi  *
30*533affcbSRobert Mustacchi  * There are two large classes of source files that make up this library
31*533affcbSRobert Mustacchi  * currently:
32*533affcbSRobert Mustacchi  *
33*533affcbSRobert Mustacchi  *   1. Source code that implements the library's interfaces is found alongside
34*533affcbSRobert Mustacchi  *      this file in lib/libnvme/common. This code is generally organized based
35*533affcbSRobert Mustacchi  *      around the portion of the NVMe specification that it implements. So for
36*533affcbSRobert Mustacchi  *      example, code that implements logic related to the features is found
37*533affcbSRobert Mustacchi  *      in libnvme_feature.c, formatting namespaces in libnvme_format.c, log
38*533affcbSRobert Mustacchi  *      pages in libnvme_log.c, etc. All files in the library begin with
39*533affcbSRobert Mustacchi  *      'libnvme_' as a way to help namespace the file names from the second set
40*533affcbSRobert Mustacchi  *      of files.
41*533affcbSRobert Mustacchi  *
42*533affcbSRobert Mustacchi  *   2. Validation logic that is shared between libnvme and the kernel is found
43*533affcbSRobert Mustacchi  *      in common/nvme/. While the kernel must validate requests regardless, we
44*533affcbSRobert Mustacchi  *      leverage this shared information as a means for trying to ensure that we
45*533affcbSRobert Mustacchi  *      have useful errors early. That code is factored in a way to facilitate
46*533affcbSRobert Mustacchi  *      easier unit testing.
47*533affcbSRobert Mustacchi  *
48*533affcbSRobert Mustacchi  * Because of the nature of this split, all of the opaque structures that we
49*533affcbSRobert Mustacchi  * create and their relationships are all maintained in the library (group 1).
50*533affcbSRobert Mustacchi  * All of the logic in group 2 is designed to be constant data tables and
51*533affcbSRobert Mustacchi  * functions that are fed information about the controller they are operating on
52*533affcbSRobert Mustacchi  * to answer them.
53*533affcbSRobert Mustacchi  *
54*533affcbSRobert Mustacchi  * There are several general classes of interfaces and related structures that
55*533affcbSRobert Mustacchi  * we have in the library. We break them into the following general categories
56*533affcbSRobert Mustacchi  * based on their purpose:
57*533affcbSRobert Mustacchi  *
58*533affcbSRobert Mustacchi  * DISCOVERY
59*533affcbSRobert Mustacchi  *
60*533affcbSRobert Mustacchi  * One of the large responsibilities of this library is helping someone discover
61*533affcbSRobert Mustacchi  * information about something, whether that be a controller, a namespace, a log
62*533affcbSRobert Mustacchi  * page, a feature, a unique command, etc. Information about one of these items
63*533affcbSRobert Mustacchi  * is contained in a generally opaque discovery structure. For example, the
64*533affcbSRobert Mustacchi  * nvme_log_disc_t.
65*533affcbSRobert Mustacchi  *
66*533affcbSRobert Mustacchi  * The goal of these structures is to contain all of the metadata for working
67*533affcbSRobert Mustacchi  * with the object in question. Continuing on the log page discovery example, it
68*533affcbSRobert Mustacchi  * can tell us information about what fields are required, whether or not the
69*533affcbSRobert Mustacchi  * log might be supported, whether it operates on a controller, a namespace, or
70*533affcbSRobert Mustacchi  * something else, as well as more human-usable things such as names and
71*533affcbSRobert Mustacchi  * descriptions.
72*533affcbSRobert Mustacchi  *
73*533affcbSRobert Mustacchi  * Discovery objects are both for humans and for programmatic consumption. There
74*533affcbSRobert Mustacchi  * are several cases where requests can be created directly from discovery
75*533affcbSRobert Mustacchi  * objects. A well designed discovery object can allow a general implementation
76*533affcbSRobert Mustacchi  * of a consumer such as nvmeadm to build up a request without having to
77*533affcbSRobert Mustacchi  * hardcode everything about what is needed for each request (though most
78*533affcbSRobert Mustacchi  * consumers still need to have information about the actual contents, meaning,
79*533affcbSRobert Mustacchi  * and semantics of a log or feature).
80*533affcbSRobert Mustacchi  *
81*533affcbSRobert Mustacchi  * Discovery objects are obtained in two general ways. The first is using one of
82*533affcbSRobert Mustacchi  * the iterator/callback based functions to discover a given class of data. The
83*533affcbSRobert Mustacchi  * second path is that several of the functions which operate based on the name
84*533affcbSRobert Mustacchi  * of something, e.g. nvme_log_req_init_by_name(),
85*533affcbSRobert Mustacchi  * nvme_get_feat_req_init_by_name(), etc. will return a discovery object.
86*533affcbSRobert Mustacchi  *
87*533affcbSRobert Mustacchi  * When a discovery object is returned based on iteration (more below), the
88*533affcbSRobert Mustacchi  * memory is owned by the iterator. When it is returned by a request
89*533affcbSRobert Mustacchi  * initialization function, then it has its own life time and must be freed.
90*533affcbSRobert Mustacchi  * We try to make this distinction clear in the API based on whether or not the
91*533affcbSRobert Mustacchi  * discovery object is 'const'.
92*533affcbSRobert Mustacchi  *
93*533affcbSRobert Mustacchi  * All discovery objects should be fully filled out before they are handed back
94*533affcbSRobert Mustacchi  * to a caller. It is an explicit design goal that every function that gets data
95*533affcbSRobert Mustacchi  * from the discovery structure operates on a const version of the pointer. This
96*533affcbSRobert Mustacchi  * is the hint that you cannot perform additional I/O or related after handing
97*533affcbSRobert Mustacchi  * out the discovery structure. Attempts to loosen this constraint should be
98*533affcbSRobert Mustacchi  * considered carefully due to how we communicate ownership.
99*533affcbSRobert Mustacchi  *
100*533affcbSRobert Mustacchi  * ITERATORS
101*533affcbSRobert Mustacchi  *
102*533affcbSRobert Mustacchi  * A common pattern of the library is iterating over items. This includes
103*533affcbSRobert Mustacchi  * controllers and namespaces, but also as part of discovering what specific
104*533affcbSRobert Mustacchi  * logs, commands, features, etc. are actually supported by the device.
105*533affcbSRobert Mustacchi  * Iteration always follows the same general pattern:
106*533affcbSRobert Mustacchi  *
107*533affcbSRobert Mustacchi  * 1. An iterator is initialized with a call to nvme_<name>_discover_init().
108*533affcbSRobert Mustacchi  * This will generally return a structure of the form nvme_<name>_iter_t. This
109*533affcbSRobert Mustacchi  * structure contains the memory for the corresponding value that is returned
110*533affcbSRobert Mustacchi  * from step in (2).
111*533affcbSRobert Mustacchi  *
112*533affcbSRobert Mustacchi  * 2. To actually pull values out of an iterator, one must call the
113*533affcbSRobert Mustacchi  * nvme_<name>_step() function for the iterator. This will return a
114*533affcbSRobert Mustacchi  * corresponding nvme_<name>_disc_t structure that is opaque and has a suite of
115*533affcbSRobert Mustacchi  * functions that are usable for getting information out from it. This structure
116*533affcbSRobert Mustacchi  * is valid only until the next time the nvme_<name>_step() is called. The
117*533affcbSRobert Mustacchi  * return value of step indicates the state of the data and indicates whether or
118*533affcbSRobert Mustacchi  * not there is an error, the iterator has finished, or we successfully stepped
119*533affcbSRobert Mustacchi  * and the data is filled out.
120*533affcbSRobert Mustacchi  *
121*533affcbSRobert Mustacchi  * If discovery data needs to outlive a given iteration, then it can be
122*533affcbSRobert Mustacchi  * duplicated which will give it a separate lifetime, though that comes with
123*533affcbSRobert Mustacchi  * the responsibility that it must then be freed.
124*533affcbSRobert Mustacchi  *
125*533affcbSRobert Mustacchi  * 3. To finish using iterators, one finally calls the corresponding
126*533affcbSRobert Mustacchi  * nvme_<name>_discover_fini(). That will deallocate the iterator structure and
127*533affcbSRobert Mustacchi  * finish everything up.
128*533affcbSRobert Mustacchi  *
129*533affcbSRobert Mustacchi  * REQUESTS
130*533affcbSRobert Mustacchi  *
131*533affcbSRobert Mustacchi  * One of the chief goals of this library is to be able to perform requests.
132*533affcbSRobert Mustacchi  * Each request has a structure that can be initialized, filled out, and then
133*533affcbSRobert Mustacchi  * executed. A request structure can be reused multiple times with minor
134*533affcbSRobert Mustacchi  * adjustments in-between (though changes aren't required). Request structures
135*533affcbSRobert Mustacchi  * are either initialized in a blank mode where every value must be filled out
136*533affcbSRobert Mustacchi  * or they can be initialized through their discovery object (or the common name
137*533affcbSRobert Mustacchi  * of such an object).
138*533affcbSRobert Mustacchi  *
139*533affcbSRobert Mustacchi  * When a request structure is initialized through a discovery object, it
140*533affcbSRobert Mustacchi  * automatically sets several of the fields, knows which ones are still required
141*533affcbSRobert Mustacchi  * to be set, and which fields cannot be set. For example, if you create a get
142*533affcbSRobert Mustacchi  * log page request from a log discovery object, it will not allow you to change
143*533affcbSRobert Mustacchi  * the log page you're requesting; however, in return you don't have to specify
144*533affcbSRobert Mustacchi  * the command set interface or log identifier.
145*533affcbSRobert Mustacchi  *
146*533affcbSRobert Mustacchi  * Request objects are tied to a controller. See 'Parallelism, Thread Safety,
147*533affcbSRobert Mustacchi  * and Errors' for more information.
148*533affcbSRobert Mustacchi  *
149*533affcbSRobert Mustacchi  * INFORMATION SNAPSHOTS
150*533affcbSRobert Mustacchi  *
151*533affcbSRobert Mustacchi  * To get information about a namespace or controller, one has to take an
152*533affcbSRobert Mustacchi  * information snapshot. Once an information snapshot is obtained, this snapshot
153*533affcbSRobert Mustacchi  * answers all questions about the controller with a mostly consistent set of
154*533affcbSRobert Mustacchi  * point-in-time data. The main reason for this design was to try and simplify
155*533affcbSRobert Mustacchi  * where errors can occur and to provide a straightforward serialization point
156*533affcbSRobert Mustacchi  * so that way the raw underlying data could be gathered at one system and then
157*533affcbSRobert Mustacchi  * interpreted later on another.
158*533affcbSRobert Mustacchi  *
159*533affcbSRobert Mustacchi  * The only reason that there are some fallible operations on the snapshot are
160*533affcbSRobert Mustacchi  * things that are not guaranteed to exist for all such NVMe controllers.
161*533affcbSRobert Mustacchi  *
162*533affcbSRobert Mustacchi  * LIBRARY, CONTROLLER, NAMESPACE and SNAPSHOT HANDLES
163*533affcbSRobert Mustacchi  *
164*533affcbSRobert Mustacchi  * The last major set of types used in this library are opaque handles. As you
165*533affcbSRobert Mustacchi  * might have guessed given the request structures, all of the objects which
166*533affcbSRobert Mustacchi  * represent something are opaque. Each library handle is independent of one
167*533affcbSRobert Mustacchi  * another and each controller handle is independent of one another. In general,
168*533affcbSRobert Mustacchi  * it is expected that only a single controller handle is used at a given time
169*533affcbSRobert Mustacchi  * for a given library handle, but this is not currently enforced.  Error
170*533affcbSRobert Mustacchi  * information and parallelism is tied into this, see 'Parallelism, Thread
171*533affcbSRobert Mustacchi  * Safety, and Errors' for more information.
172*533affcbSRobert Mustacchi  *
173*533affcbSRobert Mustacchi  * -----------------
174*533affcbSRobert Mustacchi  * Opaque Structures
175*533affcbSRobert Mustacchi  * -----------------
176*533affcbSRobert Mustacchi  *
177*533affcbSRobert Mustacchi  * One of the things that might stand out in libnvme is the use of opaque
178*533affcbSRobert Mustacchi  * structures everywhere with functions to access every arbitrary piece of data.
179*533affcbSRobert Mustacchi  * This and the function pattern around building up a request were done to try
180*533affcbSRobert Mustacchi  * and deal with the evolutionary nature of the NVMe specification. If you look
181*533affcbSRobert Mustacchi  * at the various requests, with the exception of firmware download, almost
182*533affcbSRobert Mustacchi  * every request has added additional features through the spec revisions. NVMe
183*533affcbSRobert Mustacchi  * 2.0 changed most things again with the requirement to specify the command set
184*533affcbSRobert Mustacchi  * interface.
185*533affcbSRobert Mustacchi  *
186*533affcbSRobert Mustacchi  * While the way that the NVMe specification has done this is quite reasonable,
187*533affcbSRobert Mustacchi  * it makes it much more difficult to use a traditional series of arguments to
188*533affcbSRobert Mustacchi  * functions or a structure without having to try to version the symbol through
189*533affcbSRobert Mustacchi  * clever games. If instead we accept that the specification will change and
190*533affcbSRobert Mustacchi  * that the specification is always taking these additional arguments out of
191*533affcbSRobert Mustacchi  * values that must be zero, then an opaque request structure where you have to
192*533affcbSRobert Mustacchi  * make an explicit function call and recompile to get slightly different
193*533affcbSRobert Mustacchi  * behavior is mostly reasonable. We may not be able to be perfect given we're
194*533affcbSRobert Mustacchi  * at the mercy of the specification, but at least this is better than the
195*533affcbSRobert Mustacchi  * alternative.
196*533affcbSRobert Mustacchi  *
197*533affcbSRobert Mustacchi  * This is ultimately why all the request structures are opaque and use a
198*533affcbSRobert Mustacchi  * pseudo-builder pattern to fill out the request information. Further evidence
199*533affcbSRobert Mustacchi  * to this point is that there was no way to avoid changing every kernel
200*533affcbSRobert Mustacchi  * structure here while retaining semantic operations. No one wants to manually
201*533affcbSRobert Mustacchi  * assemble cdw12-15 here. That's not how we can add value for the library.
202*533affcbSRobert Mustacchi  *
203*533affcbSRobert Mustacchi  * Similarly, for all discovery objects we ended up utilizing opaque objects.
204*533affcbSRobert Mustacchi  * The main reason here is that we want to be able to embed this library as a
205*533affcbSRobert Mustacchi  * committed interface in other languages and having the discovery structures be
206*533affcbSRobert Mustacchi  * something that everyone can see means it'll be harder to extend it. While
207*533affcbSRobert Mustacchi  * this concern is somewhat more theoretical given the iterator pattern, given
208*533affcbSRobert Mustacchi  * the other bits in the request structure we decided to lean into the
209*533affcbSRobert Mustacchi  * opaqueness.
210*533affcbSRobert Mustacchi  *
211*533affcbSRobert Mustacchi  * --------------------------------------
212*533affcbSRobert Mustacchi  * Parallelism, Thread Safety, and Errors
213*533affcbSRobert Mustacchi  * --------------------------------------
214*533affcbSRobert Mustacchi  *
215*533affcbSRobert Mustacchi  * One of the library's major design points is how do we achieve thread-safety,
216*533affcbSRobert Mustacchi  * how does ownership work, where do errors appear, and what is the degree of
217*533affcbSRobert Mustacchi  * parallelism that is achievable. To work through this we look at a few
218*533affcbSRobert Mustacchi  * different things:
219*533affcbSRobert Mustacchi  *
220*533affcbSRobert Mustacchi  * 1. The degree to which the hardware allows for parallelism
221*533affcbSRobert Mustacchi  * 2. The degree to which users might desire parallelism
222*533affcbSRobert Mustacchi  * 3. The ergonomics of getting and storing errors
223*533affcbSRobert Mustacchi  *
224*533affcbSRobert Mustacchi  * The NVMe specification allows for different degrees of admin command
225*533affcbSRobert Mustacchi  * parallelism on a per-command basis. This is discoverable, but the main point
226*533affcbSRobert Mustacchi  * is that there are a class of commands where only one can be outstanding at a
227*533affcbSRobert Mustacchi  * time, which likely fall into the case of most of the destructive commands
228*533affcbSRobert Mustacchi  * like Format NVM, Activate Firmware, etc. Our expectation to some extent is
229*533affcbSRobert Mustacchi  * that most admin queue commands don't need to be issued in parallel; however,
230*533affcbSRobert Mustacchi  * beyond how we structure the library and error handling, we don't try to
231*533affcbSRobert Mustacchi  * enforce that here. The kernel does do some enforcement through requiring
232*533affcbSRobert Mustacchi  * mandatory write locks to perform some operations.
233*533affcbSRobert Mustacchi  *
234*533affcbSRobert Mustacchi  * When we get to how do folks want to use this, during the initial design phase
235*533affcbSRobert Mustacchi  * we mostly theorized based on how nvmeadm is using it today and how various
236*533affcbSRobert Mustacchi  * daemons like a FRU monitor or an appliance kit's software might want to
237*533affcbSRobert Mustacchi  * interact with it. Our general starting assumption is that it's very
238*533affcbSRobert Mustacchi  * reasonable for each discovered controller to be handled in parallel, but that
239*533affcbSRobert Mustacchi  * operations on a controller itself are likely serial given that we're not
240*533affcbSRobert Mustacchi  * issuing I/O through this mechanism. If we were, then that'd be an entirely
241*533affcbSRobert Mustacchi  * different set of constraints.
242*533affcbSRobert Mustacchi  *
243*533affcbSRobert Mustacchi  * To discuss the perceived ergonomics, we need to first discuss what error
244*533affcbSRobert Mustacchi  * information we want to be able to have. It's an important goal of both the
245*533affcbSRobert Mustacchi  * NVMe driver and this library to give useful semantic errors. In particular,
246*533affcbSRobert Mustacchi  * for any operation we want to make sure that we include the following
247*533affcbSRobert Mustacchi  * information:
248*533affcbSRobert Mustacchi  *
249*533affcbSRobert Mustacchi  *   o A hopefully distinguishable semantic error
250*533affcbSRobert Mustacchi  *   o Saving errno as a system error if relevant (e.g if open(2) failed)
251*533affcbSRobert Mustacchi  *   o A message for humans that gives more specifics about what happened and is
252*533affcbSRobert Mustacchi  *     intended to be passed along to the output of a command or another error
253*533affcbSRobert Mustacchi  *     message.
254*533affcbSRobert Mustacchi  *   o If a controller error occurs, we want to be able to provide the
255*533affcbSRobert Mustacchi  *     controller's sc (status code) and sct (status code type).
256*533affcbSRobert Mustacchi  *
257*533affcbSRobert Mustacchi  * With this we get to the questions around ergonomics and related which are
258*533affcbSRobert Mustacchi  * entirely subjective. Given that we want to capture that information how do we
259*533affcbSRobert Mustacchi  * best do this given the tooling that we have. When the library was first being
260*533affcbSRobert Mustacchi  * prototyped all errors were on the nvme_t, basically the top-level handle.
261*533affcbSRobert Mustacchi  * This meant that each operation on a controller had to be done serially or you
262*533affcbSRobert Mustacchi  * would have to use different handles. However, the simplicity was that there
263*533affcbSRobert Mustacchi  * was one thing to check.
264*533affcbSRobert Mustacchi  *
265*533affcbSRobert Mustacchi  * This evolution changed slightly when we introduced information snapshots.
266*533affcbSRobert Mustacchi  * Because the information snapshots are meant to be separate entities whose
267*533affcbSRobert Mustacchi  * lifetime can extend beyond the nvme_t library handle, they ended up
268*533affcbSRobert Mustacchi  * developing their own error codes and functions. This has been okay because
269*533affcbSRobert Mustacchi  * there aren't too many use cases there, though the need to duplicate error
270*533affcbSRobert Mustacchi  * handling functions is a bit painful.
271*533affcbSRobert Mustacchi  *
272*533affcbSRobert Mustacchi  * From there, we did consider what if each request had its own error
273*533affcbSRobert Mustacchi  * information that could be extracted. That would turn into a lot of functions
274*533affcbSRobert Mustacchi  * to get at that data. The controller's allowed parallelism for admin commands
275*533affcbSRobert Mustacchi  * varies based on each command. Some commands must occur when there are no
276*533affcbSRobert Mustacchi  * other admin commands on the controller and others when there there is nothing
277*533affcbSRobert Mustacchi  * on the namespace. However, due to that nuance, it would lead to forcing the
278*533affcbSRobert Mustacchi  * consumer to understand the controller's specifics more than is often
279*533affcbSRobert Mustacchi  * necessary for a given request. To add to that, it'd also just be a pain to
280*533affcbSRobert Mustacchi  * try to get all the error information out in a different way and the consumers
281*533affcbSRobert Mustacchi  * we started writing in this fashion were not looking good.
282*533affcbSRobert Mustacchi  *
283*533affcbSRobert Mustacchi  * We also considered whether we could consolidate all the error functions on
284*533affcbSRobert Mustacchi  * each request into one structure that we get, but that didn't move the needle
285*533affcbSRobert Mustacchi  * too much. It also raised some more concerns around how we minimize races and
286*533affcbSRobert Mustacchi  * how data changes around that.
287*533affcbSRobert Mustacchi  *
288*533affcbSRobert Mustacchi  * So all of this led us to our current compromise position: we allow for
289*533affcbSRobert Mustacchi  * parallelism at the controller level. More specifically:
290*533affcbSRobert Mustacchi  *
291*533affcbSRobert Mustacchi  * 1. Operations which take the nvme_t handle set errors on it and must operate
292*533affcbSRobert Mustacchi  *    serially. That is the nvme_t should only be used from one thread at any
293*533affcbSRobert Mustacchi  *    time, but may move between threads. Errors are set on it.
294*533affcbSRobert Mustacchi  *
295*533affcbSRobert Mustacchi  * 2. The nvme_ctrl_t has its own error information. A given nvme_ctrl_t should
296*533affcbSRobert Mustacchi  *    only be used serially; however, different ones can be used in parallel. A
297*533affcbSRobert Mustacchi  *    controller doesn't guarantee exclusivity. That requires an explicit
298*533affcbSRobert Mustacchi  *    locking operation.
299*533affcbSRobert Mustacchi  *
300*533affcbSRobert Mustacchi  * 3. Both request structures and namespaces place their errors on the
301*533affcbSRobert Mustacchi  *    corresponding controller that they were created from. Therefore the
302*533affcbSRobert Mustacchi  *    per-controller serialization in (2) applies here as well. If two requests
303*533affcbSRobert Mustacchi  *    are tied to different controllers, they can proceed in parallel.
304*533affcbSRobert Mustacchi  *
305*533affcbSRobert Mustacchi  * 4. Once a controller or namespace snapshot is obtained, they fall into a
306*533affcbSRobert Mustacchi  *    similar pattern: each one can be operated on in parallel, but generally
307*533affcbSRobert Mustacchi  *    one should only operate on a single one serially.
308*533affcbSRobert Mustacchi  *
309*533affcbSRobert Mustacchi  * Other than the constraints defined above, the library does not care which
310*533affcbSRobert Mustacchi  * threads that an operation occurs on. These can be moved to wherever it needs
311*533affcbSRobert Mustacchi  * to be. Locking and related in the kernel is based on the open file descriptor
312*533affcbSRobert Mustacchi  * to the controller.
313*533affcbSRobert Mustacchi  *
314*533affcbSRobert Mustacchi  * ----------------
315*533affcbSRobert Mustacchi  * Field Validation
316*533affcbSRobert Mustacchi  * ----------------
317*533affcbSRobert Mustacchi  *
318*533affcbSRobert Mustacchi  * Every request is made up of fields that correspond to parts of the NVMe
319*533affcbSRobert Mustacchi  * specification. Our requests operate in terms of the logical fields that we
320*533affcbSRobert Mustacchi  * opt to expose and that the kernel knows how to consume. In general, we don't
321*533affcbSRobert Mustacchi  * expose the raw cdw values that make up the commands (except for the vendor
322*533affcbSRobert Mustacchi  * unique commands or arguments that are explicitly that way ala get features).
323*533affcbSRobert Mustacchi  * While operating on raw cdw arguments would be a simple way to create ABI
324*533affcbSRobert Mustacchi  * stability, it would leave everyone having to break up all the fields
325*533affcbSRobert Mustacchi  * themselves and we believe end up somewhat more error prone than the
326*533affcbSRobert Mustacchi  * interfaces we expose today.
327*533affcbSRobert Mustacchi  *
328*533affcbSRobert Mustacchi  * Requests are created in one of two ways today: they are either initialized
329*533affcbSRobert Mustacchi  * from corresponding discovery data e.g. nvme_log_req_init_by_disc() and
330*533affcbSRobert Mustacchi  * nvme_get_feat_req_init_by_name(), or one creates a raw request ala
331*533affcbSRobert Mustacchi  * nvme_get_feat_req_init(). In the former cases, we fill out a bunch of the
332*533affcbSRobert Mustacchi  * fields that would normally need to be set such as the log or feature ID. We
333*533affcbSRobert Mustacchi  * also will note which fields are allowed and expected. For example, the health
334*533affcbSRobert Mustacchi  * log page does not take or expect a lsp (log specific parameter) or related
335*533affcbSRobert Mustacchi  * and therefore we can flag that with an _UNUSE class error. Conversely,
336*533affcbSRobert Mustacchi  * requests that are created from their raw form will not have any such error
337*533affcbSRobert Mustacchi  * checking performed until they are finalized and checked by the kernel. The
338*533affcbSRobert Mustacchi  * set of fields that can be set in a request is usually tracked in the
339*533affcbSRobert Mustacchi  * structure with a member of the form <prefix>_allow.
340*533affcbSRobert Mustacchi  *
341*533affcbSRobert Mustacchi  * One set of library error checking that is uniform between both types is that
342*533affcbSRobert Mustacchi  * of missing fields. There are minimum fields that must be set for different
343*533affcbSRobert Mustacchi  * types of requests. That check will always be performed regardless of the path
344*533affcbSRobert Mustacchi  * that is taken through the system. Tracking which members must still be set is
345*533affcbSRobert Mustacchi  * done by a member of the form <prefix>_need.
346*533affcbSRobert Mustacchi  *
347*533affcbSRobert Mustacchi  * When we perform validation, we try to push the vast majority of it into the
348*533affcbSRobert Mustacchi  * common validation code that is shared between the kernel and userland. This
349*533affcbSRobert Mustacchi  * is wrapped up through the nvme_field_check_one() logic. The common code will
350*533affcbSRobert Mustacchi  * check if the field is supported by the controller (generating an _UNSUP class
351*533affcbSRobert Mustacchi  * error if not) and if the value of the field is within a valid range
352*533affcbSRobert Mustacchi  * (generating a _RANGE class error if not).
353*533affcbSRobert Mustacchi  *
354*533affcbSRobert Mustacchi  * While we try to fold the majority of such checks into the common code as
355*533affcbSRobert Mustacchi  * possible, it isn't perfect and some things have to be checked outside of
356*533affcbSRobert Mustacchi  * that. Those consist of the following general cases:
357*533affcbSRobert Mustacchi  *
358*533affcbSRobert Mustacchi  * 1) Items that are not semantically fields in the actual command but are
359*533affcbSRobert Mustacchi  * things that we are tracking ourselves in the library. An example of this
360*533affcbSRobert Mustacchi  * would be fields in the vuc request structure that we are synthesizing
361*533affcbSRobert Mustacchi  * ourselves.
362*533affcbSRobert Mustacchi  *
363*533affcbSRobert Mustacchi  * 2) While the field logic has the specifics of what controller is being
364*533affcbSRobert Mustacchi  * operated upon, it doesn't have all the knowledge of what things can be
365*533affcbSRobert Mustacchi  * combined or not. It can answer the specifics about its field, but cannot look
366*533affcbSRobert Mustacchi  * at the broader request.
367*533affcbSRobert Mustacchi  *
368*533affcbSRobert Mustacchi  * As a result, there are some duplicated checks in the library and the kernel,
369*533affcbSRobert Mustacchi  * though several are left just to the kernel. However, the vast majority of
370*533affcbSRobert Mustacchi  * validation does happen through these common routines which leaves the library
371*533affcbSRobert Mustacchi  * nvme_<type>_req_set_<field> functions generally wrappers around checking
372*533affcbSRobert Mustacchi  * common code and updating our tracking around what fields are set or not so we
373*533affcbSRobert Mustacchi  * can issue an ioctl.
374*533affcbSRobert Mustacchi  */
375*533affcbSRobert Mustacchi 
376*533affcbSRobert Mustacchi #include <stdlib.h>
377*533affcbSRobert Mustacchi #include <stdarg.h>
378*533affcbSRobert Mustacchi #include <libdevinfo.h>
379*533affcbSRobert Mustacchi #include <unistd.h>
380*533affcbSRobert Mustacchi #include <string.h>
381*533affcbSRobert Mustacchi #include <sys/types.h>
382*533affcbSRobert Mustacchi #include <sys/stat.h>
383*533affcbSRobert Mustacchi #include <fcntl.h>
384*533affcbSRobert Mustacchi #include <upanic.h>
385*533affcbSRobert Mustacchi 
386*533affcbSRobert Mustacchi #include "libnvme_impl.h"
387*533affcbSRobert Mustacchi 
388*533affcbSRobert Mustacchi bool
nvme_vers_ctrl_atleast(const nvme_ctrl_t * ctrl,const nvme_version_t * targ)389*533affcbSRobert Mustacchi nvme_vers_ctrl_atleast(const nvme_ctrl_t *ctrl, const nvme_version_t *targ)
390*533affcbSRobert Mustacchi {
391*533affcbSRobert Mustacchi 	return (nvme_vers_atleast(&ctrl->nc_vers, targ));
392*533affcbSRobert Mustacchi }
393*533affcbSRobert Mustacchi 
394*533affcbSRobert Mustacchi bool
nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t * ci,const nvme_version_t * targ)395*533affcbSRobert Mustacchi nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t *ci,
396*533affcbSRobert Mustacchi     const nvme_version_t *targ)
397*533affcbSRobert Mustacchi {
398*533affcbSRobert Mustacchi 	return (nvme_vers_atleast(&ci->nci_vers, targ));
399*533affcbSRobert Mustacchi }
400*533affcbSRobert Mustacchi 
401*533affcbSRobert Mustacchi bool
nvme_vers_ns_info_atleast(const nvme_ns_info_t * info,const nvme_version_t * targ)402*533affcbSRobert Mustacchi nvme_vers_ns_info_atleast(const nvme_ns_info_t *info,
403*533affcbSRobert Mustacchi     const nvme_version_t *targ)
404*533affcbSRobert Mustacchi {
405*533affcbSRobert Mustacchi 	return (nvme_vers_atleast(&info->nni_vers, targ));
406*533affcbSRobert Mustacchi }
407*533affcbSRobert Mustacchi 
408*533affcbSRobert Mustacchi bool
nvme_guid_valid(const nvme_ctrl_t * ctrl,const uint8_t guid[16])409*533affcbSRobert Mustacchi nvme_guid_valid(const nvme_ctrl_t *ctrl, const uint8_t guid[16])
410*533affcbSRobert Mustacchi {
411*533affcbSRobert Mustacchi 	const uint8_t zero_guid[16] = { 0 };
412*533affcbSRobert Mustacchi 
413*533affcbSRobert Mustacchi 	return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v2) &&
414*533affcbSRobert Mustacchi 	    memcmp(zero_guid, guid, sizeof (zero_guid)) != 0);
415*533affcbSRobert Mustacchi }
416*533affcbSRobert Mustacchi 
417*533affcbSRobert Mustacchi bool
nvme_eui64_valid(const nvme_ctrl_t * ctrl,const uint8_t eui64[8])418*533affcbSRobert Mustacchi nvme_eui64_valid(const nvme_ctrl_t *ctrl, const uint8_t eui64[8])
419*533affcbSRobert Mustacchi {
420*533affcbSRobert Mustacchi 	const uint8_t zero_eui[8] = { 0 };
421*533affcbSRobert Mustacchi 
422*533affcbSRobert Mustacchi 	return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v1) &&
423*533affcbSRobert Mustacchi 	    memcmp(zero_eui, eui64, sizeof (zero_eui)) != 0);
424*533affcbSRobert Mustacchi }
425*533affcbSRobert Mustacchi 
426*533affcbSRobert Mustacchi int
nvme_format_nguid(const uint8_t nguid[16],char * buf,size_t len)427*533affcbSRobert Mustacchi nvme_format_nguid(const uint8_t nguid[16], char *buf, size_t len)
428*533affcbSRobert Mustacchi {
429*533affcbSRobert Mustacchi 	return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X"
430*533affcbSRobert Mustacchi 	    "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
431*533affcbSRobert Mustacchi 	    nguid[0], nguid[1], nguid[2], nguid[3], nguid[4], nguid[5],
432*533affcbSRobert Mustacchi 	    nguid[6], nguid[7], nguid[8], nguid[9], nguid[10], nguid[11],
433*533affcbSRobert Mustacchi 	    nguid[12], nguid[13], nguid[14], nguid[15]));
434*533affcbSRobert Mustacchi }
435*533affcbSRobert Mustacchi 
436*533affcbSRobert Mustacchi int
nvme_format_eui64(const uint8_t eui64[8],char * buf,size_t len)437*533affcbSRobert Mustacchi nvme_format_eui64(const uint8_t eui64[8], char *buf, size_t len)
438*533affcbSRobert Mustacchi {
439*533affcbSRobert Mustacchi 	return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
440*533affcbSRobert Mustacchi 	    eui64[0], eui64[1], eui64[2], eui64[3], eui64[4], eui64[5],
441*533affcbSRobert Mustacchi 	    eui64[6], eui64[7]));
442*533affcbSRobert Mustacchi }
443*533affcbSRobert Mustacchi 
444*533affcbSRobert Mustacchi void
nvme_fini(nvme_t * nvme)445*533affcbSRobert Mustacchi nvme_fini(nvme_t *nvme)
446*533affcbSRobert Mustacchi {
447*533affcbSRobert Mustacchi 	if (nvme == NULL)
448*533affcbSRobert Mustacchi 		return;
449*533affcbSRobert Mustacchi 
450*533affcbSRobert Mustacchi 	if (nvme->nh_devinfo != DI_NODE_NIL) {
451*533affcbSRobert Mustacchi 		di_fini(nvme->nh_devinfo);
452*533affcbSRobert Mustacchi 	}
453*533affcbSRobert Mustacchi 
454*533affcbSRobert Mustacchi 	free(nvme);
455*533affcbSRobert Mustacchi }
456*533affcbSRobert Mustacchi 
457*533affcbSRobert Mustacchi nvme_t *
nvme_init(void)458*533affcbSRobert Mustacchi nvme_init(void)
459*533affcbSRobert Mustacchi {
460*533affcbSRobert Mustacchi 	nvme_t *nvme;
461*533affcbSRobert Mustacchi 
462*533affcbSRobert Mustacchi 	nvme = calloc(1, sizeof (nvme_t));
463*533affcbSRobert Mustacchi 	if (nvme == NULL) {
464*533affcbSRobert Mustacchi 		return (NULL);
465*533affcbSRobert Mustacchi 	}
466*533affcbSRobert Mustacchi 
467*533affcbSRobert Mustacchi 	nvme->nh_devinfo = di_init("/", DINFOCPYALL);
468*533affcbSRobert Mustacchi 	if (nvme->nh_devinfo == DI_NODE_NIL) {
469*533affcbSRobert Mustacchi 		nvme_fini(nvme);
470*533affcbSRobert Mustacchi 		return (NULL);
471*533affcbSRobert Mustacchi 	}
472*533affcbSRobert Mustacchi 
473*533affcbSRobert Mustacchi 	return (nvme);
474*533affcbSRobert Mustacchi }
475*533affcbSRobert Mustacchi 
476*533affcbSRobert Mustacchi void
nvme_ctrl_discover_fini(nvme_ctrl_iter_t * iter)477*533affcbSRobert Mustacchi nvme_ctrl_discover_fini(nvme_ctrl_iter_t *iter)
478*533affcbSRobert Mustacchi {
479*533affcbSRobert Mustacchi 	free(iter);
480*533affcbSRobert Mustacchi }
481*533affcbSRobert Mustacchi 
482*533affcbSRobert Mustacchi nvme_iter_t
nvme_ctrl_discover_step(nvme_ctrl_iter_t * iter,const nvme_ctrl_disc_t ** discp)483*533affcbSRobert Mustacchi nvme_ctrl_discover_step(nvme_ctrl_iter_t *iter, const nvme_ctrl_disc_t **discp)
484*533affcbSRobert Mustacchi {
485*533affcbSRobert Mustacchi 	di_minor_t m;
486*533affcbSRobert Mustacchi 
487*533affcbSRobert Mustacchi 	*discp = NULL;
488*533affcbSRobert Mustacchi 	if (iter->ni_done) {
489*533affcbSRobert Mustacchi 		return (NVME_ITER_DONE);
490*533affcbSRobert Mustacchi 	}
491*533affcbSRobert Mustacchi 
492*533affcbSRobert Mustacchi 	for (;;) {
493*533affcbSRobert Mustacchi 		if (iter->ni_cur == NULL) {
494*533affcbSRobert Mustacchi 			iter->ni_cur = di_drv_first_node("nvme",
495*533affcbSRobert Mustacchi 			    iter->ni_nvme->nh_devinfo);
496*533affcbSRobert Mustacchi 		} else {
497*533affcbSRobert Mustacchi 			iter->ni_cur = di_drv_next_node(iter->ni_cur);
498*533affcbSRobert Mustacchi 		}
499*533affcbSRobert Mustacchi 
500*533affcbSRobert Mustacchi 		if (iter->ni_cur == NULL) {
501*533affcbSRobert Mustacchi 			iter->ni_done = true;
502*533affcbSRobert Mustacchi 			return (NVME_ITER_DONE);
503*533affcbSRobert Mustacchi 		}
504*533affcbSRobert Mustacchi 
505*533affcbSRobert Mustacchi 		for (m = di_minor_next(iter->ni_cur, DI_MINOR_NIL);
506*533affcbSRobert Mustacchi 		    m != DI_MINOR_NIL; m = di_minor_next(iter->ni_cur, m)) {
507*533affcbSRobert Mustacchi 			if (strcmp(di_minor_nodetype(m),
508*533affcbSRobert Mustacchi 			    DDI_NT_NVME_NEXUS) == 0) {
509*533affcbSRobert Mustacchi 				break;
510*533affcbSRobert Mustacchi 			}
511*533affcbSRobert Mustacchi 		}
512*533affcbSRobert Mustacchi 
513*533affcbSRobert Mustacchi 		if (m == DI_MINOR_NIL) {
514*533affcbSRobert Mustacchi 			continue;
515*533affcbSRobert Mustacchi 		}
516*533affcbSRobert Mustacchi 
517*533affcbSRobert Mustacchi 		iter->ni_disc.ncd_devi = iter->ni_cur;
518*533affcbSRobert Mustacchi 		iter->ni_disc.ncd_minor = m;
519*533affcbSRobert Mustacchi 		*discp = &iter->ni_disc;
520*533affcbSRobert Mustacchi 		return (NVME_ITER_VALID);
521*533affcbSRobert Mustacchi 	}
522*533affcbSRobert Mustacchi 
523*533affcbSRobert Mustacchi 	return (NVME_ITER_DONE);
524*533affcbSRobert Mustacchi }
525*533affcbSRobert Mustacchi 
526*533affcbSRobert Mustacchi bool
nvme_ctrl_discover_init(nvme_t * nvme,nvme_ctrl_iter_t ** iterp)527*533affcbSRobert Mustacchi nvme_ctrl_discover_init(nvme_t *nvme, nvme_ctrl_iter_t **iterp)
528*533affcbSRobert Mustacchi {
529*533affcbSRobert Mustacchi 	nvme_ctrl_iter_t *iter;
530*533affcbSRobert Mustacchi 
531*533affcbSRobert Mustacchi 	if (iterp == NULL) {
532*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
533*533affcbSRobert Mustacchi 		    "invalid nvme_ctrl_iter_t output pointer: %p", iterp));
534*533affcbSRobert Mustacchi 	}
535*533affcbSRobert Mustacchi 
536*533affcbSRobert Mustacchi 	iter = calloc(1, sizeof (nvme_ctrl_iter_t));
537*533affcbSRobert Mustacchi 	if (iter == NULL) {
538*533affcbSRobert Mustacchi 		int e = errno;
539*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to "
540*533affcbSRobert Mustacchi 		    "allocate memory for a new nvme_ctrl_iter_t: %s",
541*533affcbSRobert Mustacchi 		    strerror(e)));
542*533affcbSRobert Mustacchi 	}
543*533affcbSRobert Mustacchi 	iter->ni_nvme = nvme;
544*533affcbSRobert Mustacchi 	*iterp = iter;
545*533affcbSRobert Mustacchi 	return (nvme_success(nvme));
546*533affcbSRobert Mustacchi }
547*533affcbSRobert Mustacchi 
548*533affcbSRobert Mustacchi bool
nvme_ctrl_discover(nvme_t * nvme,nvme_ctrl_disc_f func,void * arg)549*533affcbSRobert Mustacchi nvme_ctrl_discover(nvme_t *nvme, nvme_ctrl_disc_f func, void *arg)
550*533affcbSRobert Mustacchi {
551*533affcbSRobert Mustacchi 	nvme_ctrl_iter_t *iter;
552*533affcbSRobert Mustacchi 	const nvme_ctrl_disc_t *disc;
553*533affcbSRobert Mustacchi 	nvme_iter_t ret;
554*533affcbSRobert Mustacchi 
555*533affcbSRobert Mustacchi 	if (func == NULL) {
556*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
557*533affcbSRobert Mustacchi 		    "invalid nvme_ctrl_disc_f function pointer: %p", func));
558*533affcbSRobert Mustacchi 	}
559*533affcbSRobert Mustacchi 
560*533affcbSRobert Mustacchi 	if (!nvme_ctrl_discover_init(nvme, &iter)) {
561*533affcbSRobert Mustacchi 		return (false);
562*533affcbSRobert Mustacchi 	}
563*533affcbSRobert Mustacchi 
564*533affcbSRobert Mustacchi 	while ((ret = nvme_ctrl_discover_step(iter, &disc)) ==
565*533affcbSRobert Mustacchi 	    NVME_ITER_VALID) {
566*533affcbSRobert Mustacchi 		if (!func(nvme, disc, arg))
567*533affcbSRobert Mustacchi 			break;
568*533affcbSRobert Mustacchi 	}
569*533affcbSRobert Mustacchi 
570*533affcbSRobert Mustacchi 	nvme_ctrl_discover_fini(iter);
571*533affcbSRobert Mustacchi 	if (ret == NVME_ITER_ERROR) {
572*533affcbSRobert Mustacchi 		return (false);
573*533affcbSRobert Mustacchi 	}
574*533affcbSRobert Mustacchi 
575*533affcbSRobert Mustacchi 	return (nvme_success(nvme));
576*533affcbSRobert Mustacchi }
577*533affcbSRobert Mustacchi 
578*533affcbSRobert Mustacchi di_node_t
nvme_ctrl_disc_devi(const nvme_ctrl_disc_t * discp)579*533affcbSRobert Mustacchi nvme_ctrl_disc_devi(const nvme_ctrl_disc_t *discp)
580*533affcbSRobert Mustacchi {
581*533affcbSRobert Mustacchi 	return (discp->ncd_devi);
582*533affcbSRobert Mustacchi }
583*533affcbSRobert Mustacchi 
584*533affcbSRobert Mustacchi di_minor_t
nvme_ctrl_disc_minor(const nvme_ctrl_disc_t * discp)585*533affcbSRobert Mustacchi nvme_ctrl_disc_minor(const nvme_ctrl_disc_t *discp)
586*533affcbSRobert Mustacchi {
587*533affcbSRobert Mustacchi 	return (discp->ncd_minor);
588*533affcbSRobert Mustacchi }
589*533affcbSRobert Mustacchi 
590*533affcbSRobert Mustacchi void
nvme_ctrl_fini(nvme_ctrl_t * ctrl)591*533affcbSRobert Mustacchi nvme_ctrl_fini(nvme_ctrl_t *ctrl)
592*533affcbSRobert Mustacchi {
593*533affcbSRobert Mustacchi 	if (ctrl == NULL) {
594*533affcbSRobert Mustacchi 		return;
595*533affcbSRobert Mustacchi 	}
596*533affcbSRobert Mustacchi 
597*533affcbSRobert Mustacchi 	if (ctrl->nc_devi_path != NULL) {
598*533affcbSRobert Mustacchi 		di_devfs_path_free(ctrl->nc_devi_path);
599*533affcbSRobert Mustacchi 	}
600*533affcbSRobert Mustacchi 
601*533affcbSRobert Mustacchi 	if (ctrl->nc_fd >= 0) {
602*533affcbSRobert Mustacchi 		(void) close(ctrl->nc_fd);
603*533affcbSRobert Mustacchi 		ctrl->nc_fd = -1;
604*533affcbSRobert Mustacchi 	}
605*533affcbSRobert Mustacchi 
606*533affcbSRobert Mustacchi 	free(ctrl);
607*533affcbSRobert Mustacchi }
608*533affcbSRobert Mustacchi 
609*533affcbSRobert Mustacchi bool
nvme_ctrl_init(nvme_t * nvme,di_node_t di,nvme_ctrl_t ** outp)610*533affcbSRobert Mustacchi nvme_ctrl_init(nvme_t *nvme, di_node_t di, nvme_ctrl_t **outp)
611*533affcbSRobert Mustacchi {
612*533affcbSRobert Mustacchi 	const char *drv;
613*533affcbSRobert Mustacchi 	int32_t inst;
614*533affcbSRobert Mustacchi 	di_minor_t minor;
615*533affcbSRobert Mustacchi 	char *path, buf[PATH_MAX];
616*533affcbSRobert Mustacchi 	nvme_ctrl_t *ctrl;
617*533affcbSRobert Mustacchi 	nvme_ioctl_ctrl_info_t ctrl_info;
618*533affcbSRobert Mustacchi 
619*533affcbSRobert Mustacchi 	if (di == DI_NODE_NIL) {
620*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
621*533affcbSRobert Mustacchi 		    "invalid di_node_t: %p", di));
622*533affcbSRobert Mustacchi 	}
623*533affcbSRobert Mustacchi 
624*533affcbSRobert Mustacchi 	if (outp == NULL) {
625*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
626*533affcbSRobert Mustacchi 		    "invalid nvme_ctrl_t output pointer: %p", outp));
627*533affcbSRobert Mustacchi 	}
628*533affcbSRobert Mustacchi 	*outp = NULL;
629*533affcbSRobert Mustacchi 
630*533affcbSRobert Mustacchi 	drv = di_driver_name(di);
631*533affcbSRobert Mustacchi 	inst = di_instance(di);
632*533affcbSRobert Mustacchi 	if (drv == NULL || inst < 0) {
633*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s has "
634*533affcbSRobert Mustacchi 		    "no driver attached", di_node_name(di)));
635*533affcbSRobert Mustacchi 	}
636*533affcbSRobert Mustacchi 
637*533affcbSRobert Mustacchi 	if (strcmp(drv, "nvme") != 0) {
638*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't "
639*533affcbSRobert Mustacchi 		    "attached to nvme, found %s", di_node_name(di), drv));
640*533affcbSRobert Mustacchi 	}
641*533affcbSRobert Mustacchi 
642*533affcbSRobert Mustacchi 	/*
643*533affcbSRobert Mustacchi 	 * We have an NVMe node. Find the right minor that corresponds to the
644*533affcbSRobert Mustacchi 	 * attachment point. Once we find that then we can go ahead and open a
645*533affcbSRobert Mustacchi 	 * path to that and construct the device.
646*533affcbSRobert Mustacchi 	 */
647*533affcbSRobert Mustacchi 	minor = DI_MINOR_NIL;
648*533affcbSRobert Mustacchi 	while ((minor = di_minor_next(di, minor)) != DI_MINOR_NIL) {
649*533affcbSRobert Mustacchi 		if (strcmp(di_minor_nodetype(minor), DDI_NT_NVME_NEXUS) == 0) {
650*533affcbSRobert Mustacchi 			break;
651*533affcbSRobert Mustacchi 		}
652*533affcbSRobert Mustacchi 	}
653*533affcbSRobert Mustacchi 
654*533affcbSRobert Mustacchi 	if (minor == DI_MINOR_NIL) {
655*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't "
656*533affcbSRobert Mustacchi 		    "attached to nvme, found %s", di_node_name(di), drv));
657*533affcbSRobert Mustacchi 	}
658*533affcbSRobert Mustacchi 
659*533affcbSRobert Mustacchi 	path = di_devfs_minor_path(minor);
660*533affcbSRobert Mustacchi 	if (path == NULL) {
661*533affcbSRobert Mustacchi 		int e = errno;
662*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to "
663*533affcbSRobert Mustacchi 		    "obtain /devices path for the requested minor: %s",
664*533affcbSRobert Mustacchi 		    strerror(e)));
665*533affcbSRobert Mustacchi 	}
666*533affcbSRobert Mustacchi 
667*533affcbSRobert Mustacchi 	if (snprintf(buf, sizeof (buf), "/devices%s", path) >= sizeof (buf)) {
668*533affcbSRobert Mustacchi 		di_devfs_path_free(path);
669*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_INTERNAL, 0, "failed to "
670*533affcbSRobert Mustacchi 		    "construct full /devices minor path, would have overflown "
671*533affcbSRobert Mustacchi 		    "internal buffer"));
672*533affcbSRobert Mustacchi 	}
673*533affcbSRobert Mustacchi 	di_devfs_path_free(path);
674*533affcbSRobert Mustacchi 
675*533affcbSRobert Mustacchi 	ctrl = calloc(1, sizeof (*ctrl));
676*533affcbSRobert Mustacchi 	if (ctrl == NULL) {
677*533affcbSRobert Mustacchi 		int e = errno;
678*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to "
679*533affcbSRobert Mustacchi 		    "allocate memory for a new nvme_ctrl_t: %s", strerror(e)));
680*533affcbSRobert Mustacchi 	}
681*533affcbSRobert Mustacchi 
682*533affcbSRobert Mustacchi 	ctrl->nc_nvme = nvme;
683*533affcbSRobert Mustacchi 	ctrl->nc_devi = di;
684*533affcbSRobert Mustacchi 	ctrl->nc_minor = minor;
685*533affcbSRobert Mustacchi 	ctrl->nc_inst = inst;
686*533affcbSRobert Mustacchi 	ctrl->nc_fd = open(buf, O_RDWR | O_CLOEXEC);
687*533affcbSRobert Mustacchi 	if (ctrl->nc_fd < 0) {
688*533affcbSRobert Mustacchi 		int e = errno;
689*533affcbSRobert Mustacchi 		nvme_ctrl_fini(ctrl);
690*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_OPEN_DEV, e, "failed to open "
691*533affcbSRobert Mustacchi 		    "device path %s: %s", buf, strerror(e)));
692*533affcbSRobert Mustacchi 	}
693*533affcbSRobert Mustacchi 
694*533affcbSRobert Mustacchi 	ctrl->nc_devi_path = di_devfs_path(di);
695*533affcbSRobert Mustacchi 	if (ctrl->nc_devi_path == NULL) {
696*533affcbSRobert Mustacchi 		int e = errno;
697*533affcbSRobert Mustacchi 		nvme_ctrl_fini(ctrl);
698*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to "
699*533affcbSRobert Mustacchi 		    "obtain /devices path for the controller: %s",
700*533affcbSRobert Mustacchi 		    strerror(e)));
701*533affcbSRobert Mustacchi 	}
702*533affcbSRobert Mustacchi 
703*533affcbSRobert Mustacchi 	if (!nvme_ioc_ctrl_info(ctrl, &ctrl_info)) {
704*533affcbSRobert Mustacchi 		nvme_err_data_t err;
705*533affcbSRobert Mustacchi 
706*533affcbSRobert Mustacchi 		nvme_ctrl_err_save(ctrl, &err);
707*533affcbSRobert Mustacchi 		nvme_err_set(nvme, &err);
708*533affcbSRobert Mustacchi 		nvme_ctrl_fini(ctrl);
709*533affcbSRobert Mustacchi 		return (false);
710*533affcbSRobert Mustacchi 	}
711*533affcbSRobert Mustacchi 
712*533affcbSRobert Mustacchi 	ctrl->nc_vers = ctrl_info.nci_vers;
713*533affcbSRobert Mustacchi 	ctrl->nc_info = ctrl_info.nci_ctrl_id;
714*533affcbSRobert Mustacchi 
715*533affcbSRobert Mustacchi 	nvme_vendor_map_ctrl(ctrl);
716*533affcbSRobert Mustacchi 
717*533affcbSRobert Mustacchi 	*outp = ctrl;
718*533affcbSRobert Mustacchi 	return (nvme_success(nvme));
719*533affcbSRobert Mustacchi }
720*533affcbSRobert Mustacchi 
721*533affcbSRobert Mustacchi typedef struct {
722*533affcbSRobert Mustacchi 	bool ncia_found;
723*533affcbSRobert Mustacchi 	int32_t ncia_inst;
724*533affcbSRobert Mustacchi 	nvme_ctrl_t *ncia_ctrl;
725*533affcbSRobert Mustacchi 	nvme_err_data_t ncia_err;
726*533affcbSRobert Mustacchi } nvme_ctrl_init_arg_t;
727*533affcbSRobert Mustacchi 
728*533affcbSRobert Mustacchi bool
nvme_ctrl_init_by_instance_cb(nvme_t * nvme,const nvme_ctrl_disc_t * disc,void * arg)729*533affcbSRobert Mustacchi nvme_ctrl_init_by_instance_cb(nvme_t *nvme, const nvme_ctrl_disc_t *disc,
730*533affcbSRobert Mustacchi     void *arg)
731*533affcbSRobert Mustacchi {
732*533affcbSRobert Mustacchi 	nvme_ctrl_init_arg_t *init = arg;
733*533affcbSRobert Mustacchi 
734*533affcbSRobert Mustacchi 	if (di_instance(disc->ncd_devi) != init->ncia_inst) {
735*533affcbSRobert Mustacchi 		return (true);
736*533affcbSRobert Mustacchi 	}
737*533affcbSRobert Mustacchi 
738*533affcbSRobert Mustacchi 	/*
739*533affcbSRobert Mustacchi 	 * If we fail to open the controller, we need to save the error
740*533affcbSRobert Mustacchi 	 * information because it's going to end up being clobbered because this
741*533affcbSRobert Mustacchi 	 * is a callback function surrounded by other libnvme callers.
742*533affcbSRobert Mustacchi 	 */
743*533affcbSRobert Mustacchi 	init->ncia_found = true;
744*533affcbSRobert Mustacchi 	if (!nvme_ctrl_init(nvme, disc->ncd_devi, &init->ncia_ctrl)) {
745*533affcbSRobert Mustacchi 		nvme_err_save(nvme, &init->ncia_err);
746*533affcbSRobert Mustacchi 	}
747*533affcbSRobert Mustacchi 
748*533affcbSRobert Mustacchi 	return (false);
749*533affcbSRobert Mustacchi }
750*533affcbSRobert Mustacchi 
751*533affcbSRobert Mustacchi bool
nvme_ctrl_init_by_instance(nvme_t * nvme,int32_t inst,nvme_ctrl_t ** outp)752*533affcbSRobert Mustacchi nvme_ctrl_init_by_instance(nvme_t *nvme, int32_t inst, nvme_ctrl_t **outp)
753*533affcbSRobert Mustacchi {
754*533affcbSRobert Mustacchi 	nvme_ctrl_init_arg_t init;
755*533affcbSRobert Mustacchi 
756*533affcbSRobert Mustacchi 	if (inst < 0) {
757*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0,
758*533affcbSRobert Mustacchi 		    "encountered illegal negative instance number: %d", inst));
759*533affcbSRobert Mustacchi 	}
760*533affcbSRobert Mustacchi 
761*533affcbSRobert Mustacchi 	if (outp == NULL) {
762*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
763*533affcbSRobert Mustacchi 		    "invalid nvme_ctrl_t output pointer: %p", outp));
764*533affcbSRobert Mustacchi 	}
765*533affcbSRobert Mustacchi 
766*533affcbSRobert Mustacchi 	init.ncia_found = false;
767*533affcbSRobert Mustacchi 	init.ncia_inst = inst;
768*533affcbSRobert Mustacchi 	init.ncia_ctrl = NULL;
769*533affcbSRobert Mustacchi 
770*533affcbSRobert Mustacchi 	if (!nvme_ctrl_discover(nvme, nvme_ctrl_init_by_instance_cb, &init)) {
771*533affcbSRobert Mustacchi 		return (false);
772*533affcbSRobert Mustacchi 	}
773*533affcbSRobert Mustacchi 
774*533affcbSRobert Mustacchi 	if (!init.ncia_found) {
775*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
776*533affcbSRobert Mustacchi 		    "failed to find NVMe controller nvme%d", inst));
777*533affcbSRobert Mustacchi 	}
778*533affcbSRobert Mustacchi 
779*533affcbSRobert Mustacchi 	/*
780*533affcbSRobert Mustacchi 	 * If we don't have an NVMe controller structure but we did find the
781*533affcbSRobert Mustacchi 	 * instance, then we must have had an error constructing this will which
782*533affcbSRobert Mustacchi 	 * be on our handle. We have to reconstruct the error from saved
783*533affcbSRobert Mustacchi 	 * information as nvme_ctrl_discover will have clobbered it.
784*533affcbSRobert Mustacchi 	 */
785*533affcbSRobert Mustacchi 	if (init.ncia_ctrl == NULL) {
786*533affcbSRobert Mustacchi 		nvme_err_set(nvme, &init.ncia_err);
787*533affcbSRobert Mustacchi 		return (false);
788*533affcbSRobert Mustacchi 	}
789*533affcbSRobert Mustacchi 
790*533affcbSRobert Mustacchi 	*outp = init.ncia_ctrl;
791*533affcbSRobert Mustacchi 	return (nvme_success(nvme));
792*533affcbSRobert Mustacchi }
793*533affcbSRobert Mustacchi 
794*533affcbSRobert Mustacchi bool
nvme_ctrl_devi(nvme_ctrl_t * ctrl,di_node_t * devip)795*533affcbSRobert Mustacchi nvme_ctrl_devi(nvme_ctrl_t *ctrl, di_node_t *devip)
796*533affcbSRobert Mustacchi {
797*533affcbSRobert Mustacchi 	*devip = ctrl->nc_devi;
798*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
799*533affcbSRobert Mustacchi }
800*533affcbSRobert Mustacchi 
801*533affcbSRobert Mustacchi bool
nvme_ioc_ctrl_info(nvme_ctrl_t * ctrl,nvme_ioctl_ctrl_info_t * info)802*533affcbSRobert Mustacchi nvme_ioc_ctrl_info(nvme_ctrl_t *ctrl, nvme_ioctl_ctrl_info_t *info)
803*533affcbSRobert Mustacchi {
804*533affcbSRobert Mustacchi 	(void) memset(info, 0, sizeof (nvme_ioctl_ctrl_info_t));
805*533affcbSRobert Mustacchi 
806*533affcbSRobert Mustacchi 	if (ioctl(ctrl->nc_fd, NVME_IOC_CTRL_INFO, info) != 0) {
807*533affcbSRobert Mustacchi 		int e = errno;
808*533affcbSRobert Mustacchi 		return (nvme_ioctl_syserror(ctrl, e, "controller info"));
809*533affcbSRobert Mustacchi 	}
810*533affcbSRobert Mustacchi 
811*533affcbSRobert Mustacchi 	if (info->nci_common.nioc_drv_err != NVME_IOCTL_E_OK) {
812*533affcbSRobert Mustacchi 		return (nvme_ioctl_error(ctrl, &info->nci_common,
813*533affcbSRobert Mustacchi 		    "controller info"));
814*533affcbSRobert Mustacchi 	}
815*533affcbSRobert Mustacchi 
816*533affcbSRobert Mustacchi 	return (true);
817*533affcbSRobert Mustacchi }
818*533affcbSRobert Mustacchi 
819*533affcbSRobert Mustacchi bool
nvme_ioc_ns_info(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_ioctl_ns_info_t * info)820*533affcbSRobert Mustacchi nvme_ioc_ns_info(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ioctl_ns_info_t *info)
821*533affcbSRobert Mustacchi {
822*533affcbSRobert Mustacchi 	(void) memset(info, 0, sizeof (nvme_ioctl_ns_info_t));
823*533affcbSRobert Mustacchi 	info->nni_common.nioc_nsid = nsid;
824*533affcbSRobert Mustacchi 
825*533affcbSRobert Mustacchi 	if (ioctl(ctrl->nc_fd, NVME_IOC_NS_INFO, info) != 0) {
826*533affcbSRobert Mustacchi 		int e = errno;
827*533affcbSRobert Mustacchi 		return (nvme_ioctl_syserror(ctrl, e, "namespace info"));
828*533affcbSRobert Mustacchi 	}
829*533affcbSRobert Mustacchi 
830*533affcbSRobert Mustacchi 	if (info->nni_common.nioc_drv_err != NVME_IOCTL_E_OK) {
831*533affcbSRobert Mustacchi 		return (nvme_ioctl_error(ctrl, &info->nni_common,
832*533affcbSRobert Mustacchi 		    "namespace info"));
833*533affcbSRobert Mustacchi 	}
834*533affcbSRobert Mustacchi 
835*533affcbSRobert Mustacchi 	return (true);
836*533affcbSRobert Mustacchi }
837*533affcbSRobert Mustacchi 
838*533affcbSRobert Mustacchi const char *
nvme_tporttostr(nvme_ctrl_transport_t tport)839*533affcbSRobert Mustacchi nvme_tporttostr(nvme_ctrl_transport_t tport)
840*533affcbSRobert Mustacchi {
841*533affcbSRobert Mustacchi 	switch (tport) {
842*533affcbSRobert Mustacchi 	case NVME_CTRL_TRANSPORT_PCI:
843*533affcbSRobert Mustacchi 		return ("PCI");
844*533affcbSRobert Mustacchi 	case NVME_CTRL_TRANSPORT_TCP:
845*533affcbSRobert Mustacchi 		return ("TCP");
846*533affcbSRobert Mustacchi 	case NVME_CTRL_TRANSPORT_RDMA:
847*533affcbSRobert Mustacchi 		return ("RDMA");
848*533affcbSRobert Mustacchi 	default:
849*533affcbSRobert Mustacchi 		return ("unknown transport");
850*533affcbSRobert Mustacchi 	}
851*533affcbSRobert Mustacchi }
852*533affcbSRobert Mustacchi 
853*533affcbSRobert Mustacchi static bool
nvme_ns_discover_validate(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level)854*533affcbSRobert Mustacchi nvme_ns_discover_validate(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level)
855*533affcbSRobert Mustacchi {
856*533affcbSRobert Mustacchi 	switch (level) {
857*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_ALL:
858*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_ALLOCATED:
859*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_ACTIVE:
860*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_NOT_IGNORED:
861*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_BLKDEV:
862*533affcbSRobert Mustacchi 		return (true);
863*533affcbSRobert Mustacchi 	default:
864*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "invalid "
865*533affcbSRobert Mustacchi 		    "namespace discovery level specified: 0x%x", level));
866*533affcbSRobert Mustacchi 	}
867*533affcbSRobert Mustacchi }
868*533affcbSRobert Mustacchi 
869*533affcbSRobert Mustacchi void
nvme_ns_discover_fini(nvme_ns_iter_t * iter)870*533affcbSRobert Mustacchi nvme_ns_discover_fini(nvme_ns_iter_t *iter)
871*533affcbSRobert Mustacchi {
872*533affcbSRobert Mustacchi 	free(iter);
873*533affcbSRobert Mustacchi }
874*533affcbSRobert Mustacchi 
875*533affcbSRobert Mustacchi const char *
nvme_nsleveltostr(nvme_ns_disc_level_t level)876*533affcbSRobert Mustacchi nvme_nsleveltostr(nvme_ns_disc_level_t level)
877*533affcbSRobert Mustacchi {
878*533affcbSRobert Mustacchi 	switch (level) {
879*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_ALL:
880*533affcbSRobert Mustacchi 		return ("unallocated");
881*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_ALLOCATED:
882*533affcbSRobert Mustacchi 		return ("allocated");
883*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_ACTIVE:
884*533affcbSRobert Mustacchi 		return ("active");
885*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_NOT_IGNORED:
886*533affcbSRobert Mustacchi 		return ("not ignored");
887*533affcbSRobert Mustacchi 	case NVME_NS_DISC_F_BLKDEV:
888*533affcbSRobert Mustacchi 		return ("blkdev");
889*533affcbSRobert Mustacchi 	default:
890*533affcbSRobert Mustacchi 		return ("unknown level");
891*533affcbSRobert Mustacchi 	}
892*533affcbSRobert Mustacchi }
893*533affcbSRobert Mustacchi 
894*533affcbSRobert Mustacchi nvme_ns_disc_level_t
nvme_ns_state_to_disc_level(nvme_ns_state_t state)895*533affcbSRobert Mustacchi nvme_ns_state_to_disc_level(nvme_ns_state_t state)
896*533affcbSRobert Mustacchi {
897*533affcbSRobert Mustacchi 	if ((state & NVME_NS_STATE_ALLOCATED) == 0) {
898*533affcbSRobert Mustacchi 		return (NVME_NS_DISC_F_ALL);
899*533affcbSRobert Mustacchi 	}
900*533affcbSRobert Mustacchi 
901*533affcbSRobert Mustacchi 	if ((state & NVME_NS_STATE_ACTIVE) == 0) {
902*533affcbSRobert Mustacchi 		return (NVME_NS_DISC_F_ALLOCATED);
903*533affcbSRobert Mustacchi 	}
904*533affcbSRobert Mustacchi 
905*533affcbSRobert Mustacchi 	if ((state & NVME_NS_STATE_IGNORED) != 0) {
906*533affcbSRobert Mustacchi 		return (NVME_NS_DISC_F_ACTIVE);
907*533affcbSRobert Mustacchi 	}
908*533affcbSRobert Mustacchi 
909*533affcbSRobert Mustacchi 	if ((state & NVME_NS_STATE_ATTACHED) == 0) {
910*533affcbSRobert Mustacchi 		return (NVME_NS_DISC_F_NOT_IGNORED);
911*533affcbSRobert Mustacchi 	} else {
912*533affcbSRobert Mustacchi 		return (NVME_NS_DISC_F_BLKDEV);
913*533affcbSRobert Mustacchi 	}
914*533affcbSRobert Mustacchi }
915*533affcbSRobert Mustacchi 
916*533affcbSRobert Mustacchi nvme_iter_t
nvme_ns_discover_step(nvme_ns_iter_t * iter,const nvme_ns_disc_t ** discp)917*533affcbSRobert Mustacchi nvme_ns_discover_step(nvme_ns_iter_t *iter, const nvme_ns_disc_t **discp)
918*533affcbSRobert Mustacchi {
919*533affcbSRobert Mustacchi 	nvme_ctrl_t *ctrl = iter->nni_ctrl;
920*533affcbSRobert Mustacchi 
921*533affcbSRobert Mustacchi 	if (iter->nni_err) {
922*533affcbSRobert Mustacchi 		return (NVME_ITER_ERROR);
923*533affcbSRobert Mustacchi 	}
924*533affcbSRobert Mustacchi 
925*533affcbSRobert Mustacchi 	if (iter->nni_done) {
926*533affcbSRobert Mustacchi 		return (NVME_ITER_DONE);
927*533affcbSRobert Mustacchi 	}
928*533affcbSRobert Mustacchi 
929*533affcbSRobert Mustacchi 	while (iter->nni_cur_idx <= ctrl->nc_info.id_nn) {
930*533affcbSRobert Mustacchi 		uint32_t nsid = iter->nni_cur_idx;
931*533affcbSRobert Mustacchi 		nvme_ioctl_ns_info_t ns_info = { 0 };
932*533affcbSRobert Mustacchi 		nvme_ns_disc_level_t level;
933*533affcbSRobert Mustacchi 
934*533affcbSRobert Mustacchi 		if (!nvme_ioc_ns_info(ctrl, nsid, &ns_info)) {
935*533affcbSRobert Mustacchi 			iter->nni_err = true;
936*533affcbSRobert Mustacchi 			return (NVME_ITER_ERROR);
937*533affcbSRobert Mustacchi 		}
938*533affcbSRobert Mustacchi 
939*533affcbSRobert Mustacchi 		iter->nni_cur_idx++;
940*533affcbSRobert Mustacchi 		level = nvme_ns_state_to_disc_level(ns_info.nni_state);
941*533affcbSRobert Mustacchi 		if (iter->nni_level > level) {
942*533affcbSRobert Mustacchi 			continue;
943*533affcbSRobert Mustacchi 		}
944*533affcbSRobert Mustacchi 
945*533affcbSRobert Mustacchi 		(void) memset(&iter->nni_disc, 0, sizeof (nvme_ns_disc_t));
946*533affcbSRobert Mustacchi 		iter->nni_disc.nnd_nsid = nsid;
947*533affcbSRobert Mustacchi 		iter->nni_disc.nnd_level = level;
948*533affcbSRobert Mustacchi 
949*533affcbSRobert Mustacchi 		if (nvme_guid_valid(ctrl, ns_info.nni_id.id_nguid)) {
950*533affcbSRobert Mustacchi 			iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_NGUID_VALID;
951*533affcbSRobert Mustacchi 			(void) memcpy(iter->nni_disc.nnd_nguid,
952*533affcbSRobert Mustacchi 			    ns_info.nni_id.id_nguid,
953*533affcbSRobert Mustacchi 			    sizeof (ns_info.nni_id.id_nguid));
954*533affcbSRobert Mustacchi 		}
955*533affcbSRobert Mustacchi 
956*533affcbSRobert Mustacchi 		if (nvme_eui64_valid(ctrl, ns_info.nni_id.id_eui64)) {
957*533affcbSRobert Mustacchi 			iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_EUI64_VALID;
958*533affcbSRobert Mustacchi 			(void) memcpy(iter->nni_disc.nnd_eui64,
959*533affcbSRobert Mustacchi 			    ns_info.nni_id.id_eui64,
960*533affcbSRobert Mustacchi 			    sizeof (ns_info.nni_id.id_eui64));
961*533affcbSRobert Mustacchi 		}
962*533affcbSRobert Mustacchi 
963*533affcbSRobert Mustacchi 		*discp = &iter->nni_disc;
964*533affcbSRobert Mustacchi 		return (NVME_ITER_VALID);
965*533affcbSRobert Mustacchi 	}
966*533affcbSRobert Mustacchi 
967*533affcbSRobert Mustacchi 	iter->nni_done = true;
968*533affcbSRobert Mustacchi 	return (NVME_ITER_DONE);
969*533affcbSRobert Mustacchi }
970*533affcbSRobert Mustacchi 
971*533affcbSRobert Mustacchi bool
nvme_ns_discover_init(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level,nvme_ns_iter_t ** iterp)972*533affcbSRobert Mustacchi nvme_ns_discover_init(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level,
973*533affcbSRobert Mustacchi     nvme_ns_iter_t **iterp)
974*533affcbSRobert Mustacchi {
975*533affcbSRobert Mustacchi 	nvme_ns_iter_t *iter;
976*533affcbSRobert Mustacchi 
977*533affcbSRobert Mustacchi 	if (!nvme_ns_discover_validate(ctrl, level)) {
978*533affcbSRobert Mustacchi 		return (false);
979*533affcbSRobert Mustacchi 	}
980*533affcbSRobert Mustacchi 
981*533affcbSRobert Mustacchi 	if (iterp == NULL) {
982*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
983*533affcbSRobert Mustacchi 		    "encountered invalid nvme_ns_iter_t output pointer: %p",
984*533affcbSRobert Mustacchi 		    iterp));
985*533affcbSRobert Mustacchi 	}
986*533affcbSRobert Mustacchi 
987*533affcbSRobert Mustacchi 	iter = calloc(1, sizeof (nvme_ns_iter_t));
988*533affcbSRobert Mustacchi 	if (iter == NULL) {
989*533affcbSRobert Mustacchi 		int e = errno;
990*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to "
991*533affcbSRobert Mustacchi 		    "allocate memory for a new nvme_ns_iter_t: %s",
992*533affcbSRobert Mustacchi 		    strerror(e)));
993*533affcbSRobert Mustacchi 	}
994*533affcbSRobert Mustacchi 
995*533affcbSRobert Mustacchi 	iter->nni_ctrl = ctrl;
996*533affcbSRobert Mustacchi 	iter->nni_level = level;
997*533affcbSRobert Mustacchi 	iter->nni_cur_idx = 1;
998*533affcbSRobert Mustacchi 
999*533affcbSRobert Mustacchi 	*iterp = iter;
1000*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1001*533affcbSRobert Mustacchi }
1002*533affcbSRobert Mustacchi 
1003*533affcbSRobert Mustacchi bool
nvme_ns_discover(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level,nvme_ns_disc_f func,void * arg)1004*533affcbSRobert Mustacchi nvme_ns_discover(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level,
1005*533affcbSRobert Mustacchi     nvme_ns_disc_f func, void *arg)
1006*533affcbSRobert Mustacchi {
1007*533affcbSRobert Mustacchi 	nvme_ns_iter_t *iter;
1008*533affcbSRobert Mustacchi 	nvme_iter_t ret;
1009*533affcbSRobert Mustacchi 	const nvme_ns_disc_t *disc;
1010*533affcbSRobert Mustacchi 
1011*533affcbSRobert Mustacchi 	if (!nvme_ns_discover_validate(ctrl, level)) {
1012*533affcbSRobert Mustacchi 		return (false);
1013*533affcbSRobert Mustacchi 	}
1014*533affcbSRobert Mustacchi 
1015*533affcbSRobert Mustacchi 	if (func == NULL) {
1016*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1017*533affcbSRobert Mustacchi 		    "encountered invalid nvme_ns_disc_f function pointer: %p",
1018*533affcbSRobert Mustacchi 		    func));
1019*533affcbSRobert Mustacchi 	}
1020*533affcbSRobert Mustacchi 
1021*533affcbSRobert Mustacchi 	if (!nvme_ns_discover_init(ctrl, level, &iter)) {
1022*533affcbSRobert Mustacchi 		return (false);
1023*533affcbSRobert Mustacchi 	}
1024*533affcbSRobert Mustacchi 
1025*533affcbSRobert Mustacchi 	while ((ret = nvme_ns_discover_step(iter, &disc)) == NVME_ITER_VALID) {
1026*533affcbSRobert Mustacchi 		if (!func(ctrl, disc, arg))
1027*533affcbSRobert Mustacchi 			break;
1028*533affcbSRobert Mustacchi 	}
1029*533affcbSRobert Mustacchi 
1030*533affcbSRobert Mustacchi 	nvme_ns_discover_fini(iter);
1031*533affcbSRobert Mustacchi 	if (ret == NVME_ITER_ERROR) {
1032*533affcbSRobert Mustacchi 		return (false);
1033*533affcbSRobert Mustacchi 	}
1034*533affcbSRobert Mustacchi 
1035*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1036*533affcbSRobert Mustacchi }
1037*533affcbSRobert Mustacchi 
1038*533affcbSRobert Mustacchi uint32_t
nvme_ns_disc_nsid(const nvme_ns_disc_t * discp)1039*533affcbSRobert Mustacchi nvme_ns_disc_nsid(const nvme_ns_disc_t *discp)
1040*533affcbSRobert Mustacchi {
1041*533affcbSRobert Mustacchi 	return (discp->nnd_nsid);
1042*533affcbSRobert Mustacchi }
1043*533affcbSRobert Mustacchi 
1044*533affcbSRobert Mustacchi nvme_ns_disc_level_t
nvme_ns_disc_level(const nvme_ns_disc_t * discp)1045*533affcbSRobert Mustacchi nvme_ns_disc_level(const nvme_ns_disc_t *discp)
1046*533affcbSRobert Mustacchi {
1047*533affcbSRobert Mustacchi 	return (discp->nnd_level);
1048*533affcbSRobert Mustacchi }
1049*533affcbSRobert Mustacchi 
1050*533affcbSRobert Mustacchi nvme_ns_disc_flags_t
nvme_ns_disc_flags(const nvme_ns_disc_t * discp)1051*533affcbSRobert Mustacchi nvme_ns_disc_flags(const nvme_ns_disc_t *discp)
1052*533affcbSRobert Mustacchi {
1053*533affcbSRobert Mustacchi 	return (discp->nnd_flags);
1054*533affcbSRobert Mustacchi }
1055*533affcbSRobert Mustacchi 
1056*533affcbSRobert Mustacchi const uint8_t *
nvme_ns_disc_eui64(const nvme_ns_disc_t * discp)1057*533affcbSRobert Mustacchi nvme_ns_disc_eui64(const nvme_ns_disc_t *discp)
1058*533affcbSRobert Mustacchi {
1059*533affcbSRobert Mustacchi 	if ((discp->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) == 0) {
1060*533affcbSRobert Mustacchi 		return (NULL);
1061*533affcbSRobert Mustacchi 	}
1062*533affcbSRobert Mustacchi 
1063*533affcbSRobert Mustacchi 	return (discp->nnd_eui64);
1064*533affcbSRobert Mustacchi }
1065*533affcbSRobert Mustacchi 
1066*533affcbSRobert Mustacchi const uint8_t *
nvme_ns_disc_nguid(const nvme_ns_disc_t * discp)1067*533affcbSRobert Mustacchi nvme_ns_disc_nguid(const nvme_ns_disc_t *discp)
1068*533affcbSRobert Mustacchi {
1069*533affcbSRobert Mustacchi 	if ((discp->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) == 0) {
1070*533affcbSRobert Mustacchi 		return (NULL);
1071*533affcbSRobert Mustacchi 	}
1072*533affcbSRobert Mustacchi 
1073*533affcbSRobert Mustacchi 	return (discp->nnd_nguid);
1074*533affcbSRobert Mustacchi }
1075*533affcbSRobert Mustacchi 
1076*533affcbSRobert Mustacchi void
nvme_ns_fini(nvme_ns_t * ns)1077*533affcbSRobert Mustacchi nvme_ns_fini(nvme_ns_t *ns)
1078*533affcbSRobert Mustacchi {
1079*533affcbSRobert Mustacchi 	free(ns);
1080*533affcbSRobert Mustacchi }
1081*533affcbSRobert Mustacchi 
1082*533affcbSRobert Mustacchi bool
nvme_ns_init(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_ns_t ** nsp)1083*533affcbSRobert Mustacchi nvme_ns_init(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ns_t **nsp)
1084*533affcbSRobert Mustacchi {
1085*533affcbSRobert Mustacchi 	nvme_ns_t *ns;
1086*533affcbSRobert Mustacchi 
1087*533affcbSRobert Mustacchi 	if (nsp == NULL) {
1088*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1089*533affcbSRobert Mustacchi 		    "encountered invalid nvme_ns_t output pointer: %p", nsp));
1090*533affcbSRobert Mustacchi 	}
1091*533affcbSRobert Mustacchi 
1092*533affcbSRobert Mustacchi 	if (nsid < NVME_NSID_MIN || nsid > ctrl->nc_info.id_nn) {
1093*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "requested "
1094*533affcbSRobert Mustacchi 		    "namespace 0x%x is invalid, valid namespaces are [0x%x, "
1095*533affcbSRobert Mustacchi 		    "0x%x]", nsid, NVME_NSID_MIN, ctrl->nc_info.id_nn));
1096*533affcbSRobert Mustacchi 	}
1097*533affcbSRobert Mustacchi 
1098*533affcbSRobert Mustacchi 	ns = calloc(1, sizeof (nvme_ns_t));
1099*533affcbSRobert Mustacchi 	if (ns == NULL) {
1100*533affcbSRobert Mustacchi 		int e = errno;
1101*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to "
1102*533affcbSRobert Mustacchi 		    "allocate memory for a new nvme_ns_t: %s", strerror(e)));
1103*533affcbSRobert Mustacchi 	}
1104*533affcbSRobert Mustacchi 
1105*533affcbSRobert Mustacchi 	ns->nn_ctrl = ctrl;
1106*533affcbSRobert Mustacchi 	ns->nn_nsid = nsid;
1107*533affcbSRobert Mustacchi 
1108*533affcbSRobert Mustacchi 	*nsp = ns;
1109*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1110*533affcbSRobert Mustacchi }
1111*533affcbSRobert Mustacchi 
1112*533affcbSRobert Mustacchi typedef struct {
1113*533affcbSRobert Mustacchi 	nvme_ctrl_t *nnia_ctrl;
1114*533affcbSRobert Mustacchi 	const char *nnia_name;
1115*533affcbSRobert Mustacchi 	bool nnia_found;
1116*533affcbSRobert Mustacchi 	nvme_ns_t *nnia_ns;
1117*533affcbSRobert Mustacchi 	nvme_err_data_t nnia_err;
1118*533affcbSRobert Mustacchi } nvme_ns_init_arg_t;
1119*533affcbSRobert Mustacchi 
1120*533affcbSRobert Mustacchi static bool
nvme_ns_init_by_name_cb(nvme_ctrl_t * ctrl,const nvme_ns_disc_t * disc,void * arg)1121*533affcbSRobert Mustacchi nvme_ns_init_by_name_cb(nvme_ctrl_t *ctrl, const nvme_ns_disc_t *disc,
1122*533affcbSRobert Mustacchi     void *arg)
1123*533affcbSRobert Mustacchi {
1124*533affcbSRobert Mustacchi 	nvme_ns_init_arg_t *init = arg;
1125*533affcbSRobert Mustacchi 	char buf[NVME_NGUID_NAMELEN];
1126*533affcbSRobert Mustacchi 	CTASSERT(NVME_NGUID_NAMELEN > NVME_EUI64_NAMELEN);
1127*533affcbSRobert Mustacchi 
1128*533affcbSRobert Mustacchi 	if ((disc->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) != 0) {
1129*533affcbSRobert Mustacchi 		(void) nvme_format_nguid(disc->nnd_nguid, buf, sizeof (buf));
1130*533affcbSRobert Mustacchi 		if (strcasecmp(init->nnia_name, buf) == 0)
1131*533affcbSRobert Mustacchi 			goto match;
1132*533affcbSRobert Mustacchi 	}
1133*533affcbSRobert Mustacchi 
1134*533affcbSRobert Mustacchi 	if ((disc->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) != 0) {
1135*533affcbSRobert Mustacchi 		(void) nvme_format_eui64(disc->nnd_eui64, buf, sizeof (buf));
1136*533affcbSRobert Mustacchi 		if (strcasecmp(init->nnia_name, buf) == 0)
1137*533affcbSRobert Mustacchi 			goto match;
1138*533affcbSRobert Mustacchi 	}
1139*533affcbSRobert Mustacchi 
1140*533affcbSRobert Mustacchi 	(void) snprintf(buf, sizeof (buf), "%u", disc->nnd_nsid);
1141*533affcbSRobert Mustacchi 	if (strcasecmp(init->nnia_name, buf) == 0)
1142*533affcbSRobert Mustacchi 		goto match;
1143*533affcbSRobert Mustacchi 
1144*533affcbSRobert Mustacchi 	return (true);
1145*533affcbSRobert Mustacchi 
1146*533affcbSRobert Mustacchi match:
1147*533affcbSRobert Mustacchi 	init->nnia_found = true;
1148*533affcbSRobert Mustacchi 	if (!nvme_ns_init(ctrl, disc->nnd_nsid, &init->nnia_ns)) {
1149*533affcbSRobert Mustacchi 		nvme_ctrl_err_save(ctrl, &init->nnia_err);
1150*533affcbSRobert Mustacchi 	}
1151*533affcbSRobert Mustacchi 
1152*533affcbSRobert Mustacchi 	return (false);
1153*533affcbSRobert Mustacchi }
1154*533affcbSRobert Mustacchi 
1155*533affcbSRobert Mustacchi /*
1156*533affcbSRobert Mustacchi  * Attempt to find a namespace by 'name'. A name could be the NGUID, EUI64, or
1157*533affcbSRobert Mustacchi  * just the plain old namespace ID.
1158*533affcbSRobert Mustacchi  */
1159*533affcbSRobert Mustacchi bool
nvme_ns_init_by_name(nvme_ctrl_t * ctrl,const char * ns_name,nvme_ns_t ** nsp)1160*533affcbSRobert Mustacchi nvme_ns_init_by_name(nvme_ctrl_t *ctrl, const char *ns_name, nvme_ns_t **nsp)
1161*533affcbSRobert Mustacchi {
1162*533affcbSRobert Mustacchi 	nvme_ns_init_arg_t init;
1163*533affcbSRobert Mustacchi 
1164*533affcbSRobert Mustacchi 	if (ns_name == NULL) {
1165*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1166*533affcbSRobert Mustacchi 		    "encountered invalid namespace name: %p", ns_name));
1167*533affcbSRobert Mustacchi 	}
1168*533affcbSRobert Mustacchi 
1169*533affcbSRobert Mustacchi 	if (nsp == NULL) {
1170*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1171*533affcbSRobert Mustacchi 		    "encountered invalid nvme_ns_t output pointer: %p", nsp));
1172*533affcbSRobert Mustacchi 	}
1173*533affcbSRobert Mustacchi 
1174*533affcbSRobert Mustacchi 	init.nnia_ctrl = ctrl;
1175*533affcbSRobert Mustacchi 	init.nnia_name = ns_name;
1176*533affcbSRobert Mustacchi 	init.nnia_found = false;
1177*533affcbSRobert Mustacchi 	init.nnia_ns = NULL;
1178*533affcbSRobert Mustacchi 
1179*533affcbSRobert Mustacchi 	if (!nvme_ns_discover(ctrl, NVME_NS_DISC_F_ALL, nvme_ns_init_by_name_cb,
1180*533affcbSRobert Mustacchi 	    &init)) {
1181*533affcbSRobert Mustacchi 		return (false);
1182*533affcbSRobert Mustacchi 	}
1183*533affcbSRobert Mustacchi 
1184*533affcbSRobert Mustacchi 	if (!init.nnia_found) {
1185*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "failed to "
1186*533affcbSRobert Mustacchi 		    "find NVMe namespace %s on nvme%d", ns_name,
1187*533affcbSRobert Mustacchi 		    ctrl->nc_inst));
1188*533affcbSRobert Mustacchi 	}
1189*533affcbSRobert Mustacchi 
1190*533affcbSRobert Mustacchi 	if (init.nnia_ns == NULL) {
1191*533affcbSRobert Mustacchi 		nvme_ctrl_err_set(ctrl, &init.nnia_err);
1192*533affcbSRobert Mustacchi 		return (false);
1193*533affcbSRobert Mustacchi 	}
1194*533affcbSRobert Mustacchi 
1195*533affcbSRobert Mustacchi 	*nsp = init.nnia_ns;
1196*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1197*533affcbSRobert Mustacchi }
1198*533affcbSRobert Mustacchi 
1199*533affcbSRobert Mustacchi bool
nvme_ctrl_ns_init(nvme_t * nvme,const char * name,nvme_ctrl_t ** ctrlp,nvme_ns_t ** nsp)1200*533affcbSRobert Mustacchi nvme_ctrl_ns_init(nvme_t *nvme, const char *name, nvme_ctrl_t **ctrlp,
1201*533affcbSRobert Mustacchi     nvme_ns_t **nsp)
1202*533affcbSRobert Mustacchi {
1203*533affcbSRobert Mustacchi 	const char *slash, *ns_name;
1204*533affcbSRobert Mustacchi 	char *eptr;
1205*533affcbSRobert Mustacchi 	nvme_ctrl_t *ctrl;
1206*533affcbSRobert Mustacchi 	nvme_ns_t *ns;
1207*533affcbSRobert Mustacchi 	unsigned long inst;
1208*533affcbSRobert Mustacchi 	size_t ctrl_namelen;
1209*533affcbSRobert Mustacchi 
1210*533affcbSRobert Mustacchi 	if (name == NULL) {
1211*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
1212*533affcbSRobert Mustacchi 		    "invalid name to search for: %p", name));
1213*533affcbSRobert Mustacchi 	}
1214*533affcbSRobert Mustacchi 
1215*533affcbSRobert Mustacchi 	/*
1216*533affcbSRobert Mustacchi 	 * We require a controller, but the namespace output pointer is only
1217*533affcbSRobert Mustacchi 	 * required if we end up having a namespace present.
1218*533affcbSRobert Mustacchi 	 */
1219*533affcbSRobert Mustacchi 	if (ctrlp == NULL) {
1220*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
1221*533affcbSRobert Mustacchi 		    "invalid nvme_ctrl_t output pointer: %p", ctrlp));
1222*533affcbSRobert Mustacchi 	}
1223*533affcbSRobert Mustacchi 
1224*533affcbSRobert Mustacchi 	slash = strchr(name, '/');
1225*533affcbSRobert Mustacchi 	if (slash != NULL) {
1226*533affcbSRobert Mustacchi 		ctrl_namelen = (uintptr_t)slash - (uintptr_t)name;
1227*533affcbSRobert Mustacchi 		ns_name = slash + 1;
1228*533affcbSRobert Mustacchi 
1229*533affcbSRobert Mustacchi 		if (nsp == NULL) {
1230*533affcbSRobert Mustacchi 			return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0,
1231*533affcbSRobert Mustacchi 			    "encountered invalid nvme_ns_t output pointer: %p",
1232*533affcbSRobert Mustacchi 			    nsp));
1233*533affcbSRobert Mustacchi 		}
1234*533affcbSRobert Mustacchi 
1235*533affcbSRobert Mustacchi 	} else {
1236*533affcbSRobert Mustacchi 		ctrl_namelen = strlen(name);
1237*533affcbSRobert Mustacchi 		ns_name = NULL;
1238*533affcbSRobert Mustacchi 	}
1239*533affcbSRobert Mustacchi 
1240*533affcbSRobert Mustacchi 	*ctrlp = NULL;
1241*533affcbSRobert Mustacchi 	if (nsp != NULL) {
1242*533affcbSRobert Mustacchi 		*nsp = NULL;
1243*533affcbSRobert Mustacchi 	}
1244*533affcbSRobert Mustacchi 
1245*533affcbSRobert Mustacchi 	if (strncmp(name, "nvme", 4) != 0) {
1246*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, "unable "
1247*533affcbSRobert Mustacchi 		    "to map controller '%.*s' to a known device class, "
1248*533affcbSRobert Mustacchi 		    "expected the controller to start with 'nvme'",
1249*533affcbSRobert Mustacchi 		    (int)ctrl_namelen, name));
1250*533affcbSRobert Mustacchi 	}
1251*533affcbSRobert Mustacchi 
1252*533affcbSRobert Mustacchi 	/*
1253*533affcbSRobert Mustacchi 	 * Before we go ahead and try to parse this with strtoul we need to
1254*533affcbSRobert Mustacchi 	 * manually check two things that strtoul will not:
1255*533affcbSRobert Mustacchi 	 *
1256*533affcbSRobert Mustacchi 	 * 1) If we have a null terminator, then we'll just get a 0 back.
1257*533affcbSRobert Mustacchi 	 * 2) If there are multiple leading zeros in a row then that's an error.
1258*533affcbSRobert Mustacchi 	 * We don't want to conflate 001 and 1 as the same here. The only valid
1259*533affcbSRobert Mustacchi 	 * case is 'nvme0' which is 5 characters long, hence the check below.
1260*533affcbSRobert Mustacchi 	 */
1261*533affcbSRobert Mustacchi 	if (ctrl_namelen == 4) {
1262*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1263*533affcbSRobert Mustacchi 		    "no controller instance specified in %.*s",
1264*533affcbSRobert Mustacchi 		    (int)ctrl_namelen, name));
1265*533affcbSRobert Mustacchi 	}
1266*533affcbSRobert Mustacchi 
1267*533affcbSRobert Mustacchi 	if (name[4] == '0' && ctrl_namelen > 5) {
1268*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1269*533affcbSRobert Mustacchi 		    "leading zeros aren't allowed for the instance specified "
1270*533affcbSRobert Mustacchi 		    "in %.*s", (int)ctrl_namelen, name));
1271*533affcbSRobert Mustacchi 	}
1272*533affcbSRobert Mustacchi 
1273*533affcbSRobert Mustacchi 	errno = 0;
1274*533affcbSRobert Mustacchi 	inst = strtoul(name + 4, &eptr, 10);
1275*533affcbSRobert Mustacchi 	if (errno != 0 || (*eptr != '\0' && eptr != slash)) {
1276*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1277*533affcbSRobert Mustacchi 		    "failed to parse controller instance from %.*s",
1278*533affcbSRobert Mustacchi 		    (int)ctrl_namelen, name));
1279*533affcbSRobert Mustacchi 	}
1280*533affcbSRobert Mustacchi 
1281*533affcbSRobert Mustacchi 	if (inst > INT32_MAX) {
1282*533affcbSRobert Mustacchi 		return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0,
1283*533affcbSRobert Mustacchi 		    "parsed controller instance %lu is outside the valid "
1284*533affcbSRobert Mustacchi 		    "range [0, %d]", inst, INT32_MAX));
1285*533affcbSRobert Mustacchi 	}
1286*533affcbSRobert Mustacchi 
1287*533affcbSRobert Mustacchi 	if (!nvme_ctrl_init_by_instance(nvme, (int32_t)inst, &ctrl)) {
1288*533affcbSRobert Mustacchi 		return (false);
1289*533affcbSRobert Mustacchi 	}
1290*533affcbSRobert Mustacchi 
1291*533affcbSRobert Mustacchi 	if (ns_name == NULL) {
1292*533affcbSRobert Mustacchi 		*ctrlp = ctrl;
1293*533affcbSRobert Mustacchi 		return (nvme_success(nvme));
1294*533affcbSRobert Mustacchi 	}
1295*533affcbSRobert Mustacchi 
1296*533affcbSRobert Mustacchi 	if (!nvme_ns_init_by_name(ctrl, ns_name, &ns)) {
1297*533affcbSRobert Mustacchi 		nvme_err_data_t err;
1298*533affcbSRobert Mustacchi 
1299*533affcbSRobert Mustacchi 		nvme_ctrl_err_save(ctrl, &err);
1300*533affcbSRobert Mustacchi 		nvme_err_set(nvme, &err);
1301*533affcbSRobert Mustacchi 		nvme_ctrl_fini(ctrl);
1302*533affcbSRobert Mustacchi 		return (false);
1303*533affcbSRobert Mustacchi 	}
1304*533affcbSRobert Mustacchi 
1305*533affcbSRobert Mustacchi 	*ctrlp = ctrl;
1306*533affcbSRobert Mustacchi 	*nsp = ns;
1307*533affcbSRobert Mustacchi 
1308*533affcbSRobert Mustacchi 	return (nvme_success(nvme));
1309*533affcbSRobert Mustacchi }
1310*533affcbSRobert Mustacchi 
1311*533affcbSRobert Mustacchi bool
nvme_ns_bd_attach(nvme_ns_t * ns)1312*533affcbSRobert Mustacchi nvme_ns_bd_attach(nvme_ns_t *ns)
1313*533affcbSRobert Mustacchi {
1314*533affcbSRobert Mustacchi 	nvme_ctrl_t *ctrl = ns->nn_ctrl;
1315*533affcbSRobert Mustacchi 	nvme_ioctl_common_t com;
1316*533affcbSRobert Mustacchi 
1317*533affcbSRobert Mustacchi 	(void) memset(&com, 0, sizeof (com));
1318*533affcbSRobert Mustacchi 	com.nioc_nsid = ns->nn_nsid;
1319*533affcbSRobert Mustacchi 
1320*533affcbSRobert Mustacchi 	if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_ATTACH, &com) != 0) {
1321*533affcbSRobert Mustacchi 		int e = errno;
1322*533affcbSRobert Mustacchi 		return (nvme_ioctl_syserror(ctrl, e, "namespace attach"));
1323*533affcbSRobert Mustacchi 	}
1324*533affcbSRobert Mustacchi 
1325*533affcbSRobert Mustacchi 	if (com.nioc_drv_err != NVME_IOCTL_E_OK) {
1326*533affcbSRobert Mustacchi 		return (nvme_ioctl_error(ctrl, &com, "namespace attach"));
1327*533affcbSRobert Mustacchi 	}
1328*533affcbSRobert Mustacchi 
1329*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1330*533affcbSRobert Mustacchi }
1331*533affcbSRobert Mustacchi 
1332*533affcbSRobert Mustacchi bool
nvme_ns_bd_detach(nvme_ns_t * ns)1333*533affcbSRobert Mustacchi nvme_ns_bd_detach(nvme_ns_t *ns)
1334*533affcbSRobert Mustacchi {
1335*533affcbSRobert Mustacchi 	nvme_ctrl_t *ctrl = ns->nn_ctrl;
1336*533affcbSRobert Mustacchi 	nvme_ioctl_common_t com;
1337*533affcbSRobert Mustacchi 
1338*533affcbSRobert Mustacchi 	(void) memset(&com, 0, sizeof (com));
1339*533affcbSRobert Mustacchi 	com.nioc_nsid = ns->nn_nsid;
1340*533affcbSRobert Mustacchi 
1341*533affcbSRobert Mustacchi 	if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_DETACH, &com) != 0) {
1342*533affcbSRobert Mustacchi 		int e = errno;
1343*533affcbSRobert Mustacchi 		return (nvme_ioctl_syserror(ctrl, e, "namespace detach"));
1344*533affcbSRobert Mustacchi 	}
1345*533affcbSRobert Mustacchi 
1346*533affcbSRobert Mustacchi 	if (com.nioc_drv_err != NVME_IOCTL_E_OK) {
1347*533affcbSRobert Mustacchi 		return (nvme_ioctl_error(ctrl, &com, "namespace detach"));
1348*533affcbSRobert Mustacchi 	}
1349*533affcbSRobert Mustacchi 
1350*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1351*533affcbSRobert Mustacchi }
1352*533affcbSRobert Mustacchi 
1353*533affcbSRobert Mustacchi /*
1354*533affcbSRobert Mustacchi  * Check for a lock programming error and upanic() if so.
1355*533affcbSRobert Mustacchi  */
1356*533affcbSRobert Mustacchi static void
nvme_lock_check(nvme_ctrl_t * ctrl)1357*533affcbSRobert Mustacchi nvme_lock_check(nvme_ctrl_t *ctrl)
1358*533affcbSRobert Mustacchi {
1359*533affcbSRobert Mustacchi 	char msg[1024];
1360*533affcbSRobert Mustacchi 	int ret;
1361*533affcbSRobert Mustacchi 	const char *up;
1362*533affcbSRobert Mustacchi 	size_t ulen;
1363*533affcbSRobert Mustacchi 	const char *base = "fatal libnvme locking error detected";
1364*533affcbSRobert Mustacchi 
1365*533affcbSRobert Mustacchi 	if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) {
1366*533affcbSRobert Mustacchi 		return;
1367*533affcbSRobert Mustacchi 	}
1368*533affcbSRobert Mustacchi 
1369*533affcbSRobert Mustacchi 	ret = snprintf(msg, sizeof (msg), "%s: %s (controller %p)", base,
1370*533affcbSRobert Mustacchi 	    ctrl->nc_err.ne_errmsg, ctrl);
1371*533affcbSRobert Mustacchi 	if (ret >= sizeof (msg)) {
1372*533affcbSRobert Mustacchi 		ulen = sizeof (msg);
1373*533affcbSRobert Mustacchi 		up = msg;
1374*533affcbSRobert Mustacchi 	} else if (ret <= 0) {
1375*533affcbSRobert Mustacchi 		ulen = strlen(base) + 1;
1376*533affcbSRobert Mustacchi 		up = base;
1377*533affcbSRobert Mustacchi 	} else {
1378*533affcbSRobert Mustacchi 		ulen = (size_t)ret + 1;
1379*533affcbSRobert Mustacchi 		up = msg;
1380*533affcbSRobert Mustacchi 	}
1381*533affcbSRobert Mustacchi 
1382*533affcbSRobert Mustacchi 	upanic(up, ulen);
1383*533affcbSRobert Mustacchi }
1384*533affcbSRobert Mustacchi 
1385*533affcbSRobert Mustacchi static bool
nvme_lock_common(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_lock_level_t level,nvme_lock_flags_t flags)1386*533affcbSRobert Mustacchi nvme_lock_common(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_lock_level_t level,
1387*533affcbSRobert Mustacchi     nvme_lock_flags_t flags)
1388*533affcbSRobert Mustacchi {
1389*533affcbSRobert Mustacchi 	nvme_ioctl_lock_t lock;
1390*533affcbSRobert Mustacchi 	const nvme_lock_flags_t all_flags = NVME_LOCK_F_DONT_BLOCK;
1391*533affcbSRobert Mustacchi 
1392*533affcbSRobert Mustacchi 	if (level != NVME_LOCK_L_READ && level != NVME_LOCK_L_WRITE) {
1393*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown "
1394*533affcbSRobert Mustacchi 		    "lock level: 0x%x", level));
1395*533affcbSRobert Mustacchi 	}
1396*533affcbSRobert Mustacchi 
1397*533affcbSRobert Mustacchi 	if ((flags & ~all_flags) != 0) {
1398*533affcbSRobert Mustacchi 		return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown "
1399*533affcbSRobert Mustacchi 		    "lock flags: 0x%x", flags & ~all_flags));
1400*533affcbSRobert Mustacchi 	}
1401*533affcbSRobert Mustacchi 
1402*533affcbSRobert Mustacchi 	(void) memset(&lock, 0, sizeof (lock));
1403*533affcbSRobert Mustacchi 	lock.nil_common.nioc_nsid = nsid;
1404*533affcbSRobert Mustacchi 	if (nsid != 0) {
1405*533affcbSRobert Mustacchi 		lock.nil_ent = NVME_LOCK_E_NS;
1406*533affcbSRobert Mustacchi 	} else {
1407*533affcbSRobert Mustacchi 		lock.nil_ent = NVME_LOCK_E_CTRL;
1408*533affcbSRobert Mustacchi 	}
1409*533affcbSRobert Mustacchi 	lock.nil_level = level;
1410*533affcbSRobert Mustacchi 	lock.nil_flags = flags;
1411*533affcbSRobert Mustacchi 
1412*533affcbSRobert Mustacchi 	if (ioctl(ctrl->nc_fd, NVME_IOC_LOCK, &lock) != 0) {
1413*533affcbSRobert Mustacchi 		int e = errno;
1414*533affcbSRobert Mustacchi 		return (nvme_ioctl_syserror(ctrl, e, "lock"));
1415*533affcbSRobert Mustacchi 	}
1416*533affcbSRobert Mustacchi 
1417*533affcbSRobert Mustacchi 	if (lock.nil_common.nioc_drv_err != NVME_IOCTL_E_OK) {
1418*533affcbSRobert Mustacchi 		(void) nvme_ioctl_error(ctrl, &lock.nil_common, "lock");
1419*533affcbSRobert Mustacchi 		nvme_lock_check(ctrl);
1420*533affcbSRobert Mustacchi 		return (false);
1421*533affcbSRobert Mustacchi 	}
1422*533affcbSRobert Mustacchi 
1423*533affcbSRobert Mustacchi 	return (nvme_ctrl_success(ctrl));
1424*533affcbSRobert Mustacchi }
1425*533affcbSRobert Mustacchi 
1426*533affcbSRobert Mustacchi /*
1427*533affcbSRobert Mustacchi  * You may reasonably be wondering why does this return and why do we basically
1428*533affcbSRobert Mustacchi  * panic everywhere. The reality is twofold. The first part of this is that we
1429*533affcbSRobert Mustacchi  * know from experience in libc that error checking mutexes are not the most
1430*533affcbSRobert Mustacchi  * common and the kernel simplicity of mutex_enter() and mutex_exit() are really
1431*533affcbSRobert Mustacchi  * a boon. The second piece here is that the way that the ioctl path works here,
1432*533affcbSRobert Mustacchi  * only programming errors or mischief in the library could cause this to fail
1433*533affcbSRobert Mustacchi  * at the raw ioctl / errno level. That is EBADF/EFAULT, etc. are our fault and
1434*533affcbSRobert Mustacchi  * if you cannot unlock because of that you're not going to get much further.
1435*533affcbSRobert Mustacchi  */
1436*533affcbSRobert Mustacchi void
nvme_unlock_common(nvme_ctrl_t * ctrl,uint32_t nsid)1437*533affcbSRobert Mustacchi nvme_unlock_common(nvme_ctrl_t *ctrl, uint32_t nsid)
1438*533affcbSRobert Mustacchi {
1439*533affcbSRobert Mustacchi 	nvme_ioctl_unlock_t unlock;
1440*533affcbSRobert Mustacchi 
1441*533affcbSRobert Mustacchi 	(void) memset(&unlock, 0, sizeof (unlock));
1442*533affcbSRobert Mustacchi 	unlock.niu_common.nioc_nsid = nsid;
1443*533affcbSRobert Mustacchi 	if (nsid != 0) {
1444*533affcbSRobert Mustacchi 		unlock.niu_ent = NVME_LOCK_E_NS;
1445*533affcbSRobert Mustacchi 	} else {
1446*533affcbSRobert Mustacchi 		unlock.niu_ent = NVME_LOCK_E_CTRL;
1447*533affcbSRobert Mustacchi 	}
1448*533affcbSRobert Mustacchi 
1449*533affcbSRobert Mustacchi 	/*
1450*533affcbSRobert Mustacchi 	 * Because all unlock ioctls errors are promoted to an error, we don't
1451*533affcbSRobert Mustacchi 	 * bother calling nvme_ioctl_syserror() here.
1452*533affcbSRobert Mustacchi 	 */
1453*533affcbSRobert Mustacchi 	if (ioctl(ctrl->nc_fd, NVME_IOC_UNLOCK, &unlock) != 0) {
1454*533affcbSRobert Mustacchi 		int e = errno;
1455*533affcbSRobert Mustacchi 		(void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, e, "internal "
1456*533affcbSRobert Mustacchi 		    "programming error: failed to issue unlock ioctl: %s",
1457*533affcbSRobert Mustacchi 		    strerror(e));
1458*533affcbSRobert Mustacchi 		nvme_lock_check(ctrl);
1459*533affcbSRobert Mustacchi 		return;
1460*533affcbSRobert Mustacchi 	}
1461*533affcbSRobert Mustacchi 
1462*533affcbSRobert Mustacchi 	if (unlock.niu_common.nioc_drv_err != NVME_IOCTL_E_OK) {
1463*533affcbSRobert Mustacchi 		(void) nvme_ioctl_error(ctrl, &unlock.niu_common, "unlock");
1464*533affcbSRobert Mustacchi 		/*
1465*533affcbSRobert Mustacchi 		 * Promote any other failure to a new fatal failure. Consumers
1466*533affcbSRobert Mustacchi 		 * expect this to have worked.
1467*533affcbSRobert Mustacchi 		 */
1468*533affcbSRobert Mustacchi 		if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) {
1469*533affcbSRobert Mustacchi 			nvme_err_data_t err;
1470*533affcbSRobert Mustacchi 			nvme_ctrl_err_save(ctrl, &err);
1471*533affcbSRobert Mustacchi 			(void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, 0,
1472*533affcbSRobert Mustacchi 			    "internal programming error: received unexpected "
1473*533affcbSRobert Mustacchi 			    "libnvme error 0x%x: %s", err.ne_err,
1474*533affcbSRobert Mustacchi 			    err.ne_errmsg);
1475*533affcbSRobert Mustacchi 		}
1476*533affcbSRobert Mustacchi 		nvme_lock_check(ctrl);
1477*533affcbSRobert Mustacchi 		return;
1478*533affcbSRobert Mustacchi 	}
1479*533affcbSRobert Mustacchi 
1480*533affcbSRobert Mustacchi 	(void) nvme_ctrl_success(ctrl);
1481*533affcbSRobert Mustacchi }
1482*533affcbSRobert Mustacchi 
1483*533affcbSRobert Mustacchi bool
nvme_ctrl_lock(nvme_ctrl_t * ctrl,nvme_lock_level_t level,nvme_lock_flags_t flags)1484*533affcbSRobert Mustacchi nvme_ctrl_lock(nvme_ctrl_t *ctrl, nvme_lock_level_t level,
1485*533affcbSRobert Mustacchi     nvme_lock_flags_t flags)
1486*533affcbSRobert Mustacchi {
1487*533affcbSRobert Mustacchi 	return (nvme_lock_common(ctrl, 0, level, flags));
1488*533affcbSRobert Mustacchi }
1489*533affcbSRobert Mustacchi 
1490*533affcbSRobert Mustacchi bool
nvme_ns_lock(nvme_ns_t * ns,nvme_lock_level_t level,nvme_lock_flags_t flags)1491*533affcbSRobert Mustacchi nvme_ns_lock(nvme_ns_t *ns, nvme_lock_level_t level,
1492*533affcbSRobert Mustacchi     nvme_lock_flags_t flags)
1493*533affcbSRobert Mustacchi {
1494*533affcbSRobert Mustacchi 	return (nvme_lock_common(ns->nn_ctrl, ns->nn_nsid, level, flags));
1495*533affcbSRobert Mustacchi }
1496*533affcbSRobert Mustacchi 
1497*533affcbSRobert Mustacchi void
nvme_ctrl_unlock(nvme_ctrl_t * ctrl)1498*533affcbSRobert Mustacchi nvme_ctrl_unlock(nvme_ctrl_t *ctrl)
1499*533affcbSRobert Mustacchi {
1500*533affcbSRobert Mustacchi 	nvme_unlock_common(ctrl, 0);
1501*533affcbSRobert Mustacchi }
1502*533affcbSRobert Mustacchi 
1503*533affcbSRobert Mustacchi void
nvme_ns_unlock(nvme_ns_t * ns)1504*533affcbSRobert Mustacchi nvme_ns_unlock(nvme_ns_t *ns)
1505*533affcbSRobert Mustacchi {
1506*533affcbSRobert Mustacchi 	nvme_unlock_common(ns->nn_ctrl, ns->nn_nsid);
1507*533affcbSRobert Mustacchi }
1508