1*533affcbSRobert Mustacchi /*
2*533affcbSRobert Mustacchi * This file and its contents are supplied under the terms of the
3*533affcbSRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0.
4*533affcbSRobert Mustacchi * You may only use this file in accordance with the terms of version
5*533affcbSRobert Mustacchi * 1.0 of the CDDL.
6*533affcbSRobert Mustacchi *
7*533affcbSRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this
8*533affcbSRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at
9*533affcbSRobert Mustacchi * http://www.illumos.org/license/CDDL.
10*533affcbSRobert Mustacchi */
11*533affcbSRobert Mustacchi
12*533affcbSRobert Mustacchi /*
13*533affcbSRobert Mustacchi * Copyright 2024 Oxide Computer Company
14*533affcbSRobert Mustacchi */
15*533affcbSRobert Mustacchi
16*533affcbSRobert Mustacchi /*
17*533affcbSRobert Mustacchi * Programmatic interface to NVMe Devices
18*533affcbSRobert Mustacchi *
19*533affcbSRobert Mustacchi * libnvme exists to provide a means of performing non-I/O related operations on
20*533affcbSRobert Mustacchi * an NVMe device. This is intended to allow software, regardless of whether it
21*533affcbSRobert Mustacchi * is part of illumos or not, to operate on NVMe devices and perform most of the
22*533affcbSRobert Mustacchi * administrative and operator tasks that might come up. This library does not
23*533affcbSRobert Mustacchi * provide a stable interface yet. The rest of this block comment goes into the
24*533affcbSRobert Mustacchi * organization and background into why it looks the way it does.
25*533affcbSRobert Mustacchi *
26*533affcbSRobert Mustacchi * --------------------
27*533affcbSRobert Mustacchi * Library Organization
28*533affcbSRobert Mustacchi * --------------------
29*533affcbSRobert Mustacchi *
30*533affcbSRobert Mustacchi * There are two large classes of source files that make up this library
31*533affcbSRobert Mustacchi * currently:
32*533affcbSRobert Mustacchi *
33*533affcbSRobert Mustacchi * 1. Source code that implements the library's interfaces is found alongside
34*533affcbSRobert Mustacchi * this file in lib/libnvme/common. This code is generally organized based
35*533affcbSRobert Mustacchi * around the portion of the NVMe specification that it implements. So for
36*533affcbSRobert Mustacchi * example, code that implements logic related to the features is found
37*533affcbSRobert Mustacchi * in libnvme_feature.c, formatting namespaces in libnvme_format.c, log
38*533affcbSRobert Mustacchi * pages in libnvme_log.c, etc. All files in the library begin with
39*533affcbSRobert Mustacchi * 'libnvme_' as a way to help namespace the file names from the second set
40*533affcbSRobert Mustacchi * of files.
41*533affcbSRobert Mustacchi *
42*533affcbSRobert Mustacchi * 2. Validation logic that is shared between libnvme and the kernel is found
43*533affcbSRobert Mustacchi * in common/nvme/. While the kernel must validate requests regardless, we
44*533affcbSRobert Mustacchi * leverage this shared information as a means for trying to ensure that we
45*533affcbSRobert Mustacchi * have useful errors early. That code is factored in a way to facilitate
46*533affcbSRobert Mustacchi * easier unit testing.
47*533affcbSRobert Mustacchi *
48*533affcbSRobert Mustacchi * Because of the nature of this split, all of the opaque structures that we
49*533affcbSRobert Mustacchi * create and their relationships are all maintained in the library (group 1).
50*533affcbSRobert Mustacchi * All of the logic in group 2 is designed to be constant data tables and
51*533affcbSRobert Mustacchi * functions that are fed information about the controller they are operating on
52*533affcbSRobert Mustacchi * to answer them.
53*533affcbSRobert Mustacchi *
54*533affcbSRobert Mustacchi * There are several general classes of interfaces and related structures that
55*533affcbSRobert Mustacchi * we have in the library. We break them into the following general categories
56*533affcbSRobert Mustacchi * based on their purpose:
57*533affcbSRobert Mustacchi *
58*533affcbSRobert Mustacchi * DISCOVERY
59*533affcbSRobert Mustacchi *
60*533affcbSRobert Mustacchi * One of the large responsibilities of this library is helping someone discover
61*533affcbSRobert Mustacchi * information about something, whether that be a controller, a namespace, a log
62*533affcbSRobert Mustacchi * page, a feature, a unique command, etc. Information about one of these items
63*533affcbSRobert Mustacchi * is contained in a generally opaque discovery structure. For example, the
64*533affcbSRobert Mustacchi * nvme_log_disc_t.
65*533affcbSRobert Mustacchi *
66*533affcbSRobert Mustacchi * The goal of these structures is to contain all of the metadata for working
67*533affcbSRobert Mustacchi * with the object in question. Continuing on the log page discovery example, it
68*533affcbSRobert Mustacchi * can tell us information about what fields are required, whether or not the
69*533affcbSRobert Mustacchi * log might be supported, whether it operates on a controller, a namespace, or
70*533affcbSRobert Mustacchi * something else, as well as more human-usable things such as names and
71*533affcbSRobert Mustacchi * descriptions.
72*533affcbSRobert Mustacchi *
73*533affcbSRobert Mustacchi * Discovery objects are both for humans and for programmatic consumption. There
74*533affcbSRobert Mustacchi * are several cases where requests can be created directly from discovery
75*533affcbSRobert Mustacchi * objects. A well designed discovery object can allow a general implementation
76*533affcbSRobert Mustacchi * of a consumer such as nvmeadm to build up a request without having to
77*533affcbSRobert Mustacchi * hardcode everything about what is needed for each request (though most
78*533affcbSRobert Mustacchi * consumers still need to have information about the actual contents, meaning,
79*533affcbSRobert Mustacchi * and semantics of a log or feature).
80*533affcbSRobert Mustacchi *
81*533affcbSRobert Mustacchi * Discovery objects are obtained in two general ways. The first is using one of
82*533affcbSRobert Mustacchi * the iterator/callback based functions to discover a given class of data. The
83*533affcbSRobert Mustacchi * second path is that several of the functions which operate based on the name
84*533affcbSRobert Mustacchi * of something, e.g. nvme_log_req_init_by_name(),
85*533affcbSRobert Mustacchi * nvme_get_feat_req_init_by_name(), etc. will return a discovery object.
86*533affcbSRobert Mustacchi *
87*533affcbSRobert Mustacchi * When a discovery object is returned based on iteration (more below), the
88*533affcbSRobert Mustacchi * memory is owned by the iterator. When it is returned by a request
89*533affcbSRobert Mustacchi * initialization function, then it has its own life time and must be freed.
90*533affcbSRobert Mustacchi * We try to make this distinction clear in the API based on whether or not the
91*533affcbSRobert Mustacchi * discovery object is 'const'.
92*533affcbSRobert Mustacchi *
93*533affcbSRobert Mustacchi * All discovery objects should be fully filled out before they are handed back
94*533affcbSRobert Mustacchi * to a caller. It is an explicit design goal that every function that gets data
95*533affcbSRobert Mustacchi * from the discovery structure operates on a const version of the pointer. This
96*533affcbSRobert Mustacchi * is the hint that you cannot perform additional I/O or related after handing
97*533affcbSRobert Mustacchi * out the discovery structure. Attempts to loosen this constraint should be
98*533affcbSRobert Mustacchi * considered carefully due to how we communicate ownership.
99*533affcbSRobert Mustacchi *
100*533affcbSRobert Mustacchi * ITERATORS
101*533affcbSRobert Mustacchi *
102*533affcbSRobert Mustacchi * A common pattern of the library is iterating over items. This includes
103*533affcbSRobert Mustacchi * controllers and namespaces, but also as part of discovering what specific
104*533affcbSRobert Mustacchi * logs, commands, features, etc. are actually supported by the device.
105*533affcbSRobert Mustacchi * Iteration always follows the same general pattern:
106*533affcbSRobert Mustacchi *
107*533affcbSRobert Mustacchi * 1. An iterator is initialized with a call to nvme_<name>_discover_init().
108*533affcbSRobert Mustacchi * This will generally return a structure of the form nvme_<name>_iter_t. This
109*533affcbSRobert Mustacchi * structure contains the memory for the corresponding value that is returned
110*533affcbSRobert Mustacchi * from step in (2).
111*533affcbSRobert Mustacchi *
112*533affcbSRobert Mustacchi * 2. To actually pull values out of an iterator, one must call the
113*533affcbSRobert Mustacchi * nvme_<name>_step() function for the iterator. This will return a
114*533affcbSRobert Mustacchi * corresponding nvme_<name>_disc_t structure that is opaque and has a suite of
115*533affcbSRobert Mustacchi * functions that are usable for getting information out from it. This structure
116*533affcbSRobert Mustacchi * is valid only until the next time the nvme_<name>_step() is called. The
117*533affcbSRobert Mustacchi * return value of step indicates the state of the data and indicates whether or
118*533affcbSRobert Mustacchi * not there is an error, the iterator has finished, or we successfully stepped
119*533affcbSRobert Mustacchi * and the data is filled out.
120*533affcbSRobert Mustacchi *
121*533affcbSRobert Mustacchi * If discovery data needs to outlive a given iteration, then it can be
122*533affcbSRobert Mustacchi * duplicated which will give it a separate lifetime, though that comes with
123*533affcbSRobert Mustacchi * the responsibility that it must then be freed.
124*533affcbSRobert Mustacchi *
125*533affcbSRobert Mustacchi * 3. To finish using iterators, one finally calls the corresponding
126*533affcbSRobert Mustacchi * nvme_<name>_discover_fini(). That will deallocate the iterator structure and
127*533affcbSRobert Mustacchi * finish everything up.
128*533affcbSRobert Mustacchi *
129*533affcbSRobert Mustacchi * REQUESTS
130*533affcbSRobert Mustacchi *
131*533affcbSRobert Mustacchi * One of the chief goals of this library is to be able to perform requests.
132*533affcbSRobert Mustacchi * Each request has a structure that can be initialized, filled out, and then
133*533affcbSRobert Mustacchi * executed. A request structure can be reused multiple times with minor
134*533affcbSRobert Mustacchi * adjustments in-between (though changes aren't required). Request structures
135*533affcbSRobert Mustacchi * are either initialized in a blank mode where every value must be filled out
136*533affcbSRobert Mustacchi * or they can be initialized through their discovery object (or the common name
137*533affcbSRobert Mustacchi * of such an object).
138*533affcbSRobert Mustacchi *
139*533affcbSRobert Mustacchi * When a request structure is initialized through a discovery object, it
140*533affcbSRobert Mustacchi * automatically sets several of the fields, knows which ones are still required
141*533affcbSRobert Mustacchi * to be set, and which fields cannot be set. For example, if you create a get
142*533affcbSRobert Mustacchi * log page request from a log discovery object, it will not allow you to change
143*533affcbSRobert Mustacchi * the log page you're requesting; however, in return you don't have to specify
144*533affcbSRobert Mustacchi * the command set interface or log identifier.
145*533affcbSRobert Mustacchi *
146*533affcbSRobert Mustacchi * Request objects are tied to a controller. See 'Parallelism, Thread Safety,
147*533affcbSRobert Mustacchi * and Errors' for more information.
148*533affcbSRobert Mustacchi *
149*533affcbSRobert Mustacchi * INFORMATION SNAPSHOTS
150*533affcbSRobert Mustacchi *
151*533affcbSRobert Mustacchi * To get information about a namespace or controller, one has to take an
152*533affcbSRobert Mustacchi * information snapshot. Once an information snapshot is obtained, this snapshot
153*533affcbSRobert Mustacchi * answers all questions about the controller with a mostly consistent set of
154*533affcbSRobert Mustacchi * point-in-time data. The main reason for this design was to try and simplify
155*533affcbSRobert Mustacchi * where errors can occur and to provide a straightforward serialization point
156*533affcbSRobert Mustacchi * so that way the raw underlying data could be gathered at one system and then
157*533affcbSRobert Mustacchi * interpreted later on another.
158*533affcbSRobert Mustacchi *
159*533affcbSRobert Mustacchi * The only reason that there are some fallible operations on the snapshot are
160*533affcbSRobert Mustacchi * things that are not guaranteed to exist for all such NVMe controllers.
161*533affcbSRobert Mustacchi *
162*533affcbSRobert Mustacchi * LIBRARY, CONTROLLER, NAMESPACE and SNAPSHOT HANDLES
163*533affcbSRobert Mustacchi *
164*533affcbSRobert Mustacchi * The last major set of types used in this library are opaque handles. As you
165*533affcbSRobert Mustacchi * might have guessed given the request structures, all of the objects which
166*533affcbSRobert Mustacchi * represent something are opaque. Each library handle is independent of one
167*533affcbSRobert Mustacchi * another and each controller handle is independent of one another. In general,
168*533affcbSRobert Mustacchi * it is expected that only a single controller handle is used at a given time
169*533affcbSRobert Mustacchi * for a given library handle, but this is not currently enforced. Error
170*533affcbSRobert Mustacchi * information and parallelism is tied into this, see 'Parallelism, Thread
171*533affcbSRobert Mustacchi * Safety, and Errors' for more information.
172*533affcbSRobert Mustacchi *
173*533affcbSRobert Mustacchi * -----------------
174*533affcbSRobert Mustacchi * Opaque Structures
175*533affcbSRobert Mustacchi * -----------------
176*533affcbSRobert Mustacchi *
177*533affcbSRobert Mustacchi * One of the things that might stand out in libnvme is the use of opaque
178*533affcbSRobert Mustacchi * structures everywhere with functions to access every arbitrary piece of data.
179*533affcbSRobert Mustacchi * This and the function pattern around building up a request were done to try
180*533affcbSRobert Mustacchi * and deal with the evolutionary nature of the NVMe specification. If you look
181*533affcbSRobert Mustacchi * at the various requests, with the exception of firmware download, almost
182*533affcbSRobert Mustacchi * every request has added additional features through the spec revisions. NVMe
183*533affcbSRobert Mustacchi * 2.0 changed most things again with the requirement to specify the command set
184*533affcbSRobert Mustacchi * interface.
185*533affcbSRobert Mustacchi *
186*533affcbSRobert Mustacchi * While the way that the NVMe specification has done this is quite reasonable,
187*533affcbSRobert Mustacchi * it makes it much more difficult to use a traditional series of arguments to
188*533affcbSRobert Mustacchi * functions or a structure without having to try to version the symbol through
189*533affcbSRobert Mustacchi * clever games. If instead we accept that the specification will change and
190*533affcbSRobert Mustacchi * that the specification is always taking these additional arguments out of
191*533affcbSRobert Mustacchi * values that must be zero, then an opaque request structure where you have to
192*533affcbSRobert Mustacchi * make an explicit function call and recompile to get slightly different
193*533affcbSRobert Mustacchi * behavior is mostly reasonable. We may not be able to be perfect given we're
194*533affcbSRobert Mustacchi * at the mercy of the specification, but at least this is better than the
195*533affcbSRobert Mustacchi * alternative.
196*533affcbSRobert Mustacchi *
197*533affcbSRobert Mustacchi * This is ultimately why all the request structures are opaque and use a
198*533affcbSRobert Mustacchi * pseudo-builder pattern to fill out the request information. Further evidence
199*533affcbSRobert Mustacchi * to this point is that there was no way to avoid changing every kernel
200*533affcbSRobert Mustacchi * structure here while retaining semantic operations. No one wants to manually
201*533affcbSRobert Mustacchi * assemble cdw12-15 here. That's not how we can add value for the library.
202*533affcbSRobert Mustacchi *
203*533affcbSRobert Mustacchi * Similarly, for all discovery objects we ended up utilizing opaque objects.
204*533affcbSRobert Mustacchi * The main reason here is that we want to be able to embed this library as a
205*533affcbSRobert Mustacchi * committed interface in other languages and having the discovery structures be
206*533affcbSRobert Mustacchi * something that everyone can see means it'll be harder to extend it. While
207*533affcbSRobert Mustacchi * this concern is somewhat more theoretical given the iterator pattern, given
208*533affcbSRobert Mustacchi * the other bits in the request structure we decided to lean into the
209*533affcbSRobert Mustacchi * opaqueness.
210*533affcbSRobert Mustacchi *
211*533affcbSRobert Mustacchi * --------------------------------------
212*533affcbSRobert Mustacchi * Parallelism, Thread Safety, and Errors
213*533affcbSRobert Mustacchi * --------------------------------------
214*533affcbSRobert Mustacchi *
215*533affcbSRobert Mustacchi * One of the library's major design points is how do we achieve thread-safety,
216*533affcbSRobert Mustacchi * how does ownership work, where do errors appear, and what is the degree of
217*533affcbSRobert Mustacchi * parallelism that is achievable. To work through this we look at a few
218*533affcbSRobert Mustacchi * different things:
219*533affcbSRobert Mustacchi *
220*533affcbSRobert Mustacchi * 1. The degree to which the hardware allows for parallelism
221*533affcbSRobert Mustacchi * 2. The degree to which users might desire parallelism
222*533affcbSRobert Mustacchi * 3. The ergonomics of getting and storing errors
223*533affcbSRobert Mustacchi *
224*533affcbSRobert Mustacchi * The NVMe specification allows for different degrees of admin command
225*533affcbSRobert Mustacchi * parallelism on a per-command basis. This is discoverable, but the main point
226*533affcbSRobert Mustacchi * is that there are a class of commands where only one can be outstanding at a
227*533affcbSRobert Mustacchi * time, which likely fall into the case of most of the destructive commands
228*533affcbSRobert Mustacchi * like Format NVM, Activate Firmware, etc. Our expectation to some extent is
229*533affcbSRobert Mustacchi * that most admin queue commands don't need to be issued in parallel; however,
230*533affcbSRobert Mustacchi * beyond how we structure the library and error handling, we don't try to
231*533affcbSRobert Mustacchi * enforce that here. The kernel does do some enforcement through requiring
232*533affcbSRobert Mustacchi * mandatory write locks to perform some operations.
233*533affcbSRobert Mustacchi *
234*533affcbSRobert Mustacchi * When we get to how do folks want to use this, during the initial design phase
235*533affcbSRobert Mustacchi * we mostly theorized based on how nvmeadm is using it today and how various
236*533affcbSRobert Mustacchi * daemons like a FRU monitor or an appliance kit's software might want to
237*533affcbSRobert Mustacchi * interact with it. Our general starting assumption is that it's very
238*533affcbSRobert Mustacchi * reasonable for each discovered controller to be handled in parallel, but that
239*533affcbSRobert Mustacchi * operations on a controller itself are likely serial given that we're not
240*533affcbSRobert Mustacchi * issuing I/O through this mechanism. If we were, then that'd be an entirely
241*533affcbSRobert Mustacchi * different set of constraints.
242*533affcbSRobert Mustacchi *
243*533affcbSRobert Mustacchi * To discuss the perceived ergonomics, we need to first discuss what error
244*533affcbSRobert Mustacchi * information we want to be able to have. It's an important goal of both the
245*533affcbSRobert Mustacchi * NVMe driver and this library to give useful semantic errors. In particular,
246*533affcbSRobert Mustacchi * for any operation we want to make sure that we include the following
247*533affcbSRobert Mustacchi * information:
248*533affcbSRobert Mustacchi *
249*533affcbSRobert Mustacchi * o A hopefully distinguishable semantic error
250*533affcbSRobert Mustacchi * o Saving errno as a system error if relevant (e.g if open(2) failed)
251*533affcbSRobert Mustacchi * o A message for humans that gives more specifics about what happened and is
252*533affcbSRobert Mustacchi * intended to be passed along to the output of a command or another error
253*533affcbSRobert Mustacchi * message.
254*533affcbSRobert Mustacchi * o If a controller error occurs, we want to be able to provide the
255*533affcbSRobert Mustacchi * controller's sc (status code) and sct (status code type).
256*533affcbSRobert Mustacchi *
257*533affcbSRobert Mustacchi * With this we get to the questions around ergonomics and related which are
258*533affcbSRobert Mustacchi * entirely subjective. Given that we want to capture that information how do we
259*533affcbSRobert Mustacchi * best do this given the tooling that we have. When the library was first being
260*533affcbSRobert Mustacchi * prototyped all errors were on the nvme_t, basically the top-level handle.
261*533affcbSRobert Mustacchi * This meant that each operation on a controller had to be done serially or you
262*533affcbSRobert Mustacchi * would have to use different handles. However, the simplicity was that there
263*533affcbSRobert Mustacchi * was one thing to check.
264*533affcbSRobert Mustacchi *
265*533affcbSRobert Mustacchi * This evolution changed slightly when we introduced information snapshots.
266*533affcbSRobert Mustacchi * Because the information snapshots are meant to be separate entities whose
267*533affcbSRobert Mustacchi * lifetime can extend beyond the nvme_t library handle, they ended up
268*533affcbSRobert Mustacchi * developing their own error codes and functions. This has been okay because
269*533affcbSRobert Mustacchi * there aren't too many use cases there, though the need to duplicate error
270*533affcbSRobert Mustacchi * handling functions is a bit painful.
271*533affcbSRobert Mustacchi *
272*533affcbSRobert Mustacchi * From there, we did consider what if each request had its own error
273*533affcbSRobert Mustacchi * information that could be extracted. That would turn into a lot of functions
274*533affcbSRobert Mustacchi * to get at that data. The controller's allowed parallelism for admin commands
275*533affcbSRobert Mustacchi * varies based on each command. Some commands must occur when there are no
276*533affcbSRobert Mustacchi * other admin commands on the controller and others when there there is nothing
277*533affcbSRobert Mustacchi * on the namespace. However, due to that nuance, it would lead to forcing the
278*533affcbSRobert Mustacchi * consumer to understand the controller's specifics more than is often
279*533affcbSRobert Mustacchi * necessary for a given request. To add to that, it'd also just be a pain to
280*533affcbSRobert Mustacchi * try to get all the error information out in a different way and the consumers
281*533affcbSRobert Mustacchi * we started writing in this fashion were not looking good.
282*533affcbSRobert Mustacchi *
283*533affcbSRobert Mustacchi * We also considered whether we could consolidate all the error functions on
284*533affcbSRobert Mustacchi * each request into one structure that we get, but that didn't move the needle
285*533affcbSRobert Mustacchi * too much. It also raised some more concerns around how we minimize races and
286*533affcbSRobert Mustacchi * how data changes around that.
287*533affcbSRobert Mustacchi *
288*533affcbSRobert Mustacchi * So all of this led us to our current compromise position: we allow for
289*533affcbSRobert Mustacchi * parallelism at the controller level. More specifically:
290*533affcbSRobert Mustacchi *
291*533affcbSRobert Mustacchi * 1. Operations which take the nvme_t handle set errors on it and must operate
292*533affcbSRobert Mustacchi * serially. That is the nvme_t should only be used from one thread at any
293*533affcbSRobert Mustacchi * time, but may move between threads. Errors are set on it.
294*533affcbSRobert Mustacchi *
295*533affcbSRobert Mustacchi * 2. The nvme_ctrl_t has its own error information. A given nvme_ctrl_t should
296*533affcbSRobert Mustacchi * only be used serially; however, different ones can be used in parallel. A
297*533affcbSRobert Mustacchi * controller doesn't guarantee exclusivity. That requires an explicit
298*533affcbSRobert Mustacchi * locking operation.
299*533affcbSRobert Mustacchi *
300*533affcbSRobert Mustacchi * 3. Both request structures and namespaces place their errors on the
301*533affcbSRobert Mustacchi * corresponding controller that they were created from. Therefore the
302*533affcbSRobert Mustacchi * per-controller serialization in (2) applies here as well. If two requests
303*533affcbSRobert Mustacchi * are tied to different controllers, they can proceed in parallel.
304*533affcbSRobert Mustacchi *
305*533affcbSRobert Mustacchi * 4. Once a controller or namespace snapshot is obtained, they fall into a
306*533affcbSRobert Mustacchi * similar pattern: each one can be operated on in parallel, but generally
307*533affcbSRobert Mustacchi * one should only operate on a single one serially.
308*533affcbSRobert Mustacchi *
309*533affcbSRobert Mustacchi * Other than the constraints defined above, the library does not care which
310*533affcbSRobert Mustacchi * threads that an operation occurs on. These can be moved to wherever it needs
311*533affcbSRobert Mustacchi * to be. Locking and related in the kernel is based on the open file descriptor
312*533affcbSRobert Mustacchi * to the controller.
313*533affcbSRobert Mustacchi *
314*533affcbSRobert Mustacchi * ----------------
315*533affcbSRobert Mustacchi * Field Validation
316*533affcbSRobert Mustacchi * ----------------
317*533affcbSRobert Mustacchi *
318*533affcbSRobert Mustacchi * Every request is made up of fields that correspond to parts of the NVMe
319*533affcbSRobert Mustacchi * specification. Our requests operate in terms of the logical fields that we
320*533affcbSRobert Mustacchi * opt to expose and that the kernel knows how to consume. In general, we don't
321*533affcbSRobert Mustacchi * expose the raw cdw values that make up the commands (except for the vendor
322*533affcbSRobert Mustacchi * unique commands or arguments that are explicitly that way ala get features).
323*533affcbSRobert Mustacchi * While operating on raw cdw arguments would be a simple way to create ABI
324*533affcbSRobert Mustacchi * stability, it would leave everyone having to break up all the fields
325*533affcbSRobert Mustacchi * themselves and we believe end up somewhat more error prone than the
326*533affcbSRobert Mustacchi * interfaces we expose today.
327*533affcbSRobert Mustacchi *
328*533affcbSRobert Mustacchi * Requests are created in one of two ways today: they are either initialized
329*533affcbSRobert Mustacchi * from corresponding discovery data e.g. nvme_log_req_init_by_disc() and
330*533affcbSRobert Mustacchi * nvme_get_feat_req_init_by_name(), or one creates a raw request ala
331*533affcbSRobert Mustacchi * nvme_get_feat_req_init(). In the former cases, we fill out a bunch of the
332*533affcbSRobert Mustacchi * fields that would normally need to be set such as the log or feature ID. We
333*533affcbSRobert Mustacchi * also will note which fields are allowed and expected. For example, the health
334*533affcbSRobert Mustacchi * log page does not take or expect a lsp (log specific parameter) or related
335*533affcbSRobert Mustacchi * and therefore we can flag that with an _UNUSE class error. Conversely,
336*533affcbSRobert Mustacchi * requests that are created from their raw form will not have any such error
337*533affcbSRobert Mustacchi * checking performed until they are finalized and checked by the kernel. The
338*533affcbSRobert Mustacchi * set of fields that can be set in a request is usually tracked in the
339*533affcbSRobert Mustacchi * structure with a member of the form <prefix>_allow.
340*533affcbSRobert Mustacchi *
341*533affcbSRobert Mustacchi * One set of library error checking that is uniform between both types is that
342*533affcbSRobert Mustacchi * of missing fields. There are minimum fields that must be set for different
343*533affcbSRobert Mustacchi * types of requests. That check will always be performed regardless of the path
344*533affcbSRobert Mustacchi * that is taken through the system. Tracking which members must still be set is
345*533affcbSRobert Mustacchi * done by a member of the form <prefix>_need.
346*533affcbSRobert Mustacchi *
347*533affcbSRobert Mustacchi * When we perform validation, we try to push the vast majority of it into the
348*533affcbSRobert Mustacchi * common validation code that is shared between the kernel and userland. This
349*533affcbSRobert Mustacchi * is wrapped up through the nvme_field_check_one() logic. The common code will
350*533affcbSRobert Mustacchi * check if the field is supported by the controller (generating an _UNSUP class
351*533affcbSRobert Mustacchi * error if not) and if the value of the field is within a valid range
352*533affcbSRobert Mustacchi * (generating a _RANGE class error if not).
353*533affcbSRobert Mustacchi *
354*533affcbSRobert Mustacchi * While we try to fold the majority of such checks into the common code as
355*533affcbSRobert Mustacchi * possible, it isn't perfect and some things have to be checked outside of
356*533affcbSRobert Mustacchi * that. Those consist of the following general cases:
357*533affcbSRobert Mustacchi *
358*533affcbSRobert Mustacchi * 1) Items that are not semantically fields in the actual command but are
359*533affcbSRobert Mustacchi * things that we are tracking ourselves in the library. An example of this
360*533affcbSRobert Mustacchi * would be fields in the vuc request structure that we are synthesizing
361*533affcbSRobert Mustacchi * ourselves.
362*533affcbSRobert Mustacchi *
363*533affcbSRobert Mustacchi * 2) While the field logic has the specifics of what controller is being
364*533affcbSRobert Mustacchi * operated upon, it doesn't have all the knowledge of what things can be
365*533affcbSRobert Mustacchi * combined or not. It can answer the specifics about its field, but cannot look
366*533affcbSRobert Mustacchi * at the broader request.
367*533affcbSRobert Mustacchi *
368*533affcbSRobert Mustacchi * As a result, there are some duplicated checks in the library and the kernel,
369*533affcbSRobert Mustacchi * though several are left just to the kernel. However, the vast majority of
370*533affcbSRobert Mustacchi * validation does happen through these common routines which leaves the library
371*533affcbSRobert Mustacchi * nvme_<type>_req_set_<field> functions generally wrappers around checking
372*533affcbSRobert Mustacchi * common code and updating our tracking around what fields are set or not so we
373*533affcbSRobert Mustacchi * can issue an ioctl.
374*533affcbSRobert Mustacchi */
375*533affcbSRobert Mustacchi
376*533affcbSRobert Mustacchi #include <stdlib.h>
377*533affcbSRobert Mustacchi #include <stdarg.h>
378*533affcbSRobert Mustacchi #include <libdevinfo.h>
379*533affcbSRobert Mustacchi #include <unistd.h>
380*533affcbSRobert Mustacchi #include <string.h>
381*533affcbSRobert Mustacchi #include <sys/types.h>
382*533affcbSRobert Mustacchi #include <sys/stat.h>
383*533affcbSRobert Mustacchi #include <fcntl.h>
384*533affcbSRobert Mustacchi #include <upanic.h>
385*533affcbSRobert Mustacchi
386*533affcbSRobert Mustacchi #include "libnvme_impl.h"
387*533affcbSRobert Mustacchi
388*533affcbSRobert Mustacchi bool
nvme_vers_ctrl_atleast(const nvme_ctrl_t * ctrl,const nvme_version_t * targ)389*533affcbSRobert Mustacchi nvme_vers_ctrl_atleast(const nvme_ctrl_t *ctrl, const nvme_version_t *targ)
390*533affcbSRobert Mustacchi {
391*533affcbSRobert Mustacchi return (nvme_vers_atleast(&ctrl->nc_vers, targ));
392*533affcbSRobert Mustacchi }
393*533affcbSRobert Mustacchi
394*533affcbSRobert Mustacchi bool
nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t * ci,const nvme_version_t * targ)395*533affcbSRobert Mustacchi nvme_vers_ctrl_info_atleast(const nvme_ctrl_info_t *ci,
396*533affcbSRobert Mustacchi const nvme_version_t *targ)
397*533affcbSRobert Mustacchi {
398*533affcbSRobert Mustacchi return (nvme_vers_atleast(&ci->nci_vers, targ));
399*533affcbSRobert Mustacchi }
400*533affcbSRobert Mustacchi
401*533affcbSRobert Mustacchi bool
nvme_vers_ns_info_atleast(const nvme_ns_info_t * info,const nvme_version_t * targ)402*533affcbSRobert Mustacchi nvme_vers_ns_info_atleast(const nvme_ns_info_t *info,
403*533affcbSRobert Mustacchi const nvme_version_t *targ)
404*533affcbSRobert Mustacchi {
405*533affcbSRobert Mustacchi return (nvme_vers_atleast(&info->nni_vers, targ));
406*533affcbSRobert Mustacchi }
407*533affcbSRobert Mustacchi
408*533affcbSRobert Mustacchi bool
nvme_guid_valid(const nvme_ctrl_t * ctrl,const uint8_t guid[16])409*533affcbSRobert Mustacchi nvme_guid_valid(const nvme_ctrl_t *ctrl, const uint8_t guid[16])
410*533affcbSRobert Mustacchi {
411*533affcbSRobert Mustacchi const uint8_t zero_guid[16] = { 0 };
412*533affcbSRobert Mustacchi
413*533affcbSRobert Mustacchi return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v2) &&
414*533affcbSRobert Mustacchi memcmp(zero_guid, guid, sizeof (zero_guid)) != 0);
415*533affcbSRobert Mustacchi }
416*533affcbSRobert Mustacchi
417*533affcbSRobert Mustacchi bool
nvme_eui64_valid(const nvme_ctrl_t * ctrl,const uint8_t eui64[8])418*533affcbSRobert Mustacchi nvme_eui64_valid(const nvme_ctrl_t *ctrl, const uint8_t eui64[8])
419*533affcbSRobert Mustacchi {
420*533affcbSRobert Mustacchi const uint8_t zero_eui[8] = { 0 };
421*533affcbSRobert Mustacchi
422*533affcbSRobert Mustacchi return (nvme_vers_ctrl_atleast(ctrl, &nvme_vers_1v1) &&
423*533affcbSRobert Mustacchi memcmp(zero_eui, eui64, sizeof (zero_eui)) != 0);
424*533affcbSRobert Mustacchi }
425*533affcbSRobert Mustacchi
426*533affcbSRobert Mustacchi int
nvme_format_nguid(const uint8_t nguid[16],char * buf,size_t len)427*533affcbSRobert Mustacchi nvme_format_nguid(const uint8_t nguid[16], char *buf, size_t len)
428*533affcbSRobert Mustacchi {
429*533affcbSRobert Mustacchi return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X"
430*533affcbSRobert Mustacchi "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
431*533affcbSRobert Mustacchi nguid[0], nguid[1], nguid[2], nguid[3], nguid[4], nguid[5],
432*533affcbSRobert Mustacchi nguid[6], nguid[7], nguid[8], nguid[9], nguid[10], nguid[11],
433*533affcbSRobert Mustacchi nguid[12], nguid[13], nguid[14], nguid[15]));
434*533affcbSRobert Mustacchi }
435*533affcbSRobert Mustacchi
436*533affcbSRobert Mustacchi int
nvme_format_eui64(const uint8_t eui64[8],char * buf,size_t len)437*533affcbSRobert Mustacchi nvme_format_eui64(const uint8_t eui64[8], char *buf, size_t len)
438*533affcbSRobert Mustacchi {
439*533affcbSRobert Mustacchi return (snprintf(buf, len, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
440*533affcbSRobert Mustacchi eui64[0], eui64[1], eui64[2], eui64[3], eui64[4], eui64[5],
441*533affcbSRobert Mustacchi eui64[6], eui64[7]));
442*533affcbSRobert Mustacchi }
443*533affcbSRobert Mustacchi
444*533affcbSRobert Mustacchi void
nvme_fini(nvme_t * nvme)445*533affcbSRobert Mustacchi nvme_fini(nvme_t *nvme)
446*533affcbSRobert Mustacchi {
447*533affcbSRobert Mustacchi if (nvme == NULL)
448*533affcbSRobert Mustacchi return;
449*533affcbSRobert Mustacchi
450*533affcbSRobert Mustacchi if (nvme->nh_devinfo != DI_NODE_NIL) {
451*533affcbSRobert Mustacchi di_fini(nvme->nh_devinfo);
452*533affcbSRobert Mustacchi }
453*533affcbSRobert Mustacchi
454*533affcbSRobert Mustacchi free(nvme);
455*533affcbSRobert Mustacchi }
456*533affcbSRobert Mustacchi
457*533affcbSRobert Mustacchi nvme_t *
nvme_init(void)458*533affcbSRobert Mustacchi nvme_init(void)
459*533affcbSRobert Mustacchi {
460*533affcbSRobert Mustacchi nvme_t *nvme;
461*533affcbSRobert Mustacchi
462*533affcbSRobert Mustacchi nvme = calloc(1, sizeof (nvme_t));
463*533affcbSRobert Mustacchi if (nvme == NULL) {
464*533affcbSRobert Mustacchi return (NULL);
465*533affcbSRobert Mustacchi }
466*533affcbSRobert Mustacchi
467*533affcbSRobert Mustacchi nvme->nh_devinfo = di_init("/", DINFOCPYALL);
468*533affcbSRobert Mustacchi if (nvme->nh_devinfo == DI_NODE_NIL) {
469*533affcbSRobert Mustacchi nvme_fini(nvme);
470*533affcbSRobert Mustacchi return (NULL);
471*533affcbSRobert Mustacchi }
472*533affcbSRobert Mustacchi
473*533affcbSRobert Mustacchi return (nvme);
474*533affcbSRobert Mustacchi }
475*533affcbSRobert Mustacchi
476*533affcbSRobert Mustacchi void
nvme_ctrl_discover_fini(nvme_ctrl_iter_t * iter)477*533affcbSRobert Mustacchi nvme_ctrl_discover_fini(nvme_ctrl_iter_t *iter)
478*533affcbSRobert Mustacchi {
479*533affcbSRobert Mustacchi free(iter);
480*533affcbSRobert Mustacchi }
481*533affcbSRobert Mustacchi
482*533affcbSRobert Mustacchi nvme_iter_t
nvme_ctrl_discover_step(nvme_ctrl_iter_t * iter,const nvme_ctrl_disc_t ** discp)483*533affcbSRobert Mustacchi nvme_ctrl_discover_step(nvme_ctrl_iter_t *iter, const nvme_ctrl_disc_t **discp)
484*533affcbSRobert Mustacchi {
485*533affcbSRobert Mustacchi di_minor_t m;
486*533affcbSRobert Mustacchi
487*533affcbSRobert Mustacchi *discp = NULL;
488*533affcbSRobert Mustacchi if (iter->ni_done) {
489*533affcbSRobert Mustacchi return (NVME_ITER_DONE);
490*533affcbSRobert Mustacchi }
491*533affcbSRobert Mustacchi
492*533affcbSRobert Mustacchi for (;;) {
493*533affcbSRobert Mustacchi if (iter->ni_cur == NULL) {
494*533affcbSRobert Mustacchi iter->ni_cur = di_drv_first_node("nvme",
495*533affcbSRobert Mustacchi iter->ni_nvme->nh_devinfo);
496*533affcbSRobert Mustacchi } else {
497*533affcbSRobert Mustacchi iter->ni_cur = di_drv_next_node(iter->ni_cur);
498*533affcbSRobert Mustacchi }
499*533affcbSRobert Mustacchi
500*533affcbSRobert Mustacchi if (iter->ni_cur == NULL) {
501*533affcbSRobert Mustacchi iter->ni_done = true;
502*533affcbSRobert Mustacchi return (NVME_ITER_DONE);
503*533affcbSRobert Mustacchi }
504*533affcbSRobert Mustacchi
505*533affcbSRobert Mustacchi for (m = di_minor_next(iter->ni_cur, DI_MINOR_NIL);
506*533affcbSRobert Mustacchi m != DI_MINOR_NIL; m = di_minor_next(iter->ni_cur, m)) {
507*533affcbSRobert Mustacchi if (strcmp(di_minor_nodetype(m),
508*533affcbSRobert Mustacchi DDI_NT_NVME_NEXUS) == 0) {
509*533affcbSRobert Mustacchi break;
510*533affcbSRobert Mustacchi }
511*533affcbSRobert Mustacchi }
512*533affcbSRobert Mustacchi
513*533affcbSRobert Mustacchi if (m == DI_MINOR_NIL) {
514*533affcbSRobert Mustacchi continue;
515*533affcbSRobert Mustacchi }
516*533affcbSRobert Mustacchi
517*533affcbSRobert Mustacchi iter->ni_disc.ncd_devi = iter->ni_cur;
518*533affcbSRobert Mustacchi iter->ni_disc.ncd_minor = m;
519*533affcbSRobert Mustacchi *discp = &iter->ni_disc;
520*533affcbSRobert Mustacchi return (NVME_ITER_VALID);
521*533affcbSRobert Mustacchi }
522*533affcbSRobert Mustacchi
523*533affcbSRobert Mustacchi return (NVME_ITER_DONE);
524*533affcbSRobert Mustacchi }
525*533affcbSRobert Mustacchi
526*533affcbSRobert Mustacchi bool
nvme_ctrl_discover_init(nvme_t * nvme,nvme_ctrl_iter_t ** iterp)527*533affcbSRobert Mustacchi nvme_ctrl_discover_init(nvme_t *nvme, nvme_ctrl_iter_t **iterp)
528*533affcbSRobert Mustacchi {
529*533affcbSRobert Mustacchi nvme_ctrl_iter_t *iter;
530*533affcbSRobert Mustacchi
531*533affcbSRobert Mustacchi if (iterp == NULL) {
532*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
533*533affcbSRobert Mustacchi "invalid nvme_ctrl_iter_t output pointer: %p", iterp));
534*533affcbSRobert Mustacchi }
535*533affcbSRobert Mustacchi
536*533affcbSRobert Mustacchi iter = calloc(1, sizeof (nvme_ctrl_iter_t));
537*533affcbSRobert Mustacchi if (iter == NULL) {
538*533affcbSRobert Mustacchi int e = errno;
539*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to "
540*533affcbSRobert Mustacchi "allocate memory for a new nvme_ctrl_iter_t: %s",
541*533affcbSRobert Mustacchi strerror(e)));
542*533affcbSRobert Mustacchi }
543*533affcbSRobert Mustacchi iter->ni_nvme = nvme;
544*533affcbSRobert Mustacchi *iterp = iter;
545*533affcbSRobert Mustacchi return (nvme_success(nvme));
546*533affcbSRobert Mustacchi }
547*533affcbSRobert Mustacchi
548*533affcbSRobert Mustacchi bool
nvme_ctrl_discover(nvme_t * nvme,nvme_ctrl_disc_f func,void * arg)549*533affcbSRobert Mustacchi nvme_ctrl_discover(nvme_t *nvme, nvme_ctrl_disc_f func, void *arg)
550*533affcbSRobert Mustacchi {
551*533affcbSRobert Mustacchi nvme_ctrl_iter_t *iter;
552*533affcbSRobert Mustacchi const nvme_ctrl_disc_t *disc;
553*533affcbSRobert Mustacchi nvme_iter_t ret;
554*533affcbSRobert Mustacchi
555*533affcbSRobert Mustacchi if (func == NULL) {
556*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
557*533affcbSRobert Mustacchi "invalid nvme_ctrl_disc_f function pointer: %p", func));
558*533affcbSRobert Mustacchi }
559*533affcbSRobert Mustacchi
560*533affcbSRobert Mustacchi if (!nvme_ctrl_discover_init(nvme, &iter)) {
561*533affcbSRobert Mustacchi return (false);
562*533affcbSRobert Mustacchi }
563*533affcbSRobert Mustacchi
564*533affcbSRobert Mustacchi while ((ret = nvme_ctrl_discover_step(iter, &disc)) ==
565*533affcbSRobert Mustacchi NVME_ITER_VALID) {
566*533affcbSRobert Mustacchi if (!func(nvme, disc, arg))
567*533affcbSRobert Mustacchi break;
568*533affcbSRobert Mustacchi }
569*533affcbSRobert Mustacchi
570*533affcbSRobert Mustacchi nvme_ctrl_discover_fini(iter);
571*533affcbSRobert Mustacchi if (ret == NVME_ITER_ERROR) {
572*533affcbSRobert Mustacchi return (false);
573*533affcbSRobert Mustacchi }
574*533affcbSRobert Mustacchi
575*533affcbSRobert Mustacchi return (nvme_success(nvme));
576*533affcbSRobert Mustacchi }
577*533affcbSRobert Mustacchi
578*533affcbSRobert Mustacchi di_node_t
nvme_ctrl_disc_devi(const nvme_ctrl_disc_t * discp)579*533affcbSRobert Mustacchi nvme_ctrl_disc_devi(const nvme_ctrl_disc_t *discp)
580*533affcbSRobert Mustacchi {
581*533affcbSRobert Mustacchi return (discp->ncd_devi);
582*533affcbSRobert Mustacchi }
583*533affcbSRobert Mustacchi
584*533affcbSRobert Mustacchi di_minor_t
nvme_ctrl_disc_minor(const nvme_ctrl_disc_t * discp)585*533affcbSRobert Mustacchi nvme_ctrl_disc_minor(const nvme_ctrl_disc_t *discp)
586*533affcbSRobert Mustacchi {
587*533affcbSRobert Mustacchi return (discp->ncd_minor);
588*533affcbSRobert Mustacchi }
589*533affcbSRobert Mustacchi
590*533affcbSRobert Mustacchi void
nvme_ctrl_fini(nvme_ctrl_t * ctrl)591*533affcbSRobert Mustacchi nvme_ctrl_fini(nvme_ctrl_t *ctrl)
592*533affcbSRobert Mustacchi {
593*533affcbSRobert Mustacchi if (ctrl == NULL) {
594*533affcbSRobert Mustacchi return;
595*533affcbSRobert Mustacchi }
596*533affcbSRobert Mustacchi
597*533affcbSRobert Mustacchi if (ctrl->nc_devi_path != NULL) {
598*533affcbSRobert Mustacchi di_devfs_path_free(ctrl->nc_devi_path);
599*533affcbSRobert Mustacchi }
600*533affcbSRobert Mustacchi
601*533affcbSRobert Mustacchi if (ctrl->nc_fd >= 0) {
602*533affcbSRobert Mustacchi (void) close(ctrl->nc_fd);
603*533affcbSRobert Mustacchi ctrl->nc_fd = -1;
604*533affcbSRobert Mustacchi }
605*533affcbSRobert Mustacchi
606*533affcbSRobert Mustacchi free(ctrl);
607*533affcbSRobert Mustacchi }
608*533affcbSRobert Mustacchi
609*533affcbSRobert Mustacchi bool
nvme_ctrl_init(nvme_t * nvme,di_node_t di,nvme_ctrl_t ** outp)610*533affcbSRobert Mustacchi nvme_ctrl_init(nvme_t *nvme, di_node_t di, nvme_ctrl_t **outp)
611*533affcbSRobert Mustacchi {
612*533affcbSRobert Mustacchi const char *drv;
613*533affcbSRobert Mustacchi int32_t inst;
614*533affcbSRobert Mustacchi di_minor_t minor;
615*533affcbSRobert Mustacchi char *path, buf[PATH_MAX];
616*533affcbSRobert Mustacchi nvme_ctrl_t *ctrl;
617*533affcbSRobert Mustacchi nvme_ioctl_ctrl_info_t ctrl_info;
618*533affcbSRobert Mustacchi
619*533affcbSRobert Mustacchi if (di == DI_NODE_NIL) {
620*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
621*533affcbSRobert Mustacchi "invalid di_node_t: %p", di));
622*533affcbSRobert Mustacchi }
623*533affcbSRobert Mustacchi
624*533affcbSRobert Mustacchi if (outp == NULL) {
625*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
626*533affcbSRobert Mustacchi "invalid nvme_ctrl_t output pointer: %p", outp));
627*533affcbSRobert Mustacchi }
628*533affcbSRobert Mustacchi *outp = NULL;
629*533affcbSRobert Mustacchi
630*533affcbSRobert Mustacchi drv = di_driver_name(di);
631*533affcbSRobert Mustacchi inst = di_instance(di);
632*533affcbSRobert Mustacchi if (drv == NULL || inst < 0) {
633*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s has "
634*533affcbSRobert Mustacchi "no driver attached", di_node_name(di)));
635*533affcbSRobert Mustacchi }
636*533affcbSRobert Mustacchi
637*533affcbSRobert Mustacchi if (strcmp(drv, "nvme") != 0) {
638*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't "
639*533affcbSRobert Mustacchi "attached to nvme, found %s", di_node_name(di), drv));
640*533affcbSRobert Mustacchi }
641*533affcbSRobert Mustacchi
642*533affcbSRobert Mustacchi /*
643*533affcbSRobert Mustacchi * We have an NVMe node. Find the right minor that corresponds to the
644*533affcbSRobert Mustacchi * attachment point. Once we find that then we can go ahead and open a
645*533affcbSRobert Mustacchi * path to that and construct the device.
646*533affcbSRobert Mustacchi */
647*533affcbSRobert Mustacchi minor = DI_MINOR_NIL;
648*533affcbSRobert Mustacchi while ((minor = di_minor_next(di, minor)) != DI_MINOR_NIL) {
649*533affcbSRobert Mustacchi if (strcmp(di_minor_nodetype(minor), DDI_NT_NVME_NEXUS) == 0) {
650*533affcbSRobert Mustacchi break;
651*533affcbSRobert Mustacchi }
652*533affcbSRobert Mustacchi }
653*533affcbSRobert Mustacchi
654*533affcbSRobert Mustacchi if (minor == DI_MINOR_NIL) {
655*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_DEVI, 0, "devi %s isn't "
656*533affcbSRobert Mustacchi "attached to nvme, found %s", di_node_name(di), drv));
657*533affcbSRobert Mustacchi }
658*533affcbSRobert Mustacchi
659*533affcbSRobert Mustacchi path = di_devfs_minor_path(minor);
660*533affcbSRobert Mustacchi if (path == NULL) {
661*533affcbSRobert Mustacchi int e = errno;
662*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to "
663*533affcbSRobert Mustacchi "obtain /devices path for the requested minor: %s",
664*533affcbSRobert Mustacchi strerror(e)));
665*533affcbSRobert Mustacchi }
666*533affcbSRobert Mustacchi
667*533affcbSRobert Mustacchi if (snprintf(buf, sizeof (buf), "/devices%s", path) >= sizeof (buf)) {
668*533affcbSRobert Mustacchi di_devfs_path_free(path);
669*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_INTERNAL, 0, "failed to "
670*533affcbSRobert Mustacchi "construct full /devices minor path, would have overflown "
671*533affcbSRobert Mustacchi "internal buffer"));
672*533affcbSRobert Mustacchi }
673*533affcbSRobert Mustacchi di_devfs_path_free(path);
674*533affcbSRobert Mustacchi
675*533affcbSRobert Mustacchi ctrl = calloc(1, sizeof (*ctrl));
676*533affcbSRobert Mustacchi if (ctrl == NULL) {
677*533affcbSRobert Mustacchi int e = errno;
678*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_NO_MEM, e, "failed to "
679*533affcbSRobert Mustacchi "allocate memory for a new nvme_ctrl_t: %s", strerror(e)));
680*533affcbSRobert Mustacchi }
681*533affcbSRobert Mustacchi
682*533affcbSRobert Mustacchi ctrl->nc_nvme = nvme;
683*533affcbSRobert Mustacchi ctrl->nc_devi = di;
684*533affcbSRobert Mustacchi ctrl->nc_minor = minor;
685*533affcbSRobert Mustacchi ctrl->nc_inst = inst;
686*533affcbSRobert Mustacchi ctrl->nc_fd = open(buf, O_RDWR | O_CLOEXEC);
687*533affcbSRobert Mustacchi if (ctrl->nc_fd < 0) {
688*533affcbSRobert Mustacchi int e = errno;
689*533affcbSRobert Mustacchi nvme_ctrl_fini(ctrl);
690*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_OPEN_DEV, e, "failed to open "
691*533affcbSRobert Mustacchi "device path %s: %s", buf, strerror(e)));
692*533affcbSRobert Mustacchi }
693*533affcbSRobert Mustacchi
694*533affcbSRobert Mustacchi ctrl->nc_devi_path = di_devfs_path(di);
695*533affcbSRobert Mustacchi if (ctrl->nc_devi_path == NULL) {
696*533affcbSRobert Mustacchi int e = errno;
697*533affcbSRobert Mustacchi nvme_ctrl_fini(ctrl);
698*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_LIBDEVINFO, e, "failed to "
699*533affcbSRobert Mustacchi "obtain /devices path for the controller: %s",
700*533affcbSRobert Mustacchi strerror(e)));
701*533affcbSRobert Mustacchi }
702*533affcbSRobert Mustacchi
703*533affcbSRobert Mustacchi if (!nvme_ioc_ctrl_info(ctrl, &ctrl_info)) {
704*533affcbSRobert Mustacchi nvme_err_data_t err;
705*533affcbSRobert Mustacchi
706*533affcbSRobert Mustacchi nvme_ctrl_err_save(ctrl, &err);
707*533affcbSRobert Mustacchi nvme_err_set(nvme, &err);
708*533affcbSRobert Mustacchi nvme_ctrl_fini(ctrl);
709*533affcbSRobert Mustacchi return (false);
710*533affcbSRobert Mustacchi }
711*533affcbSRobert Mustacchi
712*533affcbSRobert Mustacchi ctrl->nc_vers = ctrl_info.nci_vers;
713*533affcbSRobert Mustacchi ctrl->nc_info = ctrl_info.nci_ctrl_id;
714*533affcbSRobert Mustacchi
715*533affcbSRobert Mustacchi nvme_vendor_map_ctrl(ctrl);
716*533affcbSRobert Mustacchi
717*533affcbSRobert Mustacchi *outp = ctrl;
718*533affcbSRobert Mustacchi return (nvme_success(nvme));
719*533affcbSRobert Mustacchi }
720*533affcbSRobert Mustacchi
721*533affcbSRobert Mustacchi typedef struct {
722*533affcbSRobert Mustacchi bool ncia_found;
723*533affcbSRobert Mustacchi int32_t ncia_inst;
724*533affcbSRobert Mustacchi nvme_ctrl_t *ncia_ctrl;
725*533affcbSRobert Mustacchi nvme_err_data_t ncia_err;
726*533affcbSRobert Mustacchi } nvme_ctrl_init_arg_t;
727*533affcbSRobert Mustacchi
728*533affcbSRobert Mustacchi bool
nvme_ctrl_init_by_instance_cb(nvme_t * nvme,const nvme_ctrl_disc_t * disc,void * arg)729*533affcbSRobert Mustacchi nvme_ctrl_init_by_instance_cb(nvme_t *nvme, const nvme_ctrl_disc_t *disc,
730*533affcbSRobert Mustacchi void *arg)
731*533affcbSRobert Mustacchi {
732*533affcbSRobert Mustacchi nvme_ctrl_init_arg_t *init = arg;
733*533affcbSRobert Mustacchi
734*533affcbSRobert Mustacchi if (di_instance(disc->ncd_devi) != init->ncia_inst) {
735*533affcbSRobert Mustacchi return (true);
736*533affcbSRobert Mustacchi }
737*533affcbSRobert Mustacchi
738*533affcbSRobert Mustacchi /*
739*533affcbSRobert Mustacchi * If we fail to open the controller, we need to save the error
740*533affcbSRobert Mustacchi * information because it's going to end up being clobbered because this
741*533affcbSRobert Mustacchi * is a callback function surrounded by other libnvme callers.
742*533affcbSRobert Mustacchi */
743*533affcbSRobert Mustacchi init->ncia_found = true;
744*533affcbSRobert Mustacchi if (!nvme_ctrl_init(nvme, disc->ncd_devi, &init->ncia_ctrl)) {
745*533affcbSRobert Mustacchi nvme_err_save(nvme, &init->ncia_err);
746*533affcbSRobert Mustacchi }
747*533affcbSRobert Mustacchi
748*533affcbSRobert Mustacchi return (false);
749*533affcbSRobert Mustacchi }
750*533affcbSRobert Mustacchi
751*533affcbSRobert Mustacchi bool
nvme_ctrl_init_by_instance(nvme_t * nvme,int32_t inst,nvme_ctrl_t ** outp)752*533affcbSRobert Mustacchi nvme_ctrl_init_by_instance(nvme_t *nvme, int32_t inst, nvme_ctrl_t **outp)
753*533affcbSRobert Mustacchi {
754*533affcbSRobert Mustacchi nvme_ctrl_init_arg_t init;
755*533affcbSRobert Mustacchi
756*533affcbSRobert Mustacchi if (inst < 0) {
757*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0,
758*533affcbSRobert Mustacchi "encountered illegal negative instance number: %d", inst));
759*533affcbSRobert Mustacchi }
760*533affcbSRobert Mustacchi
761*533affcbSRobert Mustacchi if (outp == NULL) {
762*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
763*533affcbSRobert Mustacchi "invalid nvme_ctrl_t output pointer: %p", outp));
764*533affcbSRobert Mustacchi }
765*533affcbSRobert Mustacchi
766*533affcbSRobert Mustacchi init.ncia_found = false;
767*533affcbSRobert Mustacchi init.ncia_inst = inst;
768*533affcbSRobert Mustacchi init.ncia_ctrl = NULL;
769*533affcbSRobert Mustacchi
770*533affcbSRobert Mustacchi if (!nvme_ctrl_discover(nvme, nvme_ctrl_init_by_instance_cb, &init)) {
771*533affcbSRobert Mustacchi return (false);
772*533affcbSRobert Mustacchi }
773*533affcbSRobert Mustacchi
774*533affcbSRobert Mustacchi if (!init.ncia_found) {
775*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
776*533affcbSRobert Mustacchi "failed to find NVMe controller nvme%d", inst));
777*533affcbSRobert Mustacchi }
778*533affcbSRobert Mustacchi
779*533affcbSRobert Mustacchi /*
780*533affcbSRobert Mustacchi * If we don't have an NVMe controller structure but we did find the
781*533affcbSRobert Mustacchi * instance, then we must have had an error constructing this will which
782*533affcbSRobert Mustacchi * be on our handle. We have to reconstruct the error from saved
783*533affcbSRobert Mustacchi * information as nvme_ctrl_discover will have clobbered it.
784*533affcbSRobert Mustacchi */
785*533affcbSRobert Mustacchi if (init.ncia_ctrl == NULL) {
786*533affcbSRobert Mustacchi nvme_err_set(nvme, &init.ncia_err);
787*533affcbSRobert Mustacchi return (false);
788*533affcbSRobert Mustacchi }
789*533affcbSRobert Mustacchi
790*533affcbSRobert Mustacchi *outp = init.ncia_ctrl;
791*533affcbSRobert Mustacchi return (nvme_success(nvme));
792*533affcbSRobert Mustacchi }
793*533affcbSRobert Mustacchi
794*533affcbSRobert Mustacchi bool
nvme_ctrl_devi(nvme_ctrl_t * ctrl,di_node_t * devip)795*533affcbSRobert Mustacchi nvme_ctrl_devi(nvme_ctrl_t *ctrl, di_node_t *devip)
796*533affcbSRobert Mustacchi {
797*533affcbSRobert Mustacchi *devip = ctrl->nc_devi;
798*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
799*533affcbSRobert Mustacchi }
800*533affcbSRobert Mustacchi
801*533affcbSRobert Mustacchi bool
nvme_ioc_ctrl_info(nvme_ctrl_t * ctrl,nvme_ioctl_ctrl_info_t * info)802*533affcbSRobert Mustacchi nvme_ioc_ctrl_info(nvme_ctrl_t *ctrl, nvme_ioctl_ctrl_info_t *info)
803*533affcbSRobert Mustacchi {
804*533affcbSRobert Mustacchi (void) memset(info, 0, sizeof (nvme_ioctl_ctrl_info_t));
805*533affcbSRobert Mustacchi
806*533affcbSRobert Mustacchi if (ioctl(ctrl->nc_fd, NVME_IOC_CTRL_INFO, info) != 0) {
807*533affcbSRobert Mustacchi int e = errno;
808*533affcbSRobert Mustacchi return (nvme_ioctl_syserror(ctrl, e, "controller info"));
809*533affcbSRobert Mustacchi }
810*533affcbSRobert Mustacchi
811*533affcbSRobert Mustacchi if (info->nci_common.nioc_drv_err != NVME_IOCTL_E_OK) {
812*533affcbSRobert Mustacchi return (nvme_ioctl_error(ctrl, &info->nci_common,
813*533affcbSRobert Mustacchi "controller info"));
814*533affcbSRobert Mustacchi }
815*533affcbSRobert Mustacchi
816*533affcbSRobert Mustacchi return (true);
817*533affcbSRobert Mustacchi }
818*533affcbSRobert Mustacchi
819*533affcbSRobert Mustacchi bool
nvme_ioc_ns_info(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_ioctl_ns_info_t * info)820*533affcbSRobert Mustacchi nvme_ioc_ns_info(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ioctl_ns_info_t *info)
821*533affcbSRobert Mustacchi {
822*533affcbSRobert Mustacchi (void) memset(info, 0, sizeof (nvme_ioctl_ns_info_t));
823*533affcbSRobert Mustacchi info->nni_common.nioc_nsid = nsid;
824*533affcbSRobert Mustacchi
825*533affcbSRobert Mustacchi if (ioctl(ctrl->nc_fd, NVME_IOC_NS_INFO, info) != 0) {
826*533affcbSRobert Mustacchi int e = errno;
827*533affcbSRobert Mustacchi return (nvme_ioctl_syserror(ctrl, e, "namespace info"));
828*533affcbSRobert Mustacchi }
829*533affcbSRobert Mustacchi
830*533affcbSRobert Mustacchi if (info->nni_common.nioc_drv_err != NVME_IOCTL_E_OK) {
831*533affcbSRobert Mustacchi return (nvme_ioctl_error(ctrl, &info->nni_common,
832*533affcbSRobert Mustacchi "namespace info"));
833*533affcbSRobert Mustacchi }
834*533affcbSRobert Mustacchi
835*533affcbSRobert Mustacchi return (true);
836*533affcbSRobert Mustacchi }
837*533affcbSRobert Mustacchi
838*533affcbSRobert Mustacchi const char *
nvme_tporttostr(nvme_ctrl_transport_t tport)839*533affcbSRobert Mustacchi nvme_tporttostr(nvme_ctrl_transport_t tport)
840*533affcbSRobert Mustacchi {
841*533affcbSRobert Mustacchi switch (tport) {
842*533affcbSRobert Mustacchi case NVME_CTRL_TRANSPORT_PCI:
843*533affcbSRobert Mustacchi return ("PCI");
844*533affcbSRobert Mustacchi case NVME_CTRL_TRANSPORT_TCP:
845*533affcbSRobert Mustacchi return ("TCP");
846*533affcbSRobert Mustacchi case NVME_CTRL_TRANSPORT_RDMA:
847*533affcbSRobert Mustacchi return ("RDMA");
848*533affcbSRobert Mustacchi default:
849*533affcbSRobert Mustacchi return ("unknown transport");
850*533affcbSRobert Mustacchi }
851*533affcbSRobert Mustacchi }
852*533affcbSRobert Mustacchi
853*533affcbSRobert Mustacchi static bool
nvme_ns_discover_validate(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level)854*533affcbSRobert Mustacchi nvme_ns_discover_validate(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level)
855*533affcbSRobert Mustacchi {
856*533affcbSRobert Mustacchi switch (level) {
857*533affcbSRobert Mustacchi case NVME_NS_DISC_F_ALL:
858*533affcbSRobert Mustacchi case NVME_NS_DISC_F_ALLOCATED:
859*533affcbSRobert Mustacchi case NVME_NS_DISC_F_ACTIVE:
860*533affcbSRobert Mustacchi case NVME_NS_DISC_F_NOT_IGNORED:
861*533affcbSRobert Mustacchi case NVME_NS_DISC_F_BLKDEV:
862*533affcbSRobert Mustacchi return (true);
863*533affcbSRobert Mustacchi default:
864*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "invalid "
865*533affcbSRobert Mustacchi "namespace discovery level specified: 0x%x", level));
866*533affcbSRobert Mustacchi }
867*533affcbSRobert Mustacchi }
868*533affcbSRobert Mustacchi
869*533affcbSRobert Mustacchi void
nvme_ns_discover_fini(nvme_ns_iter_t * iter)870*533affcbSRobert Mustacchi nvme_ns_discover_fini(nvme_ns_iter_t *iter)
871*533affcbSRobert Mustacchi {
872*533affcbSRobert Mustacchi free(iter);
873*533affcbSRobert Mustacchi }
874*533affcbSRobert Mustacchi
875*533affcbSRobert Mustacchi const char *
nvme_nsleveltostr(nvme_ns_disc_level_t level)876*533affcbSRobert Mustacchi nvme_nsleveltostr(nvme_ns_disc_level_t level)
877*533affcbSRobert Mustacchi {
878*533affcbSRobert Mustacchi switch (level) {
879*533affcbSRobert Mustacchi case NVME_NS_DISC_F_ALL:
880*533affcbSRobert Mustacchi return ("unallocated");
881*533affcbSRobert Mustacchi case NVME_NS_DISC_F_ALLOCATED:
882*533affcbSRobert Mustacchi return ("allocated");
883*533affcbSRobert Mustacchi case NVME_NS_DISC_F_ACTIVE:
884*533affcbSRobert Mustacchi return ("active");
885*533affcbSRobert Mustacchi case NVME_NS_DISC_F_NOT_IGNORED:
886*533affcbSRobert Mustacchi return ("not ignored");
887*533affcbSRobert Mustacchi case NVME_NS_DISC_F_BLKDEV:
888*533affcbSRobert Mustacchi return ("blkdev");
889*533affcbSRobert Mustacchi default:
890*533affcbSRobert Mustacchi return ("unknown level");
891*533affcbSRobert Mustacchi }
892*533affcbSRobert Mustacchi }
893*533affcbSRobert Mustacchi
894*533affcbSRobert Mustacchi nvme_ns_disc_level_t
nvme_ns_state_to_disc_level(nvme_ns_state_t state)895*533affcbSRobert Mustacchi nvme_ns_state_to_disc_level(nvme_ns_state_t state)
896*533affcbSRobert Mustacchi {
897*533affcbSRobert Mustacchi if ((state & NVME_NS_STATE_ALLOCATED) == 0) {
898*533affcbSRobert Mustacchi return (NVME_NS_DISC_F_ALL);
899*533affcbSRobert Mustacchi }
900*533affcbSRobert Mustacchi
901*533affcbSRobert Mustacchi if ((state & NVME_NS_STATE_ACTIVE) == 0) {
902*533affcbSRobert Mustacchi return (NVME_NS_DISC_F_ALLOCATED);
903*533affcbSRobert Mustacchi }
904*533affcbSRobert Mustacchi
905*533affcbSRobert Mustacchi if ((state & NVME_NS_STATE_IGNORED) != 0) {
906*533affcbSRobert Mustacchi return (NVME_NS_DISC_F_ACTIVE);
907*533affcbSRobert Mustacchi }
908*533affcbSRobert Mustacchi
909*533affcbSRobert Mustacchi if ((state & NVME_NS_STATE_ATTACHED) == 0) {
910*533affcbSRobert Mustacchi return (NVME_NS_DISC_F_NOT_IGNORED);
911*533affcbSRobert Mustacchi } else {
912*533affcbSRobert Mustacchi return (NVME_NS_DISC_F_BLKDEV);
913*533affcbSRobert Mustacchi }
914*533affcbSRobert Mustacchi }
915*533affcbSRobert Mustacchi
916*533affcbSRobert Mustacchi nvme_iter_t
nvme_ns_discover_step(nvme_ns_iter_t * iter,const nvme_ns_disc_t ** discp)917*533affcbSRobert Mustacchi nvme_ns_discover_step(nvme_ns_iter_t *iter, const nvme_ns_disc_t **discp)
918*533affcbSRobert Mustacchi {
919*533affcbSRobert Mustacchi nvme_ctrl_t *ctrl = iter->nni_ctrl;
920*533affcbSRobert Mustacchi
921*533affcbSRobert Mustacchi if (iter->nni_err) {
922*533affcbSRobert Mustacchi return (NVME_ITER_ERROR);
923*533affcbSRobert Mustacchi }
924*533affcbSRobert Mustacchi
925*533affcbSRobert Mustacchi if (iter->nni_done) {
926*533affcbSRobert Mustacchi return (NVME_ITER_DONE);
927*533affcbSRobert Mustacchi }
928*533affcbSRobert Mustacchi
929*533affcbSRobert Mustacchi while (iter->nni_cur_idx <= ctrl->nc_info.id_nn) {
930*533affcbSRobert Mustacchi uint32_t nsid = iter->nni_cur_idx;
931*533affcbSRobert Mustacchi nvme_ioctl_ns_info_t ns_info = { 0 };
932*533affcbSRobert Mustacchi nvme_ns_disc_level_t level;
933*533affcbSRobert Mustacchi
934*533affcbSRobert Mustacchi if (!nvme_ioc_ns_info(ctrl, nsid, &ns_info)) {
935*533affcbSRobert Mustacchi iter->nni_err = true;
936*533affcbSRobert Mustacchi return (NVME_ITER_ERROR);
937*533affcbSRobert Mustacchi }
938*533affcbSRobert Mustacchi
939*533affcbSRobert Mustacchi iter->nni_cur_idx++;
940*533affcbSRobert Mustacchi level = nvme_ns_state_to_disc_level(ns_info.nni_state);
941*533affcbSRobert Mustacchi if (iter->nni_level > level) {
942*533affcbSRobert Mustacchi continue;
943*533affcbSRobert Mustacchi }
944*533affcbSRobert Mustacchi
945*533affcbSRobert Mustacchi (void) memset(&iter->nni_disc, 0, sizeof (nvme_ns_disc_t));
946*533affcbSRobert Mustacchi iter->nni_disc.nnd_nsid = nsid;
947*533affcbSRobert Mustacchi iter->nni_disc.nnd_level = level;
948*533affcbSRobert Mustacchi
949*533affcbSRobert Mustacchi if (nvme_guid_valid(ctrl, ns_info.nni_id.id_nguid)) {
950*533affcbSRobert Mustacchi iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_NGUID_VALID;
951*533affcbSRobert Mustacchi (void) memcpy(iter->nni_disc.nnd_nguid,
952*533affcbSRobert Mustacchi ns_info.nni_id.id_nguid,
953*533affcbSRobert Mustacchi sizeof (ns_info.nni_id.id_nguid));
954*533affcbSRobert Mustacchi }
955*533affcbSRobert Mustacchi
956*533affcbSRobert Mustacchi if (nvme_eui64_valid(ctrl, ns_info.nni_id.id_eui64)) {
957*533affcbSRobert Mustacchi iter->nni_disc.nnd_flags |= NVME_NS_DISC_F_EUI64_VALID;
958*533affcbSRobert Mustacchi (void) memcpy(iter->nni_disc.nnd_eui64,
959*533affcbSRobert Mustacchi ns_info.nni_id.id_eui64,
960*533affcbSRobert Mustacchi sizeof (ns_info.nni_id.id_eui64));
961*533affcbSRobert Mustacchi }
962*533affcbSRobert Mustacchi
963*533affcbSRobert Mustacchi *discp = &iter->nni_disc;
964*533affcbSRobert Mustacchi return (NVME_ITER_VALID);
965*533affcbSRobert Mustacchi }
966*533affcbSRobert Mustacchi
967*533affcbSRobert Mustacchi iter->nni_done = true;
968*533affcbSRobert Mustacchi return (NVME_ITER_DONE);
969*533affcbSRobert Mustacchi }
970*533affcbSRobert Mustacchi
971*533affcbSRobert Mustacchi bool
nvme_ns_discover_init(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level,nvme_ns_iter_t ** iterp)972*533affcbSRobert Mustacchi nvme_ns_discover_init(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level,
973*533affcbSRobert Mustacchi nvme_ns_iter_t **iterp)
974*533affcbSRobert Mustacchi {
975*533affcbSRobert Mustacchi nvme_ns_iter_t *iter;
976*533affcbSRobert Mustacchi
977*533affcbSRobert Mustacchi if (!nvme_ns_discover_validate(ctrl, level)) {
978*533affcbSRobert Mustacchi return (false);
979*533affcbSRobert Mustacchi }
980*533affcbSRobert Mustacchi
981*533affcbSRobert Mustacchi if (iterp == NULL) {
982*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
983*533affcbSRobert Mustacchi "encountered invalid nvme_ns_iter_t output pointer: %p",
984*533affcbSRobert Mustacchi iterp));
985*533affcbSRobert Mustacchi }
986*533affcbSRobert Mustacchi
987*533affcbSRobert Mustacchi iter = calloc(1, sizeof (nvme_ns_iter_t));
988*533affcbSRobert Mustacchi if (iter == NULL) {
989*533affcbSRobert Mustacchi int e = errno;
990*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to "
991*533affcbSRobert Mustacchi "allocate memory for a new nvme_ns_iter_t: %s",
992*533affcbSRobert Mustacchi strerror(e)));
993*533affcbSRobert Mustacchi }
994*533affcbSRobert Mustacchi
995*533affcbSRobert Mustacchi iter->nni_ctrl = ctrl;
996*533affcbSRobert Mustacchi iter->nni_level = level;
997*533affcbSRobert Mustacchi iter->nni_cur_idx = 1;
998*533affcbSRobert Mustacchi
999*533affcbSRobert Mustacchi *iterp = iter;
1000*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1001*533affcbSRobert Mustacchi }
1002*533affcbSRobert Mustacchi
1003*533affcbSRobert Mustacchi bool
nvme_ns_discover(nvme_ctrl_t * ctrl,nvme_ns_disc_level_t level,nvme_ns_disc_f func,void * arg)1004*533affcbSRobert Mustacchi nvme_ns_discover(nvme_ctrl_t *ctrl, nvme_ns_disc_level_t level,
1005*533affcbSRobert Mustacchi nvme_ns_disc_f func, void *arg)
1006*533affcbSRobert Mustacchi {
1007*533affcbSRobert Mustacchi nvme_ns_iter_t *iter;
1008*533affcbSRobert Mustacchi nvme_iter_t ret;
1009*533affcbSRobert Mustacchi const nvme_ns_disc_t *disc;
1010*533affcbSRobert Mustacchi
1011*533affcbSRobert Mustacchi if (!nvme_ns_discover_validate(ctrl, level)) {
1012*533affcbSRobert Mustacchi return (false);
1013*533affcbSRobert Mustacchi }
1014*533affcbSRobert Mustacchi
1015*533affcbSRobert Mustacchi if (func == NULL) {
1016*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1017*533affcbSRobert Mustacchi "encountered invalid nvme_ns_disc_f function pointer: %p",
1018*533affcbSRobert Mustacchi func));
1019*533affcbSRobert Mustacchi }
1020*533affcbSRobert Mustacchi
1021*533affcbSRobert Mustacchi if (!nvme_ns_discover_init(ctrl, level, &iter)) {
1022*533affcbSRobert Mustacchi return (false);
1023*533affcbSRobert Mustacchi }
1024*533affcbSRobert Mustacchi
1025*533affcbSRobert Mustacchi while ((ret = nvme_ns_discover_step(iter, &disc)) == NVME_ITER_VALID) {
1026*533affcbSRobert Mustacchi if (!func(ctrl, disc, arg))
1027*533affcbSRobert Mustacchi break;
1028*533affcbSRobert Mustacchi }
1029*533affcbSRobert Mustacchi
1030*533affcbSRobert Mustacchi nvme_ns_discover_fini(iter);
1031*533affcbSRobert Mustacchi if (ret == NVME_ITER_ERROR) {
1032*533affcbSRobert Mustacchi return (false);
1033*533affcbSRobert Mustacchi }
1034*533affcbSRobert Mustacchi
1035*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1036*533affcbSRobert Mustacchi }
1037*533affcbSRobert Mustacchi
1038*533affcbSRobert Mustacchi uint32_t
nvme_ns_disc_nsid(const nvme_ns_disc_t * discp)1039*533affcbSRobert Mustacchi nvme_ns_disc_nsid(const nvme_ns_disc_t *discp)
1040*533affcbSRobert Mustacchi {
1041*533affcbSRobert Mustacchi return (discp->nnd_nsid);
1042*533affcbSRobert Mustacchi }
1043*533affcbSRobert Mustacchi
1044*533affcbSRobert Mustacchi nvme_ns_disc_level_t
nvme_ns_disc_level(const nvme_ns_disc_t * discp)1045*533affcbSRobert Mustacchi nvme_ns_disc_level(const nvme_ns_disc_t *discp)
1046*533affcbSRobert Mustacchi {
1047*533affcbSRobert Mustacchi return (discp->nnd_level);
1048*533affcbSRobert Mustacchi }
1049*533affcbSRobert Mustacchi
1050*533affcbSRobert Mustacchi nvme_ns_disc_flags_t
nvme_ns_disc_flags(const nvme_ns_disc_t * discp)1051*533affcbSRobert Mustacchi nvme_ns_disc_flags(const nvme_ns_disc_t *discp)
1052*533affcbSRobert Mustacchi {
1053*533affcbSRobert Mustacchi return (discp->nnd_flags);
1054*533affcbSRobert Mustacchi }
1055*533affcbSRobert Mustacchi
1056*533affcbSRobert Mustacchi const uint8_t *
nvme_ns_disc_eui64(const nvme_ns_disc_t * discp)1057*533affcbSRobert Mustacchi nvme_ns_disc_eui64(const nvme_ns_disc_t *discp)
1058*533affcbSRobert Mustacchi {
1059*533affcbSRobert Mustacchi if ((discp->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) == 0) {
1060*533affcbSRobert Mustacchi return (NULL);
1061*533affcbSRobert Mustacchi }
1062*533affcbSRobert Mustacchi
1063*533affcbSRobert Mustacchi return (discp->nnd_eui64);
1064*533affcbSRobert Mustacchi }
1065*533affcbSRobert Mustacchi
1066*533affcbSRobert Mustacchi const uint8_t *
nvme_ns_disc_nguid(const nvme_ns_disc_t * discp)1067*533affcbSRobert Mustacchi nvme_ns_disc_nguid(const nvme_ns_disc_t *discp)
1068*533affcbSRobert Mustacchi {
1069*533affcbSRobert Mustacchi if ((discp->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) == 0) {
1070*533affcbSRobert Mustacchi return (NULL);
1071*533affcbSRobert Mustacchi }
1072*533affcbSRobert Mustacchi
1073*533affcbSRobert Mustacchi return (discp->nnd_nguid);
1074*533affcbSRobert Mustacchi }
1075*533affcbSRobert Mustacchi
1076*533affcbSRobert Mustacchi void
nvme_ns_fini(nvme_ns_t * ns)1077*533affcbSRobert Mustacchi nvme_ns_fini(nvme_ns_t *ns)
1078*533affcbSRobert Mustacchi {
1079*533affcbSRobert Mustacchi free(ns);
1080*533affcbSRobert Mustacchi }
1081*533affcbSRobert Mustacchi
1082*533affcbSRobert Mustacchi bool
nvme_ns_init(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_ns_t ** nsp)1083*533affcbSRobert Mustacchi nvme_ns_init(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_ns_t **nsp)
1084*533affcbSRobert Mustacchi {
1085*533affcbSRobert Mustacchi nvme_ns_t *ns;
1086*533affcbSRobert Mustacchi
1087*533affcbSRobert Mustacchi if (nsp == NULL) {
1088*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1089*533affcbSRobert Mustacchi "encountered invalid nvme_ns_t output pointer: %p", nsp));
1090*533affcbSRobert Mustacchi }
1091*533affcbSRobert Mustacchi
1092*533affcbSRobert Mustacchi if (nsid < NVME_NSID_MIN || nsid > ctrl->nc_info.id_nn) {
1093*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "requested "
1094*533affcbSRobert Mustacchi "namespace 0x%x is invalid, valid namespaces are [0x%x, "
1095*533affcbSRobert Mustacchi "0x%x]", nsid, NVME_NSID_MIN, ctrl->nc_info.id_nn));
1096*533affcbSRobert Mustacchi }
1097*533affcbSRobert Mustacchi
1098*533affcbSRobert Mustacchi ns = calloc(1, sizeof (nvme_ns_t));
1099*533affcbSRobert Mustacchi if (ns == NULL) {
1100*533affcbSRobert Mustacchi int e = errno;
1101*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_NO_MEM, e, "failed to "
1102*533affcbSRobert Mustacchi "allocate memory for a new nvme_ns_t: %s", strerror(e)));
1103*533affcbSRobert Mustacchi }
1104*533affcbSRobert Mustacchi
1105*533affcbSRobert Mustacchi ns->nn_ctrl = ctrl;
1106*533affcbSRobert Mustacchi ns->nn_nsid = nsid;
1107*533affcbSRobert Mustacchi
1108*533affcbSRobert Mustacchi *nsp = ns;
1109*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1110*533affcbSRobert Mustacchi }
1111*533affcbSRobert Mustacchi
1112*533affcbSRobert Mustacchi typedef struct {
1113*533affcbSRobert Mustacchi nvme_ctrl_t *nnia_ctrl;
1114*533affcbSRobert Mustacchi const char *nnia_name;
1115*533affcbSRobert Mustacchi bool nnia_found;
1116*533affcbSRobert Mustacchi nvme_ns_t *nnia_ns;
1117*533affcbSRobert Mustacchi nvme_err_data_t nnia_err;
1118*533affcbSRobert Mustacchi } nvme_ns_init_arg_t;
1119*533affcbSRobert Mustacchi
1120*533affcbSRobert Mustacchi static bool
nvme_ns_init_by_name_cb(nvme_ctrl_t * ctrl,const nvme_ns_disc_t * disc,void * arg)1121*533affcbSRobert Mustacchi nvme_ns_init_by_name_cb(nvme_ctrl_t *ctrl, const nvme_ns_disc_t *disc,
1122*533affcbSRobert Mustacchi void *arg)
1123*533affcbSRobert Mustacchi {
1124*533affcbSRobert Mustacchi nvme_ns_init_arg_t *init = arg;
1125*533affcbSRobert Mustacchi char buf[NVME_NGUID_NAMELEN];
1126*533affcbSRobert Mustacchi CTASSERT(NVME_NGUID_NAMELEN > NVME_EUI64_NAMELEN);
1127*533affcbSRobert Mustacchi
1128*533affcbSRobert Mustacchi if ((disc->nnd_flags & NVME_NS_DISC_F_NGUID_VALID) != 0) {
1129*533affcbSRobert Mustacchi (void) nvme_format_nguid(disc->nnd_nguid, buf, sizeof (buf));
1130*533affcbSRobert Mustacchi if (strcasecmp(init->nnia_name, buf) == 0)
1131*533affcbSRobert Mustacchi goto match;
1132*533affcbSRobert Mustacchi }
1133*533affcbSRobert Mustacchi
1134*533affcbSRobert Mustacchi if ((disc->nnd_flags & NVME_NS_DISC_F_EUI64_VALID) != 0) {
1135*533affcbSRobert Mustacchi (void) nvme_format_eui64(disc->nnd_eui64, buf, sizeof (buf));
1136*533affcbSRobert Mustacchi if (strcasecmp(init->nnia_name, buf) == 0)
1137*533affcbSRobert Mustacchi goto match;
1138*533affcbSRobert Mustacchi }
1139*533affcbSRobert Mustacchi
1140*533affcbSRobert Mustacchi (void) snprintf(buf, sizeof (buf), "%u", disc->nnd_nsid);
1141*533affcbSRobert Mustacchi if (strcasecmp(init->nnia_name, buf) == 0)
1142*533affcbSRobert Mustacchi goto match;
1143*533affcbSRobert Mustacchi
1144*533affcbSRobert Mustacchi return (true);
1145*533affcbSRobert Mustacchi
1146*533affcbSRobert Mustacchi match:
1147*533affcbSRobert Mustacchi init->nnia_found = true;
1148*533affcbSRobert Mustacchi if (!nvme_ns_init(ctrl, disc->nnd_nsid, &init->nnia_ns)) {
1149*533affcbSRobert Mustacchi nvme_ctrl_err_save(ctrl, &init->nnia_err);
1150*533affcbSRobert Mustacchi }
1151*533affcbSRobert Mustacchi
1152*533affcbSRobert Mustacchi return (false);
1153*533affcbSRobert Mustacchi }
1154*533affcbSRobert Mustacchi
1155*533affcbSRobert Mustacchi /*
1156*533affcbSRobert Mustacchi * Attempt to find a namespace by 'name'. A name could be the NGUID, EUI64, or
1157*533affcbSRobert Mustacchi * just the plain old namespace ID.
1158*533affcbSRobert Mustacchi */
1159*533affcbSRobert Mustacchi bool
nvme_ns_init_by_name(nvme_ctrl_t * ctrl,const char * ns_name,nvme_ns_t ** nsp)1160*533affcbSRobert Mustacchi nvme_ns_init_by_name(nvme_ctrl_t *ctrl, const char *ns_name, nvme_ns_t **nsp)
1161*533affcbSRobert Mustacchi {
1162*533affcbSRobert Mustacchi nvme_ns_init_arg_t init;
1163*533affcbSRobert Mustacchi
1164*533affcbSRobert Mustacchi if (ns_name == NULL) {
1165*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1166*533affcbSRobert Mustacchi "encountered invalid namespace name: %p", ns_name));
1167*533affcbSRobert Mustacchi }
1168*533affcbSRobert Mustacchi
1169*533affcbSRobert Mustacchi if (nsp == NULL) {
1170*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_PTR, 0,
1171*533affcbSRobert Mustacchi "encountered invalid nvme_ns_t output pointer: %p", nsp));
1172*533affcbSRobert Mustacchi }
1173*533affcbSRobert Mustacchi
1174*533affcbSRobert Mustacchi init.nnia_ctrl = ctrl;
1175*533affcbSRobert Mustacchi init.nnia_name = ns_name;
1176*533affcbSRobert Mustacchi init.nnia_found = false;
1177*533affcbSRobert Mustacchi init.nnia_ns = NULL;
1178*533affcbSRobert Mustacchi
1179*533affcbSRobert Mustacchi if (!nvme_ns_discover(ctrl, NVME_NS_DISC_F_ALL, nvme_ns_init_by_name_cb,
1180*533affcbSRobert Mustacchi &init)) {
1181*533affcbSRobert Mustacchi return (false);
1182*533affcbSRobert Mustacchi }
1183*533affcbSRobert Mustacchi
1184*533affcbSRobert Mustacchi if (!init.nnia_found) {
1185*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_NS_RANGE, 0, "failed to "
1186*533affcbSRobert Mustacchi "find NVMe namespace %s on nvme%d", ns_name,
1187*533affcbSRobert Mustacchi ctrl->nc_inst));
1188*533affcbSRobert Mustacchi }
1189*533affcbSRobert Mustacchi
1190*533affcbSRobert Mustacchi if (init.nnia_ns == NULL) {
1191*533affcbSRobert Mustacchi nvme_ctrl_err_set(ctrl, &init.nnia_err);
1192*533affcbSRobert Mustacchi return (false);
1193*533affcbSRobert Mustacchi }
1194*533affcbSRobert Mustacchi
1195*533affcbSRobert Mustacchi *nsp = init.nnia_ns;
1196*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1197*533affcbSRobert Mustacchi }
1198*533affcbSRobert Mustacchi
1199*533affcbSRobert Mustacchi bool
nvme_ctrl_ns_init(nvme_t * nvme,const char * name,nvme_ctrl_t ** ctrlp,nvme_ns_t ** nsp)1200*533affcbSRobert Mustacchi nvme_ctrl_ns_init(nvme_t *nvme, const char *name, nvme_ctrl_t **ctrlp,
1201*533affcbSRobert Mustacchi nvme_ns_t **nsp)
1202*533affcbSRobert Mustacchi {
1203*533affcbSRobert Mustacchi const char *slash, *ns_name;
1204*533affcbSRobert Mustacchi char *eptr;
1205*533affcbSRobert Mustacchi nvme_ctrl_t *ctrl;
1206*533affcbSRobert Mustacchi nvme_ns_t *ns;
1207*533affcbSRobert Mustacchi unsigned long inst;
1208*533affcbSRobert Mustacchi size_t ctrl_namelen;
1209*533affcbSRobert Mustacchi
1210*533affcbSRobert Mustacchi if (name == NULL) {
1211*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
1212*533affcbSRobert Mustacchi "invalid name to search for: %p", name));
1213*533affcbSRobert Mustacchi }
1214*533affcbSRobert Mustacchi
1215*533affcbSRobert Mustacchi /*
1216*533affcbSRobert Mustacchi * We require a controller, but the namespace output pointer is only
1217*533affcbSRobert Mustacchi * required if we end up having a namespace present.
1218*533affcbSRobert Mustacchi */
1219*533affcbSRobert Mustacchi if (ctrlp == NULL) {
1220*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0, "encountered "
1221*533affcbSRobert Mustacchi "invalid nvme_ctrl_t output pointer: %p", ctrlp));
1222*533affcbSRobert Mustacchi }
1223*533affcbSRobert Mustacchi
1224*533affcbSRobert Mustacchi slash = strchr(name, '/');
1225*533affcbSRobert Mustacchi if (slash != NULL) {
1226*533affcbSRobert Mustacchi ctrl_namelen = (uintptr_t)slash - (uintptr_t)name;
1227*533affcbSRobert Mustacchi ns_name = slash + 1;
1228*533affcbSRobert Mustacchi
1229*533affcbSRobert Mustacchi if (nsp == NULL) {
1230*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_PTR, 0,
1231*533affcbSRobert Mustacchi "encountered invalid nvme_ns_t output pointer: %p",
1232*533affcbSRobert Mustacchi nsp));
1233*533affcbSRobert Mustacchi }
1234*533affcbSRobert Mustacchi
1235*533affcbSRobert Mustacchi } else {
1236*533affcbSRobert Mustacchi ctrl_namelen = strlen(name);
1237*533affcbSRobert Mustacchi ns_name = NULL;
1238*533affcbSRobert Mustacchi }
1239*533affcbSRobert Mustacchi
1240*533affcbSRobert Mustacchi *ctrlp = NULL;
1241*533affcbSRobert Mustacchi if (nsp != NULL) {
1242*533affcbSRobert Mustacchi *nsp = NULL;
1243*533affcbSRobert Mustacchi }
1244*533affcbSRobert Mustacchi
1245*533affcbSRobert Mustacchi if (strncmp(name, "nvme", 4) != 0) {
1246*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0, "unable "
1247*533affcbSRobert Mustacchi "to map controller '%.*s' to a known device class, "
1248*533affcbSRobert Mustacchi "expected the controller to start with 'nvme'",
1249*533affcbSRobert Mustacchi (int)ctrl_namelen, name));
1250*533affcbSRobert Mustacchi }
1251*533affcbSRobert Mustacchi
1252*533affcbSRobert Mustacchi /*
1253*533affcbSRobert Mustacchi * Before we go ahead and try to parse this with strtoul we need to
1254*533affcbSRobert Mustacchi * manually check two things that strtoul will not:
1255*533affcbSRobert Mustacchi *
1256*533affcbSRobert Mustacchi * 1) If we have a null terminator, then we'll just get a 0 back.
1257*533affcbSRobert Mustacchi * 2) If there are multiple leading zeros in a row then that's an error.
1258*533affcbSRobert Mustacchi * We don't want to conflate 001 and 1 as the same here. The only valid
1259*533affcbSRobert Mustacchi * case is 'nvme0' which is 5 characters long, hence the check below.
1260*533affcbSRobert Mustacchi */
1261*533affcbSRobert Mustacchi if (ctrl_namelen == 4) {
1262*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1263*533affcbSRobert Mustacchi "no controller instance specified in %.*s",
1264*533affcbSRobert Mustacchi (int)ctrl_namelen, name));
1265*533affcbSRobert Mustacchi }
1266*533affcbSRobert Mustacchi
1267*533affcbSRobert Mustacchi if (name[4] == '0' && ctrl_namelen > 5) {
1268*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1269*533affcbSRobert Mustacchi "leading zeros aren't allowed for the instance specified "
1270*533affcbSRobert Mustacchi "in %.*s", (int)ctrl_namelen, name));
1271*533affcbSRobert Mustacchi }
1272*533affcbSRobert Mustacchi
1273*533affcbSRobert Mustacchi errno = 0;
1274*533affcbSRobert Mustacchi inst = strtoul(name + 4, &eptr, 10);
1275*533affcbSRobert Mustacchi if (errno != 0 || (*eptr != '\0' && eptr != slash)) {
1276*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_BAD_CONTROLLER, 0,
1277*533affcbSRobert Mustacchi "failed to parse controller instance from %.*s",
1278*533affcbSRobert Mustacchi (int)ctrl_namelen, name));
1279*533affcbSRobert Mustacchi }
1280*533affcbSRobert Mustacchi
1281*533affcbSRobert Mustacchi if (inst > INT32_MAX) {
1282*533affcbSRobert Mustacchi return (nvme_error(nvme, NVME_ERR_ILLEGAL_INSTANCE, 0,
1283*533affcbSRobert Mustacchi "parsed controller instance %lu is outside the valid "
1284*533affcbSRobert Mustacchi "range [0, %d]", inst, INT32_MAX));
1285*533affcbSRobert Mustacchi }
1286*533affcbSRobert Mustacchi
1287*533affcbSRobert Mustacchi if (!nvme_ctrl_init_by_instance(nvme, (int32_t)inst, &ctrl)) {
1288*533affcbSRobert Mustacchi return (false);
1289*533affcbSRobert Mustacchi }
1290*533affcbSRobert Mustacchi
1291*533affcbSRobert Mustacchi if (ns_name == NULL) {
1292*533affcbSRobert Mustacchi *ctrlp = ctrl;
1293*533affcbSRobert Mustacchi return (nvme_success(nvme));
1294*533affcbSRobert Mustacchi }
1295*533affcbSRobert Mustacchi
1296*533affcbSRobert Mustacchi if (!nvme_ns_init_by_name(ctrl, ns_name, &ns)) {
1297*533affcbSRobert Mustacchi nvme_err_data_t err;
1298*533affcbSRobert Mustacchi
1299*533affcbSRobert Mustacchi nvme_ctrl_err_save(ctrl, &err);
1300*533affcbSRobert Mustacchi nvme_err_set(nvme, &err);
1301*533affcbSRobert Mustacchi nvme_ctrl_fini(ctrl);
1302*533affcbSRobert Mustacchi return (false);
1303*533affcbSRobert Mustacchi }
1304*533affcbSRobert Mustacchi
1305*533affcbSRobert Mustacchi *ctrlp = ctrl;
1306*533affcbSRobert Mustacchi *nsp = ns;
1307*533affcbSRobert Mustacchi
1308*533affcbSRobert Mustacchi return (nvme_success(nvme));
1309*533affcbSRobert Mustacchi }
1310*533affcbSRobert Mustacchi
1311*533affcbSRobert Mustacchi bool
nvme_ns_bd_attach(nvme_ns_t * ns)1312*533affcbSRobert Mustacchi nvme_ns_bd_attach(nvme_ns_t *ns)
1313*533affcbSRobert Mustacchi {
1314*533affcbSRobert Mustacchi nvme_ctrl_t *ctrl = ns->nn_ctrl;
1315*533affcbSRobert Mustacchi nvme_ioctl_common_t com;
1316*533affcbSRobert Mustacchi
1317*533affcbSRobert Mustacchi (void) memset(&com, 0, sizeof (com));
1318*533affcbSRobert Mustacchi com.nioc_nsid = ns->nn_nsid;
1319*533affcbSRobert Mustacchi
1320*533affcbSRobert Mustacchi if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_ATTACH, &com) != 0) {
1321*533affcbSRobert Mustacchi int e = errno;
1322*533affcbSRobert Mustacchi return (nvme_ioctl_syserror(ctrl, e, "namespace attach"));
1323*533affcbSRobert Mustacchi }
1324*533affcbSRobert Mustacchi
1325*533affcbSRobert Mustacchi if (com.nioc_drv_err != NVME_IOCTL_E_OK) {
1326*533affcbSRobert Mustacchi return (nvme_ioctl_error(ctrl, &com, "namespace attach"));
1327*533affcbSRobert Mustacchi }
1328*533affcbSRobert Mustacchi
1329*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1330*533affcbSRobert Mustacchi }
1331*533affcbSRobert Mustacchi
1332*533affcbSRobert Mustacchi bool
nvme_ns_bd_detach(nvme_ns_t * ns)1333*533affcbSRobert Mustacchi nvme_ns_bd_detach(nvme_ns_t *ns)
1334*533affcbSRobert Mustacchi {
1335*533affcbSRobert Mustacchi nvme_ctrl_t *ctrl = ns->nn_ctrl;
1336*533affcbSRobert Mustacchi nvme_ioctl_common_t com;
1337*533affcbSRobert Mustacchi
1338*533affcbSRobert Mustacchi (void) memset(&com, 0, sizeof (com));
1339*533affcbSRobert Mustacchi com.nioc_nsid = ns->nn_nsid;
1340*533affcbSRobert Mustacchi
1341*533affcbSRobert Mustacchi if (ioctl(ns->nn_ctrl->nc_fd, NVME_IOC_DETACH, &com) != 0) {
1342*533affcbSRobert Mustacchi int e = errno;
1343*533affcbSRobert Mustacchi return (nvme_ioctl_syserror(ctrl, e, "namespace detach"));
1344*533affcbSRobert Mustacchi }
1345*533affcbSRobert Mustacchi
1346*533affcbSRobert Mustacchi if (com.nioc_drv_err != NVME_IOCTL_E_OK) {
1347*533affcbSRobert Mustacchi return (nvme_ioctl_error(ctrl, &com, "namespace detach"));
1348*533affcbSRobert Mustacchi }
1349*533affcbSRobert Mustacchi
1350*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1351*533affcbSRobert Mustacchi }
1352*533affcbSRobert Mustacchi
1353*533affcbSRobert Mustacchi /*
1354*533affcbSRobert Mustacchi * Check for a lock programming error and upanic() if so.
1355*533affcbSRobert Mustacchi */
1356*533affcbSRobert Mustacchi static void
nvme_lock_check(nvme_ctrl_t * ctrl)1357*533affcbSRobert Mustacchi nvme_lock_check(nvme_ctrl_t *ctrl)
1358*533affcbSRobert Mustacchi {
1359*533affcbSRobert Mustacchi char msg[1024];
1360*533affcbSRobert Mustacchi int ret;
1361*533affcbSRobert Mustacchi const char *up;
1362*533affcbSRobert Mustacchi size_t ulen;
1363*533affcbSRobert Mustacchi const char *base = "fatal libnvme locking error detected";
1364*533affcbSRobert Mustacchi
1365*533affcbSRobert Mustacchi if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) {
1366*533affcbSRobert Mustacchi return;
1367*533affcbSRobert Mustacchi }
1368*533affcbSRobert Mustacchi
1369*533affcbSRobert Mustacchi ret = snprintf(msg, sizeof (msg), "%s: %s (controller %p)", base,
1370*533affcbSRobert Mustacchi ctrl->nc_err.ne_errmsg, ctrl);
1371*533affcbSRobert Mustacchi if (ret >= sizeof (msg)) {
1372*533affcbSRobert Mustacchi ulen = sizeof (msg);
1373*533affcbSRobert Mustacchi up = msg;
1374*533affcbSRobert Mustacchi } else if (ret <= 0) {
1375*533affcbSRobert Mustacchi ulen = strlen(base) + 1;
1376*533affcbSRobert Mustacchi up = base;
1377*533affcbSRobert Mustacchi } else {
1378*533affcbSRobert Mustacchi ulen = (size_t)ret + 1;
1379*533affcbSRobert Mustacchi up = msg;
1380*533affcbSRobert Mustacchi }
1381*533affcbSRobert Mustacchi
1382*533affcbSRobert Mustacchi upanic(up, ulen);
1383*533affcbSRobert Mustacchi }
1384*533affcbSRobert Mustacchi
1385*533affcbSRobert Mustacchi static bool
nvme_lock_common(nvme_ctrl_t * ctrl,uint32_t nsid,nvme_lock_level_t level,nvme_lock_flags_t flags)1386*533affcbSRobert Mustacchi nvme_lock_common(nvme_ctrl_t *ctrl, uint32_t nsid, nvme_lock_level_t level,
1387*533affcbSRobert Mustacchi nvme_lock_flags_t flags)
1388*533affcbSRobert Mustacchi {
1389*533affcbSRobert Mustacchi nvme_ioctl_lock_t lock;
1390*533affcbSRobert Mustacchi const nvme_lock_flags_t all_flags = NVME_LOCK_F_DONT_BLOCK;
1391*533affcbSRobert Mustacchi
1392*533affcbSRobert Mustacchi if (level != NVME_LOCK_L_READ && level != NVME_LOCK_L_WRITE) {
1393*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown "
1394*533affcbSRobert Mustacchi "lock level: 0x%x", level));
1395*533affcbSRobert Mustacchi }
1396*533affcbSRobert Mustacchi
1397*533affcbSRobert Mustacchi if ((flags & ~all_flags) != 0) {
1398*533affcbSRobert Mustacchi return (nvme_ctrl_error(ctrl, NVME_ERR_BAD_FLAG, 0, "unknown "
1399*533affcbSRobert Mustacchi "lock flags: 0x%x", flags & ~all_flags));
1400*533affcbSRobert Mustacchi }
1401*533affcbSRobert Mustacchi
1402*533affcbSRobert Mustacchi (void) memset(&lock, 0, sizeof (lock));
1403*533affcbSRobert Mustacchi lock.nil_common.nioc_nsid = nsid;
1404*533affcbSRobert Mustacchi if (nsid != 0) {
1405*533affcbSRobert Mustacchi lock.nil_ent = NVME_LOCK_E_NS;
1406*533affcbSRobert Mustacchi } else {
1407*533affcbSRobert Mustacchi lock.nil_ent = NVME_LOCK_E_CTRL;
1408*533affcbSRobert Mustacchi }
1409*533affcbSRobert Mustacchi lock.nil_level = level;
1410*533affcbSRobert Mustacchi lock.nil_flags = flags;
1411*533affcbSRobert Mustacchi
1412*533affcbSRobert Mustacchi if (ioctl(ctrl->nc_fd, NVME_IOC_LOCK, &lock) != 0) {
1413*533affcbSRobert Mustacchi int e = errno;
1414*533affcbSRobert Mustacchi return (nvme_ioctl_syserror(ctrl, e, "lock"));
1415*533affcbSRobert Mustacchi }
1416*533affcbSRobert Mustacchi
1417*533affcbSRobert Mustacchi if (lock.nil_common.nioc_drv_err != NVME_IOCTL_E_OK) {
1418*533affcbSRobert Mustacchi (void) nvme_ioctl_error(ctrl, &lock.nil_common, "lock");
1419*533affcbSRobert Mustacchi nvme_lock_check(ctrl);
1420*533affcbSRobert Mustacchi return (false);
1421*533affcbSRobert Mustacchi }
1422*533affcbSRobert Mustacchi
1423*533affcbSRobert Mustacchi return (nvme_ctrl_success(ctrl));
1424*533affcbSRobert Mustacchi }
1425*533affcbSRobert Mustacchi
1426*533affcbSRobert Mustacchi /*
1427*533affcbSRobert Mustacchi * You may reasonably be wondering why does this return and why do we basically
1428*533affcbSRobert Mustacchi * panic everywhere. The reality is twofold. The first part of this is that we
1429*533affcbSRobert Mustacchi * know from experience in libc that error checking mutexes are not the most
1430*533affcbSRobert Mustacchi * common and the kernel simplicity of mutex_enter() and mutex_exit() are really
1431*533affcbSRobert Mustacchi * a boon. The second piece here is that the way that the ioctl path works here,
1432*533affcbSRobert Mustacchi * only programming errors or mischief in the library could cause this to fail
1433*533affcbSRobert Mustacchi * at the raw ioctl / errno level. That is EBADF/EFAULT, etc. are our fault and
1434*533affcbSRobert Mustacchi * if you cannot unlock because of that you're not going to get much further.
1435*533affcbSRobert Mustacchi */
1436*533affcbSRobert Mustacchi void
nvme_unlock_common(nvme_ctrl_t * ctrl,uint32_t nsid)1437*533affcbSRobert Mustacchi nvme_unlock_common(nvme_ctrl_t *ctrl, uint32_t nsid)
1438*533affcbSRobert Mustacchi {
1439*533affcbSRobert Mustacchi nvme_ioctl_unlock_t unlock;
1440*533affcbSRobert Mustacchi
1441*533affcbSRobert Mustacchi (void) memset(&unlock, 0, sizeof (unlock));
1442*533affcbSRobert Mustacchi unlock.niu_common.nioc_nsid = nsid;
1443*533affcbSRobert Mustacchi if (nsid != 0) {
1444*533affcbSRobert Mustacchi unlock.niu_ent = NVME_LOCK_E_NS;
1445*533affcbSRobert Mustacchi } else {
1446*533affcbSRobert Mustacchi unlock.niu_ent = NVME_LOCK_E_CTRL;
1447*533affcbSRobert Mustacchi }
1448*533affcbSRobert Mustacchi
1449*533affcbSRobert Mustacchi /*
1450*533affcbSRobert Mustacchi * Because all unlock ioctls errors are promoted to an error, we don't
1451*533affcbSRobert Mustacchi * bother calling nvme_ioctl_syserror() here.
1452*533affcbSRobert Mustacchi */
1453*533affcbSRobert Mustacchi if (ioctl(ctrl->nc_fd, NVME_IOC_UNLOCK, &unlock) != 0) {
1454*533affcbSRobert Mustacchi int e = errno;
1455*533affcbSRobert Mustacchi (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, e, "internal "
1456*533affcbSRobert Mustacchi "programming error: failed to issue unlock ioctl: %s",
1457*533affcbSRobert Mustacchi strerror(e));
1458*533affcbSRobert Mustacchi nvme_lock_check(ctrl);
1459*533affcbSRobert Mustacchi return;
1460*533affcbSRobert Mustacchi }
1461*533affcbSRobert Mustacchi
1462*533affcbSRobert Mustacchi if (unlock.niu_common.nioc_drv_err != NVME_IOCTL_E_OK) {
1463*533affcbSRobert Mustacchi (void) nvme_ioctl_error(ctrl, &unlock.niu_common, "unlock");
1464*533affcbSRobert Mustacchi /*
1465*533affcbSRobert Mustacchi * Promote any other failure to a new fatal failure. Consumers
1466*533affcbSRobert Mustacchi * expect this to have worked.
1467*533affcbSRobert Mustacchi */
1468*533affcbSRobert Mustacchi if (ctrl->nc_err.ne_err != NVME_ERR_LOCK_PROG) {
1469*533affcbSRobert Mustacchi nvme_err_data_t err;
1470*533affcbSRobert Mustacchi nvme_ctrl_err_save(ctrl, &err);
1471*533affcbSRobert Mustacchi (void) nvme_ctrl_error(ctrl, NVME_ERR_LOCK_PROG, 0,
1472*533affcbSRobert Mustacchi "internal programming error: received unexpected "
1473*533affcbSRobert Mustacchi "libnvme error 0x%x: %s", err.ne_err,
1474*533affcbSRobert Mustacchi err.ne_errmsg);
1475*533affcbSRobert Mustacchi }
1476*533affcbSRobert Mustacchi nvme_lock_check(ctrl);
1477*533affcbSRobert Mustacchi return;
1478*533affcbSRobert Mustacchi }
1479*533affcbSRobert Mustacchi
1480*533affcbSRobert Mustacchi (void) nvme_ctrl_success(ctrl);
1481*533affcbSRobert Mustacchi }
1482*533affcbSRobert Mustacchi
1483*533affcbSRobert Mustacchi bool
nvme_ctrl_lock(nvme_ctrl_t * ctrl,nvme_lock_level_t level,nvme_lock_flags_t flags)1484*533affcbSRobert Mustacchi nvme_ctrl_lock(nvme_ctrl_t *ctrl, nvme_lock_level_t level,
1485*533affcbSRobert Mustacchi nvme_lock_flags_t flags)
1486*533affcbSRobert Mustacchi {
1487*533affcbSRobert Mustacchi return (nvme_lock_common(ctrl, 0, level, flags));
1488*533affcbSRobert Mustacchi }
1489*533affcbSRobert Mustacchi
1490*533affcbSRobert Mustacchi bool
nvme_ns_lock(nvme_ns_t * ns,nvme_lock_level_t level,nvme_lock_flags_t flags)1491*533affcbSRobert Mustacchi nvme_ns_lock(nvme_ns_t *ns, nvme_lock_level_t level,
1492*533affcbSRobert Mustacchi nvme_lock_flags_t flags)
1493*533affcbSRobert Mustacchi {
1494*533affcbSRobert Mustacchi return (nvme_lock_common(ns->nn_ctrl, ns->nn_nsid, level, flags));
1495*533affcbSRobert Mustacchi }
1496*533affcbSRobert Mustacchi
1497*533affcbSRobert Mustacchi void
nvme_ctrl_unlock(nvme_ctrl_t * ctrl)1498*533affcbSRobert Mustacchi nvme_ctrl_unlock(nvme_ctrl_t *ctrl)
1499*533affcbSRobert Mustacchi {
1500*533affcbSRobert Mustacchi nvme_unlock_common(ctrl, 0);
1501*533affcbSRobert Mustacchi }
1502*533affcbSRobert Mustacchi
1503*533affcbSRobert Mustacchi void
nvme_ns_unlock(nvme_ns_t * ns)1504*533affcbSRobert Mustacchi nvme_ns_unlock(nvme_ns_t *ns)
1505*533affcbSRobert Mustacchi {
1506*533affcbSRobert Mustacchi nvme_unlock_common(ns->nn_ctrl, ns->nn_nsid);
1507*533affcbSRobert Mustacchi }
1508