xref: /illumos-gate/usr/src/uts/common/sys/nvme.h (revision e00bdde3c6d406f40f53f3025defadc22f7ec31a)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2016 Nexenta Systems, Inc.
14  * Copyright 2020 Joyent, Inc.
15  * Copyright 2019 Western Digital Corporation
16  * Copyright 2024 Oxide Computer Company
17  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
18  */
19 
20 #ifndef _SYS_NVME_H
21 #define	_SYS_NVME_H
22 
23 #include <sys/types.h>
24 #include <sys/debug.h>
25 
26 #ifdef _KERNEL
27 #include <sys/types32.h>
28 #else
29 #include <sys/uuid.h>
30 #include <stdint.h>
31 #endif
32 
33 /*
34  * Declarations used for communication between nvmeadm(8) and nvme(4D)
35  */
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 /*
42  * NVMe ioctl definitions
43  */
44 
45 #define	NVME_IOC			(('N' << 24) | ('V' << 16) | ('M' << 8))
46 #define	NVME_IOC_CTRL_INFO		(NVME_IOC | 0)
47 #define	NVME_IOC_IDENTIFY		(NVME_IOC | 1)
48 #define	NVME_IOC_GET_LOGPAGE		(NVME_IOC | 2)
49 #define	NVME_IOC_GET_FEATURE		(NVME_IOC | 3)
50 #define	NVME_IOC_FORMAT			(NVME_IOC | 4)
51 #define	NVME_IOC_DETACH			(NVME_IOC | 5)
52 #define	NVME_IOC_ATTACH			(NVME_IOC | 6)
53 #define	NVME_IOC_FIRMWARE_DOWNLOAD	(NVME_IOC | 7)
54 #define	NVME_IOC_FIRMWARE_COMMIT	(NVME_IOC | 8)
55 #define	NVME_IOC_PASSTHRU		(NVME_IOC | 9)
56 #define	NVME_IOC_NS_INFO		(NVME_IOC | 10)
57 #define	NVME_IOC_LOCK			(NVME_IOC | 11)
58 #define	NVME_IOC_UNLOCK			(NVME_IOC | 12)
59 #define	NVME_IOC_MAX			NVME_IOC_NS_INFO
60 
61 #define	IS_NVME_IOC(x)			((x) > NVME_IOC && (x) <= NVME_IOC_MAX)
62 #define	NVME_IOC_CMD(x)			((x) & 0xff)
63 
64 /*
65  * This represents the set of all possible errors that can be returned from an
66  * ioctl. Our general rule of thumb is that we only will use an errno value to
67  * indicate that certain processing failed: a lack of privileges, bad minor, or
68  * failure to copy in and out the initial ioctl structure. However, if we get
69  * far enough that there is any other failure (including a failure to copy in
70  * and out nested data such as the identify command payload) then we will issue
71  * an error here. Put differently, our basic promise is that there should be a
72  * single straightforward meaning for any errno returned and instead all the
73  * nuance is here. Our goal is that no one should guess what of two dozen things
74  * an EINVAL might have referred to.
75  *
76  * When we are dealing with field parameters, there are three general classes of
77  * errors that we define that are common across all request structures:
78  *
79  *   <REQ>_<FIELD>_RANGE	RANGE class errors indicate that the value
80  *				passed in is outside the range that the device
81  *				supports. The range may vary based on the
82  *				specification. This is used both for issues like
83  *				bad alignment in a value (e.g. not 4-byte
84  *				aligned) or a value that is larger than the
85  *				maximum possible size. Because the namespace ID
86  *				is shared in every request in the controller and
87  *				is part of our standard ioctl handling, we use a
88  *				single set of errors for that.
89  *
90  *   <REQ>_<FIELD>_UNSUP	This indicates that the controller cannot
91  *				support any value in the given field. This is
92  *				either because the field was introduced in an
93  *				NVMe specification later than the controller
94  *				supports or because there is an explicit feature
95  *				bit that indicates whether or not this field is
96  *				valid. Entries here may or may not have a
97  *				namespace unsupported entry due to the fact that
98  *				this is command specific.
99  *
100  *  <REQ>_<FIELD>_UNUSE		This class is perhaps the weirdest. This
101  *				represents a case where a given field cannot be
102  *				set because it is not used based on the
103  *				specifics of the request. For example, if you're
104  *				getting the health log page, you may not set the
105  *				LSP or LSI for that log page, even if you have
106  *				an NVMe 1.4 controller that supports both fields
107  *				because they have no meaning. A similar example
108  *				would be setting a controller ID when it has no
109  *				meaning in a particular identify request.
110  *
111  * While every field will have a RANGE class error, some fields will not have an
112  * UNSUP or UNUSE class error depending on the specifics. A field that has
113  * always been present since NVMe 1.0 and is always valid, such as say the log
114  * page ID field for a get log page request or the length of a firmware download
115  * request, currently are always valid. It is possible that future revisions to
116  * the specification or our logic may change this.
117  */
118 typedef enum {
119 	/*
120 	 * Indicates that the command actually completed successfully.
121 	 */
122 	NVME_IOCTL_E_OK	= 0,
123 	/*
124 	 * Indicates that the controller failed the command and the controller
125 	 * specific (SC/SCT) are available. For all other errors, those fields
126 	 * are reserved.
127 	 */
128 	NVME_IOCTL_E_CTRL_ERROR,
129 	/*
130 	 * Indicates that the controller is considered "dead" by the system and
131 	 * therefore is unusable. Separately, the controller may have been
132 	 * removed from the system due to hotplug or related. In that case, the
133 	 * gone variant is used to distinguish this.
134 	 */
135 	NVME_IOCTL_E_CTRL_DEAD,
136 	NVME_IOCTL_E_CTRL_GONE,
137 	/*
138 	 * Indicates that a bad namespace was requested. This would generally
139 	 * happen when referring to a namespace that is outside of controller's
140 	 * range.
141 	 */
142 	NVME_IOCTL_E_NS_RANGE,
143 	/*
144 	 * Indicates that a namespace is not usable in this context.
145 	 */
146 	NVME_IOCTL_E_NS_UNUSE,
147 	/*
148 	 * Indicates that the requested namespace could not be used because we
149 	 * are operating on a namespace minor and asked to operate on a
150 	 * different namespace.
151 	 */
152 	NVME_IOCTL_E_MINOR_WRONG_NS,
153 	/*
154 	 * Indicates that the requested ioctl can only operate on the controller
155 	 * minor and we were on a namespace minor. This is not used for when a
156 	 * namespace is incorrectly requested otherwise.
157 	 */
158 	NVME_IOCTL_E_NOT_CTRL,
159 	/*
160 	 * Indicates that we were asked to operate on the broadcast namespace
161 	 * either because it was specified or that was how the request was
162 	 * transformed and the broadcast namespace is not supported for this
163 	 * operation.
164 	 */
165 	NVME_IOCTL_E_NO_BCAST_NS,
166 	/*
167 	 * Indicates that the operation failed because the operation requires a
168 	 * controller or namespace write lock and the caller did not have it.
169 	 */
170 	NVME_IOCTL_E_NEED_CTRL_WRLOCK,
171 	NVME_IOCTL_E_NEED_NS_WRLOCK,
172 	/*
173 	 * Indicates that the operation could not proceed because someone else
174 	 * has exclusive access currently to the controller or namespace and
175 	 * therefore this request (which does not require exclusive access)
176 	 * could not proceed.
177 	 */
178 	NVME_IOCTL_E_CTRL_LOCKED,
179 	NVME_IOCTL_E_NS_LOCKED,
180 	/*
181 	 * Indicates that a standard log page was requested that the kernel
182 	 * doesn't know about.
183 	 */
184 	NVME_IOCTL_E_UNKNOWN_LOG_PAGE,
185 	/*
186 	 * Indicates that the controller does not support the requested log
187 	 * page; however, the kernel knows about it.
188 	 */
189 	NVME_IOCTL_E_UNSUP_LOG_PAGE,
190 	/*
191 	 * Indicates that the log page's scope requires operating on something
192 	 * that isn't what was requested. For example, trying to request the
193 	 * firmware information page on a namespace.
194 	 */
195 	NVME_IOCTL_E_BAD_LOG_SCOPE,
196 	/*
197 	 * Log page fields with bad values.
198 	 */
199 	NVME_IOCTL_E_LOG_CSI_RANGE,
200 	NVME_IOCTL_E_LOG_LID_RANGE,
201 	NVME_IOCTL_E_LOG_LSP_RANGE,
202 	NVME_IOCTL_E_LOG_LSI_RANGE,
203 	NVME_IOCTL_E_LOG_RAE_RANGE,
204 	NVME_IOCTL_E_LOG_SIZE_RANGE,
205 	NVME_IOCTL_E_LOG_OFFSET_RANGE,
206 	/*
207 	 * Log page fields that may not be supported.
208 	 */
209 	NVME_IOCTL_E_LOG_CSI_UNSUP,
210 	NVME_IOCTL_E_LOG_LSP_UNSUP,
211 	NVME_IOCTL_E_LOG_LSI_UNSUP,
212 	NVME_IOCTL_E_LOG_RAE_UNSUP,
213 	NVME_IOCTL_E_LOG_OFFSET_UNSUP,
214 	/*
215 	 * Log page fields that may not be usable, depending on context.
216 	 */
217 	NVME_IOCTL_E_LOG_LSP_UNUSE,
218 	NVME_IOCTL_E_LOG_LSI_UNUSE,
219 	NVME_IOCTL_E_LOG_RAE_UNUSE,
220 	/*
221 	 * Indicates that no DMA memory was available for a request.
222 	 */
223 	NVME_IOCTL_E_NO_DMA_MEM,
224 	/*
225 	 * Indicates that there was no kernel memory avilable for the request.
226 	 */
227 	NVME_IOCTL_E_NO_KERN_MEM,
228 	/*
229 	 * Indicates that an error occurred while trying to fill out the DMA PRP
230 	 */
231 	NVME_IOCTL_E_BAD_PRP,
232 	/*
233 	 * Indicates that a pointer to user data to read from or write to was
234 	 * not valid and generated a fault. Specifically this is for items that
235 	 * an ioctl structure points to.
236 	 */
237 	NVME_IOCTL_E_BAD_USER_DATA,
238 	/*
239 	 * Indicates that the kernel does not know about the requested identify
240 	 * command.
241 	 */
242 	NVME_IOCTL_E_UNKNOWN_IDENTIFY,
243 	/*
244 	 * Indicates that the controller does not support the requested identify
245 	 * command.
246 	 */
247 	NVME_IOCTL_E_UNSUP_IDENTIFY,
248 	/*
249 	 * The following errors indicate either a bad value for a given identify
250 	 * argument. This would happen because the value is outside the
251 	 * supported range. There is no CNS or below as those are the
252 	 * higher-level errors right above this.
253 	 */
254 	NVME_IOCTL_E_IDENTIFY_CTRLID_RANGE,
255 	/*
256 	 * Next, we have the unsupported and unusable pieces. The nsid was
257 	 * supported starting in NVMe 1.0, therefore it is never unsupported.
258 	 * However, the controller ID both requires controller support and is
259 	 * not usable in several requests.
260 	 */
261 	NVME_IOCTL_E_IDENTIFY_CTRLID_UNSUP,
262 	NVME_IOCTL_E_IDENTIFY_CTRLID_UNUSE,
263 	/*
264 	 * Indicates that the controller does not support the NVMe spec's
265 	 * general vendor unique command format.
266 	 */
267 	NVME_IOCTL_E_CTRL_VUC_UNSUP,
268 	/*
269 	 * The following indicate bad values for given NVMe vendor unique
270 	 * command fields. All of the cdw1[2-5] fields are not part of this
271 	 * because there is nothing that we can validate.
272 	 */
273 	NVME_IOCTL_E_VUC_TIMEOUT_RANGE,
274 	NVME_IOCTL_E_VUC_OPCODE_RANGE,
275 	NVME_IOCTL_E_VUC_FLAGS_RANGE,
276 	NVME_IOCTL_E_VUC_IMPACT_RANGE,
277 	NVME_IOCTL_E_VUC_NDT_RANGE,
278 	/*
279 	 * These indicate that the VUC data and that the corresponding pair of
280 	 * fields do not agree with each other.
281 	 */
282 	NVME_IOCTL_E_INCONSIST_VUC_FLAGS_NDT,
283 	NVME_IOCTL_E_INCONSIST_VUC_BUF_NDT,
284 	/*
285 	 * Indicates that the operation in question did not succeed because
286 	 * blkdev failed to detach. Most often this happens because the device
287 	 * node is busy. Reasons the device node could be busy include that the
288 	 * device is in a zpool, a file system is mounted, a process has the
289 	 * block device open, etc.
290 	 */
291 	NVME_IOCTL_E_BLKDEV_DETACH,
292 	/*
293 	 * Indicates that the operation in question failed because we were
294 	 * unable to create and online a new blkdev child.
295 	 */
296 	NVME_IOCTL_E_BLKDEV_ATTACH,
297 	/*
298 	 * Indicates that the namespace requested for an attach is not supported
299 	 * by the system. This would happen due to properties of the namespace
300 	 * itself (e.g. utilizing metadata sectors).
301 	 */
302 	NVME_IOCTL_E_UNSUP_ATTACH_NS,
303 	/*
304 	 * Indicates that the format operation is not supported by the
305 	 * controller at all.
306 	 */
307 	NVME_IOCTL_E_CTRL_FORMAT_UNSUP,
308 	/*
309 	 * Indicates that the controller does not support the ability to perform
310 	 * a cryptographic secure erase.
311 	 */
312 	NVME_IOCTL_E_CTRL_CRYPTO_SE_UNSUP,
313 	/*
314 	 * Indicates that a format operation is targeting a namespace, but
315 	 * cannot be performed because it does not support formatting an
316 	 * individual namespace or performing a secure-erase of an individual
317 	 * namespace respectively.
318 	 */
319 	NVME_IOCTL_E_CTRL_NS_FORMAT_UNSUP,
320 	NVME_IOCTL_E_CTRL_NS_SE_UNSUP,
321 	/*
322 	 * The following indicate bad values for a format NVM request.
323 	 */
324 	NVME_IOCTL_E_FORMAT_LBAF_RANGE,
325 	NVME_IOCTL_E_FORMAT_SES_RANGE,
326 	/*
327 	 * Indicates that the requested LBA format is not supported due to its
328 	 * use of metadata.
329 	 */
330 	NVME_IOCTL_E_UNSUP_LBAF_META,
331 	/*
332 	 * Indicates that the firmware commands are not supported by the
333 	 * controller at all.
334 	 */
335 	NVME_IOCTL_E_CTRL_FW_UNSUP,
336 	/*
337 	 * Indicates that the controller has reported a firmware update
338 	 * granularity that exceeds the calculated / driver supported maximum
339 	 * DMA transfer size. As such we cannot perform this operation.
340 	 */
341 	NVME_IOCTL_E_FW_LOAD_IMPOS_GRAN,
342 	/*
343 	 * The following indicate bad values for a firmware load's length and
344 	 * offset.
345 	 */
346 	NVME_IOCTL_E_FW_LOAD_LEN_RANGE,
347 	NVME_IOCTL_E_FW_LOAD_OFFSET_RANGE,
348 	/*
349 	 * The following indicate bad values for a firmware commit's slot and
350 	 * action.
351 	 */
352 	NVME_IOCTL_E_FW_COMMIT_SLOT_RANGE,
353 	NVME_IOCTL_E_FW_COMMIT_ACTION_RANGE,
354 	/*
355 	 * Indicates that an explicit attempt was made to download an image into
356 	 * a read-only slot. Note, some instances of this cannot be caught prior
357 	 * to issuing a command to the controller (commit action 0b11 as it can
358 	 * be used whether there is or isn't a staged image) and will result in
359 	 * a controller error.
360 	 */
361 	NVME_IOCTL_E_RO_FW_SLOT,
362 	/*
363 	 * Indicates that the kernel doesn't know about the NVMe feature in
364 	 * question and therefore cannot proceed.
365 	 */
366 	NVME_IOCTL_E_UNKNOWN_FEATURE,
367 	/*
368 	 * Indicates that while the system knows about the feature in question,
369 	 * it is not supported by the controller.
370 	 */
371 	NVME_IOCTL_E_UNSUP_FEATURE,
372 	/*
373 	 * The following errors indicate a bad value for a given get feature
374 	 * field. This would happen because the value is outside the supported
375 	 * range.
376 	 */
377 	NVME_IOCTL_E_GET_FEAT_SEL_RANGE,
378 	NVME_IOCTL_E_GET_FEAT_CDW11_RANGE,
379 	NVME_IOCTL_E_GET_FEAT_DATA_RANGE,
380 	/*
381 	 * This set of errors indicate that the field is not supported. This can
382 	 * happen because a given get feature command doesn't support setting
383 	 * this value, the field isn't supported in this revision of the
384 	 * controller, or similar issues.
385 	 */
386 	NVME_IOCTL_E_GET_FEAT_SEL_UNSUP,
387 	/*
388 	 * Fields that may be circumstantially unusable.
389 	 */
390 	NVME_IOCTL_E_GET_FEAT_CDW11_UNUSE,
391 	NVME_IOCTL_E_GET_FEAT_DATA_UNUSE,
392 	/*
393 	 * The following errors indicate a bad lock type.
394 	 */
395 	NVME_IOCTL_E_BAD_LOCK_ENTITY,
396 	NVME_IOCTL_E_BAD_LOCK_LEVEL,
397 	NVME_IOCTL_E_BAD_LOCK_FLAGS,
398 	/*
399 	 * Indicates that a namespace open cannot lock or unlock a controller.
400 	 */
401 	NVME_IOCTL_E_NS_CANNOT_LOCK_CTRL,
402 	NVME_IOCTL_E_NS_CANNOT_UNLOCK_CTRL,
403 	/*
404 	 * Indicates that this lock is already held by the caller.
405 	 */
406 	NVME_IOCTL_E_LOCK_ALREADY_HELD,
407 	/*
408 	 * Indicates that we cannot take the controller lock, because the
409 	 * caller already has an active namespace lock.
410 	 */
411 	NVME_IOCTL_E_LOCK_NO_CTRL_WITH_NS,
412 	/*
413 	 * Indicates that we cannot take a namespace lock because a controller
414 	 * write lock already exists.
415 	 */
416 	NVME_IOCTL_LOCK_NO_NS_WITH_CTRL_WRLOCK,
417 	/*
418 	 * Indicates that we cannot take a namespace lock because we already
419 	 * have one.
420 	 */
421 	NVME_IOCTL_E_LOCK_NO_2ND_NS,
422 	/*
423 	 * Indicate that a blocking wait for a lock was interrupted due to a
424 	 * signal.
425 	 */
426 	NVME_IOCTL_E_LOCK_WAIT_SIGNAL,
427 	/*
428 	 * Indicates that the lock could not be acquired because it was already
429 	 * held and we were asked not to block on the lock.
430 	 */
431 	NVME_IOCTL_E_LOCK_WOULD_BLOCK,
432 	/*
433 	 * Indicates that the lock operation could not proceed because the minor
434 	 * is already blocking on another lock operation.
435 	 */
436 	NVME_IOCTL_E_LOCK_PENDING,
437 	/*
438 	 * Indicates that the requested lock could not be unlocked because it is
439 	 * not held. The minor may not hold the lock or it may be blocking for
440 	 * acquisition.
441 	 */
442 	NVME_IOCTL_E_LOCK_NOT_HELD,
443 	/*
444 	 * Indicates that the requested lock could not be unlocked because the
445 	 * namespace requested is not the namespace that is currently locked.
446 	 */
447 	NVME_IOCTL_E_LOCK_WRONG_NS,
448 	/*
449 	 * Indicates that the request could not proceed because a namespace is
450 	 * attached to blkdev. This would block a format operation, a vendor
451 	 * unique command that indicated that it would impact all namespaces,
452 	 * etc.
453 	 */
454 	NVME_IOCTL_E_NS_BLKDEV_ATTACH,
455 	/*
456 	 * Indicates that the blkdev address somehow would have overflowed our
457 	 * internal buffer.
458 	 */
459 	NVME_IOCTL_E_BD_ADDR_OVER
460 } nvme_ioctl_errno_t;
461 
462 /*
463  * This structure is embedded as the first item of every ioctl. It is also used
464  * directly for the attach (NVME_IOC_ATTACH) and detach (NVME_IOC_DETACH)
465  * ioctls.
466  */
467 typedef struct {
468 	/*
469 	 * This allows one to specify the namespace ID that the ioctl may
470 	 * target, if it supports it. This field may be left to zero to indicate
471 	 * that the current open device (whether the controller or a namespace)
472 	 * should be targeted. If a namespace is open, a value other than 0 or
473 	 * the current namespace's ID is invalid.
474 	 */
475 	uint32_t nioc_nsid;
476 	/*
477 	 * These next three values represent a possible error that may have
478 	 * occurred. On every ioctl nioc_drv_err is set to a value from the
479 	 * nvme_ioctl_errno_t enumeration. Anything other than NVME_IOCTL_E_OK
480 	 * indicates a failure of some kind. Some error values will put
481 	 * supplemental information in sct and sc. For example,
482 	 * NVME_IOCTL_E_CTRL_ERROR uses that as a way to return the raw error
483 	 * values from the controller for someone to inspect. Others may use
484 	 * this for their own well-defined supplemental information.
485 	 */
486 	uint32_t nioc_drv_err;
487 	uint32_t nioc_ctrl_sct;
488 	uint32_t nioc_ctrl_sc;
489 } nvme_ioctl_common_t;
490 
491 /*
492  * NVMe Identify Command (NVME_IOC_IDENTIFY).
493  */
494 typedef struct {
495 	nvme_ioctl_common_t nid_common;
496 	uint32_t nid_cns;
497 	uint32_t nid_ctrlid;
498 	uintptr_t nid_data;
499 } nvme_ioctl_identify_t;
500 
501 /*
502  * The following constants describe the maximum values that may be used in
503  * various identify requests.
504  */
505 #define	NVME_IDENTIFY_MAX_CTRLID	0xffff
506 #define	NVME_IDENTIFY_MAX_NSID		0xffffffff
507 #define	NVME_IDENTIFY_MAX_CNS_1v2	0xff
508 #define	NVME_IDENTIFY_MAX_CNS_1v1	0x3
509 #define	NVME_IDENTIFY_MAX_CNS		0x1
510 
511 /*
512  * Get a specific feature (NVME_IOC_GET_FEATURE).
513  */
514 typedef struct {
515 	nvme_ioctl_common_t nigf_common;
516 	uint32_t nigf_fid;
517 	uint32_t nigf_sel;
518 	uint32_t nigf_cdw11;
519 	uintptr_t nigf_data;
520 	uint64_t nigf_len;
521 	uint32_t nigf_cdw0;
522 } nvme_ioctl_get_feature_t;
523 
524 /*
525  * Feature maximums.
526  */
527 #define	NVME_FEAT_MAX_FID	0xff
528 #define	NVME_FEAT_MAX_SEL	0x3
529 
530 /*
531  * Get a specific log page (NVME_IOC_GET_LOGPAGE). By default, unused fields
532  * should be left at zero.  the input data length is specified by nigl_len, in
533  * bytes. The NVMe specification does not provide a way for a controller to
534  * write less bytes than requested for a log page. It is undefined behavior if a
535  * log page read requests more data than is supported. If this is successful,
536  * nigl_len bytes will be copied out.
537  */
538 typedef struct {
539 	nvme_ioctl_common_t nigl_common;
540 	uint32_t nigl_csi;
541 	uint32_t nigl_lid;
542 	uint32_t nigl_lsp;
543 	uint32_t nigl_lsi;
544 	uint32_t nigl_rae;
545 	uint64_t nigl_len;
546 	uint64_t nigl_offset;
547 	uintptr_t nigl_data;
548 } nvme_ioctl_get_logpage_t;
549 
550 /*
551  * The following constants describe the maximum values for fields that used in
552  * the log page request. Note, some of these change with the version. These
553  * values are inclusive. The default max is the lowest common value. Larger
554  * values are included here. While these values are what the command set
555  * maximums are, the device driver may support smaller minimums (e.g. for size).
556  */
557 #define	NVME_LOG_MAX_LID	0xff
558 #define	NVME_LOG_MAX_LSP	0x0f
559 #define	NVME_LOG_MAX_LSP_2v0	0x7f
560 #define	NVME_LOG_MAX_LSI	0xffff
561 #define	NVME_LOG_MAX_UUID	0x7f
562 #define	NVME_LOG_MAX_CSI	0xff
563 #define	NVME_LOG_MAX_RAE	0x1
564 #define	NVME_LOG_MAX_OFFSET	UINT64_MAX
565 
566 /*
567  * These maximum size values are inclusive like the others. The fields are 12
568  * and 32-bits wide respectively, but are zero based. That is accounted for by
569  * the shifts below.
570  */
571 #define	NVME_LOG_MAX_SIZE	((1ULL << 12ULL) * 4ULL)
572 #define	NVME_LOG_MAX_SIZE_1v2	((1ULL << 32ULL) * 4ULL)
573 
574 /*
575  * Inject a vendor-specific admin command (NVME_IOC_PASSTHRU).
576  */
577 typedef struct {
578 	nvme_ioctl_common_t npc_common;	/* NSID and status */
579 	uint32_t npc_opcode;	/* Command opcode. */
580 	uint32_t npc_timeout;	/* Command timeout, in seconds. */
581 	uint32_t npc_flags;	/* Flags for the command. */
582 	uint32_t npc_impact;	/* Impact information */
583 	uint32_t npc_cdw0;	/* Command-specific result DWord 0 */
584 	uint32_t npc_cdw12;	/* Command-specific DWord 12 */
585 	uint32_t npc_cdw13;	/* Command-specific DWord 13 */
586 	uint32_t npc_cdw14;	/* Command-specific DWord 14 */
587 	uint32_t npc_cdw15;	/* Command-specific DWord 15 */
588 	uint64_t npc_buflen;	/* Size of npc_buf. */
589 	uintptr_t npc_buf;	/* I/O source or destination */
590 } nvme_ioctl_passthru_t;
591 
592 /*
593  * Constants for the passthru admin commands. Because the timeout is a kernel
594  * property, we don't include that here.
595  */
596 #define	NVME_PASSTHRU_MIN_ADMIN_OPC	0xc0
597 #define	NVME_PASSTHRU_MAX_ADMIN_OPC	0xff
598 
599 /* Flags for NVMe passthru commands. */
600 #define	NVME_PASSTHRU_READ	0x1 /* Read from device */
601 #define	NVME_PASSTHRU_WRITE	0x2 /* Write to device */
602 
603 /*
604  * Impact information for NVMe passthru commands. The current impact flags are
605  * defined as follows:
606  *
607  * NVME_IMPACT_NS	This implies that one or all of the namespaces may be
608  *			changed. This command will rescan all namespace after
609  *			this occurs and update our state as a result. However,
610  *			this requires that all such namespaces not be attached
611  *			to blkdev to continue.
612  */
613 #define	NVME_IMPACT_NS		0x01
614 
615 
616 /*
617  * Firmware download (NVME_IOC_FIRMWARE_DOWNLOAD).
618  */
619 typedef struct {
620 	nvme_ioctl_common_t fwl_common;
621 	uintptr_t fwl_buf;
622 	uint64_t fwl_len;
623 	uint64_t fwl_off;
624 } nvme_ioctl_fw_load_t;
625 
626 /*
627  * Firmware commit (NVME_IOC_FIRMWARE_COMMIT). This was previously called
628  * firmware activate in earlier specification revisions.
629  */
630 typedef struct {
631 	nvme_ioctl_common_t fwc_common;
632 	uint32_t fwc_slot;
633 	uint32_t fwc_action;
634 } nvme_ioctl_fw_commit_t;
635 
636 /*
637  * Format NVM command (NVME_IOC_FORMAT)
638  */
639 typedef struct {
640 	nvme_ioctl_common_t nif_common;
641 	uint32_t nif_lbaf;
642 	uint32_t nif_ses;
643 } nvme_ioctl_format_t;
644 
645 typedef enum {
646 	NVME_LOCK_E_CTRL = 1,
647 	NVME_LOCK_E_NS
648 } nvme_lock_ent_t;
649 
650 typedef enum {
651 	NVME_LOCK_L_READ	= 1,
652 	NVME_LOCK_L_WRITE
653 } nvme_lock_level_t;
654 
655 typedef enum {
656 	NVME_LOCK_F_DONT_BLOCK	= 1 << 0
657 } nvme_lock_flags_t;
658 
659 /*
660  * Lock structure (NVME_IOC_LOCK).
661  */
662 typedef struct {
663 	nvme_ioctl_common_t nil_common;
664 	nvme_lock_ent_t nil_ent;
665 	nvme_lock_level_t nil_level;
666 	nvme_lock_flags_t nil_flags;
667 } nvme_ioctl_lock_t;
668 
669 /*
670  * Unlock structure (NVME_IOC_UNLOCK).
671  */
672 typedef struct {
673 	nvme_ioctl_common_t niu_common;
674 	nvme_lock_ent_t niu_ent;
675 } nvme_ioctl_unlock_t;
676 
677 /*
678  * 32-bit ioctl structures. These must be packed to be 4 bytes to get the proper
679  * ILP32 sizing.
680  */
681 #if defined(_KERNEL) && defined(_SYSCALL32)
682 #pragma pack(4)
683 typedef struct {
684 	nvme_ioctl_common_t nid_common;
685 	uint32_t nid_cns;
686 	uint32_t nid_ctrlid;
687 	uintptr32_t nid_data;
688 } nvme_ioctl_identify32_t;
689 
690 typedef struct {
691 	nvme_ioctl_common_t nigf_common;
692 	uint32_t nigf_fid;
693 	uint32_t nigf_sel;
694 	uint32_t nigf_cdw11;
695 	uintptr32_t nigf_data;
696 	uint64_t nigf_len;
697 	uint32_t nigf_cdw0;
698 } nvme_ioctl_get_feature32_t;
699 
700 typedef struct {
701 	nvme_ioctl_common_t nigl_common;
702 	uint32_t nigl_csi;
703 	uint32_t nigl_lid;
704 	uint32_t nigl_lsp;
705 	uint32_t nigl_lsi;
706 	uint32_t nigl_rae;
707 	uint64_t nigl_len;
708 	uint64_t nigl_offset;
709 	uintptr32_t nigl_data;
710 } nvme_ioctl_get_logpage32_t;
711 
712 typedef struct {
713 	nvme_ioctl_common_t npc_common;	/* NSID and status */
714 	uint32_t npc_opcode;	/* Command opcode. */
715 	uint32_t npc_timeout;	/* Command timeout, in seconds. */
716 	uint32_t npc_flags;	/* Flags for the command. */
717 	uint32_t npc_impact;	/* Impact information */
718 	uint32_t npc_cdw0;	/* Command-specific result DWord 0 */
719 	uint32_t npc_cdw12;	/* Command-specific DWord 12 */
720 	uint32_t npc_cdw13;	/* Command-specific DWord 13 */
721 	uint32_t npc_cdw14;	/* Command-specific DWord 14 */
722 	uint32_t npc_cdw15;	/* Command-specific DWord 15 */
723 	uint64_t npc_buflen;	/* Size of npc_buf. */
724 	uintptr32_t npc_buf;	/* I/O source or destination */
725 } nvme_ioctl_passthru32_t;
726 
727 typedef struct {
728 	nvme_ioctl_common_t fwl_common;
729 	uintptr32_t fwl_buf;
730 	uint64_t fwl_len;
731 	uint64_t fwl_off;
732 } nvme_ioctl_fw_load32_t;
733 #pragma pack()	/* pack(4) */
734 #endif	/* _KERNEL && _SYSCALL32 */
735 
736 /*
737  * NVMe capabilities. This is a set of fields that come from the controller's
738  * PCIe register space.
739  */
740 typedef struct {
741 	uint32_t cap_mpsmax;		/* Memory Page Size Maximum */
742 	uint32_t cap_mpsmin;		/* Memory Page Size Minimum */
743 } nvme_capabilities_t;
744 
745 /*
746  * NVMe version
747  */
748 typedef struct {
749 	uint16_t v_minor;
750 	uint16_t v_major;
751 } nvme_version_t;
752 
753 #define	NVME_VERSION_ATLEAST(v, maj, min) \
754 	(((v)->v_major) > (maj) || \
755 	((v)->v_major == (maj) && (v)->v_minor >= (min)))
756 
757 #define	NVME_VERSION_HIGHER(v, maj, min) \
758 	(((v)->v_major) > (maj) || \
759 	((v)->v_major == (maj) && (v)->v_minor > (min)))
760 
761 /*
762  * NVMe Namespace related constants. The maximum NSID is determined by the
763  * identify controller data structure.
764  */
765 #define	NVME_NSID_MIN	1
766 #define	NVME_NSID_BCAST	0xffffffff
767 
768 #pragma pack(1)
769 
770 typedef struct {
771 	uint64_t lo;
772 	uint64_t hi;
773 } nvme_uint128_t;
774 
775 /*
776  * NVMe Identify data structures
777  */
778 
779 #define	NVME_IDENTIFY_BUFSIZE	4096	/* buffer size for Identify */
780 
781 /* NVMe Identify parameters (cdw10) */
782 #define	NVME_IDENTIFY_NSID		0x0	/* Identify Namespace */
783 #define	NVME_IDENTIFY_CTRL		0x1	/* Identify Controller */
784 #define	NVME_IDENTIFY_NSID_LIST		0x2	/* List Active Namespaces */
785 #define	NVME_IDENTIFY_NSID_DESC		0x3	/* Namespace ID Descriptors */
786 
787 #define	NVME_IDENTIFY_NSID_ALLOC_LIST	0x10	/* List Allocated NSID */
788 #define	NVME_IDENTIFY_NSID_ALLOC	0x11	/* Identify Allocated NSID */
789 #define	NVME_IDENTIFY_NSID_CTRL_LIST	0x12	/* List Controllers on NSID */
790 #define	NVME_IDENTIFY_CTRL_LIST		0x13	/* Controller List */
791 #define	NVME_IDENTIFY_PRIMARY_CAPS	0x14	/* Primary Controller Caps */
792 
793 
794 /* NVMe Queue Entry Size bitfield */
795 typedef struct {
796 	uint8_t qes_min:4;		/* minimum entry size */
797 	uint8_t qes_max:4;		/* maximum entry size */
798 } nvme_idctl_qes_t;
799 
800 /* NVMe Power State Descriptor */
801 typedef struct {
802 	uint16_t psd_mp;		/* Maximum Power */
803 	uint8_t psd_rsvd1;
804 	uint8_t psd_mps:1;		/* Max Power Scale (1.1) */
805 	uint8_t psd_nops:1;		/* Non-Operational State (1.1) */
806 	uint8_t psd_rsvd2:6;
807 	uint32_t psd_enlat;		/* Entry Latency */
808 	uint32_t psd_exlat;		/* Exit Latency */
809 	uint8_t psd_rrt:5;		/* Relative Read Throughput */
810 	uint8_t psd_rsvd3:3;
811 	uint8_t psd_rrl:5;		/* Relative Read Latency */
812 	uint8_t psd_rsvd4:3;
813 	uint8_t psd_rwt:5;		/* Relative Write Throughput */
814 	uint8_t	psd_rsvd5:3;
815 	uint8_t psd_rwl:5;		/* Relative Write Latency */
816 	uint8_t psd_rsvd6:3;
817 	uint16_t psd_idlp;		/* Idle Power (1.2) */
818 	uint8_t psd_rsvd7:6;
819 	uint8_t psd_ips:2;		/* Idle Power Scale (1.2) */
820 	uint8_t psd_rsvd8;
821 	uint16_t psd_actp;		/* Active Power (1.2) */
822 	uint8_t psd_apw:3;		/* Active Power Workload (1.2) */
823 	uint8_t psd_rsvd9:3;
824 	uint8_t psd_aps:2;		/* Active Power Scale */
825 	uint8_t psd_rsvd10[9];
826 } nvme_idctl_psd_t;
827 
828 #define	NVME_SERIAL_SZ	20
829 #define	NVME_MODEL_SZ	40
830 #define	NVME_FWVER_SZ	8
831 
832 /* NVMe Identify Controller Data Structure */
833 typedef struct {
834 	/* Controller Capabilities & Features */
835 	uint16_t id_vid;		/* PCI vendor ID */
836 	uint16_t id_ssvid;		/* PCI subsystem vendor ID */
837 	char id_serial[NVME_SERIAL_SZ];	/* Serial Number */
838 	char id_model[NVME_MODEL_SZ];	/* Model Number */
839 	char id_fwrev[NVME_FWVER_SZ];	/* Firmware Revision */
840 	uint8_t id_rab;			/* Recommended Arbitration Burst */
841 	uint8_t id_oui[3];		/* vendor IEEE OUI */
842 	struct {			/* Multi-Interface Capabilities */
843 		uint8_t m_multi_pci:1;	/* HW has multiple PCIe interfaces */
844 		uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */
845 		uint8_t m_sr_iov:1;	/* Controller is SR-IOV virt fn (1.1) */
846 		uint8_t m_anar_sup:1;	/* ANA Reporting Supported (1.4) */
847 		uint8_t m_rsvd:4;
848 	} id_mic;
849 	uint8_t	id_mdts;		/* Maximum Data Transfer Size */
850 	uint16_t id_cntlid;		/* Unique Controller Identifier (1.1) */
851 	/* Added in NVMe 1.2 */
852 	uint32_t id_ver;		/* Version (1.2) */
853 	uint32_t id_rtd3r;		/* RTD3 Resume Latency (1.2) */
854 	uint32_t id_rtd3e;		/* RTD3 Entry Latency (1.2) */
855 	struct {
856 		uint32_t oaes_rsvd0:8;
857 		uint32_t oaes_nsan:1;	/* Namespace Attribute Notices (1.2) */
858 		uint32_t oaes_fwact:1;	/* Firmware Activation Notices (1.2) */
859 		uint32_t oaes_rsvd1:1;
860 		uint32_t oaes_ansacn:1;	/* Asymmetric NS Access Change (1.4) */
861 		uint32_t oaes_plat:1;	/* Predictable Lat Event Agg. (1.4) */
862 		uint32_t oaes_lbasi:1;	/* LBA Status Information (1.4) */
863 		uint32_t oaes_egeal:1;	/* Endurance Group Event Agg. (1.4) */
864 		uint32_t oaes_rsvd2:17;
865 	} id_oaes;
866 	struct {
867 		uint32_t ctrat_hid:1;	/* 128-bit Host Identifier (1.2)  */
868 		uint32_t ctrat_nops:1;	/* Non-Operational Power State (1.3) */
869 		uint32_t ctrat_nvmset:1; /* NVMe Sets (1.4) */
870 		uint32_t ctrat_rrl:1;	/* Read Recovery Levels (1.4) */
871 		uint32_t ctrat_engrp:1; /* Endurance Groups (1.4) */
872 		uint32_t ctrat_plm:1;	/* Predictable Latency Mode (1.4) */
873 		uint32_t ctrat_tbkas:1;	/* Traffic Based Keep Alive (1.4) */
874 		uint32_t ctrat_nsg:1;	/* Namespace Granularity (1.4) */
875 		uint32_t ctrat_sqass:1;	/* SQ Associations (1.4) */
876 		uint32_t ctrat_uuid:1;	/* UUID List (1.4) */
877 		uint32_t ctrat_rsvd:22;
878 	} id_ctratt;
879 	uint16_t id_rrls;		/* Read Recovery Levels (1.4) */
880 	uint8_t id_rsvd_cc[111-102];
881 	uint8_t id_cntrltype;		/* Controller Type (1.4) */
882 	uint8_t id_frguid[16];		/* FRU GUID (1.3) */
883 	uint16_t id_crdt1;		/* Command Retry Delay Time 1 (1.4) */
884 	uint16_t id_crdt2;		/* Command Retry Delay Time 2 (1.4) */
885 	uint16_t id_crdt3;		/* Command Retry Delay Time 3 (1.4) */
886 	uint8_t id_rsvd2_cc[240 - 134];
887 	uint8_t id_rsvd_nvmemi[253 - 240];
888 	/* NVMe-MI region */
889 	struct {			/* NVMe Subsystem Report */
890 		uint8_t nvmsr_nvmesd:1;	/* NVMe Storage Device */
891 		uint8_t nvmsr_nvmee:1;	/* NVMe Enclosure */
892 		uint8_t nvmsr_rsvd:6;
893 	} id_nvmsr;
894 	struct {			/* VPD Write Cycle Information */
895 		uint8_t vwci_crem:7;	/* Write Cycles Remaining */
896 		uint8_t vwci_valid:1;	/* Write Cycles Remaining Valid */
897 	} id_vpdwc;
898 	struct {			/* Management Endpoint Capabilities */
899 		uint8_t mec_smbusme:1;	/* SMBus Port Management Endpoint */
900 		uint8_t mec_pcieme:1;	/* PCIe Port Management Endpoint */
901 		uint8_t mec_rsvd:6;
902 	} id_mec;
903 
904 	/* Admin Command Set Attributes */
905 	struct {			/* Optional Admin Command Support */
906 		uint16_t oa_security:1;	/* Security Send & Receive */
907 		uint16_t oa_format:1;	/* Format NVM */
908 		uint16_t oa_firmware:1;	/* Firmware Activate & Download */
909 		uint16_t oa_nsmgmt:1;	/* Namespace Management (1.2) */
910 		uint16_t oa_selftest:1;	/* Self Test (1.3) */
911 		uint16_t oa_direct:1;	/* Directives (1.3) */
912 		uint16_t oa_nvmemi:1;	/* MI-Send/Recv (1.3) */
913 		uint16_t oa_virtmgmt:1;	/* Virtualization Management (1.3) */
914 		uint16_t oa_doorbell:1;	/* Doorbell Buffer Config (1.3) */
915 		uint16_t oa_lbastat:1;	/* LBA Status (1.4) */
916 		uint16_t oa_rsvd:6;
917 	} id_oacs;
918 	uint8_t	id_acl;			/* Abort Command Limit */
919 	uint8_t id_aerl;		/* Asynchronous Event Request Limit */
920 	struct {			/* Firmware Updates */
921 		uint8_t fw_readonly:1;	/* Slot 1 is Read-Only */
922 		uint8_t	fw_nslot:3;	/* number of firmware slots */
923 		uint8_t fw_norst:1;	/* Activate w/o reset (1.2) */
924 		uint8_t fw_rsvd:3;
925 	} id_frmw;
926 	struct {			/* Log Page Attributes */
927 		uint8_t lp_smart:1;	/* SMART/Health information per NS */
928 		uint8_t lp_cmdeff:1;	/* Command Effects (1.2) */
929 		uint8_t lp_extsup:1;	/* Extended Get Log Page (1.2) */
930 		uint8_t lp_telemetry:1;	/* Telemetry Log Pages (1.3) */
931 		uint8_t lp_persist:1;	/* Persistent Log Page (1.4) */
932 		uint8_t lp_rsvd:3;
933 	} id_lpa;
934 	uint8_t id_elpe;		/* Error Log Page Entries */
935 	uint8_t	id_npss;		/* Number of Power States */
936 	struct {			/* Admin Vendor Specific Command Conf */
937 		uint8_t av_spec:1;	/* use format from spec */
938 		uint8_t av_rsvd:7;
939 	} id_avscc;
940 	struct {			/* Autonomous Power State Trans (1.1) */
941 		uint8_t ap_sup:1;	/* APST supported (1.1) */
942 		uint8_t ap_rsvd:7;
943 	} id_apsta;
944 	uint16_t ap_wctemp;		/* Warning Composite Temp. (1.2) */
945 	uint16_t ap_cctemp;		/* Critical Composite Temp. (1.2) */
946 	uint16_t ap_mtfa;		/* Maximum Firmware Activation (1.2) */
947 	uint32_t ap_hmpre;		/* Host Memory Buf Pref Size (1.2) */
948 	uint32_t ap_hmmin;		/* Host Memory Buf Min Size (1.2) */
949 	nvme_uint128_t ap_tnvmcap;	/* Total NVM Capacity in Bytes (1.2) */
950 	nvme_uint128_t ap_unvmcap;	/* Unallocated NVM Capacity (1.2) */
951 	struct {			/* Replay Protected Mem. Block (1.2) */
952 		uint32_t rpmbs_units:3;	/* Number of targets */
953 		uint32_t rpmbs_auth:3;	/* Auth method */
954 		uint32_t rpmbs_rsvd:10;
955 		uint32_t rpmbs_tot:8;	/* Total size in 128KB */
956 		uint32_t rpmbs_acc:8;	/* Access size in 512B */
957 	} ap_rpmbs;
958 	/* Added in NVMe 1.3 */
959 	uint16_t ap_edstt;		/* Ext. Device Self-test time (1.3) */
960 	struct {			/* Device Self-test Options */
961 		uint8_t dsto_sub:1;	/* Subsystem level self-test (1.3) */
962 		uint8_t dsto_rsvd:7;
963 	} ap_dsto;
964 	uint8_t ap_fwug;		/* Firmware Update Granularity (1.3) */
965 	uint16_t ap_kas;		/* Keep Alive Support (1.2) */
966 	struct {			/* Host Thermal Management (1.3) */
967 		uint16_t hctma_hctm:1;	/* Host Controlled (1.3) */
968 		uint16_t hctma_rsvd:15;
969 	} ap_hctma;
970 	uint16_t ap_mntmt;		/* Minimum Thermal Temperature (1.3) */
971 	uint16_t ap_mxtmt;		/* Maximum Thermal Temperature (1.3) */
972 	struct {			/* Sanitize Caps */
973 		uint32_t san_ces:1;	/* Crypto Erase Support (1.3) */
974 		uint32_t san_bes:1;	/* Block Erase Support (1.3) */
975 		uint32_t san_ows:1;	/* Overwite Support (1.3) */
976 		uint32_t san_rsvd:26;
977 		uint32_t san_ndi:1;	/* No-deallocate Inhibited (1.4) */
978 		uint32_t san_nodmmas:2;	/* No-Deallocate Modifies Media (1.4) */
979 	} ap_sanitize;
980 	uint32_t ap_hmminds;		/* Host Mem Buf Min Desc Entry (1.4) */
981 	uint16_t ap_hmmaxd;		/* How Mem Max Desc Entries (1.4) */
982 	uint16_t ap_nsetidmax;		/* Max NVMe set identifier (1.4) */
983 	uint16_t ap_engidmax;		/* Max Endurance Group ID (1.4) */
984 	uint8_t ap_anatt;		/* ANA Transition Time (1.4) */
985 	struct {			/* Asymmetric Namespace Access Caps */
986 		uint8_t anacap_opt:1;	/* Optimized State (1.4) */
987 		uint8_t anacap_unopt:1;	/* Un-optimized State (1.4) */
988 		uint8_t anacap_inacc:1;	/* Inaccessible State (1.4) */
989 		uint8_t anacap_ploss:1;	/* Persistent Loss (1.4) */
990 		uint8_t anacap_chg:1;	/* Change State (1.4 ) */
991 		uint8_t anacap_rsvd:1;
992 		uint8_t anacap_grpns:1;	/* ID Changes with NS Attach (1.4) */
993 		uint8_t anacap_grpid:1;	/* Supports Group ID (1.4) */
994 	} ap_anacap;
995 	uint32_t ap_anagrpmax;		/* ANA Group ID Max (1.4) */
996 	uint32_t ap_nanagrpid;		/* Number of ANA Group IDs (1.4) */
997 	uint32_t ap_pels;		/* Persistent Event Log Size (1.4) */
998 	uint8_t id_rsvd_ac[512 - 356];
999 
1000 	/* NVM Command Set Attributes */
1001 	nvme_idctl_qes_t id_sqes;	/* Submission Queue Entry Size */
1002 	nvme_idctl_qes_t id_cqes;	/* Completion Queue Entry Size */
1003 	uint16_t id_maxcmd;		/* Max Outstanding Commands (1.3) */
1004 	uint32_t id_nn;			/* Number of Namespaces */
1005 	struct {			/* Optional NVM Command Support */
1006 		uint16_t on_compare:1;	/* Compare */
1007 		uint16_t on_wr_unc:1;	/* Write Uncorrectable */
1008 		uint16_t on_dset_mgmt:1; /* Dataset Management */
1009 		uint16_t on_wr_zero:1;	/* Write Zeros (1.1) */
1010 		uint16_t on_save:1;	/* Save/Select in Get/Set Feat (1.1) */
1011 		uint16_t on_reserve:1;	/* Reservations (1.1) */
1012 		uint16_t on_ts:1;	/* Timestamp (1.3) */
1013 		uint16_t on_verify:1;	/* Verify (1.4) */
1014 		uint16_t on_rsvd:8;
1015 	} id_oncs;
1016 	struct {			/* Fused Operation Support */
1017 		uint16_t f_cmp_wr:1;	/* Compare and Write */
1018 		uint16_t f_rsvd:15;
1019 	} id_fuses;
1020 	struct {			/* Format NVM Attributes */
1021 		uint8_t fn_format:1;	/* Format applies to all NS */
1022 		uint8_t fn_sec_erase:1;	/* Secure Erase applies to all NS */
1023 		uint8_t fn_crypt_erase:1; /* Cryptographic Erase supported */
1024 		uint8_t fn_rsvd:5;
1025 	} id_fna;
1026 	struct {			/* Volatile Write Cache */
1027 		uint8_t vwc_present:1;	/* Volatile Write Cache present */
1028 		uint8_t vwc_nsflush:2;	/* Flush with NS ffffffff (1.4) */
1029 		uint8_t rsvd:5;
1030 	} id_vwc;
1031 	uint16_t id_awun;		/* Atomic Write Unit Normal */
1032 	uint16_t id_awupf;		/* Atomic Write Unit Power Fail */
1033 	struct {			/* NVM Vendor Specific Command Conf */
1034 		uint8_t nv_spec:1;	/* use format from spec */
1035 		uint8_t nv_rsvd:7;
1036 	} id_nvscc;
1037 	struct {			/* Namespace Write Protection Caps */
1038 		uint8_t nwpc_base:1;	/* Base support (1.4) */
1039 		uint8_t nwpc_wpupc:1;	/* Write prot until power cycle (1.4) */
1040 		uint8_t nwpc_permwp:1;	/* Permanent write prot (1.4) */
1041 		uint8_t nwpc_rsvd:5;
1042 	} id_nwpc;
1043 	uint16_t id_acwu;		/* Atomic Compare & Write Unit (1.1) */
1044 	uint16_t id_rsvd_nc_3;
1045 	struct {			/* SGL Support (1.1) */
1046 		uint16_t sgl_sup:2;	/* SGL Supported in NVM cmds (1.3) */
1047 		uint16_t sgl_keyed:1;	/* Keyed SGL Support (1.2) */
1048 		uint16_t sgl_rsvd1:13;
1049 		uint16_t sgl_bucket:1;	/* SGL Bit Bucket supported (1.1) */
1050 		uint16_t sgl_balign:1;	/* SGL Byte Aligned (1.2) */
1051 		uint16_t sgl_sglgtd:1;	/* SGL Length Longer than Data (1.2) */
1052 		uint16_t sgl_mptr:1;	/* SGL MPTR w/ SGL (1.2) */
1053 		uint16_t sgl_offset:1;	/* SGL Address is offset (1.2) */
1054 		uint16_t sgl_tport:1;	/* Transport SGL Data Block (1.4) */
1055 		uint16_t sgl_rsvd2:10;
1056 	} id_sgls;
1057 	uint32_t id_mnan;		/* Maximum Number of Allowed NSes */
1058 	uint8_t id_rsvd_nc_4[768 - 544];
1059 
1060 	/* I/O Command Set Attributes */
1061 	uint8_t id_subnqn[1024 - 768];	/* Subsystem Qualified Name (1.2.1+) */
1062 	uint8_t id_rsvd_ioc[1792 - 1024];
1063 	uint8_t id_nvmof[2048 - 1792];	/* NVMe over Fabrics */
1064 
1065 	/* Power State Descriptors */
1066 	nvme_idctl_psd_t id_psd[32];
1067 
1068 	/* Vendor Specific */
1069 	uint8_t id_vs[1024];
1070 } nvme_identify_ctrl_t;
1071 
1072 /*
1073  * NVMe Controller Types
1074  */
1075 #define	NVME_CNTRLTYPE_RSVD	0
1076 #define	NVME_CNTRLTYPE_IO	1
1077 #define	NVME_CNTRLTYPE_DISC	2
1078 #define	NVME_CNTRLTYPE_ADMIN	3
1079 
1080 /*
1081  * RPMBS Authentication Types
1082  */
1083 #define	NVME_RPMBS_AUTH_HMAC_SHA256	0
1084 
1085 /*
1086  * NODMMAS Values
1087  */
1088 #define	NVME_NODMMAS_UNDEF	0x00
1089 #define	NVME_NODMMAS_NOMOD	0x01
1090 #define	NVME_NODMMAS_DOMOD	0x02
1091 
1092 /*
1093  * VWC NSID flushes
1094  */
1095 #define	NVME_VWCNS_UNKNOWN	0x00
1096 #define	NVME_VWCNS_UNSUP	0x02
1097 #define	NVME_VWCNS_SUP		0x03
1098 
1099 /*
1100  * SGL Support Values
1101  */
1102 #define	NVME_SGL_UNSUP		0x00
1103 #define	NVME_SGL_SUP_UNALIGN	0x01
1104 #define	NVME_SGL_SUP_ALIGN	0x02
1105 
1106 /* NVMe Identify Namespace LBA Format */
1107 typedef struct {
1108 	uint16_t lbaf_ms;		/* Metadata Size */
1109 	uint8_t lbaf_lbads;		/* LBA Data Size */
1110 	uint8_t lbaf_rp:2;		/* Relative Performance */
1111 	uint8_t lbaf_rsvd1:6;
1112 } nvme_idns_lbaf_t;
1113 
1114 #define	NVME_MAX_LBAF	16
1115 
1116 /* NVMe Identify Namespace Data Structure */
1117 typedef struct {
1118 	uint64_t id_nsize;		/* Namespace Size */
1119 	uint64_t id_ncap;		/* Namespace Capacity */
1120 	uint64_t id_nuse;		/* Namespace Utilization */
1121 	struct {			/* Namespace Features */
1122 		uint8_t f_thin:1;	/* Thin Provisioning */
1123 		uint8_t f_nsabp:1;	/* Namespace atomics (1.2) */
1124 		uint8_t f_dae:1;	/* Deallocated errors supported (1.2) */
1125 		uint8_t f_uidreuse:1;	/* GUID reuse impossible (1.3) */
1126 		uint8_t f_optperf:1;	/* Namespace I/O opt (1.4) */
1127 		uint8_t f_rsvd:3;
1128 	} id_nsfeat;
1129 	uint8_t id_nlbaf;		/* Number of LBA formats */
1130 	struct {			/* Formatted LBA size */
1131 		uint8_t lba_format:4;	/* LBA format */
1132 		uint8_t lba_extlba:1;	/* extended LBA (includes metadata) */
1133 		uint8_t lba_rsvd:3;
1134 	} id_flbas;
1135 	struct {			/* Metadata Capabilities */
1136 		uint8_t mc_extlba:1;	/* extended LBA transfers */
1137 		uint8_t mc_separate:1;	/* separate metadata transfers */
1138 		uint8_t mc_rsvd:6;
1139 	} id_mc;
1140 	struct {			/* Data Protection Capabilities */
1141 		uint8_t dp_type1:1;	/* Protection Information Type 1 */
1142 		uint8_t dp_type2:1;	/* Protection Information Type 2 */
1143 		uint8_t dp_type3:1;	/* Protection Information Type 3 */
1144 		uint8_t dp_first:1;	/* first 8 bytes of metadata */
1145 		uint8_t dp_last:1;	/* last 8 bytes of metadata */
1146 		uint8_t dp_rsvd:3;
1147 	} id_dpc;
1148 	struct {			/* Data Protection Settings */
1149 		uint8_t dp_pinfo:3;	/* Protection Information enabled */
1150 		uint8_t dp_first:1;	/* first 8 bytes of metadata */
1151 		uint8_t dp_rsvd:4;
1152 	} id_dps;
1153 	struct {			/* NS Multi-Path/Sharing Cap (1.1) */
1154 		uint8_t nm_shared:1;	/* NS is shared (1.1) */
1155 		uint8_t nm_rsvd:7;
1156 	} id_nmic;
1157 	struct {			/* Reservation Capabilities (1.1) */
1158 		uint8_t rc_persist:1;	/* Persist Through Power Loss (1.1) */
1159 		uint8_t rc_wr_excl:1;	/* Write Exclusive (1.1) */
1160 		uint8_t rc_excl:1;	/* Exclusive Access (1.1) */
1161 		uint8_t rc_wr_excl_r:1;	/* Wr Excl - Registrants Only (1.1) */
1162 		uint8_t rc_excl_r:1;	/* Excl Acc - Registrants Only (1.1) */
1163 		uint8_t rc_wr_excl_a:1;	/* Wr Excl - All Registrants (1.1) */
1164 		uint8_t rc_excl_a:1;	/* Excl Acc - All Registrants (1.1) */
1165 		uint8_t rc_ign_ekey:1;	/* Ignore Existing Key (1.3) */
1166 	} id_rescap;
1167 	struct {			/* Format Progress Indicator (1.2) */
1168 		uint8_t fpi_remp:7;	/* Percent NVM Format Remaining (1.2) */
1169 		uint8_t fpi_sup:1;	/* Supported (1.2) */
1170 	} id_fpi;
1171 	uint8_t id_dfleat;		/* Deallocate Log. Block (1.3) */
1172 	uint16_t id_nawun;		/* Atomic Write Unit Normal (1.2) */
1173 	uint16_t id_nawupf;		/* Atomic Write Unit Power Fail (1.2) */
1174 	uint16_t id_nacwu;		/* Atomic Compare & Write Unit (1.2) */
1175 	uint16_t id_nabsn;		/* Atomic Boundary Size Normal (1.2) */
1176 	uint16_t id_nbao;		/* Atomic Boundary Offset (1.2) */
1177 	uint16_t id_nabspf;		/* Atomic Boundary Size Fail (1.2) */
1178 	uint16_t id_noiob;		/* Optimal I/O Bondary (1.3) */
1179 	nvme_uint128_t id_nvmcap;	/* NVM Capacity */
1180 	uint16_t id_npwg;		/* NS Pref. Write Gran. (1.4) */
1181 	uint16_t id_npwa;		/* NS Pref. Write Align. (1.4) */
1182 	uint16_t id_npdg;		/* NS Pref. Deallocate Gran. (1.4) */
1183 	uint16_t id_npda;		/* NS Pref. Deallocate Align. (1.4) */
1184 	uint16_t id_nows;		/* NS. Optimal Write Size (1.4) */
1185 	uint8_t id_rsvd1[92 - 74];
1186 	uint32_t id_anagrpid;		/* ANA Group Identifier (1.4) */
1187 	uint8_t id_rsvd2[99 - 96];
1188 	struct {
1189 		uint8_t nsa_wprot:1;	/* Write Protected (1.4) */
1190 		uint8_t nsa_rsvd:7;
1191 	} id_nsattr;
1192 	uint16_t id_nvmsetid;		/* NVM Set Identifier (1.4) */
1193 	uint16_t id_endgid;		/* Endurance Group Identifier (1.4) */
1194 	uint8_t id_nguid[16];		/* Namespace GUID (1.2) */
1195 	uint8_t id_eui64[8];		/* IEEE Extended Unique Id (1.1) */
1196 	nvme_idns_lbaf_t id_lbaf[NVME_MAX_LBAF];	/* LBA Formats */
1197 
1198 	uint8_t id_rsvd3[384 - 192];
1199 
1200 	uint8_t id_vs[4096 - 384];	/* Vendor Specific */
1201 } nvme_identify_nsid_t;
1202 
1203 /* NVMe Identify Namespace ID List */
1204 typedef struct {
1205 					/* Ordered list of Namespace IDs */
1206 	uint32_t nl_nsid[NVME_IDENTIFY_BUFSIZE / sizeof (uint32_t)];
1207 } nvme_identify_nsid_list_t;
1208 
1209 /* NVME Identify Controller ID List */
1210 typedef struct {
1211 	uint16_t	cl_nid;		/* Number of controller entries */
1212 					/* unique controller identifiers */
1213 	uint16_t	cl_ctlid[NVME_IDENTIFY_BUFSIZE / sizeof (uint16_t) - 1];
1214 } nvme_identify_ctrl_list_t;
1215 
1216 /* NVMe Identify Namespace Descriptor */
1217 typedef struct {
1218 	uint8_t nd_nidt;		/* Namespace Identifier Type */
1219 	uint8_t nd_nidl;		/* Namespace Identifier Length */
1220 	uint8_t nd_resv[2];
1221 	uint8_t nd_nid[];		/* Namespace Identifier */
1222 } nvme_identify_nsid_desc_t;
1223 
1224 #define	NVME_NSID_DESC_EUI64	1
1225 #define	NVME_NSID_DESC_NGUID	2
1226 #define	NVME_NSID_DESC_NUUID	3
1227 #define	NVME_NSID_DESC_MIN	NVME_NSID_DESC_EUI64
1228 #define	NVME_NSID_DESC_MAX	NVME_NSID_DESC_NUUID
1229 
1230 #define	NVME_NSID_DESC_LEN_EUI64	8
1231 #define	NVME_NSID_DESC_LEN_NGUID	16
1232 #define	NVME_NSID_DESC_LEN_NUUID	UUID_LEN
1233 
1234 /* NVMe Identify Primary Controller Capabilities */
1235 typedef struct {
1236 	uint16_t	nipc_cntlid;	/* Controller ID */
1237 	uint16_t	nipc_portid;	/* Port Identifier */
1238 	uint8_t		nipc_crt;	/* Controller Resource Types */
1239 	uint8_t		nipc_rsvd0[32 - 5];
1240 	uint32_t	nipc_vqfrt;	/* VQ Resources Flexible Total */
1241 	uint32_t	nipc_vqrfa;	/* VQ Resources Flexible Assigned */
1242 	uint16_t	nipc_vqrfap;	/* VQ Resources to Primary */
1243 	uint16_t	nipc_vqprt;	/* VQ Resources Private Total */
1244 	uint16_t	nipc_vqfrsm;	/* VQ Resources Secondary Max */
1245 	uint16_t	nipc_vqgran;	/* VQ Flexible Resource Gran */
1246 	uint8_t		nipc_rvsd1[64 - 48];
1247 	uint32_t	nipc_vifrt;	/* VI Flexible total */
1248 	uint32_t	nipc_virfa;	/* VI Flexible Assigned */
1249 	uint16_t	nipc_virfap;	/* VI Flexible Allocated to Primary */
1250 	uint16_t	nipc_viprt;	/* VI Resources Private Total */
1251 	uint16_t	nipc_vifrsm;	/* VI Resources Secondary Max */
1252 	uint16_t	nipc_vigran;	/* VI Flexible Granularity */
1253 	uint8_t		nipc_rsvd2[4096 - 80];
1254 } nvme_identify_primary_caps_t;
1255 
1256 /*
1257  * NVMe completion queue entry status field
1258  */
1259 typedef struct {
1260 	uint16_t sf_p:1;		/* Phase Tag */
1261 	uint16_t sf_sc:8;		/* Status Code */
1262 	uint16_t sf_sct:3;		/* Status Code Type */
1263 	uint16_t sf_rsvd2:2;
1264 	uint16_t sf_m:1;		/* More */
1265 	uint16_t sf_dnr:1;		/* Do Not Retry */
1266 } nvme_cqe_sf_t;
1267 
1268 
1269 /*
1270  * NVMe Get Log Page
1271  */
1272 #define	NVME_LOGPAGE_SUP	0x00	/* Supported Logs (2.0) */
1273 #define	NVME_LOGPAGE_ERROR	0x01	/* Error Information */
1274 #define	NVME_LOGPAGE_HEALTH	0x02	/* SMART/Health Information */
1275 #define	NVME_LOGPAGE_FWSLOT	0x03	/* Firmware Slot Information */
1276 #define	NVME_LOGPAGE_NSCHANGE	0x04	/* Changed namespace (1.2) */
1277 #define	NVME_LOGPAGE_CMDSUP	0x05	/* Cmds. Supported and Effects (1.3) */
1278 #define	NVME_LOGPAGE_SELFTEST	0x06	/* Device self-test (1.3) */
1279 #define	NVME_LOGPAGE_TELMHOST	0x07	/* Telemetry Host-Initiated */
1280 #define	NVME_LOGPAGE_TELMCTRL	0x08	/* Telemetry Controller-Initiated */
1281 #define	NVME_LOGPAGE_ENDGRP	0x09	/* Endurance Group Information (1.4) */
1282 #define	NVME_LOGPAGE_PLATSET	0x0a	/* Predictable Lat. per NVM Set (1.4) */
1283 #define	NVME_LOGPAGE_PLATAGG	0x0b	/* Predictable Lat. Event Agg (1.4) */
1284 #define	NVME_LOGPAGE_ASYMNS	0x0c	/* Asymmetric Namespace Access (1.4) */
1285 #define	NVME_LOGPAGE_PEVLOG	0x0d	/* Persistent Event Log (1.4) */
1286 #define	NVME_LOGPAGE_LBASTS	0x0e	/* LBA Status Information (1.4) */
1287 #define	NVME_LOGPAGE_ENDAGG	0x0f	/* Endurance Group Event Agg. (1.4) */
1288 
1289 #define	NVME_LOGPAGE_VEND_MIN	0xc0
1290 #define	NVME_LOGPAGE_VEND_MAX	0xff
1291 
1292 typedef struct {
1293 	uint64_t el_count;		/* Error Count */
1294 	uint16_t el_sqid;		/* Submission Queue ID */
1295 	uint16_t el_cid;		/* Command ID */
1296 	nvme_cqe_sf_t el_sf;		/* Status Field */
1297 	uint8_t	el_byte;		/* Parameter Error Location byte */
1298 	uint8_t	el_bit:3;		/* Parameter Error Location bit */
1299 	uint8_t el_rsvd1:5;
1300 	uint64_t el_lba;		/* Logical Block Address */
1301 	uint32_t el_nsid;		/* Namespace ID */
1302 	uint8_t	el_vendor;		/* Vendor Specific Information avail */
1303 	uint8_t el_rsvd2[64 - 29];
1304 } nvme_error_log_entry_t;
1305 
1306 typedef struct {
1307 	struct {			/* Critical Warning */
1308 		uint8_t cw_avail:1;	/* available space too low */
1309 		uint8_t cw_temp:1;	/* temperature too high */
1310 		uint8_t cw_reliab:1;	/* degraded reliability */
1311 		uint8_t cw_readonly:1;	/* media is read-only */
1312 		uint8_t cw_volatile:1;	/* volatile memory backup failed */
1313 		uint8_t cw_rsvd:3;
1314 	} hl_crit_warn;
1315 	uint16_t hl_temp;		/* Temperature */
1316 	uint8_t hl_avail_spare;		/* Available Spare */
1317 	uint8_t hl_avail_spare_thr;	/* Available Spare Threshold */
1318 	uint8_t hl_used;		/* Percentage Used */
1319 	uint8_t hl_rsvd1[32 - 6];
1320 	nvme_uint128_t hl_data_read;	/* Data Units Read */
1321 	nvme_uint128_t hl_data_write;	/* Data Units Written */
1322 	nvme_uint128_t hl_host_read;	/* Host Read Commands */
1323 	nvme_uint128_t hl_host_write;	/* Host Write Commands */
1324 	nvme_uint128_t hl_ctrl_busy;	/* Controller Busy Time */
1325 	nvme_uint128_t hl_power_cycles;	/* Power Cycles */
1326 	nvme_uint128_t hl_power_on_hours; /* Power On Hours */
1327 	nvme_uint128_t hl_unsafe_shutdn; /* Unsafe Shutdowns */
1328 	nvme_uint128_t hl_media_errors;	/* Media Errors */
1329 	nvme_uint128_t hl_errors_logged; /* Number of errors logged */
1330 	/* Added in NVMe 1.2 */
1331 	uint32_t hl_warn_temp_time;	/* Warning Composite Temp Time */
1332 	uint32_t hl_crit_temp_time;	/* Critical Composite Temp Time */
1333 	uint16_t hl_temp_sensor_1;	/* Temperature Sensor 1 */
1334 	uint16_t hl_temp_sensor_2;	/* Temperature Sensor 2 */
1335 	uint16_t hl_temp_sensor_3;	/* Temperature Sensor 3 */
1336 	uint16_t hl_temp_sensor_4;	/* Temperature Sensor 4 */
1337 	uint16_t hl_temp_sensor_5;	/* Temperature Sensor 5 */
1338 	uint16_t hl_temp_sensor_6;	/* Temperature Sensor 6 */
1339 	uint16_t hl_temp_sensor_7;	/* Temperature Sensor 7 */
1340 	uint16_t hl_temp_sensor_8;	/* Temperature Sensor 8 */
1341 	/* Added in NVMe 1.3 */
1342 	uint32_t hl_tmtemp_1_tc;	/* Thermal Mgmt Temp 1 Transition # */
1343 	uint32_t hl_tmtemp_2_tc;	/* Thermal Mgmt Temp 1 Transition # */
1344 	uint32_t hl_tmtemp_1_time;	/* Time in Thermal Mgmt Temp 1 */
1345 	uint32_t hl_tmtemp_2_time;	/* Time in Thermal Mgmt Temp 2 */
1346 	uint8_t hl_rsvd2[512 - 232];
1347 } nvme_health_log_t;
1348 
1349 /*
1350  * The NVMe spec allows for up to seven firmware slots.
1351  */
1352 #define	NVME_MAX_FWSLOTS	7
1353 
1354 typedef struct {
1355 	/* Active Firmware Slot */
1356 	uint8_t fw_afi:3;
1357 	uint8_t fw_rsvd1:1;
1358 	/* Next Active Firmware Slot */
1359 	uint8_t fw_next:3;
1360 	uint8_t fw_rsvd2:1;
1361 	uint8_t fw_rsvd3[7];
1362 	/* Firmware Revision / Slot */
1363 	char fw_frs[NVME_MAX_FWSLOTS][NVME_FWVER_SZ];
1364 	uint8_t fw_rsvd4[512 - 64];
1365 } nvme_fwslot_log_t;
1366 
1367 /*
1368  * The NVMe spec specifies that the changed namespace list contains up to
1369  * 1024 entries.
1370  */
1371 #define	NVME_NSCHANGE_LIST_SIZE	1024
1372 
1373 typedef struct {
1374 	uint32_t	nscl_ns[NVME_NSCHANGE_LIST_SIZE];
1375 } nvme_nschange_list_t;
1376 
1377 /*
1378  * NVMe Format NVM
1379  */
1380 #define	NVME_FRMT_SES_NONE	0
1381 #define	NVME_FRMT_SES_USER	1
1382 #define	NVME_FRMT_SES_CRYPTO	2
1383 #define	NVME_FRMT_MAX_SES	2
1384 
1385 #define	NVME_FRMT_MAX_LBAF	15
1386 
1387 typedef union {
1388 	struct {
1389 		uint32_t fm_lbaf:4;		/* LBA Format */
1390 		uint32_t fm_ms:1;		/* Metadata Settings */
1391 		uint32_t fm_pi:3;		/* Protection Information */
1392 		uint32_t fm_pil:1;		/* Prot. Information Location */
1393 		uint32_t fm_ses:3;		/* Secure Erase Settings */
1394 		uint32_t fm_resvd:20;
1395 	} b;
1396 	uint32_t r;
1397 } nvme_format_nvm_t;
1398 
1399 
1400 /*
1401  * NVMe Get / Set Features
1402  */
1403 #define	NVME_FEAT_ARBITRATION	0x1	/* Command Arbitration */
1404 #define	NVME_FEAT_POWER_MGMT	0x2	/* Power Management */
1405 #define	NVME_FEAT_LBA_RANGE	0x3	/* LBA Range Type */
1406 #define	NVME_FEAT_TEMPERATURE	0x4	/* Temperature Threshold */
1407 #define	NVME_FEAT_ERROR		0x5	/* Error Recovery */
1408 #define	NVME_FEAT_WRITE_CACHE	0x6	/* Volatile Write Cache */
1409 #define	NVME_FEAT_NQUEUES	0x7	/* Number of Queues */
1410 #define	NVME_FEAT_INTR_COAL	0x8	/* Interrupt Coalescing */
1411 #define	NVME_FEAT_INTR_VECT	0x9	/* Interrupt Vector Configuration */
1412 #define	NVME_FEAT_WRITE_ATOM	0xa	/* Write Atomicity */
1413 #define	NVME_FEAT_ASYNC_EVENT	0xb	/* Asynchronous Event Configuration */
1414 #define	NVME_FEAT_AUTO_PST	0xc	/* Autonomous Power State Transition */
1415 					/* (1.1) */
1416 
1417 #define	NVME_FEAT_PROGRESS	0x80	/* Software Progress Marker */
1418 
1419 /*
1420  * This enumeration represents the capabilities in the Get Features select / Set
1421  * Features save options. This was introduced in NVMe 1.1 and the values below
1422  * match the specification. An optional feature in the identify controller data
1423  * structure is set to indicate that this is supported (id_oncs.on_save).
1424  */
1425 typedef enum {
1426 	NVME_FEATURE_SEL_CURRENT	= 0,
1427 	NVME_FEATURE_SEL_DEFAULT,
1428 	NVME_FEATURE_SEL_SAVED,
1429 	NVME_FEATURE_SEL_SUPPORTED
1430 } nvme_feature_sel_t;
1431 
1432 typedef union {
1433 	struct {
1434 		uint32_t gt_fid:8;	/* Feature ID */
1435 		uint32_t gt_sel:3;	/* Select */
1436 		uint32_t gt_rsvd:21;
1437 	} b;
1438 	uint32_t r;
1439 } nvme_get_features_dw10_t;
1440 
1441 /* Arbitration Feature */
1442 typedef union {
1443 	struct {
1444 		uint8_t arb_ab:3;	/* Arbitration Burst */
1445 		uint8_t arb_rsvd:5;
1446 		uint8_t arb_lpw;	/* Low Priority Weight */
1447 		uint8_t arb_mpw;	/* Medium Priority Weight */
1448 		uint8_t arb_hpw;	/* High Priority Weight */
1449 	} b;
1450 	uint32_t r;
1451 } nvme_arbitration_t;
1452 
1453 /* Power Management Feature */
1454 typedef union {
1455 	struct {
1456 		uint32_t pm_ps:5;	/* Power State */
1457 		uint32_t pm_rsvd:27;
1458 	} b;
1459 	uint32_t r;
1460 } nvme_power_mgmt_t;
1461 
1462 /* LBA Range Type Feature */
1463 typedef union {
1464 	struct {
1465 		uint32_t lr_num:6;	/* Number of LBA ranges */
1466 		uint32_t lr_rsvd:26;
1467 	} b;
1468 	uint32_t r;
1469 } nvme_lba_range_type_t;
1470 
1471 typedef struct {
1472 	uint8_t lr_type;		/* Type */
1473 	struct {			/* Attributes */
1474 		uint8_t lr_write:1;	/* may be overwritten */
1475 		uint8_t lr_hidden:1;	/* hidden from OS/EFI/BIOS */
1476 		uint8_t lr_rsvd1:6;
1477 	} lr_attr;
1478 	uint8_t lr_rsvd2[14];
1479 	uint64_t lr_slba;		/* Starting LBA */
1480 	uint64_t lr_nlb;		/* Number of Logical Blocks */
1481 	uint8_t lr_guid[16];		/* Unique Identifier */
1482 	uint8_t lr_rsvd3[16];
1483 } nvme_lba_range_t;
1484 
1485 #define	NVME_LBA_RANGE_BUFSIZE	4096
1486 
1487 /* Temperature Threshold Feature */
1488 typedef union {
1489 	struct {
1490 		uint16_t tt_tmpth;	/* Temperature Threshold */
1491 		uint16_t tt_tmpsel:4;	/* Temperature Select */
1492 		uint16_t tt_thsel:2;	/* Temperature Type */
1493 		uint16_t tt_resv:10;
1494 	} b;
1495 	uint32_t r;
1496 } nvme_temp_threshold_t;
1497 
1498 #define	NVME_TEMP_THRESH_MAX_SENSOR	8
1499 #define	NVME_TEMP_THRESH_ALL	0xf
1500 #define	NVME_TEMP_THRESH_OVER	0x00
1501 #define	NVME_TEMP_THRESH_UNDER	0x01
1502 
1503 /* Error Recovery Feature */
1504 typedef union {
1505 	struct {
1506 		uint16_t er_tler;	/* Time-Limited Error Recovery */
1507 		uint16_t er_rsvd;
1508 	} b;
1509 	uint32_t r;
1510 } nvme_error_recovery_t;
1511 
1512 /* Volatile Write Cache Feature */
1513 typedef union {
1514 	struct {
1515 		uint32_t wc_wce:1;	/* Volatile Write Cache Enable */
1516 		uint32_t wc_rsvd:31;
1517 	} b;
1518 	uint32_t r;
1519 } nvme_write_cache_t;
1520 
1521 /* Number of Queues Feature */
1522 typedef union {
1523 	struct {
1524 		uint16_t nq_nsq;	/* Number of Submission Queues */
1525 		uint16_t nq_ncq;	/* Number of Completion Queues */
1526 	} b;
1527 	uint32_t r;
1528 } nvme_nqueues_t;
1529 
1530 /* Interrupt Coalescing Feature */
1531 typedef union {
1532 	struct {
1533 		uint8_t ic_thr;		/* Aggregation Threshold */
1534 		uint8_t ic_time;	/* Aggregation Time */
1535 		uint16_t ic_rsvd;
1536 	} b;
1537 	uint32_t r;
1538 } nvme_intr_coal_t;
1539 
1540 /* Interrupt Configuration Features */
1541 typedef union {
1542 	struct {
1543 		uint16_t iv_iv;		/* Interrupt Vector */
1544 		uint16_t iv_cd:1;	/* Coalescing Disable */
1545 		uint16_t iv_rsvd:15;
1546 	} b;
1547 	uint32_t r;
1548 } nvme_intr_vect_t;
1549 
1550 /* Write Atomicity Feature */
1551 typedef union {
1552 	struct {
1553 		uint32_t wa_dn:1;	/* Disable Normal */
1554 		uint32_t wa_rsvd:31;
1555 	} b;
1556 	uint32_t r;
1557 } nvme_write_atomicity_t;
1558 
1559 /* Asynchronous Event Configuration Feature */
1560 typedef union {
1561 	struct {
1562 		uint8_t aec_avail:1;	/* Available space too low */
1563 		uint8_t aec_temp:1;	/* Temperature too high */
1564 		uint8_t aec_reliab:1;	/* Degraded reliability */
1565 		uint8_t aec_readonly:1;	/* Media is read-only */
1566 		uint8_t aec_volatile:1;	/* Volatile memory backup failed */
1567 		uint8_t aec_rsvd1:3;
1568 		uint8_t aec_nsan:1;	/* Namespace attribute notices (1.2) */
1569 		uint8_t aec_fwact:1;	/* Firmware activation notices (1.2) */
1570 		uint8_t aec_telln:1;	/* Telemetry log notices (1.3) */
1571 		uint8_t aec_ansacn:1;	/* Asymm. NS access change (1.4) */
1572 		uint8_t aec_plat:1;	/* Predictable latency ev. agg. (1.4) */
1573 		uint8_t aec_lbasi:1;	/* LBA status information (1.4) */
1574 		uint8_t aec_egeal:1;	/* Endurance group ev. agg. (1.4) */
1575 		uint8_t aec_rsvd2:1;
1576 		uint8_t aec_rsvd3[2];
1577 	} b;
1578 	uint32_t r;
1579 } nvme_async_event_conf_t;
1580 
1581 /* Autonomous Power State Transition Feature (1.1) */
1582 typedef union {
1583 	struct {
1584 		uint32_t apst_apste:1;	/* APST enabled */
1585 		uint32_t apst_rsvd:31;
1586 	} b;
1587 	uint32_t r;
1588 } nvme_auto_power_state_trans_t;
1589 
1590 typedef struct {
1591 	uint32_t apst_rsvd1:3;
1592 	uint32_t apst_itps:5;	/* Idle Transition Power State */
1593 	uint32_t apst_itpt:24;	/* Idle Time Prior to Transition */
1594 	uint32_t apst_rsvd2;
1595 } nvme_auto_power_state_t;
1596 
1597 #define	NVME_AUTO_PST_BUFSIZE	256
1598 
1599 /* Software Progress Marker Feature */
1600 typedef union {
1601 	struct {
1602 		uint8_t spm_pbslc;	/* Pre-Boot Software Load Count */
1603 		uint8_t spm_rsvd[3];
1604 	} b;
1605 	uint32_t r;
1606 } nvme_software_progress_marker_t;
1607 
1608 /*
1609  * Firmware Commit - Command Dword 10
1610  */
1611 #define	NVME_FWC_SAVE		0x0	/* Save image only */
1612 #define	NVME_FWC_SAVE_ACTIVATE	0x1	/* Save and activate at next reset */
1613 #define	NVME_FWC_ACTIVATE	0x2	/* Activate slot at next reset */
1614 #define	NVME_FWC_ACTIVATE_IMMED	0x3	/* Activate slot immediately */
1615 
1616 /*
1617  * Firmware slot number is only 3 bits, and zero is not allowed.
1618  * Valid range is 1 to 7.
1619  */
1620 #define	NVME_FW_SLOT_MIN	1U	/* lowest allowable slot number ... */
1621 #define	NVME_FW_SLOT_MAX	7U	/* ... and highest */
1622 
1623 /*
1624  * Some constants to make verification of DWORD variables and arguments easier.
1625  * A DWORD is 4 bytes.
1626  */
1627 #define	NVME_DWORD_SHIFT	2
1628 #define	NVME_DWORD_SIZE		(1 << NVME_DWORD_SHIFT)
1629 #define	NVME_DWORD_MASK		(NVME_DWORD_SIZE - 1)
1630 
1631 /*
1632  * Maximum offset a firmware image can be load at is the number of
1633  * DWORDS in a 32 bit field. Expressed in bytes its is:
1634  */
1635 #define	NVME_FW_OFFSETB_MAX	((u_longlong_t)UINT32_MAX << NVME_DWORD_SHIFT)
1636 
1637 typedef union {
1638 	struct {
1639 		uint32_t fc_slot:3;	/* Firmware slot */
1640 		uint32_t fc_action:3;	/* Commit action */
1641 		uint32_t fc_rsvd:26;
1642 	} b;
1643 	uint32_t r;
1644 } nvme_firmware_commit_dw10_t;
1645 
1646 #pragma pack() /* pack(1) */
1647 
1648 /* NVMe completion status code type */
1649 #define	NVME_CQE_SCT_GENERIC	0	/* Generic Command Status */
1650 #define	NVME_CQE_SCT_SPECIFIC	1	/* Command Specific Status */
1651 #define	NVME_CQE_SCT_INTEGRITY	2	/* Media and Data Integrity Errors */
1652 #define	NVME_CQE_SCT_VENDOR	7	/* Vendor Specific */
1653 
1654 /*
1655  * Status code ranges
1656  */
1657 #define	NVME_CQE_SC_GEN_MIN		0x00
1658 #define	NVME_CQE_SC_GEN_MAX		0x7f
1659 #define	NVME_CQE_SC_CSI_MIN		0x80
1660 #define	NVME_CQE_SC_CSI_MAX		0xbf
1661 #define	NVME_CQE_SC_VEND_MIN		0xc0
1662 #define	NVME_CQE_SC_VEND_MAX		0xff
1663 
1664 /* NVMe completion status code (generic) */
1665 #define	NVME_CQE_SC_GEN_SUCCESS		0x0	/* Successful Completion */
1666 #define	NVME_CQE_SC_GEN_INV_OPC		0x1	/* Invalid Command Opcode */
1667 #define	NVME_CQE_SC_GEN_INV_FLD		0x2	/* Invalid Field in Command */
1668 #define	NVME_CQE_SC_GEN_ID_CNFL		0x3	/* Command ID Conflict */
1669 #define	NVME_CQE_SC_GEN_DATA_XFR_ERR	0x4	/* Data Transfer Error */
1670 #define	NVME_CQE_SC_GEN_ABORT_PWRLOSS	0x5	/* Cmds Aborted / Pwr Loss */
1671 #define	NVME_CQE_SC_GEN_INTERNAL_ERR	0x6	/* Internal Error */
1672 #define	NVME_CQE_SC_GEN_ABORT_REQUEST	0x7	/* Command Abort Requested */
1673 #define	NVME_CQE_SC_GEN_ABORT_SQ_DEL	0x8	/* Cmd Aborted / SQ deletion */
1674 #define	NVME_CQE_SC_GEN_ABORT_FUSE_FAIL	0x9	/* Cmd Aborted / Failed Fused */
1675 #define	NVME_CQE_SC_GEN_ABORT_FUSE_MISS	0xa	/* Cmd Aborted / Missing Fusd */
1676 #define	NVME_CQE_SC_GEN_INV_NS		0xb	/* Inval Namespace or Format */
1677 #define	NVME_CQE_SC_GEN_CMD_SEQ_ERR	0xc	/* Command Sequence Error */
1678 #define	NVME_CQE_SC_GEN_INV_SGL_LAST	0xd	/* Inval SGL Last Seg Desc */
1679 #define	NVME_CQE_SC_GEN_INV_SGL_NUM	0xe	/* Inval Number of SGL Desc */
1680 #define	NVME_CQE_SC_GEN_INV_DSGL_LEN	0xf	/* Data SGL Length Invalid */
1681 #define	NVME_CQE_SC_GEN_INV_MSGL_LEN	0x10	/* Metadata SGL Length Inval */
1682 #define	NVME_CQE_SC_GEN_INV_SGL_DESC	0x11	/* SGL Descriptor Type Inval */
1683 #define	NVME_CQE_SC_GEN_INV_USE_CMB	0x12	/* Inval use of Ctrl Mem Buf */
1684 #define	NVME_CQE_SC_GEN_INV_PRP_OFF	0x13	/* PRP Offset Invalid */
1685 #define	NVME_CQE_SC_GEN_AWU_EXCEEDED	0x14	/* Atomic Write Unit Exceeded */
1686 
1687 /* NVMe completion status code (generic NVM commands) */
1688 #define	NVME_CQE_SC_GEN_NVM_LBA_RANGE	0x80	/* LBA Out Of Range */
1689 #define	NVME_CQE_SC_GEN_NVM_CAP_EXC	0x81	/* Capacity Exceeded */
1690 #define	NVME_CQE_SC_GEN_NVM_NS_NOTRDY	0x82	/* Namespace Not Ready */
1691 #define	NVME_CQE_SC_GEN_NVM_RSV_CNFLCT	0x83	/* Reservation Conflict */
1692 #define	NVME_CQE_SC_GEN_NVM_FORMATTING	0x84	/* Format in progress (1.2) */
1693 
1694 /* NVMe completion status code (command specific) */
1695 #define	NVME_CQE_SC_SPC_INV_CQ		0x0	/* Completion Queue Invalid */
1696 #define	NVME_CQE_SC_SPC_INV_QID		0x1	/* Invalid Queue Identifier */
1697 #define	NVME_CQE_SC_SPC_MAX_QSZ_EXC	0x2	/* Max Queue Size Exceeded */
1698 #define	NVME_CQE_SC_SPC_ABRT_CMD_EXC	0x3	/* Abort Cmd Limit Exceeded */
1699 #define	NVME_CQE_SC_SPC_ASYNC_EVREQ_EXC	0x5	/* Async Event Request Limit */
1700 #define	NVME_CQE_SC_SPC_INV_FW_SLOT	0x6	/* Invalid Firmware Slot */
1701 #define	NVME_CQE_SC_SPC_INV_FW_IMG	0x7	/* Invalid Firmware Image */
1702 #define	NVME_CQE_SC_SPC_INV_INT_VECT	0x8	/* Invalid Interrupt Vector */
1703 #define	NVME_CQE_SC_SPC_INV_LOG_PAGE	0x9	/* Invalid Log Page */
1704 #define	NVME_CQE_SC_SPC_INV_FORMAT	0xa	/* Invalid Format */
1705 #define	NVME_CQE_SC_SPC_FW_RESET	0xb	/* FW Application Reset Reqd */
1706 #define	NVME_CQE_SC_SPC_INV_Q_DEL	0xc	/* Invalid Queue Deletion */
1707 #define	NVME_CQE_SC_SPC_FEAT_SAVE	0xd	/* Feature Id Not Saveable */
1708 #define	NVME_CQE_SC_SPC_FEAT_CHG	0xe	/* Feature Not Changeable */
1709 #define	NVME_CQE_SC_SPC_FEAT_NS_SPEC	0xf	/* Feature Not Namespace Spec */
1710 #define	NVME_CQE_SC_SPC_FW_NSSR		0x10	/* FW Application NSSR Reqd */
1711 #define	NVME_CQE_SC_SPC_FW_NEXT_RESET	0x11	/* FW Application Next Reqd */
1712 #define	NVME_CQE_SC_SPC_FW_MTFA		0x12	/* FW Application Exceed MTFA */
1713 #define	NVME_CQE_SC_SPC_FW_PROHIBITED	0x13	/* FW Application Prohibited */
1714 #define	NVME_CQE_SC_SPC_FW_OVERLAP	0x14	/* Overlapping FW ranges */
1715 
1716 /* NVMe completion status code (NVM command specific */
1717 #define	NVME_CQE_SC_SPC_NVM_CNFL_ATTR	0x80	/* Conflicting Attributes */
1718 #define	NVME_CQE_SC_SPC_NVM_INV_PROT	0x81	/* Invalid Protection */
1719 #define	NVME_CQE_SC_SPC_NVM_READONLY	0x82	/* Write to Read Only Range */
1720 
1721 /* NVMe completion status code (data / metadata integrity) */
1722 #define	NVME_CQE_SC_INT_NVM_WRITE	0x80	/* Write Fault */
1723 #define	NVME_CQE_SC_INT_NVM_READ	0x81	/* Unrecovered Read Error */
1724 #define	NVME_CQE_SC_INT_NVM_GUARD	0x82	/* Guard Check Error */
1725 #define	NVME_CQE_SC_INT_NVM_APPL_TAG	0x83	/* Application Tag Check Err */
1726 #define	NVME_CQE_SC_INT_NVM_REF_TAG	0x84	/* Reference Tag Check Err */
1727 #define	NVME_CQE_SC_INT_NVM_COMPARE	0x85	/* Compare Failure */
1728 #define	NVME_CQE_SC_INT_NVM_ACCESS	0x86	/* Access Denied */
1729 
1730 /*
1731  * Controller information (NVME_IOC_CTRL_INFO). This is a consolidation of misc.
1732  * information that we want to know about a controller.
1733  */
1734 typedef struct {
1735 	nvme_ioctl_common_t nci_common;
1736 	nvme_identify_ctrl_t nci_ctrl_id;
1737 	nvme_identify_nsid_t nci_common_ns;
1738 	nvme_version_t nci_vers;
1739 	nvme_capabilities_t nci_caps;
1740 	uint32_t nci_nintrs;
1741 } nvme_ioctl_ctrl_info_t;
1742 
1743 /*
1744  * NVME namespace state flags.
1745  *
1746  * The values are defined entirely by the driver. Some states correspond to
1747  * namespace states described by the NVMe specification r1.3 section 6.1, others
1748  * are specific to the implementation of this driver. These are present in the
1749  * nvme_ns_kinfo_t that is used with the NVME_IOC_NS_INFO ioctl.
1750  *
1751  * The states are as follows:
1752  * - ALLOCATED: the namespace exists in the controller as per the NVMe spec
1753  * - ACTIVE: the namespace exists and is attached to this controller as per the
1754  *   NVMe spec. Any namespace that is ACTIVE is also ALLOCATED. This must not be
1755  *   confused with the ATTACHED state.
1756  * - ATTACHED: the driver has attached a blkdev(4D) instance to this namespace.
1757  *   This state can be changed by userspace with the ioctls NVME_IOC_ATTACH and
1758  *   NVME_IOC_DETACH. A namespace can only be ATTACHED when it is not IGNORED.
1759  * - IGNORED: the driver ignores this namespace, it never attaches a blkdev(4D).
1760  *   Namespaces are IGNORED when they are not ACTIVE, or if they are ACTIVE but
1761  *   have certain properties that the driver cannot handle.
1762  */
1763 typedef enum {
1764 	NVME_NS_STATE_ALLOCATED	=	1 << 0,
1765 	NVME_NS_STATE_ACTIVE	=	1 << 1,
1766 	NVME_NS_STATE_ATTACHED	=	1 << 2,
1767 	NVME_NS_STATE_IGNORED	=	1 << 3
1768 } nvme_ns_state_t;
1769 
1770 /*
1771  * This is the maximum length of the NVMe namespace's blkdev address. This is
1772  * only valid in the structure with the NVME_NS_STATE_ATTACHED flag is set.
1773  * Otherwise the entry will be all zeros. This is useful when you need to
1774  * determine what the corresponding blkdev instance in libdevinfo for the
1775  * device.
1776  */
1777 #define	NVME_BLKDEV_NAMELEN	128
1778 
1779 /*
1780  * Namespace Information (NVME_IOC_NS_INFO).
1781  */
1782 typedef struct {
1783 	nvme_ioctl_common_t nni_common;
1784 	nvme_ns_state_t	nni_state;
1785 	char nni_addr[NVME_BLKDEV_NAMELEN];
1786 	nvme_identify_nsid_t nni_id;
1787 } nvme_ioctl_ns_info_t;
1788 
1789 /*
1790  * NVMe Command Set Identifiers. This was added in NVMe 2.0, but in all the
1791  * places it was required to be specified, the default value of 0 indicates the
1792  * traditional NVM command set.
1793  */
1794 typedef enum {
1795 	NVME_CSI_NVM	= 0,
1796 	NVME_CSI_KV,
1797 	NVME_CSI_ZNS
1798 } nvme_csi_t;
1799 
1800 #ifdef __cplusplus
1801 }
1802 #endif
1803 
1804 #endif /* _SYS_NVME_H */
1805