1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2012-2013 Intel Corporation 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #ifndef __NVME_H__ 32 #define __NVME_H__ 33 34 #ifdef _KERNEL 35 #include <sys/types.h> 36 #endif 37 38 #include <sys/param.h> 39 40 #define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command) 41 #define NVME_RESET_CONTROLLER _IO('n', 1) 42 43 #define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test) 44 #define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test) 45 46 /* 47 * Macros to deal with NVME revisions, as defined VS register 48 */ 49 #define NVME_REV(x, y) (((x) << 16) | ((y) << 8)) 50 #define NVME_MAJOR(r) (((r) >> 16) & 0xffff) 51 #define NVME_MINOR(r) (((r) >> 8) & 0xff) 52 53 /* 54 * Use to mark a command to apply to all namespaces, or to retrieve global 55 * log pages. 56 */ 57 #define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF) 58 59 /* Cap nvme to 1MB transfers driver explodes with larger sizes */ 60 #define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20)) 61 62 union cap_lo_register { 63 uint32_t raw; 64 struct { 65 /** maximum queue entries supported */ 66 uint32_t mqes : 16; 67 68 /** contiguous queues required */ 69 uint32_t cqr : 1; 70 71 /** arbitration mechanism supported */ 72 uint32_t ams : 2; 73 74 uint32_t reserved1 : 5; 75 76 /** timeout */ 77 uint32_t to : 8; 78 } bits __packed; 79 } __packed; 80 81 _Static_assert(sizeof(union cap_lo_register) == 4, "bad size for cap_lo_register"); 82 83 union cap_hi_register { 84 uint32_t raw; 85 struct { 86 /** doorbell stride */ 87 uint32_t dstrd : 4; 88 89 uint32_t reserved3 : 1; 90 91 /** command sets supported */ 92 uint32_t css_nvm : 1; 93 94 uint32_t css_reserved : 3; 95 uint32_t reserved2 : 7; 96 97 /** memory page size minimum */ 98 uint32_t mpsmin : 4; 99 100 /** memory page size maximum */ 101 uint32_t mpsmax : 4; 102 103 uint32_t reserved1 : 8; 104 } bits __packed; 105 } __packed; 106 107 _Static_assert(sizeof(union cap_hi_register) == 4, "bad size of cap_hi_register"); 108 109 union cc_register { 110 uint32_t raw; 111 struct { 112 /** enable */ 113 uint32_t en : 1; 114 115 uint32_t reserved1 : 3; 116 117 /** i/o command set selected */ 118 uint32_t css : 3; 119 120 /** memory page size */ 121 uint32_t mps : 4; 122 123 /** arbitration mechanism selected */ 124 uint32_t ams : 3; 125 126 /** shutdown notification */ 127 uint32_t shn : 2; 128 129 /** i/o submission queue entry size */ 130 uint32_t iosqes : 4; 131 132 /** i/o completion queue entry size */ 133 uint32_t iocqes : 4; 134 135 uint32_t reserved2 : 8; 136 } bits __packed; 137 } __packed; 138 139 _Static_assert(sizeof(union cc_register) == 4, "bad size for cc_register"); 140 141 enum shn_value { 142 NVME_SHN_NORMAL = 0x1, 143 NVME_SHN_ABRUPT = 0x2, 144 }; 145 146 union csts_register { 147 uint32_t raw; 148 struct { 149 /** ready */ 150 uint32_t rdy : 1; 151 152 /** controller fatal status */ 153 uint32_t cfs : 1; 154 155 /** shutdown status */ 156 uint32_t shst : 2; 157 158 uint32_t reserved1 : 28; 159 } bits __packed; 160 } __packed; 161 162 _Static_assert(sizeof(union csts_register) == 4, "bad size for csts_register"); 163 164 enum shst_value { 165 NVME_SHST_NORMAL = 0x0, 166 NVME_SHST_OCCURRING = 0x1, 167 NVME_SHST_COMPLETE = 0x2, 168 }; 169 170 union aqa_register { 171 uint32_t raw; 172 struct { 173 /** admin submission queue size */ 174 uint32_t asqs : 12; 175 176 uint32_t reserved1 : 4; 177 178 /** admin completion queue size */ 179 uint32_t acqs : 12; 180 181 uint32_t reserved2 : 4; 182 } bits __packed; 183 } __packed; 184 185 _Static_assert(sizeof(union aqa_register) == 4, "bad size for aqa_resgister"); 186 187 struct nvme_registers 188 { 189 /** controller capabilities */ 190 union cap_lo_register cap_lo; 191 union cap_hi_register cap_hi; 192 193 uint32_t vs; /* version */ 194 uint32_t intms; /* interrupt mask set */ 195 uint32_t intmc; /* interrupt mask clear */ 196 197 /** controller configuration */ 198 union cc_register cc; 199 200 uint32_t reserved1; 201 202 /** controller status */ 203 union csts_register csts; 204 205 uint32_t reserved2; 206 207 /** admin queue attributes */ 208 union aqa_register aqa; 209 210 uint64_t asq; /* admin submission queue base addr */ 211 uint64_t acq; /* admin completion queue base addr */ 212 uint32_t reserved3[0x3f2]; 213 214 struct { 215 uint32_t sq_tdbl; /* submission queue tail doorbell */ 216 uint32_t cq_hdbl; /* completion queue head doorbell */ 217 } doorbell[1] __packed; 218 } __packed; 219 220 _Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_registers"); 221 222 struct nvme_command 223 { 224 /* dword 0 */ 225 uint16_t opc : 8; /* opcode */ 226 uint16_t fuse : 2; /* fused operation */ 227 uint16_t rsvd1 : 6; 228 uint16_t cid; /* command identifier */ 229 230 /* dword 1 */ 231 uint32_t nsid; /* namespace identifier */ 232 233 /* dword 2-3 */ 234 uint32_t rsvd2; 235 uint32_t rsvd3; 236 237 /* dword 4-5 */ 238 uint64_t mptr; /* metadata pointer */ 239 240 /* dword 6-7 */ 241 uint64_t prp1; /* prp entry 1 */ 242 243 /* dword 8-9 */ 244 uint64_t prp2; /* prp entry 2 */ 245 246 /* dword 10-15 */ 247 uint32_t cdw10; /* command-specific */ 248 uint32_t cdw11; /* command-specific */ 249 uint32_t cdw12; /* command-specific */ 250 uint32_t cdw13; /* command-specific */ 251 uint32_t cdw14; /* command-specific */ 252 uint32_t cdw15; /* command-specific */ 253 } __packed; 254 255 _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command"); 256 257 struct nvme_status { 258 259 uint16_t p : 1; /* phase tag */ 260 uint16_t sc : 8; /* status code */ 261 uint16_t sct : 3; /* status code type */ 262 uint16_t rsvd2 : 2; 263 uint16_t m : 1; /* more */ 264 uint16_t dnr : 1; /* do not retry */ 265 } __packed; 266 267 _Static_assert(sizeof(struct nvme_status) == 2, "bad size for nvme_status"); 268 269 struct nvme_completion { 270 271 /* dword 0 */ 272 uint32_t cdw0; /* command-specific */ 273 274 /* dword 1 */ 275 uint32_t rsvd1; 276 277 /* dword 2 */ 278 uint16_t sqhd; /* submission queue head pointer */ 279 uint16_t sqid; /* submission queue identifier */ 280 281 /* dword 3 */ 282 uint16_t cid; /* command identifier */ 283 struct nvme_status status; 284 } __packed; 285 286 _Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion"); 287 288 struct nvme_dsm_range { 289 290 uint32_t attributes; 291 uint32_t length; 292 uint64_t starting_lba; 293 } __packed; 294 295 _Static_assert(sizeof(struct nvme_dsm_range) == 16, "bad size for nvme_dsm_ranage"); 296 297 /* status code types */ 298 enum nvme_status_code_type { 299 NVME_SCT_GENERIC = 0x0, 300 NVME_SCT_COMMAND_SPECIFIC = 0x1, 301 NVME_SCT_MEDIA_ERROR = 0x2, 302 /* 0x3-0x6 - reserved */ 303 NVME_SCT_VENDOR_SPECIFIC = 0x7, 304 }; 305 306 /* generic command status codes */ 307 enum nvme_generic_command_status_code { 308 NVME_SC_SUCCESS = 0x00, 309 NVME_SC_INVALID_OPCODE = 0x01, 310 NVME_SC_INVALID_FIELD = 0x02, 311 NVME_SC_COMMAND_ID_CONFLICT = 0x03, 312 NVME_SC_DATA_TRANSFER_ERROR = 0x04, 313 NVME_SC_ABORTED_POWER_LOSS = 0x05, 314 NVME_SC_INTERNAL_DEVICE_ERROR = 0x06, 315 NVME_SC_ABORTED_BY_REQUEST = 0x07, 316 NVME_SC_ABORTED_SQ_DELETION = 0x08, 317 NVME_SC_ABORTED_FAILED_FUSED = 0x09, 318 NVME_SC_ABORTED_MISSING_FUSED = 0x0a, 319 NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b, 320 NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c, 321 322 NVME_SC_LBA_OUT_OF_RANGE = 0x80, 323 NVME_SC_CAPACITY_EXCEEDED = 0x81, 324 NVME_SC_NAMESPACE_NOT_READY = 0x82, 325 }; 326 327 /* command specific status codes */ 328 enum nvme_command_specific_status_code { 329 NVME_SC_COMPLETION_QUEUE_INVALID = 0x00, 330 NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01, 331 NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED = 0x02, 332 NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03, 333 /* 0x04 - reserved */ 334 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05, 335 NVME_SC_INVALID_FIRMWARE_SLOT = 0x06, 336 NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07, 337 NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08, 338 NVME_SC_INVALID_LOG_PAGE = 0x09, 339 NVME_SC_INVALID_FORMAT = 0x0a, 340 NVME_SC_FIRMWARE_REQUIRES_RESET = 0x0b, 341 342 NVME_SC_CONFLICTING_ATTRIBUTES = 0x80, 343 NVME_SC_INVALID_PROTECTION_INFO = 0x81, 344 NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE = 0x82, 345 }; 346 347 /* media error status codes */ 348 enum nvme_media_error_status_code { 349 NVME_SC_WRITE_FAULTS = 0x80, 350 NVME_SC_UNRECOVERED_READ_ERROR = 0x81, 351 NVME_SC_GUARD_CHECK_ERROR = 0x82, 352 NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83, 353 NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84, 354 NVME_SC_COMPARE_FAILURE = 0x85, 355 NVME_SC_ACCESS_DENIED = 0x86, 356 }; 357 358 /* admin opcodes */ 359 enum nvme_admin_opcode { 360 NVME_OPC_DELETE_IO_SQ = 0x00, 361 NVME_OPC_CREATE_IO_SQ = 0x01, 362 NVME_OPC_GET_LOG_PAGE = 0x02, 363 /* 0x03 - reserved */ 364 NVME_OPC_DELETE_IO_CQ = 0x04, 365 NVME_OPC_CREATE_IO_CQ = 0x05, 366 NVME_OPC_IDENTIFY = 0x06, 367 /* 0x07 - reserved */ 368 NVME_OPC_ABORT = 0x08, 369 NVME_OPC_SET_FEATURES = 0x09, 370 NVME_OPC_GET_FEATURES = 0x0a, 371 /* 0x0b - reserved */ 372 NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c, 373 NVME_OPC_NAMESPACE_MANAGEMENT = 0x0d, 374 /* 0x0e-0x0f - reserved */ 375 NVME_OPC_FIRMWARE_ACTIVATE = 0x10, 376 NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11, 377 NVME_OPC_NAMESPACE_ATTACHMENT = 0x15, 378 379 NVME_OPC_FORMAT_NVM = 0x80, 380 NVME_OPC_SECURITY_SEND = 0x81, 381 NVME_OPC_SECURITY_RECEIVE = 0x82, 382 }; 383 384 /* nvme nvm opcodes */ 385 enum nvme_nvm_opcode { 386 NVME_OPC_FLUSH = 0x00, 387 NVME_OPC_WRITE = 0x01, 388 NVME_OPC_READ = 0x02, 389 /* 0x03 - reserved */ 390 NVME_OPC_WRITE_UNCORRECTABLE = 0x04, 391 NVME_OPC_COMPARE = 0x05, 392 /* 0x06-0x07 - reserved */ 393 NVME_OPC_DATASET_MANAGEMENT = 0x09, 394 }; 395 396 enum nvme_feature { 397 /* 0x00 - reserved */ 398 NVME_FEAT_ARBITRATION = 0x01, 399 NVME_FEAT_POWER_MANAGEMENT = 0x02, 400 NVME_FEAT_LBA_RANGE_TYPE = 0x03, 401 NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04, 402 NVME_FEAT_ERROR_RECOVERY = 0x05, 403 NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06, 404 NVME_FEAT_NUMBER_OF_QUEUES = 0x07, 405 NVME_FEAT_INTERRUPT_COALESCING = 0x08, 406 NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09, 407 NVME_FEAT_WRITE_ATOMICITY = 0x0A, 408 NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B, 409 NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C, 410 NVME_FEAT_HOST_MEMORY_BUFFER = 0x0D, 411 NVME_FEAT_TIMESTAMP = 0x0E, 412 NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F, 413 NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT = 0x10, 414 NVME_FEAT_NON_OP_POWER_STATE_CONFIG = 0x11, 415 /* 0x12-0x77 - reserved */ 416 /* 0x78-0x7f - NVMe Management Interface */ 417 NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80, 418 /* 0x81-0xBF - command set specific (reserved) */ 419 /* 0xC0-0xFF - vendor specific */ 420 }; 421 422 enum nvme_dsm_attribute { 423 NVME_DSM_ATTR_INTEGRAL_READ = 0x1, 424 NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2, 425 NVME_DSM_ATTR_DEALLOCATE = 0x4, 426 }; 427 428 enum nvme_activate_action { 429 NVME_AA_REPLACE_NO_ACTIVATE = 0x0, 430 NVME_AA_REPLACE_ACTIVATE = 0x1, 431 NVME_AA_ACTIVATE = 0x2, 432 }; 433 434 struct nvme_power_state { 435 /** Maximum Power */ 436 uint16_t mp; /* Maximum Power */ 437 uint8_t ps_rsvd1; 438 uint8_t mps : 1; /* Max Power Scale */ 439 uint8_t nops : 1; /* Non-Operational State */ 440 uint8_t ps_rsvd2 : 6; 441 uint32_t enlat; /* Entry Latency */ 442 uint32_t exlat; /* Exit Latency */ 443 uint8_t rrt : 5; /* Relative Read Throughput */ 444 uint8_t ps_rsvd3 : 3; 445 uint8_t rrl : 5; /* Relative Read Latency */ 446 uint8_t ps_rsvd4 : 3; 447 uint8_t rwt : 5; /* Relative Write Throughput */ 448 uint8_t ps_rsvd5 : 3; 449 uint8_t rwl : 5; /* Relative Write Latency */ 450 uint8_t ps_rsvd6 : 3; 451 uint16_t idlp; /* Idle Power */ 452 uint8_t ps_rsvd7 : 6; 453 uint8_t ips : 2; /* Idle Power Scale */ 454 uint8_t ps_rsvd8; 455 uint16_t actp; /* Active Power */ 456 uint8_t apw : 3; /* Active Power Workload */ 457 uint8_t ps_rsvd9 : 3; 458 uint8_t aps : 2; /* Active Power Scale */ 459 uint8_t ps_rsvd10[9]; 460 } __packed; 461 462 _Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_state"); 463 464 #define NVME_SERIAL_NUMBER_LENGTH 20 465 #define NVME_MODEL_NUMBER_LENGTH 40 466 #define NVME_FIRMWARE_REVISION_LENGTH 8 467 468 struct nvme_controller_data { 469 470 /* bytes 0-255: controller capabilities and features */ 471 472 /** pci vendor id */ 473 uint16_t vid; 474 475 /** pci subsystem vendor id */ 476 uint16_t ssvid; 477 478 /** serial number */ 479 uint8_t sn[NVME_SERIAL_NUMBER_LENGTH]; 480 481 /** model number */ 482 uint8_t mn[NVME_MODEL_NUMBER_LENGTH]; 483 484 /** firmware revision */ 485 uint8_t fr[NVME_FIRMWARE_REVISION_LENGTH]; 486 487 /** recommended arbitration burst */ 488 uint8_t rab; 489 490 /** ieee oui identifier */ 491 uint8_t ieee[3]; 492 493 /** multi-interface capabilities */ 494 uint8_t mic; 495 496 /** maximum data transfer size */ 497 uint8_t mdts; 498 499 /** Controller ID */ 500 uint16_t ctrlr_id; 501 502 /** Version */ 503 uint32_t ver; 504 505 /** RTD3 Resume Latency */ 506 uint32_t rtd3r; 507 508 /** RTD3 Enter Latency */ 509 uint32_t rtd3e; 510 511 /** Optional Asynchronous Events Supported */ 512 uint32_t oaes; /* bitfield really */ 513 514 /** Controller Attributes */ 515 uint32_t ctratt; /* bitfield really */ 516 517 uint8_t reserved1[12]; 518 519 /** FRU Globally Unique Identifier */ 520 uint8_t fguid[16]; 521 522 uint8_t reserved2[128]; 523 524 /* bytes 256-511: admin command set attributes */ 525 526 /** optional admin command support */ 527 struct { 528 /* supports security send/receive commands */ 529 uint16_t security : 1; 530 531 /* supports format nvm command */ 532 uint16_t format : 1; 533 534 /* supports firmware activate/download commands */ 535 uint16_t firmware : 1; 536 537 /* supports namespace management commands */ 538 uint16_t nsmgmt : 1; 539 540 uint16_t oacs_rsvd : 12; 541 } __packed oacs; 542 543 /** abort command limit */ 544 uint8_t acl; 545 546 /** asynchronous event request limit */ 547 uint8_t aerl; 548 549 /** firmware updates */ 550 struct { 551 /* first slot is read-only */ 552 uint8_t slot1_ro : 1; 553 554 /* number of firmware slots */ 555 uint8_t num_slots : 3; 556 557 uint8_t frmw_rsvd : 4; 558 } __packed frmw; 559 560 /** log page attributes */ 561 struct { 562 /* per namespace smart/health log page */ 563 uint8_t ns_smart : 1; 564 565 uint8_t lpa_rsvd : 7; 566 } __packed lpa; 567 568 /** error log page entries */ 569 uint8_t elpe; 570 571 /** number of power states supported */ 572 uint8_t npss; 573 574 /** admin vendor specific command configuration */ 575 struct { 576 /* admin vendor specific commands use spec format */ 577 uint8_t spec_format : 1; 578 579 uint8_t avscc_rsvd : 7; 580 } __packed avscc; 581 582 /** Autonomous Power State Transition Attributes */ 583 struct { 584 /* Autonmous Power State Transitions supported */ 585 uint8_t apst_supp : 1; 586 587 uint8_t apsta_rsvd : 7; 588 } __packed apsta; 589 590 /** Warning Composite Temperature Threshold */ 591 uint16_t wctemp; 592 593 /** Critical Composite Temperature Threshold */ 594 uint16_t cctemp; 595 596 /** Maximum Time for Firmware Activation */ 597 uint16_t mtfa; 598 599 /** Host Memory Buffer Preferred Size */ 600 uint32_t hmpre; 601 602 /** Host Memory Buffer Minimum Size */ 603 uint32_t hmmin; 604 605 /** Name space capabilities */ 606 struct { 607 /* if nsmgmt, report tnvmcap and unvmcap */ 608 uint8_t tnvmcap[16]; 609 uint8_t unvmcap[16]; 610 } __packed untncap; 611 612 /** Replay Protected Memory Block Support */ 613 uint32_t rpmbs; /* Really a bitfield */ 614 615 /** Extended Device Self-test Time */ 616 uint16_t edstt; 617 618 /** Device Self-test Options */ 619 uint8_t dsto; /* Really a bitfield */ 620 621 /** Firmware Update Granularity */ 622 uint8_t fwug; 623 624 /** Keep Alive Support */ 625 uint16_t kas; 626 627 /** Host Controlled Thermal Management Attributes */ 628 uint16_t hctma; /* Really a bitfield */ 629 630 /** Minimum Thermal Management Temperature */ 631 uint16_t mntmt; 632 633 /** Maximum Thermal Management Temperature */ 634 uint16_t mxtmt; 635 636 /** Sanitize Capabilities */ 637 uint32_t sanicap; /* Really a bitfield */ 638 639 uint8_t reserved3[180]; 640 /* bytes 512-703: nvm command set attributes */ 641 642 /** submission queue entry size */ 643 struct { 644 uint8_t min : 4; 645 uint8_t max : 4; 646 } __packed sqes; 647 648 /** completion queue entry size */ 649 struct { 650 uint8_t min : 4; 651 uint8_t max : 4; 652 } __packed cqes; 653 654 /** Maximum Outstanding Commands */ 655 uint16_t maxcmd; 656 657 /** number of namespaces */ 658 uint32_t nn; 659 660 /** optional nvm command support */ 661 struct { 662 uint16_t compare : 1; 663 uint16_t write_unc : 1; 664 uint16_t dsm: 1; 665 uint16_t reserved: 13; 666 } __packed oncs; 667 668 /** fused operation support */ 669 uint16_t fuses; 670 671 /** format nvm attributes */ 672 uint8_t fna; 673 674 /** volatile write cache */ 675 struct { 676 uint8_t present : 1; 677 uint8_t reserved : 7; 678 } __packed vwc; 679 680 /* TODO: flesh out remaining nvm command set attributes */ 681 uint8_t reserved5[178]; 682 683 /* bytes 704-2047: i/o command set attributes */ 684 uint8_t reserved6[1344]; 685 686 /* bytes 2048-3071: power state descriptors */ 687 struct nvme_power_state power_state[32]; 688 689 /* bytes 3072-4095: vendor specific */ 690 uint8_t vs[1024]; 691 } __packed __aligned(4); 692 693 _Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data"); 694 695 struct nvme_namespace_data { 696 697 /** namespace size */ 698 uint64_t nsze; 699 700 /** namespace capacity */ 701 uint64_t ncap; 702 703 /** namespace utilization */ 704 uint64_t nuse; 705 706 /** namespace features */ 707 struct { 708 /** thin provisioning */ 709 uint8_t thin_prov : 1; 710 uint8_t reserved1 : 7; 711 } __packed nsfeat; 712 713 /** number of lba formats */ 714 uint8_t nlbaf; 715 716 /** formatted lba size */ 717 struct { 718 uint8_t format : 4; 719 uint8_t extended : 1; 720 uint8_t reserved2 : 3; 721 } __packed flbas; 722 723 /** metadata capabilities */ 724 struct { 725 /* metadata can be transferred as part of data prp list */ 726 uint8_t extended : 1; 727 728 /* metadata can be transferred with separate metadata pointer */ 729 uint8_t pointer : 1; 730 731 uint8_t reserved3 : 6; 732 } __packed mc; 733 734 /** end-to-end data protection capabilities */ 735 struct { 736 /* protection information type 1 */ 737 uint8_t pit1 : 1; 738 739 /* protection information type 2 */ 740 uint8_t pit2 : 1; 741 742 /* protection information type 3 */ 743 uint8_t pit3 : 1; 744 745 /* first eight bytes of metadata */ 746 uint8_t md_start : 1; 747 748 /* last eight bytes of metadata */ 749 uint8_t md_end : 1; 750 } __packed dpc; 751 752 /** end-to-end data protection type settings */ 753 struct { 754 /* protection information type */ 755 uint8_t pit : 3; 756 757 /* 1 == protection info transferred at start of metadata */ 758 /* 0 == protection info transferred at end of metadata */ 759 uint8_t md_start : 1; 760 761 uint8_t reserved4 : 4; 762 } __packed dps; 763 764 uint8_t reserved5[98]; 765 766 /** lba format support */ 767 struct { 768 /** metadata size */ 769 uint32_t ms : 16; 770 771 /** lba data size */ 772 uint32_t lbads : 8; 773 774 /** relative performance */ 775 uint32_t rp : 2; 776 777 uint32_t reserved6 : 6; 778 } __packed lbaf[16]; 779 780 uint8_t reserved6[192]; 781 782 uint8_t vendor_specific[3712]; 783 } __packed __aligned(4); 784 785 _Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data"); 786 787 enum nvme_log_page { 788 789 /* 0x00 - reserved */ 790 NVME_LOG_ERROR = 0x01, 791 NVME_LOG_HEALTH_INFORMATION = 0x02, 792 NVME_LOG_FIRMWARE_SLOT = 0x03, 793 NVME_LOG_CHANGED_NAMESPACE = 0x04, 794 NVME_LOG_COMMAND_EFFECT = 0x05, 795 /* 0x06-0x7F - reserved */ 796 /* 0x80-0xBF - I/O command set specific */ 797 NVME_LOG_RES_NOTIFICATION = 0x80, 798 /* 0xC0-0xFF - vendor specific */ 799 800 /* 801 * The following are Intel Specific log pages, but they seem 802 * to be widely implemented. 803 */ 804 INTEL_LOG_READ_LAT_LOG = 0xc1, 805 INTEL_LOG_WRITE_LAT_LOG = 0xc2, 806 INTEL_LOG_TEMP_STATS = 0xc5, 807 INTEL_LOG_ADD_SMART = 0xca, 808 INTEL_LOG_DRIVE_MKT_NAME = 0xdd, 809 810 /* 811 * HGST log page, with lots ofs sub pages. 812 */ 813 HGST_INFO_LOG = 0xc1, 814 }; 815 816 struct nvme_error_information_entry { 817 818 uint64_t error_count; 819 uint16_t sqid; 820 uint16_t cid; 821 struct nvme_status status; 822 uint16_t error_location; 823 uint64_t lba; 824 uint32_t nsid; 825 uint8_t vendor_specific; 826 uint8_t reserved[35]; 827 } __packed __aligned(4); 828 829 _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry"); 830 831 union nvme_critical_warning_state { 832 833 uint8_t raw; 834 835 struct { 836 uint8_t available_spare : 1; 837 uint8_t temperature : 1; 838 uint8_t device_reliability : 1; 839 uint8_t read_only : 1; 840 uint8_t volatile_memory_backup : 1; 841 uint8_t reserved : 3; 842 } __packed bits; 843 } __packed; 844 845 _Static_assert(sizeof(union nvme_critical_warning_state) == 1, "bad size for nvme_critical_warning_state"); 846 847 struct nvme_health_information_page { 848 849 union nvme_critical_warning_state critical_warning; 850 851 uint16_t temperature; 852 uint8_t available_spare; 853 uint8_t available_spare_threshold; 854 uint8_t percentage_used; 855 856 uint8_t reserved[26]; 857 858 /* 859 * Note that the following are 128-bit values, but are 860 * defined as an array of 2 64-bit values. 861 */ 862 /* Data Units Read is always in 512-byte units. */ 863 uint64_t data_units_read[2]; 864 /* Data Units Written is always in 512-byte units. */ 865 uint64_t data_units_written[2]; 866 /* For NVM command set, this includes Compare commands. */ 867 uint64_t host_read_commands[2]; 868 uint64_t host_write_commands[2]; 869 /* Controller Busy Time is reported in minutes. */ 870 uint64_t controller_busy_time[2]; 871 uint64_t power_cycles[2]; 872 uint64_t power_on_hours[2]; 873 uint64_t unsafe_shutdowns[2]; 874 uint64_t media_errors[2]; 875 uint64_t num_error_info_log_entries[2]; 876 uint32_t warning_temp_time; 877 uint32_t error_temp_time; 878 uint16_t temp_sensor[8]; 879 880 uint8_t reserved2[296]; 881 } __packed __aligned(4); 882 883 _Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page"); 884 885 struct nvme_firmware_page { 886 887 struct { 888 uint8_t slot : 3; /* slot for current FW */ 889 uint8_t reserved : 5; 890 } __packed afi; 891 892 uint8_t reserved[7]; 893 uint64_t revision[7]; /* revisions for 7 slots */ 894 uint8_t reserved2[448]; 895 } __packed __aligned(4); 896 897 _Static_assert(sizeof(struct nvme_firmware_page) == 512, "bad size for nvme_firmware_page"); 898 899 struct intel_log_temp_stats 900 { 901 uint64_t current; 902 uint64_t overtemp_flag_last; 903 uint64_t overtemp_flag_life; 904 uint64_t max_temp; 905 uint64_t min_temp; 906 uint64_t _rsvd[5]; 907 uint64_t max_oper_temp; 908 uint64_t min_oper_temp; 909 uint64_t est_offset; 910 } __packed __aligned(4); 911 912 _Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats"); 913 914 #define NVME_TEST_MAX_THREADS 128 915 916 struct nvme_io_test { 917 918 enum nvme_nvm_opcode opc; 919 uint32_t size; 920 uint32_t time; /* in seconds */ 921 uint32_t num_threads; 922 uint32_t flags; 923 uint64_t io_completed[NVME_TEST_MAX_THREADS]; 924 }; 925 926 enum nvme_io_test_flags { 927 928 /* 929 * Specifies whether dev_refthread/dev_relthread should be 930 * called during NVME_BIO_TEST. Ignored for other test 931 * types. 932 */ 933 NVME_TEST_FLAG_REFTHREAD = 0x1, 934 }; 935 936 struct nvme_pt_command { 937 938 /* 939 * cmd is used to specify a passthrough command to a controller or 940 * namespace. 941 * 942 * The following fields from cmd may be specified by the caller: 943 * * opc (opcode) 944 * * nsid (namespace id) - for admin commands only 945 * * cdw10-cdw15 946 * 947 * Remaining fields must be set to 0 by the caller. 948 */ 949 struct nvme_command cmd; 950 951 /* 952 * cpl returns completion status for the passthrough command 953 * specified by cmd. 954 * 955 * The following fields will be filled out by the driver, for 956 * consumption by the caller: 957 * * cdw0 958 * * status (except for phase) 959 * 960 * Remaining fields will be set to 0 by the driver. 961 */ 962 struct nvme_completion cpl; 963 964 /* buf is the data buffer associated with this passthrough command. */ 965 void * buf; 966 967 /* 968 * len is the length of the data buffer associated with this 969 * passthrough command. 970 */ 971 uint32_t len; 972 973 /* 974 * is_read = 1 if the passthrough command will read data into the 975 * supplied buffer from the controller. 976 * 977 * is_read = 0 if the passthrough command will write data from the 978 * supplied buffer to the controller. 979 */ 980 uint32_t is_read; 981 982 /* 983 * driver_lock is used by the driver only. It must be set to 0 984 * by the caller. 985 */ 986 struct mtx * driver_lock; 987 }; 988 989 #define nvme_completion_is_error(cpl) \ 990 ((cpl)->status.sc != 0 || (cpl)->status.sct != 0) 991 992 void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen); 993 994 #ifdef _KERNEL 995 996 struct bio; 997 998 struct nvme_namespace; 999 struct nvme_controller; 1000 struct nvme_consumer; 1001 1002 typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *); 1003 1004 typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *); 1005 typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *); 1006 typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *, 1007 uint32_t, void *, uint32_t); 1008 typedef void (*nvme_cons_fail_fn_t)(void *); 1009 1010 enum nvme_namespace_flags { 1011 NVME_NS_DEALLOCATE_SUPPORTED = 0x1, 1012 NVME_NS_FLUSH_SUPPORTED = 0x2, 1013 }; 1014 1015 int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, 1016 struct nvme_pt_command *pt, 1017 uint32_t nsid, int is_user_buffer, 1018 int is_admin_cmd); 1019 1020 /* Admin functions */ 1021 void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, 1022 uint8_t feature, uint32_t cdw11, 1023 void *payload, uint32_t payload_size, 1024 nvme_cb_fn_t cb_fn, void *cb_arg); 1025 void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, 1026 uint8_t feature, uint32_t cdw11, 1027 void *payload, uint32_t payload_size, 1028 nvme_cb_fn_t cb_fn, void *cb_arg); 1029 void nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, 1030 uint8_t log_page, uint32_t nsid, 1031 void *payload, uint32_t payload_size, 1032 nvme_cb_fn_t cb_fn, void *cb_arg); 1033 1034 /* NVM I/O functions */ 1035 int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, 1036 uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, 1037 void *cb_arg); 1038 int nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp, 1039 nvme_cb_fn_t cb_fn, void *cb_arg); 1040 int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, 1041 uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, 1042 void *cb_arg); 1043 int nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp, 1044 nvme_cb_fn_t cb_fn, void *cb_arg); 1045 int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload, 1046 uint8_t num_ranges, nvme_cb_fn_t cb_fn, 1047 void *cb_arg); 1048 int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn, 1049 void *cb_arg); 1050 int nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset, 1051 size_t len); 1052 1053 /* Registration functions */ 1054 struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, 1055 nvme_cons_ctrlr_fn_t ctrlr_fn, 1056 nvme_cons_async_fn_t async_fn, 1057 nvme_cons_fail_fn_t fail_fn); 1058 void nvme_unregister_consumer(struct nvme_consumer *consumer); 1059 1060 /* Controller helper functions */ 1061 device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr); 1062 const struct nvme_controller_data * 1063 nvme_ctrlr_get_data(struct nvme_controller *ctrlr); 1064 1065 /* Namespace helper functions */ 1066 uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns); 1067 uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns); 1068 uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns); 1069 uint64_t nvme_ns_get_size(struct nvme_namespace *ns); 1070 uint32_t nvme_ns_get_flags(struct nvme_namespace *ns); 1071 const char * nvme_ns_get_serial_number(struct nvme_namespace *ns); 1072 const char * nvme_ns_get_model_number(struct nvme_namespace *ns); 1073 const struct nvme_namespace_data * 1074 nvme_ns_get_data(struct nvme_namespace *ns); 1075 uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns); 1076 1077 int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, 1078 nvme_cb_fn_t cb_fn); 1079 1080 /* 1081 * Command building helper functions -- shared with CAM 1082 * These functions assume allocator zeros out cmd structure 1083 * CAM's xpt_get_ccb and the request allocator for nvme both 1084 * do zero'd allocations. 1085 */ 1086 static inline 1087 void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid) 1088 { 1089 1090 cmd->opc = NVME_OPC_FLUSH; 1091 cmd->nsid = nsid; 1092 } 1093 1094 static inline 1095 void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid, 1096 uint64_t lba, uint32_t count) 1097 { 1098 cmd->opc = rwcmd; 1099 cmd->nsid = nsid; 1100 cmd->cdw10 = lba & 0xffffffffu; 1101 cmd->cdw11 = lba >> 32; 1102 cmd->cdw12 = count-1; 1103 } 1104 1105 static inline 1106 void nvme_ns_write_cmd(struct nvme_command *cmd, uint32_t nsid, 1107 uint64_t lba, uint32_t count) 1108 { 1109 nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count); 1110 } 1111 1112 static inline 1113 void nvme_ns_read_cmd(struct nvme_command *cmd, uint32_t nsid, 1114 uint64_t lba, uint32_t count) 1115 { 1116 nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count); 1117 } 1118 1119 static inline 1120 void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid, 1121 uint32_t num_ranges) 1122 { 1123 cmd->opc = NVME_OPC_DATASET_MANAGEMENT; 1124 cmd->nsid = nsid; 1125 cmd->cdw10 = num_ranges - 1; 1126 cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE; 1127 } 1128 1129 extern int nvme_use_nvd; 1130 1131 #endif /* _KERNEL */ 1132 1133 #endif /* __NVME_H__ */ 1134