14c87aefeSPatrick Mooney /*- 24c87aefeSPatrick Mooney * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 34c87aefeSPatrick Mooney * 44c87aefeSPatrick Mooney * Copyright (c) 2017 Shunsuke Mie 54c87aefeSPatrick Mooney * Copyright (c) 2018 Leon Dang 6*6960cd89SAndy Fiddaman * Copyright (c) 2020 Chuck Tuffli 74c87aefeSPatrick Mooney * 884659b24SMichael Zeller * Function crc16 Copyright (c) 2017, Fedor Uporov 984659b24SMichael Zeller * Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c 1084659b24SMichael Zeller * 114c87aefeSPatrick Mooney * Redistribution and use in source and binary forms, with or without 124c87aefeSPatrick Mooney * modification, are permitted provided that the following conditions 134c87aefeSPatrick Mooney * are met: 144c87aefeSPatrick Mooney * 1. Redistributions of source code must retain the above copyright 154c87aefeSPatrick Mooney * notice, this list of conditions and the following disclaimer. 164c87aefeSPatrick Mooney * 2. Redistributions in binary form must reproduce the above copyright 174c87aefeSPatrick Mooney * notice, this list of conditions and the following disclaimer in the 184c87aefeSPatrick Mooney * documentation and/or other materials provided with the distribution. 194c87aefeSPatrick Mooney * 204c87aefeSPatrick Mooney * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 214c87aefeSPatrick Mooney * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 224c87aefeSPatrick Mooney * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 234c87aefeSPatrick Mooney * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 244c87aefeSPatrick Mooney * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 254c87aefeSPatrick Mooney * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 264c87aefeSPatrick Mooney * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 274c87aefeSPatrick Mooney * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 284c87aefeSPatrick Mooney * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 294c87aefeSPatrick Mooney * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 304c87aefeSPatrick Mooney * SUCH DAMAGE. 314c87aefeSPatrick Mooney */ 324c87aefeSPatrick Mooney 334c87aefeSPatrick Mooney /* 344c87aefeSPatrick Mooney * bhyve PCIe-NVMe device emulation. 354c87aefeSPatrick Mooney * 364c87aefeSPatrick Mooney * options: 37*6960cd89SAndy Fiddaman * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#,dsm=<opt> 384c87aefeSPatrick Mooney * 394c87aefeSPatrick Mooney * accepted devpath: 404c87aefeSPatrick Mooney * /dev/blockdev 414c87aefeSPatrick Mooney * /path/to/image 424c87aefeSPatrick Mooney * ram=size_in_MiB 434c87aefeSPatrick Mooney * 444c87aefeSPatrick Mooney * maxq = max number of queues 454c87aefeSPatrick Mooney * qsz = max elements in each queue 464c87aefeSPatrick Mooney * ioslots = max number of concurrent io requests 474c87aefeSPatrick Mooney * sectsz = sector size (defaults to blockif sector size) 484c87aefeSPatrick Mooney * ser = serial number (20-chars max) 4984659b24SMichael Zeller * eui64 = IEEE Extended Unique Identifier (8 byte value) 50*6960cd89SAndy Fiddaman * dsm = DataSet Management support. Option is one of auto, enable,disable 514c87aefeSPatrick Mooney * 524c87aefeSPatrick Mooney */ 534c87aefeSPatrick Mooney 544c87aefeSPatrick Mooney /* TODO: 554c87aefeSPatrick Mooney - create async event for smart and log 564c87aefeSPatrick Mooney - intr coalesce 574c87aefeSPatrick Mooney */ 584c87aefeSPatrick Mooney 594c87aefeSPatrick Mooney #include <sys/cdefs.h> 604c87aefeSPatrick Mooney __FBSDID("$FreeBSD$"); 614c87aefeSPatrick Mooney 62*6960cd89SAndy Fiddaman #include <sys/errno.h> 634c87aefeSPatrick Mooney #include <sys/types.h> 6484659b24SMichael Zeller #include <net/ieee_oui.h> 6584659b24SMichael Zeller #ifndef __FreeBSD__ 6684659b24SMichael Zeller #include <endian.h> 6784659b24SMichael Zeller #endif 684c87aefeSPatrick Mooney 694c87aefeSPatrick Mooney #include <assert.h> 704c87aefeSPatrick Mooney #include <pthread.h> 714c87aefeSPatrick Mooney #include <semaphore.h> 724c87aefeSPatrick Mooney #include <stdbool.h> 734c87aefeSPatrick Mooney #include <stddef.h> 744c87aefeSPatrick Mooney #include <stdint.h> 754c87aefeSPatrick Mooney #include <stdio.h> 764c87aefeSPatrick Mooney #include <stdlib.h> 774c87aefeSPatrick Mooney #include <string.h> 784c87aefeSPatrick Mooney 794c87aefeSPatrick Mooney #include <machine/atomic.h> 804c87aefeSPatrick Mooney #include <machine/vmm.h> 814c87aefeSPatrick Mooney #include <vmmapi.h> 824c87aefeSPatrick Mooney 834c87aefeSPatrick Mooney #include <dev/nvme/nvme.h> 844c87aefeSPatrick Mooney 854c87aefeSPatrick Mooney #include "bhyverun.h" 864c87aefeSPatrick Mooney #include "block_if.h" 87154972afSPatrick Mooney #include "debug.h" 884c87aefeSPatrick Mooney #include "pci_emul.h" 894c87aefeSPatrick Mooney 904c87aefeSPatrick Mooney 914c87aefeSPatrick Mooney static int nvme_debug = 0; 92*6960cd89SAndy Fiddaman #define DPRINTF(fmt, args...) if (nvme_debug) PRINTLN(fmt, ##args) 93*6960cd89SAndy Fiddaman #define WPRINTF(fmt, args...) PRINTLN(fmt, ##args) 944c87aefeSPatrick Mooney 954c87aefeSPatrick Mooney /* defaults; can be overridden */ 964c87aefeSPatrick Mooney #define NVME_MSIX_BAR 4 974c87aefeSPatrick Mooney 984c87aefeSPatrick Mooney #define NVME_IOSLOTS 8 994c87aefeSPatrick Mooney 1004c87aefeSPatrick Mooney /* The NVMe spec defines bits 13:4 in BAR0 as reserved */ 1014c87aefeSPatrick Mooney #define NVME_MMIO_SPACE_MIN (1 << 14) 1024c87aefeSPatrick Mooney 1034c87aefeSPatrick Mooney #define NVME_QUEUES 16 1044c87aefeSPatrick Mooney #define NVME_MAX_QENTRIES 2048 105*6960cd89SAndy Fiddaman /* Memory Page size Minimum reported in CAP register */ 106*6960cd89SAndy Fiddaman #define NVME_MPSMIN 0 107*6960cd89SAndy Fiddaman /* MPSMIN converted to bytes */ 108*6960cd89SAndy Fiddaman #define NVME_MPSMIN_BYTES (1 << (12 + NVME_MPSMIN)) 1094c87aefeSPatrick Mooney 1104c87aefeSPatrick Mooney #define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t)) 111*6960cd89SAndy Fiddaman #define NVME_MDTS 9 112*6960cd89SAndy Fiddaman /* Note the + 1 allows for the initial descriptor to not be page aligned */ 113*6960cd89SAndy Fiddaman #define NVME_MAX_IOVEC ((1 << NVME_MDTS) + 1) 114*6960cd89SAndy Fiddaman #define NVME_MAX_DATA_SIZE ((1 << NVME_MDTS) * NVME_MPSMIN_BYTES) 1154c87aefeSPatrick Mooney 11684659b24SMichael Zeller /* This is a synthetic status code to indicate there is no status */ 11784659b24SMichael Zeller #define NVME_NO_STATUS 0xffff 11884659b24SMichael Zeller #define NVME_COMPLETION_VALID(c) ((c).status != NVME_NO_STATUS) 11984659b24SMichael Zeller 1204c87aefeSPatrick Mooney /* helpers */ 1214c87aefeSPatrick Mooney 1224c87aefeSPatrick Mooney /* Convert a zero-based value into a one-based value */ 1234c87aefeSPatrick Mooney #define ONE_BASED(zero) ((zero) + 1) 1244c87aefeSPatrick Mooney /* Convert a one-based value into a zero-based value */ 1254c87aefeSPatrick Mooney #define ZERO_BASED(one) ((one) - 1) 1264c87aefeSPatrick Mooney 1274c87aefeSPatrick Mooney /* Encode number of SQ's and CQ's for Set/Get Features */ 1284c87aefeSPatrick Mooney #define NVME_FEATURE_NUM_QUEUES(sc) \ 1294c87aefeSPatrick Mooney (ZERO_BASED((sc)->num_squeues) & 0xffff) | \ 1304c87aefeSPatrick Mooney (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16; 1314c87aefeSPatrick Mooney 1324c87aefeSPatrick Mooney #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell) 1334c87aefeSPatrick Mooney 1344c87aefeSPatrick Mooney enum nvme_controller_register_offsets { 1354c87aefeSPatrick Mooney NVME_CR_CAP_LOW = 0x00, 1364c87aefeSPatrick Mooney NVME_CR_CAP_HI = 0x04, 1374c87aefeSPatrick Mooney NVME_CR_VS = 0x08, 1384c87aefeSPatrick Mooney NVME_CR_INTMS = 0x0c, 1394c87aefeSPatrick Mooney NVME_CR_INTMC = 0x10, 1404c87aefeSPatrick Mooney NVME_CR_CC = 0x14, 1414c87aefeSPatrick Mooney NVME_CR_CSTS = 0x1c, 1424c87aefeSPatrick Mooney NVME_CR_NSSR = 0x20, 1434c87aefeSPatrick Mooney NVME_CR_AQA = 0x24, 1444c87aefeSPatrick Mooney NVME_CR_ASQ_LOW = 0x28, 1454c87aefeSPatrick Mooney NVME_CR_ASQ_HI = 0x2c, 1464c87aefeSPatrick Mooney NVME_CR_ACQ_LOW = 0x30, 1474c87aefeSPatrick Mooney NVME_CR_ACQ_HI = 0x34, 1484c87aefeSPatrick Mooney }; 1494c87aefeSPatrick Mooney 1504c87aefeSPatrick Mooney enum nvme_cmd_cdw11 { 1514c87aefeSPatrick Mooney NVME_CMD_CDW11_PC = 0x0001, 1524c87aefeSPatrick Mooney NVME_CMD_CDW11_IEN = 0x0002, 1534c87aefeSPatrick Mooney NVME_CMD_CDW11_IV = 0xFFFF0000, 1544c87aefeSPatrick Mooney }; 1554c87aefeSPatrick Mooney 156154972afSPatrick Mooney enum nvme_copy_dir { 157154972afSPatrick Mooney NVME_COPY_TO_PRP, 158154972afSPatrick Mooney NVME_COPY_FROM_PRP, 159154972afSPatrick Mooney }; 160154972afSPatrick Mooney 1614c87aefeSPatrick Mooney #define NVME_CQ_INTEN 0x01 1624c87aefeSPatrick Mooney #define NVME_CQ_INTCOAL 0x02 1634c87aefeSPatrick Mooney 1644c87aefeSPatrick Mooney struct nvme_completion_queue { 1654c87aefeSPatrick Mooney struct nvme_completion *qbase; 166*6960cd89SAndy Fiddaman pthread_mutex_t mtx; 1674c87aefeSPatrick Mooney uint32_t size; 1684c87aefeSPatrick Mooney uint16_t tail; /* nvme progress */ 1694c87aefeSPatrick Mooney uint16_t head; /* guest progress */ 1704c87aefeSPatrick Mooney uint16_t intr_vec; 1714c87aefeSPatrick Mooney uint32_t intr_en; 1724c87aefeSPatrick Mooney }; 1734c87aefeSPatrick Mooney 1744c87aefeSPatrick Mooney struct nvme_submission_queue { 1754c87aefeSPatrick Mooney struct nvme_command *qbase; 176*6960cd89SAndy Fiddaman pthread_mutex_t mtx; 1774c87aefeSPatrick Mooney uint32_t size; 1784c87aefeSPatrick Mooney uint16_t head; /* nvme progress */ 1794c87aefeSPatrick Mooney uint16_t tail; /* guest progress */ 1804c87aefeSPatrick Mooney uint16_t cqid; /* completion queue id */ 1814c87aefeSPatrick Mooney int qpriority; 1824c87aefeSPatrick Mooney }; 1834c87aefeSPatrick Mooney 1844c87aefeSPatrick Mooney enum nvme_storage_type { 1854c87aefeSPatrick Mooney NVME_STOR_BLOCKIF = 0, 1864c87aefeSPatrick Mooney NVME_STOR_RAM = 1, 1874c87aefeSPatrick Mooney }; 1884c87aefeSPatrick Mooney 1894c87aefeSPatrick Mooney struct pci_nvme_blockstore { 1904c87aefeSPatrick Mooney enum nvme_storage_type type; 1914c87aefeSPatrick Mooney void *ctx; 1924c87aefeSPatrick Mooney uint64_t size; 1934c87aefeSPatrick Mooney uint32_t sectsz; 1944c87aefeSPatrick Mooney uint32_t sectsz_bits; 19584659b24SMichael Zeller uint64_t eui64; 196154972afSPatrick Mooney uint32_t deallocate:1; 1974c87aefeSPatrick Mooney }; 1984c87aefeSPatrick Mooney 199*6960cd89SAndy Fiddaman /* 200*6960cd89SAndy Fiddaman * Calculate the number of additional page descriptors for guest IO requests 201*6960cd89SAndy Fiddaman * based on the advertised Max Data Transfer (MDTS) and given the number of 202*6960cd89SAndy Fiddaman * default iovec's in a struct blockif_req. 203*6960cd89SAndy Fiddaman * 204*6960cd89SAndy Fiddaman * Note the + 1 allows for the initial descriptor to not be page aligned. 205*6960cd89SAndy Fiddaman */ 206*6960cd89SAndy Fiddaman #define MDTS_PAD_SIZE \ 207*6960cd89SAndy Fiddaman NVME_MAX_IOVEC > BLOCKIF_IOV_MAX ? \ 208*6960cd89SAndy Fiddaman NVME_MAX_IOVEC - BLOCKIF_IOV_MAX : \ 209*6960cd89SAndy Fiddaman 0 210*6960cd89SAndy Fiddaman 2114c87aefeSPatrick Mooney struct pci_nvme_ioreq { 2124c87aefeSPatrick Mooney struct pci_nvme_softc *sc; 213154972afSPatrick Mooney STAILQ_ENTRY(pci_nvme_ioreq) link; 2144c87aefeSPatrick Mooney struct nvme_submission_queue *nvme_sq; 2154c87aefeSPatrick Mooney uint16_t sqid; 2164c87aefeSPatrick Mooney 2174c87aefeSPatrick Mooney /* command information */ 2184c87aefeSPatrick Mooney uint16_t opc; 2194c87aefeSPatrick Mooney uint16_t cid; 2204c87aefeSPatrick Mooney uint32_t nsid; 2214c87aefeSPatrick Mooney 2224c87aefeSPatrick Mooney uint64_t prev_gpaddr; 2234c87aefeSPatrick Mooney size_t prev_size; 224*6960cd89SAndy Fiddaman size_t bytes; 2254c87aefeSPatrick Mooney 2264c87aefeSPatrick Mooney struct blockif_req io_req; 2274c87aefeSPatrick Mooney 228*6960cd89SAndy Fiddaman struct iovec iovpadding[MDTS_PAD_SIZE]; 2294c87aefeSPatrick Mooney }; 2304c87aefeSPatrick Mooney 231154972afSPatrick Mooney enum nvme_dsm_type { 232154972afSPatrick Mooney /* Dataset Management bit in ONCS reflects backing storage capability */ 233154972afSPatrick Mooney NVME_DATASET_MANAGEMENT_AUTO, 234154972afSPatrick Mooney /* Unconditionally set Dataset Management bit in ONCS */ 235154972afSPatrick Mooney NVME_DATASET_MANAGEMENT_ENABLE, 236154972afSPatrick Mooney /* Unconditionally clear Dataset Management bit in ONCS */ 237154972afSPatrick Mooney NVME_DATASET_MANAGEMENT_DISABLE, 238154972afSPatrick Mooney }; 239154972afSPatrick Mooney 240*6960cd89SAndy Fiddaman struct pci_nvme_softc; 241*6960cd89SAndy Fiddaman struct nvme_feature_obj; 242*6960cd89SAndy Fiddaman 243*6960cd89SAndy Fiddaman typedef void (*nvme_feature_cb)(struct pci_nvme_softc *, 244*6960cd89SAndy Fiddaman struct nvme_feature_obj *, 245*6960cd89SAndy Fiddaman struct nvme_command *, 246*6960cd89SAndy Fiddaman struct nvme_completion *); 247*6960cd89SAndy Fiddaman 248*6960cd89SAndy Fiddaman struct nvme_feature_obj { 249*6960cd89SAndy Fiddaman uint32_t cdw11; 250*6960cd89SAndy Fiddaman nvme_feature_cb set; 251*6960cd89SAndy Fiddaman nvme_feature_cb get; 252*6960cd89SAndy Fiddaman bool namespace_specific; 253*6960cd89SAndy Fiddaman }; 254*6960cd89SAndy Fiddaman 255*6960cd89SAndy Fiddaman #define NVME_FID_MAX (NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1) 256*6960cd89SAndy Fiddaman 257*6960cd89SAndy Fiddaman struct pci_nvme_aer { 258*6960cd89SAndy Fiddaman STAILQ_ENTRY(pci_nvme_aer) link; 259*6960cd89SAndy Fiddaman uint16_t cid; /* Command ID of the submitted AER */ 260*6960cd89SAndy Fiddaman }; 261*6960cd89SAndy Fiddaman 2624c87aefeSPatrick Mooney struct pci_nvme_softc { 2634c87aefeSPatrick Mooney struct pci_devinst *nsc_pi; 2644c87aefeSPatrick Mooney 2654c87aefeSPatrick Mooney pthread_mutex_t mtx; 2664c87aefeSPatrick Mooney 2674c87aefeSPatrick Mooney struct nvme_registers regs; 2684c87aefeSPatrick Mooney 2694c87aefeSPatrick Mooney struct nvme_namespace_data nsdata; 2704c87aefeSPatrick Mooney struct nvme_controller_data ctrldata; 2714c87aefeSPatrick Mooney struct nvme_error_information_entry err_log; 2724c87aefeSPatrick Mooney struct nvme_health_information_page health_log; 2734c87aefeSPatrick Mooney struct nvme_firmware_page fw_log; 2744c87aefeSPatrick Mooney 2754c87aefeSPatrick Mooney struct pci_nvme_blockstore nvstore; 2764c87aefeSPatrick Mooney 2774c87aefeSPatrick Mooney uint16_t max_qentries; /* max entries per queue */ 2784c87aefeSPatrick Mooney uint32_t max_queues; /* max number of IO SQ's or CQ's */ 2794c87aefeSPatrick Mooney uint32_t num_cqueues; 2804c87aefeSPatrick Mooney uint32_t num_squeues; 281*6960cd89SAndy Fiddaman bool num_q_is_set; /* Has host set Number of Queues */ 2824c87aefeSPatrick Mooney 2834c87aefeSPatrick Mooney struct pci_nvme_ioreq *ioreqs; 284154972afSPatrick Mooney STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */ 2854c87aefeSPatrick Mooney uint32_t pending_ios; 2864c87aefeSPatrick Mooney uint32_t ioslots; 2874c87aefeSPatrick Mooney sem_t iosemlock; 2884c87aefeSPatrick Mooney 2894c87aefeSPatrick Mooney /* 2904c87aefeSPatrick Mooney * Memory mapped Submission and Completion queues 2914c87aefeSPatrick Mooney * Each array includes both Admin and IO queues 2924c87aefeSPatrick Mooney */ 2934c87aefeSPatrick Mooney struct nvme_completion_queue *compl_queues; 2944c87aefeSPatrick Mooney struct nvme_submission_queue *submit_queues; 2954c87aefeSPatrick Mooney 296*6960cd89SAndy Fiddaman struct nvme_feature_obj feat[NVME_FID_MAX]; 297154972afSPatrick Mooney 298154972afSPatrick Mooney enum nvme_dsm_type dataset_management; 299*6960cd89SAndy Fiddaman 300*6960cd89SAndy Fiddaman /* Accounting for SMART data */ 301*6960cd89SAndy Fiddaman __uint128_t read_data_units; 302*6960cd89SAndy Fiddaman __uint128_t write_data_units; 303*6960cd89SAndy Fiddaman __uint128_t read_commands; 304*6960cd89SAndy Fiddaman __uint128_t write_commands; 305*6960cd89SAndy Fiddaman uint32_t read_dunits_remainder; 306*6960cd89SAndy Fiddaman uint32_t write_dunits_remainder; 307*6960cd89SAndy Fiddaman 308*6960cd89SAndy Fiddaman STAILQ_HEAD(, pci_nvme_aer) aer_list; 309*6960cd89SAndy Fiddaman uint32_t aer_count; 3104c87aefeSPatrick Mooney }; 3114c87aefeSPatrick Mooney 3124c87aefeSPatrick Mooney 313*6960cd89SAndy Fiddaman static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *); 314*6960cd89SAndy Fiddaman static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *); 315*6960cd89SAndy Fiddaman static void pci_nvme_io_done(struct blockif_req *, int); 3164c87aefeSPatrick Mooney 3174c87aefeSPatrick Mooney /* Controller Configuration utils */ 3184c87aefeSPatrick Mooney #define NVME_CC_GET_EN(cc) \ 3194c87aefeSPatrick Mooney ((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK) 3204c87aefeSPatrick Mooney #define NVME_CC_GET_CSS(cc) \ 3214c87aefeSPatrick Mooney ((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK) 3224c87aefeSPatrick Mooney #define NVME_CC_GET_SHN(cc) \ 3234c87aefeSPatrick Mooney ((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK) 3244c87aefeSPatrick Mooney #define NVME_CC_GET_IOSQES(cc) \ 3254c87aefeSPatrick Mooney ((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK) 3264c87aefeSPatrick Mooney #define NVME_CC_GET_IOCQES(cc) \ 3274c87aefeSPatrick Mooney ((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK) 3284c87aefeSPatrick Mooney 3294c87aefeSPatrick Mooney #define NVME_CC_WRITE_MASK \ 3304c87aefeSPatrick Mooney ((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \ 3314c87aefeSPatrick Mooney (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \ 3324c87aefeSPatrick Mooney (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT)) 3334c87aefeSPatrick Mooney 3344c87aefeSPatrick Mooney #define NVME_CC_NEN_WRITE_MASK \ 3354c87aefeSPatrick Mooney ((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \ 3364c87aefeSPatrick Mooney (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \ 3374c87aefeSPatrick Mooney (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT)) 3384c87aefeSPatrick Mooney 3394c87aefeSPatrick Mooney /* Controller Status utils */ 3404c87aefeSPatrick Mooney #define NVME_CSTS_GET_RDY(sts) \ 3414c87aefeSPatrick Mooney ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK) 3424c87aefeSPatrick Mooney 3434c87aefeSPatrick Mooney #define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT) 3444c87aefeSPatrick Mooney 3454c87aefeSPatrick Mooney /* Completion Queue status word utils */ 3464c87aefeSPatrick Mooney #define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT) 3474c87aefeSPatrick Mooney #define NVME_STATUS_MASK \ 3484c87aefeSPatrick Mooney ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\ 3494c87aefeSPatrick Mooney (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) 3504c87aefeSPatrick Mooney 351154972afSPatrick Mooney #define NVME_ONCS_DSM (NVME_CTRLR_DATA_ONCS_DSM_MASK << \ 352154972afSPatrick Mooney NVME_CTRLR_DATA_ONCS_DSM_SHIFT) 353154972afSPatrick Mooney 354*6960cd89SAndy Fiddaman static void nvme_feature_invalid_cb(struct pci_nvme_softc *, 355*6960cd89SAndy Fiddaman struct nvme_feature_obj *, 356*6960cd89SAndy Fiddaman struct nvme_command *, 357*6960cd89SAndy Fiddaman struct nvme_completion *); 358*6960cd89SAndy Fiddaman static void nvme_feature_num_queues(struct pci_nvme_softc *, 359*6960cd89SAndy Fiddaman struct nvme_feature_obj *, 360*6960cd89SAndy Fiddaman struct nvme_command *, 361*6960cd89SAndy Fiddaman struct nvme_completion *); 362*6960cd89SAndy Fiddaman static void nvme_feature_iv_config(struct pci_nvme_softc *, 363*6960cd89SAndy Fiddaman struct nvme_feature_obj *, 364*6960cd89SAndy Fiddaman struct nvme_command *, 365*6960cd89SAndy Fiddaman struct nvme_completion *); 366*6960cd89SAndy Fiddaman 3674c87aefeSPatrick Mooney static __inline void 3684c87aefeSPatrick Mooney cpywithpad(char *dst, size_t dst_size, const char *src, char pad) 3694c87aefeSPatrick Mooney { 3704c87aefeSPatrick Mooney size_t len; 3714c87aefeSPatrick Mooney 3724c87aefeSPatrick Mooney len = strnlen(src, dst_size); 3734c87aefeSPatrick Mooney memset(dst, pad, dst_size); 3744c87aefeSPatrick Mooney memcpy(dst, src, len); 3754c87aefeSPatrick Mooney } 3764c87aefeSPatrick Mooney 3774c87aefeSPatrick Mooney static __inline void 3784c87aefeSPatrick Mooney pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code) 3794c87aefeSPatrick Mooney { 3804c87aefeSPatrick Mooney 3814c87aefeSPatrick Mooney *status &= ~NVME_STATUS_MASK; 3824c87aefeSPatrick Mooney *status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT | 3834c87aefeSPatrick Mooney (code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; 3844c87aefeSPatrick Mooney } 3854c87aefeSPatrick Mooney 3864c87aefeSPatrick Mooney static __inline void 3874c87aefeSPatrick Mooney pci_nvme_status_genc(uint16_t *status, uint16_t code) 3884c87aefeSPatrick Mooney { 3894c87aefeSPatrick Mooney 3904c87aefeSPatrick Mooney pci_nvme_status_tc(status, NVME_SCT_GENERIC, code); 3914c87aefeSPatrick Mooney } 3924c87aefeSPatrick Mooney 393*6960cd89SAndy Fiddaman /* 394*6960cd89SAndy Fiddaman * Initialize the requested number or IO Submission and Completion Queues. 395*6960cd89SAndy Fiddaman * Admin queues are allocated implicitly. 396*6960cd89SAndy Fiddaman */ 397*6960cd89SAndy Fiddaman static void 398*6960cd89SAndy Fiddaman pci_nvme_init_queues(struct pci_nvme_softc *sc, uint32_t nsq, uint32_t ncq) 3994c87aefeSPatrick Mooney { 400*6960cd89SAndy Fiddaman uint32_t i; 4014c87aefeSPatrick Mooney 402*6960cd89SAndy Fiddaman /* 403*6960cd89SAndy Fiddaman * Allocate and initialize the Submission Queues 404*6960cd89SAndy Fiddaman */ 405*6960cd89SAndy Fiddaman if (nsq > NVME_QUEUES) { 406*6960cd89SAndy Fiddaman WPRINTF("%s: clamping number of SQ from %u to %u", 407*6960cd89SAndy Fiddaman __func__, nsq, NVME_QUEUES); 408*6960cd89SAndy Fiddaman nsq = NVME_QUEUES; 409*6960cd89SAndy Fiddaman } 410*6960cd89SAndy Fiddaman 411*6960cd89SAndy Fiddaman sc->num_squeues = nsq; 412*6960cd89SAndy Fiddaman 413*6960cd89SAndy Fiddaman sc->submit_queues = calloc(sc->num_squeues + 1, 414*6960cd89SAndy Fiddaman sizeof(struct nvme_submission_queue)); 415*6960cd89SAndy Fiddaman if (sc->submit_queues == NULL) { 416*6960cd89SAndy Fiddaman WPRINTF("%s: SQ allocation failed", __func__); 417*6960cd89SAndy Fiddaman sc->num_squeues = 0; 418*6960cd89SAndy Fiddaman } else { 419*6960cd89SAndy Fiddaman struct nvme_submission_queue *sq = sc->submit_queues; 420*6960cd89SAndy Fiddaman 421*6960cd89SAndy Fiddaman for (i = 0; i < sc->num_squeues; i++) 422*6960cd89SAndy Fiddaman pthread_mutex_init(&sq[i].mtx, NULL); 423*6960cd89SAndy Fiddaman } 424*6960cd89SAndy Fiddaman 425*6960cd89SAndy Fiddaman /* 426*6960cd89SAndy Fiddaman * Allocate and initialize the Completion Queues 427*6960cd89SAndy Fiddaman */ 428*6960cd89SAndy Fiddaman if (ncq > NVME_QUEUES) { 429*6960cd89SAndy Fiddaman WPRINTF("%s: clamping number of CQ from %u to %u", 430*6960cd89SAndy Fiddaman __func__, ncq, NVME_QUEUES); 431*6960cd89SAndy Fiddaman ncq = NVME_QUEUES; 432*6960cd89SAndy Fiddaman } 433*6960cd89SAndy Fiddaman 434*6960cd89SAndy Fiddaman sc->num_cqueues = ncq; 435*6960cd89SAndy Fiddaman 436*6960cd89SAndy Fiddaman sc->compl_queues = calloc(sc->num_cqueues + 1, 437*6960cd89SAndy Fiddaman sizeof(struct nvme_completion_queue)); 438*6960cd89SAndy Fiddaman if (sc->compl_queues == NULL) { 439*6960cd89SAndy Fiddaman WPRINTF("%s: CQ allocation failed", __func__); 440*6960cd89SAndy Fiddaman sc->num_cqueues = 0; 441*6960cd89SAndy Fiddaman } else { 442*6960cd89SAndy Fiddaman struct nvme_completion_queue *cq = sc->compl_queues; 443*6960cd89SAndy Fiddaman 444*6960cd89SAndy Fiddaman for (i = 0; i < sc->num_cqueues; i++) 445*6960cd89SAndy Fiddaman pthread_mutex_init(&cq[i].mtx, NULL); 446*6960cd89SAndy Fiddaman } 4474c87aefeSPatrick Mooney } 4484c87aefeSPatrick Mooney 4494c87aefeSPatrick Mooney static void 4504c87aefeSPatrick Mooney pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) 4514c87aefeSPatrick Mooney { 4524c87aefeSPatrick Mooney struct nvme_controller_data *cd = &sc->ctrldata; 4534c87aefeSPatrick Mooney 4544c87aefeSPatrick Mooney cd->vid = 0xFB5D; 4554c87aefeSPatrick Mooney cd->ssvid = 0x0000; 4564c87aefeSPatrick Mooney 4574c87aefeSPatrick Mooney cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' '); 4584c87aefeSPatrick Mooney cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' '); 4594c87aefeSPatrick Mooney 4604c87aefeSPatrick Mooney /* Num of submission commands that we can handle at a time (2^rab) */ 4614c87aefeSPatrick Mooney cd->rab = 4; 4624c87aefeSPatrick Mooney 4634c87aefeSPatrick Mooney /* FreeBSD OUI */ 4644c87aefeSPatrick Mooney cd->ieee[0] = 0x58; 4654c87aefeSPatrick Mooney cd->ieee[1] = 0x9c; 4664c87aefeSPatrick Mooney cd->ieee[2] = 0xfc; 4674c87aefeSPatrick Mooney 4684c87aefeSPatrick Mooney cd->mic = 0; 4694c87aefeSPatrick Mooney 470*6960cd89SAndy Fiddaman cd->mdts = NVME_MDTS; /* max data transfer size (2^mdts * CAP.MPSMIN) */ 4714c87aefeSPatrick Mooney 4724c87aefeSPatrick Mooney cd->ver = 0x00010300; 4734c87aefeSPatrick Mooney 4744c87aefeSPatrick Mooney cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT; 4754c87aefeSPatrick Mooney cd->acl = 2; 4764c87aefeSPatrick Mooney cd->aerl = 4; 4774c87aefeSPatrick Mooney 478*6960cd89SAndy Fiddaman /* Advertise 1, Read-only firmware slot */ 479*6960cd89SAndy Fiddaman cd->frmw = NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK | 480*6960cd89SAndy Fiddaman (1 << NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT); 4814c87aefeSPatrick Mooney cd->lpa = 0; /* TODO: support some simple things like SMART */ 4824c87aefeSPatrick Mooney cd->elpe = 0; /* max error log page entries */ 4834c87aefeSPatrick Mooney cd->npss = 1; /* number of power states support */ 4844c87aefeSPatrick Mooney 4854c87aefeSPatrick Mooney /* Warning Composite Temperature Threshold */ 4864c87aefeSPatrick Mooney cd->wctemp = 0x0157; 4874c87aefeSPatrick Mooney 4884c87aefeSPatrick Mooney cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) | 4894c87aefeSPatrick Mooney (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT); 4904c87aefeSPatrick Mooney cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) | 4914c87aefeSPatrick Mooney (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT); 4924c87aefeSPatrick Mooney cd->nn = 1; /* number of namespaces */ 4934c87aefeSPatrick Mooney 494154972afSPatrick Mooney cd->oncs = 0; 495154972afSPatrick Mooney switch (sc->dataset_management) { 496154972afSPatrick Mooney case NVME_DATASET_MANAGEMENT_AUTO: 497154972afSPatrick Mooney if (sc->nvstore.deallocate) 498154972afSPatrick Mooney cd->oncs |= NVME_ONCS_DSM; 499154972afSPatrick Mooney break; 500154972afSPatrick Mooney case NVME_DATASET_MANAGEMENT_ENABLE: 501154972afSPatrick Mooney cd->oncs |= NVME_ONCS_DSM; 502154972afSPatrick Mooney break; 503154972afSPatrick Mooney default: 504154972afSPatrick Mooney break; 505154972afSPatrick Mooney } 506154972afSPatrick Mooney 5074c87aefeSPatrick Mooney cd->fna = 0x03; 5084c87aefeSPatrick Mooney 5094c87aefeSPatrick Mooney cd->power_state[0].mp = 10; 5104c87aefeSPatrick Mooney } 5114c87aefeSPatrick Mooney 51284659b24SMichael Zeller /* 51384659b24SMichael Zeller * Calculate the CRC-16 of the given buffer 51484659b24SMichael Zeller * See copyright attribution at top of file 51584659b24SMichael Zeller */ 51684659b24SMichael Zeller static uint16_t 51784659b24SMichael Zeller crc16(uint16_t crc, const void *buffer, unsigned int len) 5184c87aefeSPatrick Mooney { 51984659b24SMichael Zeller const unsigned char *cp = buffer; 52084659b24SMichael Zeller /* CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1). */ 52184659b24SMichael Zeller static uint16_t const crc16_table[256] = { 52284659b24SMichael Zeller 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 52384659b24SMichael Zeller 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 52484659b24SMichael Zeller 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 52584659b24SMichael Zeller 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 52684659b24SMichael Zeller 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 52784659b24SMichael Zeller 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, 52884659b24SMichael Zeller 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, 52984659b24SMichael Zeller 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, 53084659b24SMichael Zeller 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, 53184659b24SMichael Zeller 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 53284659b24SMichael Zeller 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 53384659b24SMichael Zeller 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 53484659b24SMichael Zeller 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 53584659b24SMichael Zeller 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 53684659b24SMichael Zeller 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, 53784659b24SMichael Zeller 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, 53884659b24SMichael Zeller 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, 53984659b24SMichael Zeller 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, 54084659b24SMichael Zeller 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 54184659b24SMichael Zeller 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 54284659b24SMichael Zeller 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 54384659b24SMichael Zeller 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 54484659b24SMichael Zeller 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, 54584659b24SMichael Zeller 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, 54684659b24SMichael Zeller 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, 54784659b24SMichael Zeller 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, 54884659b24SMichael Zeller 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, 54984659b24SMichael Zeller 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 55084659b24SMichael Zeller 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 55184659b24SMichael Zeller 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 55284659b24SMichael Zeller 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 55384659b24SMichael Zeller 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 55484659b24SMichael Zeller }; 5554c87aefeSPatrick Mooney 55684659b24SMichael Zeller while (len--) 55784659b24SMichael Zeller crc = (((crc >> 8) & 0xffU) ^ 55884659b24SMichael Zeller crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU; 55984659b24SMichael Zeller return crc; 56084659b24SMichael Zeller } 56184659b24SMichael Zeller 56284659b24SMichael Zeller static void 56384659b24SMichael Zeller pci_nvme_init_nsdata(struct pci_nvme_softc *sc, 56484659b24SMichael Zeller struct nvme_namespace_data *nd, uint32_t nsid, 565154972afSPatrick Mooney struct pci_nvme_blockstore *nvstore) 56684659b24SMichael Zeller { 5674c87aefeSPatrick Mooney 568154972afSPatrick Mooney /* Get capacity and block size information from backing store */ 569154972afSPatrick Mooney nd->nsze = nvstore->size / nvstore->sectsz; 5704c87aefeSPatrick Mooney nd->ncap = nd->nsze; 5714c87aefeSPatrick Mooney nd->nuse = nd->nsze; 5724c87aefeSPatrick Mooney 573154972afSPatrick Mooney if (nvstore->type == NVME_STOR_BLOCKIF) 574154972afSPatrick Mooney nvstore->deallocate = blockif_candelete(nvstore->ctx); 575154972afSPatrick Mooney 5764c87aefeSPatrick Mooney nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */ 57784659b24SMichael Zeller nd->flbas = 0; 57884659b24SMichael Zeller 57984659b24SMichael Zeller /* Create an EUI-64 if user did not provide one */ 580154972afSPatrick Mooney if (nvstore->eui64 == 0) { 58184659b24SMichael Zeller char *data = NULL; 582154972afSPatrick Mooney uint64_t eui64 = nvstore->eui64; 58384659b24SMichael Zeller 58484659b24SMichael Zeller asprintf(&data, "%s%u%u%u", vmname, sc->nsc_pi->pi_bus, 58584659b24SMichael Zeller sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); 58684659b24SMichael Zeller 58784659b24SMichael Zeller if (data != NULL) { 58884659b24SMichael Zeller eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data)); 58984659b24SMichael Zeller free(data); 59084659b24SMichael Zeller } 591154972afSPatrick Mooney nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff); 59284659b24SMichael Zeller } 593154972afSPatrick Mooney be64enc(nd->eui64, nvstore->eui64); 59484659b24SMichael Zeller 5954c87aefeSPatrick Mooney /* LBA data-sz = 2^lbads */ 596154972afSPatrick Mooney nd->lbaf[0] = nvstore->sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; 5974c87aefeSPatrick Mooney } 5984c87aefeSPatrick Mooney 5994c87aefeSPatrick Mooney static void 6004c87aefeSPatrick Mooney pci_nvme_init_logpages(struct pci_nvme_softc *sc) 6014c87aefeSPatrick Mooney { 6024c87aefeSPatrick Mooney 6034c87aefeSPatrick Mooney memset(&sc->err_log, 0, sizeof(sc->err_log)); 6044c87aefeSPatrick Mooney memset(&sc->health_log, 0, sizeof(sc->health_log)); 6054c87aefeSPatrick Mooney memset(&sc->fw_log, 0, sizeof(sc->fw_log)); 606*6960cd89SAndy Fiddaman 607*6960cd89SAndy Fiddaman /* Set read/write remainder to round up according to spec */ 608*6960cd89SAndy Fiddaman sc->read_dunits_remainder = 999; 609*6960cd89SAndy Fiddaman sc->write_dunits_remainder = 999; 610*6960cd89SAndy Fiddaman 611*6960cd89SAndy Fiddaman /* Set nominal Health values checked by implementations */ 612*6960cd89SAndy Fiddaman sc->health_log.temperature = 310; 613*6960cd89SAndy Fiddaman sc->health_log.available_spare = 100; 614*6960cd89SAndy Fiddaman sc->health_log.available_spare_threshold = 10; 6154c87aefeSPatrick Mooney } 6164c87aefeSPatrick Mooney 6174c87aefeSPatrick Mooney static void 618*6960cd89SAndy Fiddaman pci_nvme_init_features(struct pci_nvme_softc *sc) 619*6960cd89SAndy Fiddaman { 620*6960cd89SAndy Fiddaman 621*6960cd89SAndy Fiddaman sc->feat[0].set = nvme_feature_invalid_cb; 622*6960cd89SAndy Fiddaman sc->feat[0].get = nvme_feature_invalid_cb; 623*6960cd89SAndy Fiddaman 624*6960cd89SAndy Fiddaman sc->feat[NVME_FEAT_LBA_RANGE_TYPE].namespace_specific = true; 625*6960cd89SAndy Fiddaman sc->feat[NVME_FEAT_ERROR_RECOVERY].namespace_specific = true; 626*6960cd89SAndy Fiddaman sc->feat[NVME_FEAT_NUMBER_OF_QUEUES].set = nvme_feature_num_queues; 627*6960cd89SAndy Fiddaman sc->feat[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION].set = 628*6960cd89SAndy Fiddaman nvme_feature_iv_config; 629*6960cd89SAndy Fiddaman sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG].get = 630*6960cd89SAndy Fiddaman nvme_feature_invalid_cb; 631*6960cd89SAndy Fiddaman sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW].get = 632*6960cd89SAndy Fiddaman nvme_feature_invalid_cb; 633*6960cd89SAndy Fiddaman } 634*6960cd89SAndy Fiddaman 635*6960cd89SAndy Fiddaman static void 636*6960cd89SAndy Fiddaman pci_nvme_aer_init(struct pci_nvme_softc *sc) 637*6960cd89SAndy Fiddaman { 638*6960cd89SAndy Fiddaman 639*6960cd89SAndy Fiddaman STAILQ_INIT(&sc->aer_list); 640*6960cd89SAndy Fiddaman sc->aer_count = 0; 641*6960cd89SAndy Fiddaman } 642*6960cd89SAndy Fiddaman 643*6960cd89SAndy Fiddaman static void 644*6960cd89SAndy Fiddaman pci_nvme_aer_destroy(struct pci_nvme_softc *sc) 645*6960cd89SAndy Fiddaman { 646*6960cd89SAndy Fiddaman struct pci_nvme_aer *aer = NULL; 647*6960cd89SAndy Fiddaman 648*6960cd89SAndy Fiddaman while (!STAILQ_EMPTY(&sc->aer_list)) { 649*6960cd89SAndy Fiddaman aer = STAILQ_FIRST(&sc->aer_list); 650*6960cd89SAndy Fiddaman STAILQ_REMOVE_HEAD(&sc->aer_list, link); 651*6960cd89SAndy Fiddaman free(aer); 652*6960cd89SAndy Fiddaman } 653*6960cd89SAndy Fiddaman 654*6960cd89SAndy Fiddaman pci_nvme_aer_init(sc); 655*6960cd89SAndy Fiddaman } 656*6960cd89SAndy Fiddaman 657*6960cd89SAndy Fiddaman #ifdef __FreeBSD__ 658*6960cd89SAndy Fiddaman static bool 659*6960cd89SAndy Fiddaman pci_nvme_aer_available(struct pci_nvme_softc *sc) 660*6960cd89SAndy Fiddaman { 661*6960cd89SAndy Fiddaman 662*6960cd89SAndy Fiddaman return (!STAILQ_EMPTY(&sc->aer_list)); 663*6960cd89SAndy Fiddaman } 664*6960cd89SAndy Fiddaman #else 665*6960cd89SAndy Fiddaman /* This is kept behind an ifdef while it's unused to appease the compiler. */ 666*6960cd89SAndy Fiddaman #endif 667*6960cd89SAndy Fiddaman 668*6960cd89SAndy Fiddaman static bool 669*6960cd89SAndy Fiddaman pci_nvme_aer_limit_reached(struct pci_nvme_softc *sc) 670*6960cd89SAndy Fiddaman { 671*6960cd89SAndy Fiddaman struct nvme_controller_data *cd = &sc->ctrldata; 672*6960cd89SAndy Fiddaman 673*6960cd89SAndy Fiddaman /* AERL is a zero based value while aer_count is one's based */ 674*6960cd89SAndy Fiddaman return (sc->aer_count == (cd->aerl + 1)); 675*6960cd89SAndy Fiddaman } 676*6960cd89SAndy Fiddaman 677*6960cd89SAndy Fiddaman /* 678*6960cd89SAndy Fiddaman * Add an Async Event Request 679*6960cd89SAndy Fiddaman * 680*6960cd89SAndy Fiddaman * Stores an AER to be returned later if the Controller needs to notify the 681*6960cd89SAndy Fiddaman * host of an event. 682*6960cd89SAndy Fiddaman * Note that while the NVMe spec doesn't require Controllers to return AER's 683*6960cd89SAndy Fiddaman * in order, this implementation does preserve the order. 684*6960cd89SAndy Fiddaman */ 685*6960cd89SAndy Fiddaman static int 686*6960cd89SAndy Fiddaman pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid) 687*6960cd89SAndy Fiddaman { 688*6960cd89SAndy Fiddaman struct pci_nvme_aer *aer = NULL; 689*6960cd89SAndy Fiddaman 690*6960cd89SAndy Fiddaman if (pci_nvme_aer_limit_reached(sc)) 691*6960cd89SAndy Fiddaman return (-1); 692*6960cd89SAndy Fiddaman 693*6960cd89SAndy Fiddaman aer = calloc(1, sizeof(struct pci_nvme_aer)); 694*6960cd89SAndy Fiddaman if (aer == NULL) 695*6960cd89SAndy Fiddaman return (-1); 696*6960cd89SAndy Fiddaman 697*6960cd89SAndy Fiddaman sc->aer_count++; 698*6960cd89SAndy Fiddaman 699*6960cd89SAndy Fiddaman /* Save the Command ID for use in the completion message */ 700*6960cd89SAndy Fiddaman aer->cid = cid; 701*6960cd89SAndy Fiddaman STAILQ_INSERT_TAIL(&sc->aer_list, aer, link); 702*6960cd89SAndy Fiddaman 703*6960cd89SAndy Fiddaman return (0); 704*6960cd89SAndy Fiddaman } 705*6960cd89SAndy Fiddaman 706*6960cd89SAndy Fiddaman /* 707*6960cd89SAndy Fiddaman * Get an Async Event Request structure 708*6960cd89SAndy Fiddaman * 709*6960cd89SAndy Fiddaman * Returns a pointer to an AER previously submitted by the host or NULL if 710*6960cd89SAndy Fiddaman * no AER's exist. Caller is responsible for freeing the returned struct. 711*6960cd89SAndy Fiddaman */ 712*6960cd89SAndy Fiddaman #ifdef __FreeBSD__ 713*6960cd89SAndy Fiddaman static struct pci_nvme_aer * 714*6960cd89SAndy Fiddaman pci_nvme_aer_get(struct pci_nvme_softc *sc) 715*6960cd89SAndy Fiddaman { 716*6960cd89SAndy Fiddaman struct pci_nvme_aer *aer = NULL; 717*6960cd89SAndy Fiddaman 718*6960cd89SAndy Fiddaman aer = STAILQ_FIRST(&sc->aer_list); 719*6960cd89SAndy Fiddaman if (aer != NULL) { 720*6960cd89SAndy Fiddaman STAILQ_REMOVE_HEAD(&sc->aer_list, link); 721*6960cd89SAndy Fiddaman sc->aer_count--; 722*6960cd89SAndy Fiddaman } 723*6960cd89SAndy Fiddaman 724*6960cd89SAndy Fiddaman return (aer); 725*6960cd89SAndy Fiddaman } 726*6960cd89SAndy Fiddaman #else 727*6960cd89SAndy Fiddaman /* This is kept behind an ifdef while it's unused to appease the compiler. */ 728*6960cd89SAndy Fiddaman #endif 729*6960cd89SAndy Fiddaman 730*6960cd89SAndy Fiddaman static void 7314c87aefeSPatrick Mooney pci_nvme_reset_locked(struct pci_nvme_softc *sc) 7324c87aefeSPatrick Mooney { 733*6960cd89SAndy Fiddaman uint32_t i; 734*6960cd89SAndy Fiddaman 735*6960cd89SAndy Fiddaman DPRINTF("%s", __func__); 7364c87aefeSPatrick Mooney 7374c87aefeSPatrick Mooney sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | 7384c87aefeSPatrick Mooney (1 << NVME_CAP_LO_REG_CQR_SHIFT) | 7394c87aefeSPatrick Mooney (60 << NVME_CAP_LO_REG_TO_SHIFT); 7404c87aefeSPatrick Mooney 7414c87aefeSPatrick Mooney sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT; 7424c87aefeSPatrick Mooney 7434c87aefeSPatrick Mooney sc->regs.vs = 0x00010300; /* NVMe v1.3 */ 7444c87aefeSPatrick Mooney 7454c87aefeSPatrick Mooney sc->regs.cc = 0; 7464c87aefeSPatrick Mooney sc->regs.csts = 0; 7474c87aefeSPatrick Mooney 748*6960cd89SAndy Fiddaman assert(sc->submit_queues != NULL); 749*6960cd89SAndy Fiddaman 750*6960cd89SAndy Fiddaman for (i = 0; i < sc->num_squeues + 1; i++) { 7514c87aefeSPatrick Mooney sc->submit_queues[i].qbase = NULL; 7524c87aefeSPatrick Mooney sc->submit_queues[i].size = 0; 7534c87aefeSPatrick Mooney sc->submit_queues[i].cqid = 0; 7544c87aefeSPatrick Mooney sc->submit_queues[i].tail = 0; 7554c87aefeSPatrick Mooney sc->submit_queues[i].head = 0; 7564c87aefeSPatrick Mooney } 7574c87aefeSPatrick Mooney 758*6960cd89SAndy Fiddaman assert(sc->compl_queues != NULL); 759*6960cd89SAndy Fiddaman 760*6960cd89SAndy Fiddaman for (i = 0; i < sc->num_cqueues + 1; i++) { 7614c87aefeSPatrick Mooney sc->compl_queues[i].qbase = NULL; 7624c87aefeSPatrick Mooney sc->compl_queues[i].size = 0; 7634c87aefeSPatrick Mooney sc->compl_queues[i].tail = 0; 7644c87aefeSPatrick Mooney sc->compl_queues[i].head = 0; 7654c87aefeSPatrick Mooney } 7664c87aefeSPatrick Mooney 767*6960cd89SAndy Fiddaman sc->num_q_is_set = false; 768*6960cd89SAndy Fiddaman 769*6960cd89SAndy Fiddaman pci_nvme_aer_destroy(sc); 7704c87aefeSPatrick Mooney } 7714c87aefeSPatrick Mooney 7724c87aefeSPatrick Mooney static void 7734c87aefeSPatrick Mooney pci_nvme_reset(struct pci_nvme_softc *sc) 7744c87aefeSPatrick Mooney { 7754c87aefeSPatrick Mooney pthread_mutex_lock(&sc->mtx); 7764c87aefeSPatrick Mooney pci_nvme_reset_locked(sc); 7774c87aefeSPatrick Mooney pthread_mutex_unlock(&sc->mtx); 7784c87aefeSPatrick Mooney } 7794c87aefeSPatrick Mooney 7804c87aefeSPatrick Mooney static void 7814c87aefeSPatrick Mooney pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) 7824c87aefeSPatrick Mooney { 7834c87aefeSPatrick Mooney uint16_t acqs, asqs; 7844c87aefeSPatrick Mooney 785*6960cd89SAndy Fiddaman DPRINTF("%s", __func__); 7864c87aefeSPatrick Mooney 7874c87aefeSPatrick Mooney asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; 7884c87aefeSPatrick Mooney sc->submit_queues[0].size = asqs; 7894c87aefeSPatrick Mooney sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, 7904c87aefeSPatrick Mooney sizeof(struct nvme_command) * asqs); 7914c87aefeSPatrick Mooney 792*6960cd89SAndy Fiddaman DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p", 793*6960cd89SAndy Fiddaman __func__, sc->regs.asq, sc->submit_queues[0].qbase); 7944c87aefeSPatrick Mooney 7954c87aefeSPatrick Mooney acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & 7964c87aefeSPatrick Mooney NVME_AQA_REG_ACQS_MASK) + 1; 7974c87aefeSPatrick Mooney sc->compl_queues[0].size = acqs; 7984c87aefeSPatrick Mooney sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, 7994c87aefeSPatrick Mooney sizeof(struct nvme_completion) * acqs); 800*6960cd89SAndy Fiddaman sc->compl_queues[0].intr_en = NVME_CQ_INTEN; 801*6960cd89SAndy Fiddaman 802*6960cd89SAndy Fiddaman DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p", 803*6960cd89SAndy Fiddaman __func__, sc->regs.acq, sc->compl_queues[0].qbase); 8044c87aefeSPatrick Mooney } 8054c87aefeSPatrick Mooney 8064c87aefeSPatrick Mooney static int 807154972afSPatrick Mooney nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b, 808154972afSPatrick Mooney size_t len, enum nvme_copy_dir dir) 8094c87aefeSPatrick Mooney { 810154972afSPatrick Mooney uint8_t *p; 8114c87aefeSPatrick Mooney size_t bytes; 8124c87aefeSPatrick Mooney 8134c87aefeSPatrick Mooney if (len > (8 * 1024)) { 8144c87aefeSPatrick Mooney return (-1); 8154c87aefeSPatrick Mooney } 8164c87aefeSPatrick Mooney 8174c87aefeSPatrick Mooney /* Copy from the start of prp1 to the end of the physical page */ 8184c87aefeSPatrick Mooney bytes = PAGE_SIZE - (prp1 & PAGE_MASK); 8194c87aefeSPatrick Mooney bytes = MIN(bytes, len); 8204c87aefeSPatrick Mooney 821154972afSPatrick Mooney p = vm_map_gpa(ctx, prp1, bytes); 822154972afSPatrick Mooney if (p == NULL) { 8234c87aefeSPatrick Mooney return (-1); 8244c87aefeSPatrick Mooney } 8254c87aefeSPatrick Mooney 826154972afSPatrick Mooney if (dir == NVME_COPY_TO_PRP) 827154972afSPatrick Mooney memcpy(p, b, bytes); 828154972afSPatrick Mooney else 829154972afSPatrick Mooney memcpy(b, p, bytes); 8304c87aefeSPatrick Mooney 831154972afSPatrick Mooney b += bytes; 8324c87aefeSPatrick Mooney 8334c87aefeSPatrick Mooney len -= bytes; 8344c87aefeSPatrick Mooney if (len == 0) { 8354c87aefeSPatrick Mooney return (0); 8364c87aefeSPatrick Mooney } 8374c87aefeSPatrick Mooney 8384c87aefeSPatrick Mooney len = MIN(len, PAGE_SIZE); 8394c87aefeSPatrick Mooney 840154972afSPatrick Mooney p = vm_map_gpa(ctx, prp2, len); 841154972afSPatrick Mooney if (p == NULL) { 8424c87aefeSPatrick Mooney return (-1); 8434c87aefeSPatrick Mooney } 8444c87aefeSPatrick Mooney 845154972afSPatrick Mooney if (dir == NVME_COPY_TO_PRP) 846154972afSPatrick Mooney memcpy(p, b, len); 847154972afSPatrick Mooney else 848154972afSPatrick Mooney memcpy(b, p, len); 8494c87aefeSPatrick Mooney 8504c87aefeSPatrick Mooney return (0); 8514c87aefeSPatrick Mooney } 8524c87aefeSPatrick Mooney 853*6960cd89SAndy Fiddaman /* 854*6960cd89SAndy Fiddaman * Write a Completion Queue Entry update 855*6960cd89SAndy Fiddaman * 856*6960cd89SAndy Fiddaman * Write the completion and update the doorbell value 857*6960cd89SAndy Fiddaman */ 858*6960cd89SAndy Fiddaman static void 859*6960cd89SAndy Fiddaman pci_nvme_cq_update(struct pci_nvme_softc *sc, 860*6960cd89SAndy Fiddaman struct nvme_completion_queue *cq, 861*6960cd89SAndy Fiddaman uint32_t cdw0, 862*6960cd89SAndy Fiddaman uint16_t cid, 863*6960cd89SAndy Fiddaman uint16_t sqid, 864*6960cd89SAndy Fiddaman uint16_t status) 865*6960cd89SAndy Fiddaman { 866*6960cd89SAndy Fiddaman struct nvme_submission_queue *sq = &sc->submit_queues[sqid]; 867*6960cd89SAndy Fiddaman struct nvme_completion *cqe; 868*6960cd89SAndy Fiddaman 869*6960cd89SAndy Fiddaman assert(cq->qbase != NULL); 870*6960cd89SAndy Fiddaman 871*6960cd89SAndy Fiddaman pthread_mutex_lock(&cq->mtx); 872*6960cd89SAndy Fiddaman 873*6960cd89SAndy Fiddaman cqe = &cq->qbase[cq->tail]; 874*6960cd89SAndy Fiddaman 875*6960cd89SAndy Fiddaman /* Flip the phase bit */ 876*6960cd89SAndy Fiddaman status |= (cqe->status ^ NVME_STATUS_P) & NVME_STATUS_P_MASK; 877*6960cd89SAndy Fiddaman 878*6960cd89SAndy Fiddaman cqe->cdw0 = cdw0; 879*6960cd89SAndy Fiddaman cqe->sqhd = sq->head; 880*6960cd89SAndy Fiddaman cqe->sqid = sqid; 881*6960cd89SAndy Fiddaman cqe->cid = cid; 882*6960cd89SAndy Fiddaman cqe->status = status; 883*6960cd89SAndy Fiddaman 884*6960cd89SAndy Fiddaman cq->tail++; 885*6960cd89SAndy Fiddaman if (cq->tail >= cq->size) { 886*6960cd89SAndy Fiddaman cq->tail = 0; 887*6960cd89SAndy Fiddaman } 888*6960cd89SAndy Fiddaman 889*6960cd89SAndy Fiddaman pthread_mutex_unlock(&cq->mtx); 890*6960cd89SAndy Fiddaman } 891*6960cd89SAndy Fiddaman 8924c87aefeSPatrick Mooney static int 8934c87aefeSPatrick Mooney nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, 8944c87aefeSPatrick Mooney struct nvme_completion* compl) 8954c87aefeSPatrick Mooney { 8964c87aefeSPatrick Mooney uint16_t qid = command->cdw10 & 0xffff; 8974c87aefeSPatrick Mooney 898*6960cd89SAndy Fiddaman DPRINTF("%s DELETE_IO_SQ %u", __func__, qid); 899*6960cd89SAndy Fiddaman if (qid == 0 || qid > sc->num_squeues || 900*6960cd89SAndy Fiddaman (sc->submit_queues[qid].qbase == NULL)) { 901*6960cd89SAndy Fiddaman WPRINTF("%s NOT PERMITTED queue id %u / num_squeues %u", 902*6960cd89SAndy Fiddaman __func__, qid, sc->num_squeues); 9034c87aefeSPatrick Mooney pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 9044c87aefeSPatrick Mooney NVME_SC_INVALID_QUEUE_IDENTIFIER); 9054c87aefeSPatrick Mooney return (1); 9064c87aefeSPatrick Mooney } 9074c87aefeSPatrick Mooney 9084c87aefeSPatrick Mooney sc->submit_queues[qid].qbase = NULL; 909*6960cd89SAndy Fiddaman sc->submit_queues[qid].cqid = 0; 9104c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 9114c87aefeSPatrick Mooney return (1); 9124c87aefeSPatrick Mooney } 9134c87aefeSPatrick Mooney 9144c87aefeSPatrick Mooney static int 9154c87aefeSPatrick Mooney nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, 9164c87aefeSPatrick Mooney struct nvme_completion* compl) 9174c87aefeSPatrick Mooney { 9184c87aefeSPatrick Mooney if (command->cdw11 & NVME_CMD_CDW11_PC) { 9194c87aefeSPatrick Mooney uint16_t qid = command->cdw10 & 0xffff; 9204c87aefeSPatrick Mooney struct nvme_submission_queue *nsq; 9214c87aefeSPatrick Mooney 922*6960cd89SAndy Fiddaman if ((qid == 0) || (qid > sc->num_squeues) || 923*6960cd89SAndy Fiddaman (sc->submit_queues[qid].qbase != NULL)) { 924*6960cd89SAndy Fiddaman WPRINTF("%s queue index %u > num_squeues %u", 925*6960cd89SAndy Fiddaman __func__, qid, sc->num_squeues); 9264c87aefeSPatrick Mooney pci_nvme_status_tc(&compl->status, 9274c87aefeSPatrick Mooney NVME_SCT_COMMAND_SPECIFIC, 9284c87aefeSPatrick Mooney NVME_SC_INVALID_QUEUE_IDENTIFIER); 9294c87aefeSPatrick Mooney return (1); 9304c87aefeSPatrick Mooney } 9314c87aefeSPatrick Mooney 9324c87aefeSPatrick Mooney nsq = &sc->submit_queues[qid]; 9334c87aefeSPatrick Mooney nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); 934*6960cd89SAndy Fiddaman DPRINTF("%s size=%u (max=%u)", __func__, nsq->size, sc->max_qentries); 935*6960cd89SAndy Fiddaman if ((nsq->size < 2) || (nsq->size > sc->max_qentries)) { 936*6960cd89SAndy Fiddaman /* 937*6960cd89SAndy Fiddaman * Queues must specify at least two entries 938*6960cd89SAndy Fiddaman * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to 939*6960cd89SAndy Fiddaman * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec 940*6960cd89SAndy Fiddaman */ 941*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, 942*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 943*6960cd89SAndy Fiddaman NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED); 944*6960cd89SAndy Fiddaman return (1); 945*6960cd89SAndy Fiddaman } 946*6960cd89SAndy Fiddaman nsq->head = nsq->tail = 0; 947*6960cd89SAndy Fiddaman 948*6960cd89SAndy Fiddaman nsq->cqid = (command->cdw11 >> 16) & 0xffff; 949*6960cd89SAndy Fiddaman if ((nsq->cqid == 0) || (nsq->cqid > sc->num_cqueues)) { 950*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, 951*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 952*6960cd89SAndy Fiddaman NVME_SC_INVALID_QUEUE_IDENTIFIER); 953*6960cd89SAndy Fiddaman return (1); 954*6960cd89SAndy Fiddaman } 955*6960cd89SAndy Fiddaman 956*6960cd89SAndy Fiddaman if (sc->compl_queues[nsq->cqid].qbase == NULL) { 957*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, 958*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 959*6960cd89SAndy Fiddaman NVME_SC_COMPLETION_QUEUE_INVALID); 960*6960cd89SAndy Fiddaman return (1); 961*6960cd89SAndy Fiddaman } 962*6960cd89SAndy Fiddaman 963*6960cd89SAndy Fiddaman nsq->qpriority = (command->cdw11 >> 1) & 0x03; 9644c87aefeSPatrick Mooney 9654c87aefeSPatrick Mooney nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 9664c87aefeSPatrick Mooney sizeof(struct nvme_command) * (size_t)nsq->size); 9674c87aefeSPatrick Mooney 968*6960cd89SAndy Fiddaman DPRINTF("%s sq %u size %u gaddr %p cqid %u", __func__, 969*6960cd89SAndy Fiddaman qid, nsq->size, nsq->qbase, nsq->cqid); 9704c87aefeSPatrick Mooney 9714c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 9724c87aefeSPatrick Mooney 973*6960cd89SAndy Fiddaman DPRINTF("%s completed creating IOSQ qid %u", 974*6960cd89SAndy Fiddaman __func__, qid); 9754c87aefeSPatrick Mooney } else { 9764c87aefeSPatrick Mooney /* 9774c87aefeSPatrick Mooney * Guest sent non-cont submission queue request. 9784c87aefeSPatrick Mooney * This setting is unsupported by this emulation. 9794c87aefeSPatrick Mooney */ 980*6960cd89SAndy Fiddaman WPRINTF("%s unsupported non-contig (list-based) " 981*6960cd89SAndy Fiddaman "create i/o submission queue", __func__); 9824c87aefeSPatrick Mooney 9834c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 9844c87aefeSPatrick Mooney } 9854c87aefeSPatrick Mooney return (1); 9864c87aefeSPatrick Mooney } 9874c87aefeSPatrick Mooney 9884c87aefeSPatrick Mooney static int 9894c87aefeSPatrick Mooney nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, 9904c87aefeSPatrick Mooney struct nvme_completion* compl) 9914c87aefeSPatrick Mooney { 9924c87aefeSPatrick Mooney uint16_t qid = command->cdw10 & 0xffff; 993*6960cd89SAndy Fiddaman uint16_t sqid; 9944c87aefeSPatrick Mooney 995*6960cd89SAndy Fiddaman DPRINTF("%s DELETE_IO_CQ %u", __func__, qid); 996*6960cd89SAndy Fiddaman if (qid == 0 || qid > sc->num_cqueues || 997*6960cd89SAndy Fiddaman (sc->compl_queues[qid].qbase == NULL)) { 998*6960cd89SAndy Fiddaman WPRINTF("%s queue index %u / num_cqueues %u", 999*6960cd89SAndy Fiddaman __func__, qid, sc->num_cqueues); 10004c87aefeSPatrick Mooney pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 10014c87aefeSPatrick Mooney NVME_SC_INVALID_QUEUE_IDENTIFIER); 10024c87aefeSPatrick Mooney return (1); 10034c87aefeSPatrick Mooney } 10044c87aefeSPatrick Mooney 1005*6960cd89SAndy Fiddaman /* Deleting an Active CQ is an error */ 1006*6960cd89SAndy Fiddaman for (sqid = 1; sqid < sc->num_squeues + 1; sqid++) 1007*6960cd89SAndy Fiddaman if (sc->submit_queues[sqid].cqid == qid) { 1008*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, 1009*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 1010*6960cd89SAndy Fiddaman NVME_SC_INVALID_QUEUE_DELETION); 1011*6960cd89SAndy Fiddaman return (1); 1012*6960cd89SAndy Fiddaman } 1013*6960cd89SAndy Fiddaman 10144c87aefeSPatrick Mooney sc->compl_queues[qid].qbase = NULL; 10154c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 10164c87aefeSPatrick Mooney return (1); 10174c87aefeSPatrick Mooney } 10184c87aefeSPatrick Mooney 10194c87aefeSPatrick Mooney static int 10204c87aefeSPatrick Mooney nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, 10214c87aefeSPatrick Mooney struct nvme_completion* compl) 10224c87aefeSPatrick Mooney { 10234c87aefeSPatrick Mooney struct nvme_completion_queue *ncq; 1024*6960cd89SAndy Fiddaman uint16_t qid = command->cdw10 & 0xffff; 10254c87aefeSPatrick Mooney 1026*6960cd89SAndy Fiddaman /* Only support Physically Contiguous queues */ 1027*6960cd89SAndy Fiddaman if ((command->cdw11 & NVME_CMD_CDW11_PC) == 0) { 1028*6960cd89SAndy Fiddaman WPRINTF("%s unsupported non-contig (list-based) " 1029*6960cd89SAndy Fiddaman "create i/o completion queue", 1030*6960cd89SAndy Fiddaman __func__); 1031*6960cd89SAndy Fiddaman 1032*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 1033*6960cd89SAndy Fiddaman return (1); 1034*6960cd89SAndy Fiddaman } 1035*6960cd89SAndy Fiddaman 1036*6960cd89SAndy Fiddaman if ((qid == 0) || (qid > sc->num_cqueues) || 1037*6960cd89SAndy Fiddaman (sc->compl_queues[qid].qbase != NULL)) { 1038*6960cd89SAndy Fiddaman WPRINTF("%s queue index %u > num_cqueues %u", 1039*6960cd89SAndy Fiddaman __func__, qid, sc->num_cqueues); 10404c87aefeSPatrick Mooney pci_nvme_status_tc(&compl->status, 10414c87aefeSPatrick Mooney NVME_SCT_COMMAND_SPECIFIC, 10424c87aefeSPatrick Mooney NVME_SC_INVALID_QUEUE_IDENTIFIER); 10434c87aefeSPatrick Mooney return (1); 10444c87aefeSPatrick Mooney } 10454c87aefeSPatrick Mooney 10464c87aefeSPatrick Mooney ncq = &sc->compl_queues[qid]; 10474c87aefeSPatrick Mooney ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; 10484c87aefeSPatrick Mooney ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; 1049*6960cd89SAndy Fiddaman if (ncq->intr_vec > (sc->max_queues + 1)) { 1050*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, 1051*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 1052*6960cd89SAndy Fiddaman NVME_SC_INVALID_INTERRUPT_VECTOR); 1053*6960cd89SAndy Fiddaman return (1); 1054*6960cd89SAndy Fiddaman } 10554c87aefeSPatrick Mooney 1056*6960cd89SAndy Fiddaman ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); 1057*6960cd89SAndy Fiddaman if ((ncq->size < 2) || (ncq->size > sc->max_qentries)) { 1058*6960cd89SAndy Fiddaman /* 1059*6960cd89SAndy Fiddaman * Queues must specify at least two entries 1060*6960cd89SAndy Fiddaman * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to 1061*6960cd89SAndy Fiddaman * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec 1062*6960cd89SAndy Fiddaman */ 1063*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, 1064*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 1065*6960cd89SAndy Fiddaman NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED); 1066*6960cd89SAndy Fiddaman return (1); 1067*6960cd89SAndy Fiddaman } 1068*6960cd89SAndy Fiddaman ncq->head = ncq->tail = 0; 10694c87aefeSPatrick Mooney ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, 10704c87aefeSPatrick Mooney command->prp1, 10714c87aefeSPatrick Mooney sizeof(struct nvme_command) * (size_t)ncq->size); 10724c87aefeSPatrick Mooney 10734c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 10744c87aefeSPatrick Mooney 10754c87aefeSPatrick Mooney 10764c87aefeSPatrick Mooney return (1); 10774c87aefeSPatrick Mooney } 10784c87aefeSPatrick Mooney 10794c87aefeSPatrick Mooney static int 10804c87aefeSPatrick Mooney nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, 10814c87aefeSPatrick Mooney struct nvme_completion* compl) 10824c87aefeSPatrick Mooney { 1083*6960cd89SAndy Fiddaman uint32_t logsize = 0; 10844c87aefeSPatrick Mooney uint8_t logpage = command->cdw10 & 0xFF; 10854c87aefeSPatrick Mooney 1086*6960cd89SAndy Fiddaman DPRINTF("%s log page %u len %u", __func__, logpage, logsize); 10874c87aefeSPatrick Mooney 10884c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 10894c87aefeSPatrick Mooney 1090*6960cd89SAndy Fiddaman /* 1091*6960cd89SAndy Fiddaman * Command specifies the number of dwords to return in fields NUMDU 1092*6960cd89SAndy Fiddaman * and NUMDL. This is a zero-based value. 1093*6960cd89SAndy Fiddaman */ 1094*6960cd89SAndy Fiddaman logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1; 1095*6960cd89SAndy Fiddaman logsize *= sizeof(uint32_t); 1096*6960cd89SAndy Fiddaman 10974c87aefeSPatrick Mooney switch (logpage) { 10984c87aefeSPatrick Mooney case NVME_LOG_ERROR: 10994c87aefeSPatrick Mooney nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, 1100*6960cd89SAndy Fiddaman command->prp2, (uint8_t *)&sc->err_log, 1101*6960cd89SAndy Fiddaman MIN(logsize, sizeof(sc->err_log)), 1102154972afSPatrick Mooney NVME_COPY_TO_PRP); 11034c87aefeSPatrick Mooney break; 11044c87aefeSPatrick Mooney case NVME_LOG_HEALTH_INFORMATION: 1105*6960cd89SAndy Fiddaman pthread_mutex_lock(&sc->mtx); 1106*6960cd89SAndy Fiddaman memcpy(&sc->health_log.data_units_read, &sc->read_data_units, 1107*6960cd89SAndy Fiddaman sizeof(sc->health_log.data_units_read)); 1108*6960cd89SAndy Fiddaman memcpy(&sc->health_log.data_units_written, &sc->write_data_units, 1109*6960cd89SAndy Fiddaman sizeof(sc->health_log.data_units_written)); 1110*6960cd89SAndy Fiddaman memcpy(&sc->health_log.host_read_commands, &sc->read_commands, 1111*6960cd89SAndy Fiddaman sizeof(sc->health_log.host_read_commands)); 1112*6960cd89SAndy Fiddaman memcpy(&sc->health_log.host_write_commands, &sc->write_commands, 1113*6960cd89SAndy Fiddaman sizeof(sc->health_log.host_write_commands)); 1114*6960cd89SAndy Fiddaman pthread_mutex_unlock(&sc->mtx); 1115*6960cd89SAndy Fiddaman 11164c87aefeSPatrick Mooney nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, 1117*6960cd89SAndy Fiddaman command->prp2, (uint8_t *)&sc->health_log, 1118*6960cd89SAndy Fiddaman MIN(logsize, sizeof(sc->health_log)), 1119154972afSPatrick Mooney NVME_COPY_TO_PRP); 11204c87aefeSPatrick Mooney break; 11214c87aefeSPatrick Mooney case NVME_LOG_FIRMWARE_SLOT: 11224c87aefeSPatrick Mooney nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, 1123*6960cd89SAndy Fiddaman command->prp2, (uint8_t *)&sc->fw_log, 1124*6960cd89SAndy Fiddaman MIN(logsize, sizeof(sc->fw_log)), 1125154972afSPatrick Mooney NVME_COPY_TO_PRP); 11264c87aefeSPatrick Mooney break; 11274c87aefeSPatrick Mooney default: 1128*6960cd89SAndy Fiddaman DPRINTF("%s get log page %x command not supported", 1129*6960cd89SAndy Fiddaman __func__, logpage); 11304c87aefeSPatrick Mooney 11314c87aefeSPatrick Mooney pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 11324c87aefeSPatrick Mooney NVME_SC_INVALID_LOG_PAGE); 11334c87aefeSPatrick Mooney } 11344c87aefeSPatrick Mooney 11354c87aefeSPatrick Mooney return (1); 11364c87aefeSPatrick Mooney } 11374c87aefeSPatrick Mooney 11384c87aefeSPatrick Mooney static int 11394c87aefeSPatrick Mooney nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, 11404c87aefeSPatrick Mooney struct nvme_completion* compl) 11414c87aefeSPatrick Mooney { 11424c87aefeSPatrick Mooney void *dest; 1143*6960cd89SAndy Fiddaman uint16_t status = 0; 11444c87aefeSPatrick Mooney 1145*6960cd89SAndy Fiddaman DPRINTF("%s identify 0x%x nsid 0x%x", __func__, 1146*6960cd89SAndy Fiddaman command->cdw10 & 0xFF, command->nsid); 1147*6960cd89SAndy Fiddaman 1148*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, NVME_SC_SUCCESS); 11494c87aefeSPatrick Mooney 11504c87aefeSPatrick Mooney switch (command->cdw10 & 0xFF) { 11514c87aefeSPatrick Mooney case 0x00: /* return Identify Namespace data structure */ 11524c87aefeSPatrick Mooney nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, 1153154972afSPatrick Mooney command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata), 1154154972afSPatrick Mooney NVME_COPY_TO_PRP); 11554c87aefeSPatrick Mooney break; 11564c87aefeSPatrick Mooney case 0x01: /* return Identify Controller data structure */ 11574c87aefeSPatrick Mooney nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, 11584c87aefeSPatrick Mooney command->prp2, (uint8_t *)&sc->ctrldata, 1159154972afSPatrick Mooney sizeof(sc->ctrldata), 1160154972afSPatrick Mooney NVME_COPY_TO_PRP); 11614c87aefeSPatrick Mooney break; 11624c87aefeSPatrick Mooney case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ 11634c87aefeSPatrick Mooney dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 11644c87aefeSPatrick Mooney sizeof(uint32_t) * 1024); 1165*6960cd89SAndy Fiddaman /* All unused entries shall be zero */ 1166*6960cd89SAndy Fiddaman bzero(dest, sizeof(uint32_t) * 1024); 11674c87aefeSPatrick Mooney ((uint32_t *)dest)[0] = 1; 11684c87aefeSPatrick Mooney break; 11694c87aefeSPatrick Mooney case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */ 1170*6960cd89SAndy Fiddaman if (command->nsid != 1) { 1171*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 1172*6960cd89SAndy Fiddaman NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1173*6960cd89SAndy Fiddaman break; 1174*6960cd89SAndy Fiddaman } 1175*6960cd89SAndy Fiddaman dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 1176*6960cd89SAndy Fiddaman sizeof(uint32_t) * 1024); 1177*6960cd89SAndy Fiddaman /* All bytes after the descriptor shall be zero */ 1178*6960cd89SAndy Fiddaman bzero(dest, sizeof(uint32_t) * 1024); 1179*6960cd89SAndy Fiddaman 1180*6960cd89SAndy Fiddaman /* Return NIDT=1 (i.e. EUI64) descriptor */ 1181*6960cd89SAndy Fiddaman ((uint8_t *)dest)[0] = 1; 1182*6960cd89SAndy Fiddaman ((uint8_t *)dest)[1] = sizeof(uint64_t); 1183*6960cd89SAndy Fiddaman bcopy(sc->nsdata.eui64, ((uint8_t *)dest) + 4, sizeof(uint64_t)); 1184*6960cd89SAndy Fiddaman break; 11854c87aefeSPatrick Mooney default: 1186*6960cd89SAndy Fiddaman DPRINTF("%s unsupported identify command requested 0x%x", 1187*6960cd89SAndy Fiddaman __func__, command->cdw10 & 0xFF); 1188*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, NVME_SC_INVALID_FIELD); 1189*6960cd89SAndy Fiddaman break; 1190*6960cd89SAndy Fiddaman } 1191*6960cd89SAndy Fiddaman 1192*6960cd89SAndy Fiddaman compl->status = status; 1193*6960cd89SAndy Fiddaman return (1); 1194*6960cd89SAndy Fiddaman } 1195*6960cd89SAndy Fiddaman 1196*6960cd89SAndy Fiddaman static const char * 1197*6960cd89SAndy Fiddaman nvme_fid_to_name(uint8_t fid) 1198*6960cd89SAndy Fiddaman { 1199*6960cd89SAndy Fiddaman const char *name; 1200*6960cd89SAndy Fiddaman 1201*6960cd89SAndy Fiddaman switch (fid) { 1202*6960cd89SAndy Fiddaman case NVME_FEAT_ARBITRATION: 1203*6960cd89SAndy Fiddaman name = "Arbitration"; 1204*6960cd89SAndy Fiddaman break; 1205*6960cd89SAndy Fiddaman case NVME_FEAT_POWER_MANAGEMENT: 1206*6960cd89SAndy Fiddaman name = "Power Management"; 1207*6960cd89SAndy Fiddaman break; 1208*6960cd89SAndy Fiddaman case NVME_FEAT_LBA_RANGE_TYPE: 1209*6960cd89SAndy Fiddaman name = "LBA Range Type"; 1210*6960cd89SAndy Fiddaman break; 1211*6960cd89SAndy Fiddaman case NVME_FEAT_TEMPERATURE_THRESHOLD: 1212*6960cd89SAndy Fiddaman name = "Temperature Threshold"; 1213*6960cd89SAndy Fiddaman break; 1214*6960cd89SAndy Fiddaman case NVME_FEAT_ERROR_RECOVERY: 1215*6960cd89SAndy Fiddaman name = "Error Recovery"; 1216*6960cd89SAndy Fiddaman break; 1217*6960cd89SAndy Fiddaman case NVME_FEAT_VOLATILE_WRITE_CACHE: 1218*6960cd89SAndy Fiddaman name = "Volatile Write Cache"; 1219*6960cd89SAndy Fiddaman break; 1220*6960cd89SAndy Fiddaman case NVME_FEAT_NUMBER_OF_QUEUES: 1221*6960cd89SAndy Fiddaman name = "Number of Queues"; 1222*6960cd89SAndy Fiddaman break; 1223*6960cd89SAndy Fiddaman case NVME_FEAT_INTERRUPT_COALESCING: 1224*6960cd89SAndy Fiddaman name = "Interrupt Coalescing"; 1225*6960cd89SAndy Fiddaman break; 1226*6960cd89SAndy Fiddaman case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: 1227*6960cd89SAndy Fiddaman name = "Interrupt Vector Configuration"; 1228*6960cd89SAndy Fiddaman break; 1229*6960cd89SAndy Fiddaman case NVME_FEAT_WRITE_ATOMICITY: 1230*6960cd89SAndy Fiddaman name = "Write Atomicity Normal"; 1231*6960cd89SAndy Fiddaman break; 1232*6960cd89SAndy Fiddaman case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 1233*6960cd89SAndy Fiddaman name = "Asynchronous Event Configuration"; 1234*6960cd89SAndy Fiddaman break; 1235*6960cd89SAndy Fiddaman case NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 1236*6960cd89SAndy Fiddaman name = "Autonomous Power State Transition"; 1237*6960cd89SAndy Fiddaman break; 1238*6960cd89SAndy Fiddaman case NVME_FEAT_HOST_MEMORY_BUFFER: 1239*6960cd89SAndy Fiddaman name = "Host Memory Buffer"; 1240*6960cd89SAndy Fiddaman break; 1241*6960cd89SAndy Fiddaman case NVME_FEAT_TIMESTAMP: 1242*6960cd89SAndy Fiddaman name = "Timestamp"; 1243*6960cd89SAndy Fiddaman break; 1244*6960cd89SAndy Fiddaman case NVME_FEAT_KEEP_ALIVE_TIMER: 1245*6960cd89SAndy Fiddaman name = "Keep Alive Timer"; 1246*6960cd89SAndy Fiddaman break; 1247*6960cd89SAndy Fiddaman case NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT: 1248*6960cd89SAndy Fiddaman name = "Host Controlled Thermal Management"; 1249*6960cd89SAndy Fiddaman break; 1250*6960cd89SAndy Fiddaman case NVME_FEAT_NON_OP_POWER_STATE_CONFIG: 1251*6960cd89SAndy Fiddaman name = "Non-Operation Power State Config"; 1252*6960cd89SAndy Fiddaman break; 1253*6960cd89SAndy Fiddaman case NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG: 1254*6960cd89SAndy Fiddaman name = "Read Recovery Level Config"; 1255*6960cd89SAndy Fiddaman break; 1256*6960cd89SAndy Fiddaman case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG: 1257*6960cd89SAndy Fiddaman name = "Predictable Latency Mode Config"; 1258*6960cd89SAndy Fiddaman break; 1259*6960cd89SAndy Fiddaman case NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW: 1260*6960cd89SAndy Fiddaman name = "Predictable Latency Mode Window"; 1261*6960cd89SAndy Fiddaman break; 1262*6960cd89SAndy Fiddaman case NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES: 1263*6960cd89SAndy Fiddaman name = "LBA Status Information Report Interval"; 1264*6960cd89SAndy Fiddaman break; 1265*6960cd89SAndy Fiddaman case NVME_FEAT_HOST_BEHAVIOR_SUPPORT: 1266*6960cd89SAndy Fiddaman name = "Host Behavior Support"; 1267*6960cd89SAndy Fiddaman break; 1268*6960cd89SAndy Fiddaman case NVME_FEAT_SANITIZE_CONFIG: 1269*6960cd89SAndy Fiddaman name = "Sanitize Config"; 1270*6960cd89SAndy Fiddaman break; 1271*6960cd89SAndy Fiddaman case NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION: 1272*6960cd89SAndy Fiddaman name = "Endurance Group Event Configuration"; 1273*6960cd89SAndy Fiddaman break; 1274*6960cd89SAndy Fiddaman case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: 1275*6960cd89SAndy Fiddaman name = "Software Progress Marker"; 1276*6960cd89SAndy Fiddaman break; 1277*6960cd89SAndy Fiddaman case NVME_FEAT_HOST_IDENTIFIER: 1278*6960cd89SAndy Fiddaman name = "Host Identifier"; 1279*6960cd89SAndy Fiddaman break; 1280*6960cd89SAndy Fiddaman case NVME_FEAT_RESERVATION_NOTIFICATION_MASK: 1281*6960cd89SAndy Fiddaman name = "Reservation Notification Mask"; 1282*6960cd89SAndy Fiddaman break; 1283*6960cd89SAndy Fiddaman case NVME_FEAT_RESERVATION_PERSISTENCE: 1284*6960cd89SAndy Fiddaman name = "Reservation Persistence"; 1285*6960cd89SAndy Fiddaman break; 1286*6960cd89SAndy Fiddaman case NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG: 1287*6960cd89SAndy Fiddaman name = "Namespace Write Protection Config"; 1288*6960cd89SAndy Fiddaman break; 1289*6960cd89SAndy Fiddaman default: 1290*6960cd89SAndy Fiddaman name = "Unknown"; 1291*6960cd89SAndy Fiddaman break; 1292*6960cd89SAndy Fiddaman } 1293*6960cd89SAndy Fiddaman 1294*6960cd89SAndy Fiddaman return (name); 1295*6960cd89SAndy Fiddaman } 1296*6960cd89SAndy Fiddaman 1297*6960cd89SAndy Fiddaman static void 1298*6960cd89SAndy Fiddaman nvme_feature_invalid_cb(struct pci_nvme_softc *sc, 1299*6960cd89SAndy Fiddaman struct nvme_feature_obj *feat, 1300*6960cd89SAndy Fiddaman struct nvme_command *command, 1301*6960cd89SAndy Fiddaman struct nvme_completion *compl) 1302*6960cd89SAndy Fiddaman { 1303*6960cd89SAndy Fiddaman 13044c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 13054c87aefeSPatrick Mooney } 13064c87aefeSPatrick Mooney 1307*6960cd89SAndy Fiddaman static void 1308*6960cd89SAndy Fiddaman nvme_feature_iv_config(struct pci_nvme_softc *sc, 1309*6960cd89SAndy Fiddaman struct nvme_feature_obj *feat, 1310*6960cd89SAndy Fiddaman struct nvme_command *command, 1311*6960cd89SAndy Fiddaman struct nvme_completion *compl) 1312*6960cd89SAndy Fiddaman { 1313*6960cd89SAndy Fiddaman uint32_t i; 1314*6960cd89SAndy Fiddaman uint32_t cdw11 = command->cdw11; 1315*6960cd89SAndy Fiddaman uint16_t iv; 1316*6960cd89SAndy Fiddaman bool cd; 1317*6960cd89SAndy Fiddaman 1318*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 1319*6960cd89SAndy Fiddaman 1320*6960cd89SAndy Fiddaman iv = cdw11 & 0xffff; 1321*6960cd89SAndy Fiddaman cd = cdw11 & (1 << 16); 1322*6960cd89SAndy Fiddaman 1323*6960cd89SAndy Fiddaman if (iv > (sc->max_queues + 1)) { 1324*6960cd89SAndy Fiddaman return; 1325*6960cd89SAndy Fiddaman } 1326*6960cd89SAndy Fiddaman 1327*6960cd89SAndy Fiddaman /* No Interrupt Coalescing (i.e. not Coalescing Disable) for Admin Q */ 1328*6960cd89SAndy Fiddaman if ((iv == 0) && !cd) 1329*6960cd89SAndy Fiddaman return; 1330*6960cd89SAndy Fiddaman 1331*6960cd89SAndy Fiddaman /* Requested Interrupt Vector must be used by a CQ */ 1332*6960cd89SAndy Fiddaman for (i = 0; i < sc->num_cqueues + 1; i++) { 1333*6960cd89SAndy Fiddaman if (sc->compl_queues[i].intr_vec == iv) { 13344c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 1335*6960cd89SAndy Fiddaman } 13364c87aefeSPatrick Mooney } 13374c87aefeSPatrick Mooney 1338*6960cd89SAndy Fiddaman } 1339*6960cd89SAndy Fiddaman 1340*6960cd89SAndy Fiddaman static void 1341*6960cd89SAndy Fiddaman nvme_feature_num_queues(struct pci_nvme_softc *sc, 1342*6960cd89SAndy Fiddaman struct nvme_feature_obj *feat, 1343*6960cd89SAndy Fiddaman struct nvme_command *command, 13444c87aefeSPatrick Mooney struct nvme_completion *compl) 13454c87aefeSPatrick Mooney { 13464c87aefeSPatrick Mooney uint16_t nqr; /* Number of Queues Requested */ 13474c87aefeSPatrick Mooney 1348*6960cd89SAndy Fiddaman if (sc->num_q_is_set) { 1349*6960cd89SAndy Fiddaman WPRINTF("%s: Number of Queues already set", __func__); 1350*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, 1351*6960cd89SAndy Fiddaman NVME_SC_COMMAND_SEQUENCE_ERROR); 1352*6960cd89SAndy Fiddaman return; 1353*6960cd89SAndy Fiddaman } 1354*6960cd89SAndy Fiddaman 13554c87aefeSPatrick Mooney nqr = command->cdw11 & 0xFFFF; 13564c87aefeSPatrick Mooney if (nqr == 0xffff) { 1357*6960cd89SAndy Fiddaman WPRINTF("%s: Illegal NSQR value %#x", __func__, nqr); 13584c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 1359*6960cd89SAndy Fiddaman return; 13604c87aefeSPatrick Mooney } 13614c87aefeSPatrick Mooney 13624c87aefeSPatrick Mooney sc->num_squeues = ONE_BASED(nqr); 13634c87aefeSPatrick Mooney if (sc->num_squeues > sc->max_queues) { 1364*6960cd89SAndy Fiddaman DPRINTF("NSQR=%u is greater than max %u", sc->num_squeues, 1365*6960cd89SAndy Fiddaman sc->max_queues); 13664c87aefeSPatrick Mooney sc->num_squeues = sc->max_queues; 13674c87aefeSPatrick Mooney } 13684c87aefeSPatrick Mooney 13694c87aefeSPatrick Mooney nqr = (command->cdw11 >> 16) & 0xFFFF; 13704c87aefeSPatrick Mooney if (nqr == 0xffff) { 1371*6960cd89SAndy Fiddaman WPRINTF("%s: Illegal NCQR value %#x", __func__, nqr); 13724c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 1373*6960cd89SAndy Fiddaman return; 13744c87aefeSPatrick Mooney } 13754c87aefeSPatrick Mooney 13764c87aefeSPatrick Mooney sc->num_cqueues = ONE_BASED(nqr); 13774c87aefeSPatrick Mooney if (sc->num_cqueues > sc->max_queues) { 1378*6960cd89SAndy Fiddaman DPRINTF("NCQR=%u is greater than max %u", sc->num_cqueues, 1379*6960cd89SAndy Fiddaman sc->max_queues); 13804c87aefeSPatrick Mooney sc->num_cqueues = sc->max_queues; 13814c87aefeSPatrick Mooney } 13824c87aefeSPatrick Mooney 1383*6960cd89SAndy Fiddaman /* Patch the command value which will be saved on callback's return */ 1384*6960cd89SAndy Fiddaman command->cdw11 = NVME_FEATURE_NUM_QUEUES(sc); 13854c87aefeSPatrick Mooney compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); 13864c87aefeSPatrick Mooney 1387*6960cd89SAndy Fiddaman sc->num_q_is_set = true; 13884c87aefeSPatrick Mooney } 13894c87aefeSPatrick Mooney 13904c87aefeSPatrick Mooney static int 13914c87aefeSPatrick Mooney nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command, 13924c87aefeSPatrick Mooney struct nvme_completion *compl) 13934c87aefeSPatrick Mooney { 1394*6960cd89SAndy Fiddaman struct nvme_feature_obj *feat; 1395*6960cd89SAndy Fiddaman uint32_t nsid = command->nsid; 1396*6960cd89SAndy Fiddaman uint8_t fid = command->cdw10 & 0xFF; 13974c87aefeSPatrick Mooney 1398*6960cd89SAndy Fiddaman DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid)); 13994c87aefeSPatrick Mooney 1400*6960cd89SAndy Fiddaman if (fid >= NVME_FID_MAX) { 1401*6960cd89SAndy Fiddaman DPRINTF("%s invalid feature 0x%x", __func__, fid); 14024c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 14034c87aefeSPatrick Mooney return (1); 14044c87aefeSPatrick Mooney } 1405*6960cd89SAndy Fiddaman feat = &sc->feat[fid]; 14064c87aefeSPatrick Mooney 1407*6960cd89SAndy Fiddaman if (!feat->namespace_specific && 1408*6960cd89SAndy Fiddaman !((nsid == 0) || (nsid == NVME_GLOBAL_NAMESPACE_TAG))) { 1409*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 1410*6960cd89SAndy Fiddaman NVME_SC_FEATURE_NOT_NS_SPECIFIC); 14114c87aefeSPatrick Mooney return (1); 14124c87aefeSPatrick Mooney } 14134c87aefeSPatrick Mooney 1414*6960cd89SAndy Fiddaman compl->cdw0 = 0; 1415*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 1416*6960cd89SAndy Fiddaman 1417*6960cd89SAndy Fiddaman if (feat->set) 1418*6960cd89SAndy Fiddaman feat->set(sc, feat, command, compl); 1419*6960cd89SAndy Fiddaman 1420*6960cd89SAndy Fiddaman if (compl->status == NVME_SC_SUCCESS) 1421*6960cd89SAndy Fiddaman feat->cdw11 = command->cdw11; 1422*6960cd89SAndy Fiddaman 1423*6960cd89SAndy Fiddaman return (0); 1424*6960cd89SAndy Fiddaman } 1425*6960cd89SAndy Fiddaman 14264c87aefeSPatrick Mooney static int 14274c87aefeSPatrick Mooney nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, 14284c87aefeSPatrick Mooney struct nvme_completion* compl) 14294c87aefeSPatrick Mooney { 1430*6960cd89SAndy Fiddaman struct nvme_feature_obj *feat; 1431*6960cd89SAndy Fiddaman uint8_t fid = command->cdw10 & 0xFF; 14324c87aefeSPatrick Mooney 1433*6960cd89SAndy Fiddaman DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid)); 14344c87aefeSPatrick Mooney 1435*6960cd89SAndy Fiddaman if (fid >= NVME_FID_MAX) { 1436*6960cd89SAndy Fiddaman DPRINTF("%s invalid feature 0x%x", __func__, fid); 14374c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 14384c87aefeSPatrick Mooney return (1); 14394c87aefeSPatrick Mooney } 14404c87aefeSPatrick Mooney 1441*6960cd89SAndy Fiddaman compl->cdw0 = 0; 14424c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 1443*6960cd89SAndy Fiddaman 1444*6960cd89SAndy Fiddaman feat = &sc->feat[fid]; 1445*6960cd89SAndy Fiddaman if (feat->get) { 1446*6960cd89SAndy Fiddaman feat->get(sc, feat, command, compl); 1447*6960cd89SAndy Fiddaman } 1448*6960cd89SAndy Fiddaman 1449*6960cd89SAndy Fiddaman if (compl->status == NVME_SC_SUCCESS) { 1450*6960cd89SAndy Fiddaman compl->cdw0 = feat->cdw11; 1451*6960cd89SAndy Fiddaman } 1452*6960cd89SAndy Fiddaman 1453*6960cd89SAndy Fiddaman return (0); 1454*6960cd89SAndy Fiddaman } 1455*6960cd89SAndy Fiddaman 1456*6960cd89SAndy Fiddaman static int 1457*6960cd89SAndy Fiddaman nvme_opc_format_nvm(struct pci_nvme_softc* sc, struct nvme_command* command, 1458*6960cd89SAndy Fiddaman struct nvme_completion* compl) 1459*6960cd89SAndy Fiddaman { 1460*6960cd89SAndy Fiddaman uint8_t ses, lbaf, pi; 1461*6960cd89SAndy Fiddaman 1462*6960cd89SAndy Fiddaman /* Only supports Secure Erase Setting - User Data Erase */ 1463*6960cd89SAndy Fiddaman ses = (command->cdw10 >> 9) & 0x7; 1464*6960cd89SAndy Fiddaman if (ses > 0x1) { 1465*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 1466*6960cd89SAndy Fiddaman return (1); 1467*6960cd89SAndy Fiddaman } 1468*6960cd89SAndy Fiddaman 1469*6960cd89SAndy Fiddaman /* Only supports a single LBA Format */ 1470*6960cd89SAndy Fiddaman lbaf = command->cdw10 & 0xf; 1471*6960cd89SAndy Fiddaman if (lbaf != 0) { 1472*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 1473*6960cd89SAndy Fiddaman NVME_SC_INVALID_FORMAT); 1474*6960cd89SAndy Fiddaman return (1); 1475*6960cd89SAndy Fiddaman } 1476*6960cd89SAndy Fiddaman 1477*6960cd89SAndy Fiddaman /* Doesn't support Protection Infomation */ 1478*6960cd89SAndy Fiddaman pi = (command->cdw10 >> 5) & 0x7; 1479*6960cd89SAndy Fiddaman if (pi != 0) { 1480*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 1481*6960cd89SAndy Fiddaman return (1); 1482*6960cd89SAndy Fiddaman } 1483*6960cd89SAndy Fiddaman 1484*6960cd89SAndy Fiddaman if (sc->nvstore.type == NVME_STOR_RAM) { 1485*6960cd89SAndy Fiddaman if (sc->nvstore.ctx) 1486*6960cd89SAndy Fiddaman free(sc->nvstore.ctx); 1487*6960cd89SAndy Fiddaman sc->nvstore.ctx = calloc(1, sc->nvstore.size); 1488*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 1489*6960cd89SAndy Fiddaman } else { 1490*6960cd89SAndy Fiddaman struct pci_nvme_ioreq *req; 1491*6960cd89SAndy Fiddaman int err; 1492*6960cd89SAndy Fiddaman 1493*6960cd89SAndy Fiddaman req = pci_nvme_get_ioreq(sc); 1494*6960cd89SAndy Fiddaman if (req == NULL) { 1495*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, 1496*6960cd89SAndy Fiddaman NVME_SC_INTERNAL_DEVICE_ERROR); 1497*6960cd89SAndy Fiddaman WPRINTF("%s: unable to allocate IO req", __func__); 1498*6960cd89SAndy Fiddaman return (1); 1499*6960cd89SAndy Fiddaman } 1500*6960cd89SAndy Fiddaman req->nvme_sq = &sc->submit_queues[0]; 1501*6960cd89SAndy Fiddaman req->sqid = 0; 1502*6960cd89SAndy Fiddaman req->opc = command->opc; 1503*6960cd89SAndy Fiddaman req->cid = command->cid; 1504*6960cd89SAndy Fiddaman req->nsid = command->nsid; 1505*6960cd89SAndy Fiddaman 1506*6960cd89SAndy Fiddaman req->io_req.br_offset = 0; 1507*6960cd89SAndy Fiddaman req->io_req.br_resid = sc->nvstore.size; 1508*6960cd89SAndy Fiddaman req->io_req.br_callback = pci_nvme_io_done; 1509*6960cd89SAndy Fiddaman 1510*6960cd89SAndy Fiddaman err = blockif_delete(sc->nvstore.ctx, &req->io_req); 1511*6960cd89SAndy Fiddaman if (err) { 1512*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl->status, 1513*6960cd89SAndy Fiddaman NVME_SC_INTERNAL_DEVICE_ERROR); 1514*6960cd89SAndy Fiddaman pci_nvme_release_ioreq(sc, req); 1515*6960cd89SAndy Fiddaman } 1516*6960cd89SAndy Fiddaman } 1517*6960cd89SAndy Fiddaman 15184c87aefeSPatrick Mooney return (1); 15194c87aefeSPatrick Mooney } 15204c87aefeSPatrick Mooney 15214c87aefeSPatrick Mooney static int 15224c87aefeSPatrick Mooney nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, 15234c87aefeSPatrick Mooney struct nvme_completion* compl) 15244c87aefeSPatrick Mooney { 1525*6960cd89SAndy Fiddaman DPRINTF("%s submission queue %u, command ID 0x%x", __func__, 1526*6960cd89SAndy Fiddaman command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF); 15274c87aefeSPatrick Mooney 15284c87aefeSPatrick Mooney /* TODO: search for the command ID and abort it */ 15294c87aefeSPatrick Mooney 15304c87aefeSPatrick Mooney compl->cdw0 = 1; 15314c87aefeSPatrick Mooney pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 15324c87aefeSPatrick Mooney return (1); 15334c87aefeSPatrick Mooney } 15344c87aefeSPatrick Mooney 15354c87aefeSPatrick Mooney static int 15364c87aefeSPatrick Mooney nvme_opc_async_event_req(struct pci_nvme_softc* sc, 15374c87aefeSPatrick Mooney struct nvme_command* command, struct nvme_completion* compl) 15384c87aefeSPatrick Mooney { 1539*6960cd89SAndy Fiddaman DPRINTF("%s async event request 0x%x", __func__, command->cdw11); 1540*6960cd89SAndy Fiddaman 1541*6960cd89SAndy Fiddaman /* Don't exceed the Async Event Request Limit (AERL). */ 1542*6960cd89SAndy Fiddaman if (pci_nvme_aer_limit_reached(sc)) { 1543*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 1544*6960cd89SAndy Fiddaman NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 1545*6960cd89SAndy Fiddaman return (1); 1546*6960cd89SAndy Fiddaman } 1547*6960cd89SAndy Fiddaman 1548*6960cd89SAndy Fiddaman if (pci_nvme_aer_add(sc, command->cid)) { 1549*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl->status, NVME_SCT_GENERIC, 1550*6960cd89SAndy Fiddaman NVME_SC_INTERNAL_DEVICE_ERROR); 1551*6960cd89SAndy Fiddaman return (1); 1552*6960cd89SAndy Fiddaman } 15534c87aefeSPatrick Mooney 15544c87aefeSPatrick Mooney /* 1555*6960cd89SAndy Fiddaman * Raise events when they happen based on the Set Features cmd. 15564c87aefeSPatrick Mooney * These events happen async, so only set completion successful if 15574c87aefeSPatrick Mooney * there is an event reflective of the request to get event. 15584c87aefeSPatrick Mooney */ 1559*6960cd89SAndy Fiddaman compl->status = NVME_NO_STATUS; 1560*6960cd89SAndy Fiddaman 15614c87aefeSPatrick Mooney return (0); 15624c87aefeSPatrick Mooney } 15634c87aefeSPatrick Mooney 15644c87aefeSPatrick Mooney static void 15654c87aefeSPatrick Mooney pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) 15664c87aefeSPatrick Mooney { 15674c87aefeSPatrick Mooney struct nvme_completion compl; 15684c87aefeSPatrick Mooney struct nvme_command *cmd; 15694c87aefeSPatrick Mooney struct nvme_submission_queue *sq; 15704c87aefeSPatrick Mooney struct nvme_completion_queue *cq; 15714c87aefeSPatrick Mooney uint16_t sqhead; 15724c87aefeSPatrick Mooney 1573*6960cd89SAndy Fiddaman DPRINTF("%s index %u", __func__, (uint32_t)value); 15744c87aefeSPatrick Mooney 15754c87aefeSPatrick Mooney sq = &sc->submit_queues[0]; 1576154972afSPatrick Mooney cq = &sc->compl_queues[0]; 15774c87aefeSPatrick Mooney 1578*6960cd89SAndy Fiddaman pthread_mutex_lock(&sq->mtx); 15794c87aefeSPatrick Mooney 1580*6960cd89SAndy Fiddaman sqhead = sq->head; 1581*6960cd89SAndy Fiddaman DPRINTF("sqhead %u, tail %u", sqhead, sq->tail); 15824c87aefeSPatrick Mooney 15834c87aefeSPatrick Mooney while (sqhead != atomic_load_acq_short(&sq->tail)) { 15844c87aefeSPatrick Mooney cmd = &(sq->qbase)[sqhead]; 158584659b24SMichael Zeller compl.cdw0 = 0; 15864c87aefeSPatrick Mooney compl.status = 0; 15874c87aefeSPatrick Mooney 15884c87aefeSPatrick Mooney switch (cmd->opc) { 15894c87aefeSPatrick Mooney case NVME_OPC_DELETE_IO_SQ: 1590*6960cd89SAndy Fiddaman DPRINTF("%s command DELETE_IO_SQ", __func__); 1591154972afSPatrick Mooney nvme_opc_delete_io_sq(sc, cmd, &compl); 15924c87aefeSPatrick Mooney break; 15934c87aefeSPatrick Mooney case NVME_OPC_CREATE_IO_SQ: 1594*6960cd89SAndy Fiddaman DPRINTF("%s command CREATE_IO_SQ", __func__); 1595154972afSPatrick Mooney nvme_opc_create_io_sq(sc, cmd, &compl); 15964c87aefeSPatrick Mooney break; 15974c87aefeSPatrick Mooney case NVME_OPC_DELETE_IO_CQ: 1598*6960cd89SAndy Fiddaman DPRINTF("%s command DELETE_IO_CQ", __func__); 1599154972afSPatrick Mooney nvme_opc_delete_io_cq(sc, cmd, &compl); 16004c87aefeSPatrick Mooney break; 16014c87aefeSPatrick Mooney case NVME_OPC_CREATE_IO_CQ: 1602*6960cd89SAndy Fiddaman DPRINTF("%s command CREATE_IO_CQ", __func__); 1603154972afSPatrick Mooney nvme_opc_create_io_cq(sc, cmd, &compl); 16044c87aefeSPatrick Mooney break; 16054c87aefeSPatrick Mooney case NVME_OPC_GET_LOG_PAGE: 1606*6960cd89SAndy Fiddaman DPRINTF("%s command GET_LOG_PAGE", __func__); 1607154972afSPatrick Mooney nvme_opc_get_log_page(sc, cmd, &compl); 16084c87aefeSPatrick Mooney break; 16094c87aefeSPatrick Mooney case NVME_OPC_IDENTIFY: 1610*6960cd89SAndy Fiddaman DPRINTF("%s command IDENTIFY", __func__); 1611154972afSPatrick Mooney nvme_opc_identify(sc, cmd, &compl); 16124c87aefeSPatrick Mooney break; 16134c87aefeSPatrick Mooney case NVME_OPC_ABORT: 1614*6960cd89SAndy Fiddaman DPRINTF("%s command ABORT", __func__); 1615154972afSPatrick Mooney nvme_opc_abort(sc, cmd, &compl); 16164c87aefeSPatrick Mooney break; 16174c87aefeSPatrick Mooney case NVME_OPC_SET_FEATURES: 1618*6960cd89SAndy Fiddaman DPRINTF("%s command SET_FEATURES", __func__); 1619154972afSPatrick Mooney nvme_opc_set_features(sc, cmd, &compl); 16204c87aefeSPatrick Mooney break; 16214c87aefeSPatrick Mooney case NVME_OPC_GET_FEATURES: 1622*6960cd89SAndy Fiddaman DPRINTF("%s command GET_FEATURES", __func__); 1623154972afSPatrick Mooney nvme_opc_get_features(sc, cmd, &compl); 16244c87aefeSPatrick Mooney break; 1625*6960cd89SAndy Fiddaman case NVME_OPC_FIRMWARE_ACTIVATE: 1626*6960cd89SAndy Fiddaman DPRINTF("%s command FIRMWARE_ACTIVATE", __func__); 1627*6960cd89SAndy Fiddaman pci_nvme_status_tc(&compl.status, 1628*6960cd89SAndy Fiddaman NVME_SCT_COMMAND_SPECIFIC, 1629*6960cd89SAndy Fiddaman NVME_SC_INVALID_FIRMWARE_SLOT); 1630*6960cd89SAndy Fiddaman break; 16314c87aefeSPatrick Mooney case NVME_OPC_ASYNC_EVENT_REQUEST: 1632*6960cd89SAndy Fiddaman DPRINTF("%s command ASYNC_EVENT_REQ", __func__); 1633154972afSPatrick Mooney nvme_opc_async_event_req(sc, cmd, &compl); 1634*6960cd89SAndy Fiddaman break; 1635*6960cd89SAndy Fiddaman case NVME_OPC_FORMAT_NVM: 1636*6960cd89SAndy Fiddaman DPRINTF("%s command FORMAT_NVM", __func__); 1637*6960cd89SAndy Fiddaman if ((sc->ctrldata.oacs & 1638*6960cd89SAndy Fiddaman (1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT)) == 0) { 1639*6960cd89SAndy Fiddaman pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); 1640*6960cd89SAndy Fiddaman } 164184659b24SMichael Zeller compl.status = NVME_NO_STATUS; 1642*6960cd89SAndy Fiddaman nvme_opc_format_nvm(sc, cmd, &compl); 16434c87aefeSPatrick Mooney break; 16444c87aefeSPatrick Mooney default: 1645*6960cd89SAndy Fiddaman DPRINTF("0x%x command is not implemented", 1646*6960cd89SAndy Fiddaman cmd->opc); 164784659b24SMichael Zeller pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); 16484c87aefeSPatrick Mooney } 1649154972afSPatrick Mooney sqhead = (sqhead + 1) % sq->size; 16504c87aefeSPatrick Mooney 165184659b24SMichael Zeller if (NVME_COMPLETION_VALID(compl)) { 1652*6960cd89SAndy Fiddaman pci_nvme_cq_update(sc, &sc->compl_queues[0], 1653*6960cd89SAndy Fiddaman compl.cdw0, 1654*6960cd89SAndy Fiddaman cmd->cid, 1655*6960cd89SAndy Fiddaman 0, /* SQID */ 1656*6960cd89SAndy Fiddaman compl.status); 16574c87aefeSPatrick Mooney } 16584c87aefeSPatrick Mooney } 16594c87aefeSPatrick Mooney 1660*6960cd89SAndy Fiddaman DPRINTF("setting sqhead %u", sqhead); 1661*6960cd89SAndy Fiddaman sq->head = sqhead; 16624c87aefeSPatrick Mooney 1663154972afSPatrick Mooney if (cq->head != cq->tail) 16644c87aefeSPatrick Mooney pci_generate_msix(sc->nsc_pi, 0); 16654c87aefeSPatrick Mooney 1666*6960cd89SAndy Fiddaman pthread_mutex_unlock(&sq->mtx); 1667*6960cd89SAndy Fiddaman } 1668*6960cd89SAndy Fiddaman 1669*6960cd89SAndy Fiddaman /* 1670*6960cd89SAndy Fiddaman * Update the Write and Read statistics reported in SMART data 1671*6960cd89SAndy Fiddaman * 1672*6960cd89SAndy Fiddaman * NVMe defines "data unit" as thousand's of 512 byte blocks and is rounded up. 1673*6960cd89SAndy Fiddaman * E.g. 1 data unit is 1 - 1,000 512 byte blocks. 3 data units are 2,001 - 3,000 1674*6960cd89SAndy Fiddaman * 512 byte blocks. Rounding up is acheived by initializing the remainder to 999. 1675*6960cd89SAndy Fiddaman */ 1676*6960cd89SAndy Fiddaman static void 1677*6960cd89SAndy Fiddaman pci_nvme_stats_write_read_update(struct pci_nvme_softc *sc, uint8_t opc, 1678*6960cd89SAndy Fiddaman size_t bytes, uint16_t status) 1679*6960cd89SAndy Fiddaman { 1680*6960cd89SAndy Fiddaman 1681*6960cd89SAndy Fiddaman pthread_mutex_lock(&sc->mtx); 1682*6960cd89SAndy Fiddaman switch (opc) { 1683*6960cd89SAndy Fiddaman case NVME_OPC_WRITE: 1684*6960cd89SAndy Fiddaman sc->write_commands++; 1685*6960cd89SAndy Fiddaman if (status != NVME_SC_SUCCESS) 1686*6960cd89SAndy Fiddaman break; 1687*6960cd89SAndy Fiddaman sc->write_dunits_remainder += (bytes / 512); 1688*6960cd89SAndy Fiddaman while (sc->write_dunits_remainder >= 1000) { 1689*6960cd89SAndy Fiddaman sc->write_data_units++; 1690*6960cd89SAndy Fiddaman sc->write_dunits_remainder -= 1000; 1691*6960cd89SAndy Fiddaman } 1692*6960cd89SAndy Fiddaman break; 1693*6960cd89SAndy Fiddaman case NVME_OPC_READ: 1694*6960cd89SAndy Fiddaman sc->read_commands++; 1695*6960cd89SAndy Fiddaman if (status != NVME_SC_SUCCESS) 1696*6960cd89SAndy Fiddaman break; 1697*6960cd89SAndy Fiddaman sc->read_dunits_remainder += (bytes / 512); 1698*6960cd89SAndy Fiddaman while (sc->read_dunits_remainder >= 1000) { 1699*6960cd89SAndy Fiddaman sc->read_data_units++; 1700*6960cd89SAndy Fiddaman sc->read_dunits_remainder -= 1000; 1701*6960cd89SAndy Fiddaman } 1702*6960cd89SAndy Fiddaman break; 1703*6960cd89SAndy Fiddaman default: 1704*6960cd89SAndy Fiddaman DPRINTF("%s: Invalid OPC 0x%02x for stats", __func__, opc); 1705*6960cd89SAndy Fiddaman break; 1706*6960cd89SAndy Fiddaman } 1707*6960cd89SAndy Fiddaman pthread_mutex_unlock(&sc->mtx); 1708*6960cd89SAndy Fiddaman } 1709*6960cd89SAndy Fiddaman 1710*6960cd89SAndy Fiddaman /* 1711*6960cd89SAndy Fiddaman * Check if the combination of Starting LBA (slba) and Number of Logical 1712*6960cd89SAndy Fiddaman * Blocks (nlb) exceeds the range of the underlying storage. 1713*6960cd89SAndy Fiddaman * 1714*6960cd89SAndy Fiddaman * Because NVMe specifies the SLBA in blocks as a uint64_t and blockif stores 1715*6960cd89SAndy Fiddaman * the capacity in bytes as a uint64_t, care must be taken to avoid integer 1716*6960cd89SAndy Fiddaman * overflow. 1717*6960cd89SAndy Fiddaman */ 1718*6960cd89SAndy Fiddaman static bool 1719*6960cd89SAndy Fiddaman pci_nvme_out_of_range(struct pci_nvme_blockstore *nvstore, uint64_t slba, 1720*6960cd89SAndy Fiddaman uint32_t nlb) 1721*6960cd89SAndy Fiddaman { 1722*6960cd89SAndy Fiddaman size_t offset, bytes; 1723*6960cd89SAndy Fiddaman 1724*6960cd89SAndy Fiddaman /* Overflow check of multiplying Starting LBA by the sector size */ 1725*6960cd89SAndy Fiddaman if (slba >> (64 - nvstore->sectsz_bits)) 1726*6960cd89SAndy Fiddaman return (true); 1727*6960cd89SAndy Fiddaman 1728*6960cd89SAndy Fiddaman offset = slba << nvstore->sectsz_bits; 1729*6960cd89SAndy Fiddaman bytes = nlb << nvstore->sectsz_bits; 1730*6960cd89SAndy Fiddaman 1731*6960cd89SAndy Fiddaman /* Overflow check of Number of Logical Blocks */ 1732*6960cd89SAndy Fiddaman if ((nvstore->size - offset) < bytes) 1733*6960cd89SAndy Fiddaman return (true); 1734*6960cd89SAndy Fiddaman 1735*6960cd89SAndy Fiddaman return (false); 17364c87aefeSPatrick Mooney } 17374c87aefeSPatrick Mooney 17384c87aefeSPatrick Mooney static int 17394c87aefeSPatrick Mooney pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, 17404c87aefeSPatrick Mooney uint64_t gpaddr, size_t size, int do_write, uint64_t lba) 17414c87aefeSPatrick Mooney { 17424c87aefeSPatrick Mooney int iovidx; 17434c87aefeSPatrick Mooney 1744*6960cd89SAndy Fiddaman if (req == NULL) 1745*6960cd89SAndy Fiddaman return (-1); 1746*6960cd89SAndy Fiddaman 1747*6960cd89SAndy Fiddaman if (req->io_req.br_iovcnt == NVME_MAX_IOVEC) { 1748*6960cd89SAndy Fiddaman return (-1); 1749*6960cd89SAndy Fiddaman } 1750*6960cd89SAndy Fiddaman 17514c87aefeSPatrick Mooney /* concatenate contig block-iovs to minimize number of iovs */ 17524c87aefeSPatrick Mooney if ((req->prev_gpaddr + req->prev_size) == gpaddr) { 17534c87aefeSPatrick Mooney iovidx = req->io_req.br_iovcnt - 1; 17544c87aefeSPatrick Mooney 17554c87aefeSPatrick Mooney req->io_req.br_iov[iovidx].iov_base = 17564c87aefeSPatrick Mooney paddr_guest2host(req->sc->nsc_pi->pi_vmctx, 17574c87aefeSPatrick Mooney req->prev_gpaddr, size); 17584c87aefeSPatrick Mooney 17594c87aefeSPatrick Mooney req->prev_size += size; 17604c87aefeSPatrick Mooney req->io_req.br_resid += size; 17614c87aefeSPatrick Mooney 17624c87aefeSPatrick Mooney req->io_req.br_iov[iovidx].iov_len = req->prev_size; 17634c87aefeSPatrick Mooney } else { 17644c87aefeSPatrick Mooney iovidx = req->io_req.br_iovcnt; 17654c87aefeSPatrick Mooney if (iovidx == 0) { 17664c87aefeSPatrick Mooney req->io_req.br_offset = lba; 17674c87aefeSPatrick Mooney req->io_req.br_resid = 0; 17684c87aefeSPatrick Mooney req->io_req.br_param = req; 17694c87aefeSPatrick Mooney } 17704c87aefeSPatrick Mooney 17714c87aefeSPatrick Mooney req->io_req.br_iov[iovidx].iov_base = 17724c87aefeSPatrick Mooney paddr_guest2host(req->sc->nsc_pi->pi_vmctx, 17734c87aefeSPatrick Mooney gpaddr, size); 17744c87aefeSPatrick Mooney 17754c87aefeSPatrick Mooney req->io_req.br_iov[iovidx].iov_len = size; 17764c87aefeSPatrick Mooney 17774c87aefeSPatrick Mooney req->prev_gpaddr = gpaddr; 17784c87aefeSPatrick Mooney req->prev_size = size; 17794c87aefeSPatrick Mooney req->io_req.br_resid += size; 17804c87aefeSPatrick Mooney 17814c87aefeSPatrick Mooney req->io_req.br_iovcnt++; 17824c87aefeSPatrick Mooney } 17834c87aefeSPatrick Mooney 17844c87aefeSPatrick Mooney return (0); 17854c87aefeSPatrick Mooney } 17864c87aefeSPatrick Mooney 17874c87aefeSPatrick Mooney static void 17884c87aefeSPatrick Mooney pci_nvme_set_completion(struct pci_nvme_softc *sc, 17894c87aefeSPatrick Mooney struct nvme_submission_queue *sq, int sqid, uint16_t cid, 1790*6960cd89SAndy Fiddaman uint32_t cdw0, uint16_t status) 17914c87aefeSPatrick Mooney { 17924c87aefeSPatrick Mooney struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; 17934c87aefeSPatrick Mooney 1794*6960cd89SAndy Fiddaman DPRINTF("%s sqid %d cqid %u cid %u status: 0x%x 0x%x", 17954c87aefeSPatrick Mooney __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), 1796*6960cd89SAndy Fiddaman NVME_STATUS_GET_SC(status)); 17974c87aefeSPatrick Mooney 1798*6960cd89SAndy Fiddaman pci_nvme_cq_update(sc, cq, 1799*6960cd89SAndy Fiddaman 0, /* CDW0 */ 1800*6960cd89SAndy Fiddaman cid, 1801*6960cd89SAndy Fiddaman sqid, 1802*6960cd89SAndy Fiddaman status); 18034c87aefeSPatrick Mooney 1804154972afSPatrick Mooney if (cq->head != cq->tail) { 1805154972afSPatrick Mooney if (cq->intr_en & NVME_CQ_INTEN) { 18064c87aefeSPatrick Mooney pci_generate_msix(sc->nsc_pi, cq->intr_vec); 1807154972afSPatrick Mooney } else { 1808*6960cd89SAndy Fiddaman DPRINTF("%s: CQ%u interrupt disabled", 1809*6960cd89SAndy Fiddaman __func__, sq->cqid); 1810154972afSPatrick Mooney } 1811154972afSPatrick Mooney } 18124c87aefeSPatrick Mooney } 18134c87aefeSPatrick Mooney 18144c87aefeSPatrick Mooney static void 18154c87aefeSPatrick Mooney pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req) 18164c87aefeSPatrick Mooney { 18174c87aefeSPatrick Mooney req->sc = NULL; 18184c87aefeSPatrick Mooney req->nvme_sq = NULL; 18194c87aefeSPatrick Mooney req->sqid = 0; 18204c87aefeSPatrick Mooney 18214c87aefeSPatrick Mooney pthread_mutex_lock(&sc->mtx); 18224c87aefeSPatrick Mooney 1823154972afSPatrick Mooney STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link); 18244c87aefeSPatrick Mooney sc->pending_ios--; 18254c87aefeSPatrick Mooney 18264c87aefeSPatrick Mooney /* when no more IO pending, can set to ready if device reset/enabled */ 18274c87aefeSPatrick Mooney if (sc->pending_ios == 0 && 18284c87aefeSPatrick Mooney NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts))) 18294c87aefeSPatrick Mooney sc->regs.csts |= NVME_CSTS_RDY; 18304c87aefeSPatrick Mooney 18314c87aefeSPatrick Mooney pthread_mutex_unlock(&sc->mtx); 18324c87aefeSPatrick Mooney 18334c87aefeSPatrick Mooney sem_post(&sc->iosemlock); 18344c87aefeSPatrick Mooney } 18354c87aefeSPatrick Mooney 18364c87aefeSPatrick Mooney static struct pci_nvme_ioreq * 18374c87aefeSPatrick Mooney pci_nvme_get_ioreq(struct pci_nvme_softc *sc) 18384c87aefeSPatrick Mooney { 18394c87aefeSPatrick Mooney struct pci_nvme_ioreq *req = NULL;; 18404c87aefeSPatrick Mooney 18414c87aefeSPatrick Mooney sem_wait(&sc->iosemlock); 18424c87aefeSPatrick Mooney pthread_mutex_lock(&sc->mtx); 18434c87aefeSPatrick Mooney 1844154972afSPatrick Mooney req = STAILQ_FIRST(&sc->ioreqs_free); 18454c87aefeSPatrick Mooney assert(req != NULL); 1846154972afSPatrick Mooney STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link); 18474c87aefeSPatrick Mooney 18484c87aefeSPatrick Mooney req->sc = sc; 18494c87aefeSPatrick Mooney 18504c87aefeSPatrick Mooney sc->pending_ios++; 18514c87aefeSPatrick Mooney 18524c87aefeSPatrick Mooney pthread_mutex_unlock(&sc->mtx); 18534c87aefeSPatrick Mooney 18544c87aefeSPatrick Mooney req->io_req.br_iovcnt = 0; 18554c87aefeSPatrick Mooney req->io_req.br_offset = 0; 18564c87aefeSPatrick Mooney req->io_req.br_resid = 0; 18574c87aefeSPatrick Mooney req->io_req.br_param = req; 18584c87aefeSPatrick Mooney req->prev_gpaddr = 0; 18594c87aefeSPatrick Mooney req->prev_size = 0; 18604c87aefeSPatrick Mooney 18614c87aefeSPatrick Mooney return req; 18624c87aefeSPatrick Mooney } 18634c87aefeSPatrick Mooney 18644c87aefeSPatrick Mooney static void 18654c87aefeSPatrick Mooney pci_nvme_io_done(struct blockif_req *br, int err) 18664c87aefeSPatrick Mooney { 18674c87aefeSPatrick Mooney struct pci_nvme_ioreq *req = br->br_param; 18684c87aefeSPatrick Mooney struct nvme_submission_queue *sq = req->nvme_sq; 18694c87aefeSPatrick Mooney uint16_t code, status = 0; 18704c87aefeSPatrick Mooney 1871*6960cd89SAndy Fiddaman DPRINTF("%s error %d %s", __func__, err, strerror(err)); 18724c87aefeSPatrick Mooney 18734c87aefeSPatrick Mooney /* TODO return correct error */ 18744c87aefeSPatrick Mooney code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; 18754c87aefeSPatrick Mooney pci_nvme_status_genc(&status, code); 18764c87aefeSPatrick Mooney 1877*6960cd89SAndy Fiddaman pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status); 1878*6960cd89SAndy Fiddaman pci_nvme_stats_write_read_update(req->sc, req->opc, 1879*6960cd89SAndy Fiddaman req->bytes, status); 18804c87aefeSPatrick Mooney pci_nvme_release_ioreq(req->sc, req); 18814c87aefeSPatrick Mooney } 18824c87aefeSPatrick Mooney 1883*6960cd89SAndy Fiddaman /* 1884*6960cd89SAndy Fiddaman * Implements the Flush command. The specification states: 1885*6960cd89SAndy Fiddaman * If a volatile write cache is not present, Flush commands complete 1886*6960cd89SAndy Fiddaman * successfully and have no effect 1887*6960cd89SAndy Fiddaman * in the description of the Volatile Write Cache (VWC) field of the Identify 1888*6960cd89SAndy Fiddaman * Controller data. Therefore, set status to Success if the command is 1889*6960cd89SAndy Fiddaman * not supported (i.e. RAM or as indicated by the blockif). 1890*6960cd89SAndy Fiddaman */ 1891*6960cd89SAndy Fiddaman static bool 1892*6960cd89SAndy Fiddaman nvme_opc_flush(struct pci_nvme_softc *sc, 1893*6960cd89SAndy Fiddaman struct nvme_command *cmd, 1894*6960cd89SAndy Fiddaman struct pci_nvme_blockstore *nvstore, 1895*6960cd89SAndy Fiddaman struct pci_nvme_ioreq *req, 1896*6960cd89SAndy Fiddaman uint16_t *status) 18974c87aefeSPatrick Mooney { 1898*6960cd89SAndy Fiddaman bool pending = false; 18994c87aefeSPatrick Mooney 1900*6960cd89SAndy Fiddaman if (nvstore->type == NVME_STOR_RAM) { 1901*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_SUCCESS); 1902*6960cd89SAndy Fiddaman } else { 1903*6960cd89SAndy Fiddaman int err; 19044c87aefeSPatrick Mooney 1905*6960cd89SAndy Fiddaman req->io_req.br_callback = pci_nvme_io_done; 1906*6960cd89SAndy Fiddaman 1907*6960cd89SAndy Fiddaman err = blockif_flush(nvstore->ctx, &req->io_req); 1908*6960cd89SAndy Fiddaman switch (err) { 1909*6960cd89SAndy Fiddaman case 0: 1910*6960cd89SAndy Fiddaman pending = true; 1911*6960cd89SAndy Fiddaman break; 1912*6960cd89SAndy Fiddaman case EOPNOTSUPP: 1913*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_SUCCESS); 1914*6960cd89SAndy Fiddaman break; 1915*6960cd89SAndy Fiddaman default: 1916*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); 1917*6960cd89SAndy Fiddaman } 1918*6960cd89SAndy Fiddaman } 1919*6960cd89SAndy Fiddaman 1920*6960cd89SAndy Fiddaman return (pending); 1921*6960cd89SAndy Fiddaman } 1922*6960cd89SAndy Fiddaman 1923*6960cd89SAndy Fiddaman static uint16_t 1924*6960cd89SAndy Fiddaman nvme_write_read_ram(struct pci_nvme_softc *sc, 1925*6960cd89SAndy Fiddaman struct pci_nvme_blockstore *nvstore, 1926*6960cd89SAndy Fiddaman uint64_t prp1, uint64_t prp2, 1927*6960cd89SAndy Fiddaman size_t offset, uint64_t bytes, 1928*6960cd89SAndy Fiddaman bool is_write) 1929*6960cd89SAndy Fiddaman { 1930*6960cd89SAndy Fiddaman uint8_t *buf = nvstore->ctx; 1931*6960cd89SAndy Fiddaman enum nvme_copy_dir dir; 1932*6960cd89SAndy Fiddaman uint16_t status = 0; 1933*6960cd89SAndy Fiddaman 1934*6960cd89SAndy Fiddaman if (is_write) 1935*6960cd89SAndy Fiddaman dir = NVME_COPY_TO_PRP; 1936*6960cd89SAndy Fiddaman else 1937*6960cd89SAndy Fiddaman dir = NVME_COPY_FROM_PRP; 1938*6960cd89SAndy Fiddaman 1939*6960cd89SAndy Fiddaman if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, prp1, prp2, 1940*6960cd89SAndy Fiddaman buf + offset, bytes, dir)) 1941*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 1942*6960cd89SAndy Fiddaman NVME_SC_DATA_TRANSFER_ERROR); 1943*6960cd89SAndy Fiddaman else 1944*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, NVME_SC_SUCCESS); 1945*6960cd89SAndy Fiddaman 1946*6960cd89SAndy Fiddaman return (status); 1947*6960cd89SAndy Fiddaman } 1948*6960cd89SAndy Fiddaman 1949*6960cd89SAndy Fiddaman static uint16_t 1950*6960cd89SAndy Fiddaman nvme_write_read_blockif(struct pci_nvme_softc *sc, 1951*6960cd89SAndy Fiddaman struct pci_nvme_blockstore *nvstore, 1952*6960cd89SAndy Fiddaman struct pci_nvme_ioreq *req, 1953*6960cd89SAndy Fiddaman uint64_t prp1, uint64_t prp2, 1954*6960cd89SAndy Fiddaman size_t offset, uint64_t bytes, 1955*6960cd89SAndy Fiddaman bool is_write) 1956*6960cd89SAndy Fiddaman { 1957*6960cd89SAndy Fiddaman uint64_t size; 1958*6960cd89SAndy Fiddaman int err; 1959*6960cd89SAndy Fiddaman uint16_t status = NVME_NO_STATUS; 1960*6960cd89SAndy Fiddaman 1961*6960cd89SAndy Fiddaman size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes); 1962*6960cd89SAndy Fiddaman if (pci_nvme_append_iov_req(sc, req, prp1, 1963*6960cd89SAndy Fiddaman size, is_write, offset)) { 1964*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 1965*6960cd89SAndy Fiddaman NVME_SC_DATA_TRANSFER_ERROR); 1966*6960cd89SAndy Fiddaman goto out; 1967*6960cd89SAndy Fiddaman } 1968*6960cd89SAndy Fiddaman 1969*6960cd89SAndy Fiddaman offset += size; 1970*6960cd89SAndy Fiddaman bytes -= size; 1971*6960cd89SAndy Fiddaman 1972*6960cd89SAndy Fiddaman if (bytes == 0) { 1973*6960cd89SAndy Fiddaman ; 1974*6960cd89SAndy Fiddaman } else if (bytes <= PAGE_SIZE) { 1975*6960cd89SAndy Fiddaman size = bytes; 1976*6960cd89SAndy Fiddaman if (pci_nvme_append_iov_req(sc, req, prp2, 1977*6960cd89SAndy Fiddaman size, is_write, offset)) { 1978*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 1979*6960cd89SAndy Fiddaman NVME_SC_DATA_TRANSFER_ERROR); 1980*6960cd89SAndy Fiddaman goto out; 1981*6960cd89SAndy Fiddaman } 1982*6960cd89SAndy Fiddaman } else { 1983*6960cd89SAndy Fiddaman void *vmctx = sc->nsc_pi->pi_vmctx; 1984*6960cd89SAndy Fiddaman uint64_t *prp_list = &prp2; 1985*6960cd89SAndy Fiddaman uint64_t *last = prp_list; 1986*6960cd89SAndy Fiddaman 1987*6960cd89SAndy Fiddaman /* PRP2 is pointer to a physical region page list */ 1988*6960cd89SAndy Fiddaman while (bytes) { 1989*6960cd89SAndy Fiddaman /* Last entry in list points to the next list */ 1990*6960cd89SAndy Fiddaman if (prp_list == last) { 1991*6960cd89SAndy Fiddaman uint64_t prp = *prp_list; 1992*6960cd89SAndy Fiddaman 1993*6960cd89SAndy Fiddaman prp_list = paddr_guest2host(vmctx, prp, 1994*6960cd89SAndy Fiddaman PAGE_SIZE - (prp % PAGE_SIZE)); 1995*6960cd89SAndy Fiddaman last = prp_list + (NVME_PRP2_ITEMS - 1); 1996*6960cd89SAndy Fiddaman } 1997*6960cd89SAndy Fiddaman 1998*6960cd89SAndy Fiddaman size = MIN(bytes, PAGE_SIZE); 1999*6960cd89SAndy Fiddaman 2000*6960cd89SAndy Fiddaman if (pci_nvme_append_iov_req(sc, req, *prp_list, 2001*6960cd89SAndy Fiddaman size, is_write, offset)) { 2002*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 2003*6960cd89SAndy Fiddaman NVME_SC_DATA_TRANSFER_ERROR); 2004*6960cd89SAndy Fiddaman goto out; 2005*6960cd89SAndy Fiddaman } 2006*6960cd89SAndy Fiddaman 2007*6960cd89SAndy Fiddaman offset += size; 2008*6960cd89SAndy Fiddaman bytes -= size; 2009*6960cd89SAndy Fiddaman 2010*6960cd89SAndy Fiddaman prp_list++; 2011*6960cd89SAndy Fiddaman } 2012*6960cd89SAndy Fiddaman } 2013*6960cd89SAndy Fiddaman req->io_req.br_callback = pci_nvme_io_done; 2014*6960cd89SAndy Fiddaman if (is_write) 2015*6960cd89SAndy Fiddaman err = blockif_write(nvstore->ctx, &req->io_req); 2016*6960cd89SAndy Fiddaman else 2017*6960cd89SAndy Fiddaman err = blockif_read(nvstore->ctx, &req->io_req); 2018*6960cd89SAndy Fiddaman 2019*6960cd89SAndy Fiddaman if (err) 2020*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); 2021*6960cd89SAndy Fiddaman out: 2022*6960cd89SAndy Fiddaman return (status); 2023*6960cd89SAndy Fiddaman } 2024*6960cd89SAndy Fiddaman 2025*6960cd89SAndy Fiddaman static bool 2026*6960cd89SAndy Fiddaman nvme_opc_write_read(struct pci_nvme_softc *sc, 2027*6960cd89SAndy Fiddaman struct nvme_command *cmd, 2028*6960cd89SAndy Fiddaman struct pci_nvme_blockstore *nvstore, 2029*6960cd89SAndy Fiddaman struct pci_nvme_ioreq *req, 2030*6960cd89SAndy Fiddaman uint16_t *status) 2031*6960cd89SAndy Fiddaman { 2032*6960cd89SAndy Fiddaman uint64_t lba, nblocks, bytes = 0; 2033*6960cd89SAndy Fiddaman size_t offset; 2034*6960cd89SAndy Fiddaman bool is_write = cmd->opc == NVME_OPC_WRITE; 2035*6960cd89SAndy Fiddaman bool pending = false; 2036*6960cd89SAndy Fiddaman 2037*6960cd89SAndy Fiddaman lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; 2038*6960cd89SAndy Fiddaman nblocks = (cmd->cdw12 & 0xFFFF) + 1; 2039*6960cd89SAndy Fiddaman if (pci_nvme_out_of_range(nvstore, lba, nblocks)) { 2040*6960cd89SAndy Fiddaman WPRINTF("%s command would exceed LBA range", __func__); 2041*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); 2042*6960cd89SAndy Fiddaman goto out; 2043*6960cd89SAndy Fiddaman } 2044*6960cd89SAndy Fiddaman 2045*6960cd89SAndy Fiddaman bytes = nblocks << nvstore->sectsz_bits; 2046*6960cd89SAndy Fiddaman if (bytes > NVME_MAX_DATA_SIZE) { 2047*6960cd89SAndy Fiddaman WPRINTF("%s command would exceed MDTS", __func__); 2048*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD); 2049*6960cd89SAndy Fiddaman goto out; 2050*6960cd89SAndy Fiddaman } 2051*6960cd89SAndy Fiddaman 2052*6960cd89SAndy Fiddaman offset = lba << nvstore->sectsz_bits; 2053*6960cd89SAndy Fiddaman 2054*6960cd89SAndy Fiddaman req->bytes = bytes; 2055*6960cd89SAndy Fiddaman req->io_req.br_offset = lba; 2056*6960cd89SAndy Fiddaman 2057*6960cd89SAndy Fiddaman /* PRP bits 1:0 must be zero */ 2058*6960cd89SAndy Fiddaman cmd->prp1 &= ~0x3UL; 2059*6960cd89SAndy Fiddaman cmd->prp2 &= ~0x3UL; 2060*6960cd89SAndy Fiddaman 2061*6960cd89SAndy Fiddaman if (nvstore->type == NVME_STOR_RAM) { 2062*6960cd89SAndy Fiddaman *status = nvme_write_read_ram(sc, nvstore, cmd->prp1, 2063*6960cd89SAndy Fiddaman cmd->prp2, offset, bytes, is_write); 2064*6960cd89SAndy Fiddaman } else { 2065*6960cd89SAndy Fiddaman *status = nvme_write_read_blockif(sc, nvstore, req, 2066*6960cd89SAndy Fiddaman cmd->prp1, cmd->prp2, offset, bytes, is_write); 2067*6960cd89SAndy Fiddaman 2068*6960cd89SAndy Fiddaman if (*status == NVME_NO_STATUS) 2069*6960cd89SAndy Fiddaman pending = true; 2070*6960cd89SAndy Fiddaman } 2071*6960cd89SAndy Fiddaman out: 2072*6960cd89SAndy Fiddaman if (!pending) 2073*6960cd89SAndy Fiddaman pci_nvme_stats_write_read_update(sc, cmd->opc, bytes, *status); 2074*6960cd89SAndy Fiddaman 2075*6960cd89SAndy Fiddaman return (pending); 20764c87aefeSPatrick Mooney } 20774c87aefeSPatrick Mooney 2078154972afSPatrick Mooney static void 2079154972afSPatrick Mooney pci_nvme_dealloc_sm(struct blockif_req *br, int err) 2080154972afSPatrick Mooney { 2081154972afSPatrick Mooney struct pci_nvme_ioreq *req = br->br_param; 2082154972afSPatrick Mooney struct pci_nvme_softc *sc = req->sc; 2083154972afSPatrick Mooney bool done = true; 2084154972afSPatrick Mooney #ifdef __FreeBSD__ 2085154972afSPatrick Mooney uint16_t status; 2086154972afSPatrick Mooney #else 2087154972afSPatrick Mooney uint16_t status = 0; 2088154972afSPatrick Mooney #endif 2089154972afSPatrick Mooney 2090154972afSPatrick Mooney if (err) { 2091154972afSPatrick Mooney pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR); 2092154972afSPatrick Mooney } else if ((req->prev_gpaddr + 1) == (req->prev_size)) { 2093154972afSPatrick Mooney pci_nvme_status_genc(&status, NVME_SC_SUCCESS); 2094154972afSPatrick Mooney } else { 2095154972afSPatrick Mooney struct iovec *iov = req->io_req.br_iov; 2096154972afSPatrick Mooney 2097154972afSPatrick Mooney req->prev_gpaddr++; 2098154972afSPatrick Mooney iov += req->prev_gpaddr; 2099154972afSPatrick Mooney 2100154972afSPatrick Mooney /* The iov_* values already include the sector size */ 2101154972afSPatrick Mooney req->io_req.br_offset = (off_t)iov->iov_base; 2102154972afSPatrick Mooney req->io_req.br_resid = iov->iov_len; 2103154972afSPatrick Mooney if (blockif_delete(sc->nvstore.ctx, &req->io_req)) { 2104154972afSPatrick Mooney pci_nvme_status_genc(&status, 2105154972afSPatrick Mooney NVME_SC_INTERNAL_DEVICE_ERROR); 2106154972afSPatrick Mooney } else 2107154972afSPatrick Mooney done = false; 2108154972afSPatrick Mooney } 2109154972afSPatrick Mooney 2110154972afSPatrick Mooney if (done) { 2111154972afSPatrick Mooney pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, 2112*6960cd89SAndy Fiddaman req->cid, 0, status); 2113154972afSPatrick Mooney pci_nvme_release_ioreq(sc, req); 2114154972afSPatrick Mooney } 2115154972afSPatrick Mooney } 2116154972afSPatrick Mooney 2117*6960cd89SAndy Fiddaman static bool 2118154972afSPatrick Mooney nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, 2119154972afSPatrick Mooney struct nvme_command *cmd, 2120154972afSPatrick Mooney struct pci_nvme_blockstore *nvstore, 2121154972afSPatrick Mooney struct pci_nvme_ioreq *req, 2122154972afSPatrick Mooney uint16_t *status) 2123154972afSPatrick Mooney { 2124*6960cd89SAndy Fiddaman struct nvme_dsm_range *range = NULL; 2125*6960cd89SAndy Fiddaman uint32_t nr, r, non_zero, dr; 2126*6960cd89SAndy Fiddaman int err; 2127*6960cd89SAndy Fiddaman bool pending = false; 2128154972afSPatrick Mooney 2129154972afSPatrick Mooney if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) { 2130154972afSPatrick Mooney pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE); 2131154972afSPatrick Mooney goto out; 2132154972afSPatrick Mooney } 2133154972afSPatrick Mooney 2134*6960cd89SAndy Fiddaman nr = cmd->cdw10 & 0xff; 2135154972afSPatrick Mooney 2136154972afSPatrick Mooney /* copy locally because a range entry could straddle PRPs */ 2137154972afSPatrick Mooney range = calloc(1, NVME_MAX_DSM_TRIM); 2138154972afSPatrick Mooney if (range == NULL) { 2139154972afSPatrick Mooney pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); 2140154972afSPatrick Mooney goto out; 2141154972afSPatrick Mooney } 2142154972afSPatrick Mooney nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2, 2143154972afSPatrick Mooney (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP); 2144154972afSPatrick Mooney 2145*6960cd89SAndy Fiddaman /* Check for invalid ranges and the number of non-zero lengths */ 2146*6960cd89SAndy Fiddaman non_zero = 0; 2147*6960cd89SAndy Fiddaman for (r = 0; r <= nr; r++) { 2148*6960cd89SAndy Fiddaman if (pci_nvme_out_of_range(nvstore, 2149*6960cd89SAndy Fiddaman range[r].starting_lba, range[r].length)) { 2150*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); 2151*6960cd89SAndy Fiddaman goto out; 2152*6960cd89SAndy Fiddaman } 2153*6960cd89SAndy Fiddaman if (range[r].length != 0) 2154*6960cd89SAndy Fiddaman non_zero++; 2155*6960cd89SAndy Fiddaman } 2156*6960cd89SAndy Fiddaman 2157*6960cd89SAndy Fiddaman if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) { 2158*6960cd89SAndy Fiddaman size_t offset, bytes; 2159*6960cd89SAndy Fiddaman int sectsz_bits = sc->nvstore.sectsz_bits; 2160*6960cd89SAndy Fiddaman 2161*6960cd89SAndy Fiddaman /* 2162*6960cd89SAndy Fiddaman * DSM calls are advisory only, and compliant controllers 2163*6960cd89SAndy Fiddaman * may choose to take no actions (i.e. return Success). 2164*6960cd89SAndy Fiddaman */ 2165*6960cd89SAndy Fiddaman if (!nvstore->deallocate) { 2166*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_SUCCESS); 2167*6960cd89SAndy Fiddaman goto out; 2168*6960cd89SAndy Fiddaman } 2169*6960cd89SAndy Fiddaman 2170*6960cd89SAndy Fiddaman /* If all ranges have a zero length, return Success */ 2171*6960cd89SAndy Fiddaman if (non_zero == 0) { 2172*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_SUCCESS); 2173*6960cd89SAndy Fiddaman goto out; 2174*6960cd89SAndy Fiddaman } 2175*6960cd89SAndy Fiddaman 2176*6960cd89SAndy Fiddaman if (req == NULL) { 2177*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); 2178*6960cd89SAndy Fiddaman goto out; 2179*6960cd89SAndy Fiddaman } 2180*6960cd89SAndy Fiddaman 2181*6960cd89SAndy Fiddaman offset = range[0].starting_lba << sectsz_bits; 2182*6960cd89SAndy Fiddaman bytes = range[0].length << sectsz_bits; 2183*6960cd89SAndy Fiddaman 2184154972afSPatrick Mooney /* 2185154972afSPatrick Mooney * If the request is for more than a single range, store 2186154972afSPatrick Mooney * the ranges in the br_iov. Optimize for the common case 2187154972afSPatrick Mooney * of a single range. 2188154972afSPatrick Mooney * 2189154972afSPatrick Mooney * Note that NVMe Number of Ranges is a zero based value 2190154972afSPatrick Mooney */ 2191154972afSPatrick Mooney req->io_req.br_iovcnt = 0; 2192*6960cd89SAndy Fiddaman req->io_req.br_offset = offset; 2193*6960cd89SAndy Fiddaman req->io_req.br_resid = bytes; 2194154972afSPatrick Mooney 2195154972afSPatrick Mooney if (nr == 0) { 2196154972afSPatrick Mooney req->io_req.br_callback = pci_nvme_io_done; 2197154972afSPatrick Mooney } else { 2198154972afSPatrick Mooney struct iovec *iov = req->io_req.br_iov; 2199154972afSPatrick Mooney 2200*6960cd89SAndy Fiddaman for (r = 0, dr = 0; r <= nr; r++) { 2201*6960cd89SAndy Fiddaman offset = range[r].starting_lba << sectsz_bits; 2202*6960cd89SAndy Fiddaman bytes = range[r].length << sectsz_bits; 2203*6960cd89SAndy Fiddaman if (bytes == 0) 2204*6960cd89SAndy Fiddaman continue; 2205*6960cd89SAndy Fiddaman 2206*6960cd89SAndy Fiddaman if ((nvstore->size - offset) < bytes) { 2207*6960cd89SAndy Fiddaman pci_nvme_status_genc(status, 2208*6960cd89SAndy Fiddaman NVME_SC_LBA_OUT_OF_RANGE); 2209*6960cd89SAndy Fiddaman goto out; 2210*6960cd89SAndy Fiddaman } 2211*6960cd89SAndy Fiddaman iov[dr].iov_base = (void *)offset; 2212*6960cd89SAndy Fiddaman iov[dr].iov_len = bytes; 2213*6960cd89SAndy Fiddaman dr++; 2214154972afSPatrick Mooney } 2215154972afSPatrick Mooney req->io_req.br_callback = pci_nvme_dealloc_sm; 2216154972afSPatrick Mooney 2217154972afSPatrick Mooney /* 2218154972afSPatrick Mooney * Use prev_gpaddr to track the current entry and 2219154972afSPatrick Mooney * prev_size to track the number of entries 2220154972afSPatrick Mooney */ 2221154972afSPatrick Mooney req->prev_gpaddr = 0; 2222*6960cd89SAndy Fiddaman req->prev_size = dr; 2223154972afSPatrick Mooney } 2224154972afSPatrick Mooney 2225154972afSPatrick Mooney err = blockif_delete(nvstore->ctx, &req->io_req); 2226154972afSPatrick Mooney if (err) 2227154972afSPatrick Mooney pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); 2228*6960cd89SAndy Fiddaman else 2229*6960cd89SAndy Fiddaman pending = true; 2230154972afSPatrick Mooney } 2231154972afSPatrick Mooney out: 2232*6960cd89SAndy Fiddaman free(range); 2233*6960cd89SAndy Fiddaman return (pending); 2234154972afSPatrick Mooney } 22354c87aefeSPatrick Mooney 22364c87aefeSPatrick Mooney static void 22374c87aefeSPatrick Mooney pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) 22384c87aefeSPatrick Mooney { 22394c87aefeSPatrick Mooney struct nvme_submission_queue *sq; 22404c87aefeSPatrick Mooney uint16_t status = 0; 22414c87aefeSPatrick Mooney uint16_t sqhead; 22424c87aefeSPatrick Mooney 22434c87aefeSPatrick Mooney /* handle all submissions up to sq->tail index */ 22444c87aefeSPatrick Mooney sq = &sc->submit_queues[idx]; 22454c87aefeSPatrick Mooney 2246*6960cd89SAndy Fiddaman pthread_mutex_lock(&sq->mtx); 22474c87aefeSPatrick Mooney 2248*6960cd89SAndy Fiddaman sqhead = sq->head; 2249*6960cd89SAndy Fiddaman DPRINTF("nvme_handle_io qid %u head %u tail %u cmdlist %p", 2250*6960cd89SAndy Fiddaman idx, sqhead, sq->tail, sq->qbase); 22514c87aefeSPatrick Mooney 22524c87aefeSPatrick Mooney while (sqhead != atomic_load_acq_short(&sq->tail)) { 22534c87aefeSPatrick Mooney struct nvme_command *cmd; 2254*6960cd89SAndy Fiddaman struct pci_nvme_ioreq *req; 2255*6960cd89SAndy Fiddaman uint32_t nsid; 2256*6960cd89SAndy Fiddaman bool pending; 22574c87aefeSPatrick Mooney 2258*6960cd89SAndy Fiddaman pending = false; 2259*6960cd89SAndy Fiddaman req = NULL; 2260*6960cd89SAndy Fiddaman status = 0; 22614c87aefeSPatrick Mooney 22624c87aefeSPatrick Mooney cmd = &sq->qbase[sqhead]; 22634c87aefeSPatrick Mooney sqhead = (sqhead + 1) % sq->size; 22644c87aefeSPatrick Mooney 2265*6960cd89SAndy Fiddaman nsid = le32toh(cmd->nsid); 2266*6960cd89SAndy Fiddaman if ((nsid == 0) || (nsid > sc->ctrldata.nn)) { 2267*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 2268*6960cd89SAndy Fiddaman NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 2269*6960cd89SAndy Fiddaman status |= 2270*6960cd89SAndy Fiddaman NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT; 2271*6960cd89SAndy Fiddaman goto complete; 22724c87aefeSPatrick Mooney } 22734c87aefeSPatrick Mooney 22744c87aefeSPatrick Mooney req = pci_nvme_get_ioreq(sc); 2275*6960cd89SAndy Fiddaman if (req == NULL) { 2276*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, 2277*6960cd89SAndy Fiddaman NVME_SC_INTERNAL_DEVICE_ERROR); 2278*6960cd89SAndy Fiddaman WPRINTF("%s: unable to allocate IO req", __func__); 2279*6960cd89SAndy Fiddaman goto complete; 2280*6960cd89SAndy Fiddaman } 22814c87aefeSPatrick Mooney req->nvme_sq = sq; 22824c87aefeSPatrick Mooney req->sqid = idx; 22834c87aefeSPatrick Mooney req->opc = cmd->opc; 22844c87aefeSPatrick Mooney req->cid = cmd->cid; 22854c87aefeSPatrick Mooney req->nsid = cmd->nsid; 22864c87aefeSPatrick Mooney 22874c87aefeSPatrick Mooney switch (cmd->opc) { 2288*6960cd89SAndy Fiddaman case NVME_OPC_FLUSH: 2289*6960cd89SAndy Fiddaman pending = nvme_opc_flush(sc, cmd, &sc->nvstore, 2290*6960cd89SAndy Fiddaman req, &status); 22914c87aefeSPatrick Mooney break; 22924c87aefeSPatrick Mooney case NVME_OPC_WRITE: 2293*6960cd89SAndy Fiddaman case NVME_OPC_READ: 2294*6960cd89SAndy Fiddaman pending = nvme_opc_write_read(sc, cmd, &sc->nvstore, 2295*6960cd89SAndy Fiddaman req, &status); 2296*6960cd89SAndy Fiddaman break; 2297*6960cd89SAndy Fiddaman case NVME_OPC_WRITE_ZEROES: 2298*6960cd89SAndy Fiddaman /* TODO: write zeroes 2299*6960cd89SAndy Fiddaman WPRINTF("%s write zeroes lba 0x%lx blocks %u", 2300*6960cd89SAndy Fiddaman __func__, lba, cmd->cdw12 & 0xFFFF); */ 2301*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, NVME_SC_SUCCESS); 2302*6960cd89SAndy Fiddaman break; 2303*6960cd89SAndy Fiddaman case NVME_OPC_DATASET_MANAGEMENT: 2304*6960cd89SAndy Fiddaman pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, 2305*6960cd89SAndy Fiddaman req, &status); 23064c87aefeSPatrick Mooney break; 23074c87aefeSPatrick Mooney default: 2308*6960cd89SAndy Fiddaman WPRINTF("%s unhandled io command 0x%x", 2309*6960cd89SAndy Fiddaman __func__, cmd->opc); 2310*6960cd89SAndy Fiddaman pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE); 23114c87aefeSPatrick Mooney } 2312*6960cd89SAndy Fiddaman complete: 2313*6960cd89SAndy Fiddaman if (!pending) { 23144c87aefeSPatrick Mooney pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, 2315*6960cd89SAndy Fiddaman status); 2316*6960cd89SAndy Fiddaman if (req != NULL) 23174c87aefeSPatrick Mooney pci_nvme_release_ioreq(sc, req); 23184c87aefeSPatrick Mooney } 23194c87aefeSPatrick Mooney } 23204c87aefeSPatrick Mooney 2321*6960cd89SAndy Fiddaman sq->head = sqhead; 2322*6960cd89SAndy Fiddaman 2323*6960cd89SAndy Fiddaman pthread_mutex_unlock(&sq->mtx); 23244c87aefeSPatrick Mooney } 23254c87aefeSPatrick Mooney 23264c87aefeSPatrick Mooney static void 23274c87aefeSPatrick Mooney pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, 23284c87aefeSPatrick Mooney uint64_t idx, int is_sq, uint64_t value) 23294c87aefeSPatrick Mooney { 2330*6960cd89SAndy Fiddaman DPRINTF("nvme doorbell %lu, %s, val 0x%lx", 2331*6960cd89SAndy Fiddaman idx, is_sq ? "SQ" : "CQ", value & 0xFFFF); 23324c87aefeSPatrick Mooney 23334c87aefeSPatrick Mooney if (is_sq) { 2334*6960cd89SAndy Fiddaman if (idx > sc->num_squeues) { 2335*6960cd89SAndy Fiddaman WPRINTF("%s queue index %lu overflow from " 2336*6960cd89SAndy Fiddaman "guest (max %u)", 2337*6960cd89SAndy Fiddaman __func__, idx, sc->num_squeues); 2338*6960cd89SAndy Fiddaman return; 2339*6960cd89SAndy Fiddaman } 2340*6960cd89SAndy Fiddaman 23414c87aefeSPatrick Mooney atomic_store_short(&sc->submit_queues[idx].tail, 23424c87aefeSPatrick Mooney (uint16_t)value); 23434c87aefeSPatrick Mooney 23444c87aefeSPatrick Mooney if (idx == 0) { 23454c87aefeSPatrick Mooney pci_nvme_handle_admin_cmd(sc, value); 23464c87aefeSPatrick Mooney } else { 23474c87aefeSPatrick Mooney /* submission queue; handle new entries in SQ */ 23484c87aefeSPatrick Mooney if (idx > sc->num_squeues) { 2349*6960cd89SAndy Fiddaman WPRINTF("%s SQ index %lu overflow from " 2350154972afSPatrick Mooney "guest (max %u)", 2351*6960cd89SAndy Fiddaman __func__, idx, sc->num_squeues); 23524c87aefeSPatrick Mooney return; 23534c87aefeSPatrick Mooney } 23544c87aefeSPatrick Mooney pci_nvme_handle_io_cmd(sc, (uint16_t)idx); 23554c87aefeSPatrick Mooney } 23564c87aefeSPatrick Mooney } else { 23574c87aefeSPatrick Mooney if (idx > sc->num_cqueues) { 2358*6960cd89SAndy Fiddaman WPRINTF("%s queue index %lu overflow from " 2359154972afSPatrick Mooney "guest (max %u)", 2360*6960cd89SAndy Fiddaman __func__, idx, sc->num_cqueues); 23614c87aefeSPatrick Mooney return; 23624c87aefeSPatrick Mooney } 23634c87aefeSPatrick Mooney 2364*6960cd89SAndy Fiddaman atomic_store_short(&sc->compl_queues[idx].head, 2365*6960cd89SAndy Fiddaman (uint16_t)value); 23664c87aefeSPatrick Mooney } 23674c87aefeSPatrick Mooney } 23684c87aefeSPatrick Mooney 23694c87aefeSPatrick Mooney static void 23704c87aefeSPatrick Mooney pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite) 23714c87aefeSPatrick Mooney { 23724c87aefeSPatrick Mooney const char *s = iswrite ? "WRITE" : "READ"; 23734c87aefeSPatrick Mooney 23744c87aefeSPatrick Mooney switch (offset) { 23754c87aefeSPatrick Mooney case NVME_CR_CAP_LOW: 2376*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_CAP_LOW", func, s); 23774c87aefeSPatrick Mooney break; 23784c87aefeSPatrick Mooney case NVME_CR_CAP_HI: 2379*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_CAP_HI", func, s); 23804c87aefeSPatrick Mooney break; 23814c87aefeSPatrick Mooney case NVME_CR_VS: 2382*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_VS", func, s); 23834c87aefeSPatrick Mooney break; 23844c87aefeSPatrick Mooney case NVME_CR_INTMS: 2385*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_INTMS", func, s); 23864c87aefeSPatrick Mooney break; 23874c87aefeSPatrick Mooney case NVME_CR_INTMC: 2388*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_INTMC", func, s); 23894c87aefeSPatrick Mooney break; 23904c87aefeSPatrick Mooney case NVME_CR_CC: 2391*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_CC", func, s); 23924c87aefeSPatrick Mooney break; 23934c87aefeSPatrick Mooney case NVME_CR_CSTS: 2394*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_CSTS", func, s); 23954c87aefeSPatrick Mooney break; 23964c87aefeSPatrick Mooney case NVME_CR_NSSR: 2397*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_NSSR", func, s); 23984c87aefeSPatrick Mooney break; 23994c87aefeSPatrick Mooney case NVME_CR_AQA: 2400*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_AQA", func, s); 24014c87aefeSPatrick Mooney break; 24024c87aefeSPatrick Mooney case NVME_CR_ASQ_LOW: 2403*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_ASQ_LOW", func, s); 24044c87aefeSPatrick Mooney break; 24054c87aefeSPatrick Mooney case NVME_CR_ASQ_HI: 2406*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_ASQ_HI", func, s); 24074c87aefeSPatrick Mooney break; 24084c87aefeSPatrick Mooney case NVME_CR_ACQ_LOW: 2409*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_ACQ_LOW", func, s); 24104c87aefeSPatrick Mooney break; 24114c87aefeSPatrick Mooney case NVME_CR_ACQ_HI: 2412*6960cd89SAndy Fiddaman DPRINTF("%s %s NVME_CR_ACQ_HI", func, s); 24134c87aefeSPatrick Mooney break; 24144c87aefeSPatrick Mooney default: 2415*6960cd89SAndy Fiddaman DPRINTF("unknown nvme bar-0 offset 0x%lx", offset); 24164c87aefeSPatrick Mooney } 24174c87aefeSPatrick Mooney 24184c87aefeSPatrick Mooney } 24194c87aefeSPatrick Mooney 24204c87aefeSPatrick Mooney static void 24214c87aefeSPatrick Mooney pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, 24224c87aefeSPatrick Mooney uint64_t offset, int size, uint64_t value) 24234c87aefeSPatrick Mooney { 24244c87aefeSPatrick Mooney uint32_t ccreg; 24254c87aefeSPatrick Mooney 24264c87aefeSPatrick Mooney if (offset >= NVME_DOORBELL_OFFSET) { 24274c87aefeSPatrick Mooney uint64_t belloffset = offset - NVME_DOORBELL_OFFSET; 24284c87aefeSPatrick Mooney uint64_t idx = belloffset / 8; /* door bell size = 2*int */ 24294c87aefeSPatrick Mooney int is_sq = (belloffset % 8) < 4; 24304c87aefeSPatrick Mooney 24314c87aefeSPatrick Mooney if (belloffset > ((sc->max_queues+1) * 8 - 4)) { 2432*6960cd89SAndy Fiddaman WPRINTF("guest attempted an overflow write offset " 24334c87aefeSPatrick Mooney "0x%lx, val 0x%lx in %s", 2434*6960cd89SAndy Fiddaman offset, value, __func__); 24354c87aefeSPatrick Mooney return; 24364c87aefeSPatrick Mooney } 24374c87aefeSPatrick Mooney 24384c87aefeSPatrick Mooney pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value); 24394c87aefeSPatrick Mooney return; 24404c87aefeSPatrick Mooney } 24414c87aefeSPatrick Mooney 2442*6960cd89SAndy Fiddaman DPRINTF("nvme-write offset 0x%lx, size %d, value 0x%lx", 2443*6960cd89SAndy Fiddaman offset, size, value); 24444c87aefeSPatrick Mooney 24454c87aefeSPatrick Mooney if (size != 4) { 2446*6960cd89SAndy Fiddaman WPRINTF("guest wrote invalid size %d (offset 0x%lx, " 24474c87aefeSPatrick Mooney "val 0x%lx) to bar0 in %s", 2448*6960cd89SAndy Fiddaman size, offset, value, __func__); 24494c87aefeSPatrick Mooney /* TODO: shutdown device */ 24504c87aefeSPatrick Mooney return; 24514c87aefeSPatrick Mooney } 24524c87aefeSPatrick Mooney 24534c87aefeSPatrick Mooney pci_nvme_bar0_reg_dumps(__func__, offset, 1); 24544c87aefeSPatrick Mooney 24554c87aefeSPatrick Mooney pthread_mutex_lock(&sc->mtx); 24564c87aefeSPatrick Mooney 24574c87aefeSPatrick Mooney switch (offset) { 24584c87aefeSPatrick Mooney case NVME_CR_CAP_LOW: 24594c87aefeSPatrick Mooney case NVME_CR_CAP_HI: 24604c87aefeSPatrick Mooney /* readonly */ 24614c87aefeSPatrick Mooney break; 24624c87aefeSPatrick Mooney case NVME_CR_VS: 24634c87aefeSPatrick Mooney /* readonly */ 24644c87aefeSPatrick Mooney break; 24654c87aefeSPatrick Mooney case NVME_CR_INTMS: 24664c87aefeSPatrick Mooney /* MSI-X, so ignore */ 24674c87aefeSPatrick Mooney break; 24684c87aefeSPatrick Mooney case NVME_CR_INTMC: 24694c87aefeSPatrick Mooney /* MSI-X, so ignore */ 24704c87aefeSPatrick Mooney break; 24714c87aefeSPatrick Mooney case NVME_CR_CC: 24724c87aefeSPatrick Mooney ccreg = (uint32_t)value; 24734c87aefeSPatrick Mooney 2474*6960cd89SAndy Fiddaman DPRINTF("%s NVME_CR_CC en %x css %x shn %x iosqes %u " 2475154972afSPatrick Mooney "iocqes %u", 24764c87aefeSPatrick Mooney __func__, 24774c87aefeSPatrick Mooney NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg), 24784c87aefeSPatrick Mooney NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg), 2479*6960cd89SAndy Fiddaman NVME_CC_GET_IOCQES(ccreg)); 24804c87aefeSPatrick Mooney 24814c87aefeSPatrick Mooney if (NVME_CC_GET_SHN(ccreg)) { 24824c87aefeSPatrick Mooney /* perform shutdown - flush out data to backend */ 24834c87aefeSPatrick Mooney sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK << 24844c87aefeSPatrick Mooney NVME_CSTS_REG_SHST_SHIFT); 24854c87aefeSPatrick Mooney sc->regs.csts |= NVME_SHST_COMPLETE << 24864c87aefeSPatrick Mooney NVME_CSTS_REG_SHST_SHIFT; 24874c87aefeSPatrick Mooney } 24884c87aefeSPatrick Mooney if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) { 24894c87aefeSPatrick Mooney if (NVME_CC_GET_EN(ccreg) == 0) 24904c87aefeSPatrick Mooney /* transition 1-> causes controller reset */ 24914c87aefeSPatrick Mooney pci_nvme_reset_locked(sc); 24924c87aefeSPatrick Mooney else 24934c87aefeSPatrick Mooney pci_nvme_init_controller(ctx, sc); 24944c87aefeSPatrick Mooney } 24954c87aefeSPatrick Mooney 24964c87aefeSPatrick Mooney /* Insert the iocqes, iosqes and en bits from the write */ 24974c87aefeSPatrick Mooney sc->regs.cc &= ~NVME_CC_WRITE_MASK; 24984c87aefeSPatrick Mooney sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK; 24994c87aefeSPatrick Mooney if (NVME_CC_GET_EN(ccreg) == 0) { 25004c87aefeSPatrick Mooney /* Insert the ams, mps and css bit fields */ 25014c87aefeSPatrick Mooney sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK; 25024c87aefeSPatrick Mooney sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK; 25034c87aefeSPatrick Mooney sc->regs.csts &= ~NVME_CSTS_RDY; 25044c87aefeSPatrick Mooney } else if (sc->pending_ios == 0) { 25054c87aefeSPatrick Mooney sc->regs.csts |= NVME_CSTS_RDY; 25064c87aefeSPatrick Mooney } 25074c87aefeSPatrick Mooney break; 25084c87aefeSPatrick Mooney case NVME_CR_CSTS: 25094c87aefeSPatrick Mooney break; 25104c87aefeSPatrick Mooney case NVME_CR_NSSR: 25114c87aefeSPatrick Mooney /* ignore writes; don't support subsystem reset */ 25124c87aefeSPatrick Mooney break; 25134c87aefeSPatrick Mooney case NVME_CR_AQA: 25144c87aefeSPatrick Mooney sc->regs.aqa = (uint32_t)value; 25154c87aefeSPatrick Mooney break; 25164c87aefeSPatrick Mooney case NVME_CR_ASQ_LOW: 25174c87aefeSPatrick Mooney sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) | 25184c87aefeSPatrick Mooney (0xFFFFF000 & value); 25194c87aefeSPatrick Mooney break; 25204c87aefeSPatrick Mooney case NVME_CR_ASQ_HI: 25214c87aefeSPatrick Mooney sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) | 25224c87aefeSPatrick Mooney (value << 32); 25234c87aefeSPatrick Mooney break; 25244c87aefeSPatrick Mooney case NVME_CR_ACQ_LOW: 25254c87aefeSPatrick Mooney sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) | 25264c87aefeSPatrick Mooney (0xFFFFF000 & value); 25274c87aefeSPatrick Mooney break; 25284c87aefeSPatrick Mooney case NVME_CR_ACQ_HI: 25294c87aefeSPatrick Mooney sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) | 25304c87aefeSPatrick Mooney (value << 32); 25314c87aefeSPatrick Mooney break; 25324c87aefeSPatrick Mooney default: 2533*6960cd89SAndy Fiddaman DPRINTF("%s unknown offset 0x%lx, value 0x%lx size %d", 2534*6960cd89SAndy Fiddaman __func__, offset, value, size); 25354c87aefeSPatrick Mooney } 25364c87aefeSPatrick Mooney pthread_mutex_unlock(&sc->mtx); 25374c87aefeSPatrick Mooney } 25384c87aefeSPatrick Mooney 25394c87aefeSPatrick Mooney static void 25404c87aefeSPatrick Mooney pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 25414c87aefeSPatrick Mooney int baridx, uint64_t offset, int size, uint64_t value) 25424c87aefeSPatrick Mooney { 25434c87aefeSPatrick Mooney struct pci_nvme_softc* sc = pi->pi_arg; 25444c87aefeSPatrick Mooney 25454c87aefeSPatrick Mooney if (baridx == pci_msix_table_bar(pi) || 25464c87aefeSPatrick Mooney baridx == pci_msix_pba_bar(pi)) { 2547*6960cd89SAndy Fiddaman DPRINTF("nvme-write baridx %d, msix: off 0x%lx, size %d, " 2548*6960cd89SAndy Fiddaman " value 0x%lx", baridx, offset, size, value); 25494c87aefeSPatrick Mooney 25504c87aefeSPatrick Mooney pci_emul_msix_twrite(pi, offset, size, value); 25514c87aefeSPatrick Mooney return; 25524c87aefeSPatrick Mooney } 25534c87aefeSPatrick Mooney 25544c87aefeSPatrick Mooney switch (baridx) { 25554c87aefeSPatrick Mooney case 0: 25564c87aefeSPatrick Mooney pci_nvme_write_bar_0(ctx, sc, offset, size, value); 25574c87aefeSPatrick Mooney break; 25584c87aefeSPatrick Mooney 25594c87aefeSPatrick Mooney default: 2560*6960cd89SAndy Fiddaman DPRINTF("%s unknown baridx %d, val 0x%lx", 2561*6960cd89SAndy Fiddaman __func__, baridx, value); 25624c87aefeSPatrick Mooney } 25634c87aefeSPatrick Mooney } 25644c87aefeSPatrick Mooney 25654c87aefeSPatrick Mooney static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, 25664c87aefeSPatrick Mooney uint64_t offset, int size) 25674c87aefeSPatrick Mooney { 25684c87aefeSPatrick Mooney uint64_t value; 25694c87aefeSPatrick Mooney 25704c87aefeSPatrick Mooney pci_nvme_bar0_reg_dumps(__func__, offset, 0); 25714c87aefeSPatrick Mooney 25724c87aefeSPatrick Mooney if (offset < NVME_DOORBELL_OFFSET) { 25734c87aefeSPatrick Mooney void *p = &(sc->regs); 25744c87aefeSPatrick Mooney pthread_mutex_lock(&sc->mtx); 25754c87aefeSPatrick Mooney memcpy(&value, (void *)((uintptr_t)p + offset), size); 25764c87aefeSPatrick Mooney pthread_mutex_unlock(&sc->mtx); 25774c87aefeSPatrick Mooney } else { 25784c87aefeSPatrick Mooney value = 0; 2579*6960cd89SAndy Fiddaman WPRINTF("pci_nvme: read invalid offset %ld", offset); 25804c87aefeSPatrick Mooney } 25814c87aefeSPatrick Mooney 25824c87aefeSPatrick Mooney switch (size) { 25834c87aefeSPatrick Mooney case 1: 25844c87aefeSPatrick Mooney value &= 0xFF; 25854c87aefeSPatrick Mooney break; 25864c87aefeSPatrick Mooney case 2: 25874c87aefeSPatrick Mooney value &= 0xFFFF; 25884c87aefeSPatrick Mooney break; 25894c87aefeSPatrick Mooney case 4: 25904c87aefeSPatrick Mooney value &= 0xFFFFFFFF; 25914c87aefeSPatrick Mooney break; 25924c87aefeSPatrick Mooney } 25934c87aefeSPatrick Mooney 2594*6960cd89SAndy Fiddaman DPRINTF(" nvme-read offset 0x%lx, size %d -> value 0x%x", 2595*6960cd89SAndy Fiddaman offset, size, (uint32_t)value); 25964c87aefeSPatrick Mooney 25974c87aefeSPatrick Mooney return (value); 25984c87aefeSPatrick Mooney } 25994c87aefeSPatrick Mooney 26004c87aefeSPatrick Mooney 26014c87aefeSPatrick Mooney 26024c87aefeSPatrick Mooney static uint64_t 26034c87aefeSPatrick Mooney pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 26044c87aefeSPatrick Mooney uint64_t offset, int size) 26054c87aefeSPatrick Mooney { 26064c87aefeSPatrick Mooney struct pci_nvme_softc* sc = pi->pi_arg; 26074c87aefeSPatrick Mooney 26084c87aefeSPatrick Mooney if (baridx == pci_msix_table_bar(pi) || 26094c87aefeSPatrick Mooney baridx == pci_msix_pba_bar(pi)) { 2610*6960cd89SAndy Fiddaman DPRINTF("nvme-read bar: %d, msix: regoff 0x%lx, size %d", 2611*6960cd89SAndy Fiddaman baridx, offset, size); 26124c87aefeSPatrick Mooney 26134c87aefeSPatrick Mooney return pci_emul_msix_tread(pi, offset, size); 26144c87aefeSPatrick Mooney } 26154c87aefeSPatrick Mooney 26164c87aefeSPatrick Mooney switch (baridx) { 26174c87aefeSPatrick Mooney case 0: 26184c87aefeSPatrick Mooney return pci_nvme_read_bar_0(sc, offset, size); 26194c87aefeSPatrick Mooney 26204c87aefeSPatrick Mooney default: 2621*6960cd89SAndy Fiddaman DPRINTF("unknown bar %d, 0x%lx", baridx, offset); 26224c87aefeSPatrick Mooney } 26234c87aefeSPatrick Mooney 26244c87aefeSPatrick Mooney return (0); 26254c87aefeSPatrick Mooney } 26264c87aefeSPatrick Mooney 26274c87aefeSPatrick Mooney 26284c87aefeSPatrick Mooney static int 26294c87aefeSPatrick Mooney pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) 26304c87aefeSPatrick Mooney { 26314c87aefeSPatrick Mooney char bident[sizeof("XX:X:X")]; 26324c87aefeSPatrick Mooney char *uopt, *xopts, *config; 26334c87aefeSPatrick Mooney uint32_t sectsz; 26344c87aefeSPatrick Mooney int optidx; 26354c87aefeSPatrick Mooney 26364c87aefeSPatrick Mooney sc->max_queues = NVME_QUEUES; 26374c87aefeSPatrick Mooney sc->max_qentries = NVME_MAX_QENTRIES; 26384c87aefeSPatrick Mooney sc->ioslots = NVME_IOSLOTS; 26394c87aefeSPatrick Mooney sc->num_squeues = sc->max_queues; 26404c87aefeSPatrick Mooney sc->num_cqueues = sc->max_queues; 2641154972afSPatrick Mooney sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO; 26424c87aefeSPatrick Mooney sectsz = 0; 26434c87aefeSPatrick Mooney 26444c87aefeSPatrick Mooney uopt = strdup(opts); 26454c87aefeSPatrick Mooney optidx = 0; 26464c87aefeSPatrick Mooney snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn), 26474c87aefeSPatrick Mooney "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); 26484c87aefeSPatrick Mooney for (xopts = strtok(uopt, ","); 26494c87aefeSPatrick Mooney xopts != NULL; 26504c87aefeSPatrick Mooney xopts = strtok(NULL, ",")) { 26514c87aefeSPatrick Mooney 26524c87aefeSPatrick Mooney if ((config = strchr(xopts, '=')) != NULL) 26534c87aefeSPatrick Mooney *config++ = '\0'; 26544c87aefeSPatrick Mooney 26554c87aefeSPatrick Mooney if (!strcmp("maxq", xopts)) { 26564c87aefeSPatrick Mooney sc->max_queues = atoi(config); 26574c87aefeSPatrick Mooney } else if (!strcmp("qsz", xopts)) { 26584c87aefeSPatrick Mooney sc->max_qentries = atoi(config); 26594c87aefeSPatrick Mooney } else if (!strcmp("ioslots", xopts)) { 26604c87aefeSPatrick Mooney sc->ioslots = atoi(config); 26614c87aefeSPatrick Mooney } else if (!strcmp("sectsz", xopts)) { 26624c87aefeSPatrick Mooney sectsz = atoi(config); 26634c87aefeSPatrick Mooney } else if (!strcmp("ser", xopts)) { 26644c87aefeSPatrick Mooney /* 26654c87aefeSPatrick Mooney * This field indicates the Product Serial Number in 26664c87aefeSPatrick Mooney * 7-bit ASCII, unused bytes should be space characters. 26674c87aefeSPatrick Mooney * Ref: NVMe v1.3c. 26684c87aefeSPatrick Mooney */ 26694c87aefeSPatrick Mooney cpywithpad((char *)sc->ctrldata.sn, 26704c87aefeSPatrick Mooney sizeof(sc->ctrldata.sn), config, ' '); 26714c87aefeSPatrick Mooney } else if (!strcmp("ram", xopts)) { 26724c87aefeSPatrick Mooney uint64_t sz = strtoull(&xopts[4], NULL, 10); 26734c87aefeSPatrick Mooney 26744c87aefeSPatrick Mooney sc->nvstore.type = NVME_STOR_RAM; 26754c87aefeSPatrick Mooney sc->nvstore.size = sz * 1024 * 1024; 26764c87aefeSPatrick Mooney sc->nvstore.ctx = calloc(1, sc->nvstore.size); 26774c87aefeSPatrick Mooney sc->nvstore.sectsz = 4096; 26784c87aefeSPatrick Mooney sc->nvstore.sectsz_bits = 12; 26794c87aefeSPatrick Mooney if (sc->nvstore.ctx == NULL) { 26804c87aefeSPatrick Mooney perror("Unable to allocate RAM"); 26814c87aefeSPatrick Mooney free(uopt); 26824c87aefeSPatrick Mooney return (-1); 26834c87aefeSPatrick Mooney } 268484659b24SMichael Zeller } else if (!strcmp("eui64", xopts)) { 268584659b24SMichael Zeller sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0)); 2686154972afSPatrick Mooney } else if (!strcmp("dsm", xopts)) { 2687154972afSPatrick Mooney if (!strcmp("auto", config)) 2688154972afSPatrick Mooney sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO; 2689154972afSPatrick Mooney else if (!strcmp("enable", config)) 2690154972afSPatrick Mooney sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE; 2691154972afSPatrick Mooney else if (!strcmp("disable", config)) 2692154972afSPatrick Mooney sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE; 26934c87aefeSPatrick Mooney } else if (optidx == 0) { 26944c87aefeSPatrick Mooney snprintf(bident, sizeof(bident), "%d:%d", 26954c87aefeSPatrick Mooney sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); 26964c87aefeSPatrick Mooney sc->nvstore.ctx = blockif_open(xopts, bident); 26974c87aefeSPatrick Mooney if (sc->nvstore.ctx == NULL) { 26984c87aefeSPatrick Mooney perror("Could not open backing file"); 26994c87aefeSPatrick Mooney free(uopt); 27004c87aefeSPatrick Mooney return (-1); 27014c87aefeSPatrick Mooney } 27024c87aefeSPatrick Mooney sc->nvstore.type = NVME_STOR_BLOCKIF; 27034c87aefeSPatrick Mooney sc->nvstore.size = blockif_size(sc->nvstore.ctx); 27044c87aefeSPatrick Mooney } else { 2705154972afSPatrick Mooney EPRINTLN("Invalid option %s", xopts); 27064c87aefeSPatrick Mooney free(uopt); 27074c87aefeSPatrick Mooney return (-1); 27084c87aefeSPatrick Mooney } 27094c87aefeSPatrick Mooney 27104c87aefeSPatrick Mooney optidx++; 27114c87aefeSPatrick Mooney } 27124c87aefeSPatrick Mooney free(uopt); 27134c87aefeSPatrick Mooney 27144c87aefeSPatrick Mooney if (sc->nvstore.ctx == NULL || sc->nvstore.size == 0) { 2715154972afSPatrick Mooney EPRINTLN("backing store not specified"); 27164c87aefeSPatrick Mooney return (-1); 27174c87aefeSPatrick Mooney } 27184c87aefeSPatrick Mooney if (sectsz == 512 || sectsz == 4096 || sectsz == 8192) 27194c87aefeSPatrick Mooney sc->nvstore.sectsz = sectsz; 27204c87aefeSPatrick Mooney else if (sc->nvstore.type != NVME_STOR_RAM) 27214c87aefeSPatrick Mooney sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx); 27224c87aefeSPatrick Mooney for (sc->nvstore.sectsz_bits = 9; 27234c87aefeSPatrick Mooney (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz; 27244c87aefeSPatrick Mooney sc->nvstore.sectsz_bits++); 27254c87aefeSPatrick Mooney 27264c87aefeSPatrick Mooney if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES) 27274c87aefeSPatrick Mooney sc->max_queues = NVME_QUEUES; 27284c87aefeSPatrick Mooney 27294c87aefeSPatrick Mooney if (sc->max_qentries <= 0) { 2730154972afSPatrick Mooney EPRINTLN("Invalid qsz option"); 27314c87aefeSPatrick Mooney return (-1); 27324c87aefeSPatrick Mooney } 27334c87aefeSPatrick Mooney if (sc->ioslots <= 0) { 2734154972afSPatrick Mooney EPRINTLN("Invalid ioslots option"); 27354c87aefeSPatrick Mooney return (-1); 27364c87aefeSPatrick Mooney } 27374c87aefeSPatrick Mooney 27384c87aefeSPatrick Mooney return (0); 27394c87aefeSPatrick Mooney } 27404c87aefeSPatrick Mooney 27414c87aefeSPatrick Mooney static int 27424c87aefeSPatrick Mooney pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 27434c87aefeSPatrick Mooney { 27444c87aefeSPatrick Mooney struct pci_nvme_softc *sc; 27454c87aefeSPatrick Mooney uint32_t pci_membar_sz; 27464c87aefeSPatrick Mooney int error; 27474c87aefeSPatrick Mooney 27484c87aefeSPatrick Mooney error = 0; 27494c87aefeSPatrick Mooney 27504c87aefeSPatrick Mooney sc = calloc(1, sizeof(struct pci_nvme_softc)); 27514c87aefeSPatrick Mooney pi->pi_arg = sc; 27524c87aefeSPatrick Mooney sc->nsc_pi = pi; 27534c87aefeSPatrick Mooney 27544c87aefeSPatrick Mooney error = pci_nvme_parse_opts(sc, opts); 27554c87aefeSPatrick Mooney if (error < 0) 27564c87aefeSPatrick Mooney goto done; 27574c87aefeSPatrick Mooney else 27584c87aefeSPatrick Mooney error = 0; 27594c87aefeSPatrick Mooney 2760154972afSPatrick Mooney STAILQ_INIT(&sc->ioreqs_free); 27614c87aefeSPatrick Mooney sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq)); 27624c87aefeSPatrick Mooney for (int i = 0; i < sc->ioslots; i++) { 2763154972afSPatrick Mooney STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link); 27644c87aefeSPatrick Mooney } 27654c87aefeSPatrick Mooney 27664c87aefeSPatrick Mooney pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A); 27674c87aefeSPatrick Mooney pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); 27684c87aefeSPatrick Mooney pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 27694c87aefeSPatrick Mooney pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM); 27704c87aefeSPatrick Mooney pci_set_cfgdata8(pi, PCIR_PROGIF, 27714c87aefeSPatrick Mooney PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0); 27724c87aefeSPatrick Mooney 27734c87aefeSPatrick Mooney /* 27744c87aefeSPatrick Mooney * Allocate size of NVMe registers + doorbell space for all queues. 27754c87aefeSPatrick Mooney * 27764c87aefeSPatrick Mooney * The specification requires a minimum memory I/O window size of 16K. 27774c87aefeSPatrick Mooney * The Windows driver will refuse to start a device with a smaller 27784c87aefeSPatrick Mooney * window. 27794c87aefeSPatrick Mooney */ 27804c87aefeSPatrick Mooney pci_membar_sz = sizeof(struct nvme_registers) + 27814c87aefeSPatrick Mooney 2 * sizeof(uint32_t) * (sc->max_queues + 1); 27824c87aefeSPatrick Mooney pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN); 27834c87aefeSPatrick Mooney 2784*6960cd89SAndy Fiddaman DPRINTF("nvme membar size: %u", pci_membar_sz); 27854c87aefeSPatrick Mooney 27864c87aefeSPatrick Mooney error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz); 27874c87aefeSPatrick Mooney if (error) { 2788*6960cd89SAndy Fiddaman WPRINTF("%s pci alloc mem bar failed", __func__); 27894c87aefeSPatrick Mooney goto done; 27904c87aefeSPatrick Mooney } 27914c87aefeSPatrick Mooney 27924c87aefeSPatrick Mooney error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); 27934c87aefeSPatrick Mooney if (error) { 2794*6960cd89SAndy Fiddaman WPRINTF("%s pci add msixcap failed", __func__); 27954c87aefeSPatrick Mooney goto done; 27964c87aefeSPatrick Mooney } 27974c87aefeSPatrick Mooney 279884659b24SMichael Zeller error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP); 279984659b24SMichael Zeller if (error) { 2800*6960cd89SAndy Fiddaman WPRINTF("%s pci add Express capability failed", __func__); 280184659b24SMichael Zeller goto done; 280284659b24SMichael Zeller } 280384659b24SMichael Zeller 28044c87aefeSPatrick Mooney pthread_mutex_init(&sc->mtx, NULL); 28054c87aefeSPatrick Mooney sem_init(&sc->iosemlock, 0, sc->ioslots); 28064c87aefeSPatrick Mooney 2807*6960cd89SAndy Fiddaman pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues); 2808154972afSPatrick Mooney /* 2809154972afSPatrick Mooney * Controller data depends on Namespace data so initialize Namespace 2810154972afSPatrick Mooney * data first. 2811154972afSPatrick Mooney */ 2812154972afSPatrick Mooney pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore); 28134c87aefeSPatrick Mooney pci_nvme_init_ctrldata(sc); 28144c87aefeSPatrick Mooney pci_nvme_init_logpages(sc); 2815*6960cd89SAndy Fiddaman pci_nvme_init_features(sc); 2816*6960cd89SAndy Fiddaman 2817*6960cd89SAndy Fiddaman pci_nvme_aer_init(sc); 2818*6960cd89SAndy Fiddaman 2819*6960cd89SAndy Fiddaman pci_nvme_reset(sc); 28204c87aefeSPatrick Mooney 28214c87aefeSPatrick Mooney pci_lintr_request(pi); 28224c87aefeSPatrick Mooney 28234c87aefeSPatrick Mooney done: 28244c87aefeSPatrick Mooney return (error); 28254c87aefeSPatrick Mooney } 28264c87aefeSPatrick Mooney 28274c87aefeSPatrick Mooney 28284c87aefeSPatrick Mooney struct pci_devemu pci_de_nvme = { 28294c87aefeSPatrick Mooney .pe_emu = "nvme", 28304c87aefeSPatrick Mooney .pe_init = pci_nvme_init, 28314c87aefeSPatrick Mooney .pe_barwrite = pci_nvme_write, 28324c87aefeSPatrick Mooney .pe_barread = pci_nvme_read 28334c87aefeSPatrick Mooney }; 28344c87aefeSPatrick Mooney PCI_EMUL_SET(pci_de_nvme); 2835