xref: /illumos-gate/usr/src/cmd/bhyve/pci_nvme.c (revision d7b72f7b52f902da47cc7210a9121f4caabbcb9c)
14c87aefeSPatrick Mooney /*-
24c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
34c87aefeSPatrick Mooney  *
44c87aefeSPatrick Mooney  * Copyright (c) 2017 Shunsuke Mie
54c87aefeSPatrick Mooney  * Copyright (c) 2018 Leon Dang
66960cd89SAndy Fiddaman  * Copyright (c) 2020 Chuck Tuffli
74c87aefeSPatrick Mooney  *
884659b24SMichael Zeller  * Function crc16 Copyright (c) 2017, Fedor Uporov
984659b24SMichael Zeller  *     Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c
1084659b24SMichael Zeller  *
114c87aefeSPatrick Mooney  * Redistribution and use in source and binary forms, with or without
124c87aefeSPatrick Mooney  * modification, are permitted provided that the following conditions
134c87aefeSPatrick Mooney  * are met:
144c87aefeSPatrick Mooney  * 1. Redistributions of source code must retain the above copyright
154c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer.
164c87aefeSPatrick Mooney  * 2. Redistributions in binary form must reproduce the above copyright
174c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer in the
184c87aefeSPatrick Mooney  *    documentation and/or other materials provided with the distribution.
194c87aefeSPatrick Mooney  *
204c87aefeSPatrick Mooney  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
214c87aefeSPatrick Mooney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
224c87aefeSPatrick Mooney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
234c87aefeSPatrick Mooney  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
244c87aefeSPatrick Mooney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
254c87aefeSPatrick Mooney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
264c87aefeSPatrick Mooney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
274c87aefeSPatrick Mooney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
284c87aefeSPatrick Mooney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
294c87aefeSPatrick Mooney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
304c87aefeSPatrick Mooney  * SUCH DAMAGE.
314c87aefeSPatrick Mooney  */
324c87aefeSPatrick Mooney 
334c87aefeSPatrick Mooney /*
344c87aefeSPatrick Mooney  * bhyve PCIe-NVMe device emulation.
354c87aefeSPatrick Mooney  *
364c87aefeSPatrick Mooney  * options:
376960cd89SAndy Fiddaman  *  -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#,dsm=<opt>
384c87aefeSPatrick Mooney  *
394c87aefeSPatrick Mooney  *  accepted devpath:
404c87aefeSPatrick Mooney  *    /dev/blockdev
414c87aefeSPatrick Mooney  *    /path/to/image
424c87aefeSPatrick Mooney  *    ram=size_in_MiB
434c87aefeSPatrick Mooney  *
444c87aefeSPatrick Mooney  *  maxq    = max number of queues
454c87aefeSPatrick Mooney  *  qsz     = max elements in each queue
464c87aefeSPatrick Mooney  *  ioslots = max number of concurrent io requests
474c87aefeSPatrick Mooney  *  sectsz  = sector size (defaults to blockif sector size)
484c87aefeSPatrick Mooney  *  ser     = serial number (20-chars max)
4984659b24SMichael Zeller  *  eui64   = IEEE Extended Unique Identifier (8 byte value)
506960cd89SAndy Fiddaman  *  dsm     = DataSet Management support. Option is one of auto, enable,disable
514c87aefeSPatrick Mooney  *
524c87aefeSPatrick Mooney  */
534c87aefeSPatrick Mooney 
544c87aefeSPatrick Mooney /* TODO:
554c87aefeSPatrick Mooney     - create async event for smart and log
564c87aefeSPatrick Mooney     - intr coalesce
574c87aefeSPatrick Mooney  */
584c87aefeSPatrick Mooney 
594c87aefeSPatrick Mooney #include <sys/cdefs.h>
604c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
614c87aefeSPatrick Mooney 
626960cd89SAndy Fiddaman #include <sys/errno.h>
634c87aefeSPatrick Mooney #include <sys/types.h>
6484659b24SMichael Zeller #include <net/ieee_oui.h>
6584659b24SMichael Zeller #ifndef __FreeBSD__
6684659b24SMichael Zeller #include <endian.h>
6784659b24SMichael Zeller #endif
684c87aefeSPatrick Mooney 
694c87aefeSPatrick Mooney #include <assert.h>
704c87aefeSPatrick Mooney #include <pthread.h>
716dc98349SAndy Fiddaman #include <pthread_np.h>
724c87aefeSPatrick Mooney #include <semaphore.h>
734c87aefeSPatrick Mooney #include <stdbool.h>
744c87aefeSPatrick Mooney #include <stddef.h>
754c87aefeSPatrick Mooney #include <stdint.h>
764c87aefeSPatrick Mooney #include <stdio.h>
774c87aefeSPatrick Mooney #include <stdlib.h>
784c87aefeSPatrick Mooney #include <string.h>
794c87aefeSPatrick Mooney 
804c87aefeSPatrick Mooney #include <machine/atomic.h>
814c87aefeSPatrick Mooney #include <machine/vmm.h>
824c87aefeSPatrick Mooney #include <vmmapi.h>
834c87aefeSPatrick Mooney 
844c87aefeSPatrick Mooney #include <dev/nvme/nvme.h>
854c87aefeSPatrick Mooney 
864c87aefeSPatrick Mooney #include "bhyverun.h"
874c87aefeSPatrick Mooney #include "block_if.h"
882b948146SAndy Fiddaman #include "config.h"
89154972afSPatrick Mooney #include "debug.h"
904c87aefeSPatrick Mooney #include "pci_emul.h"
914c87aefeSPatrick Mooney 
924c87aefeSPatrick Mooney 
934c87aefeSPatrick Mooney static int nvme_debug = 0;
946960cd89SAndy Fiddaman #define	DPRINTF(fmt, args...) if (nvme_debug) PRINTLN(fmt, ##args)
956960cd89SAndy Fiddaman #define	WPRINTF(fmt, args...) PRINTLN(fmt, ##args)
964c87aefeSPatrick Mooney 
974c87aefeSPatrick Mooney /* defaults; can be overridden */
984c87aefeSPatrick Mooney #define	NVME_MSIX_BAR		4
994c87aefeSPatrick Mooney 
1004c87aefeSPatrick Mooney #define	NVME_IOSLOTS		8
1014c87aefeSPatrick Mooney 
1024c87aefeSPatrick Mooney /* The NVMe spec defines bits 13:4 in BAR0 as reserved */
1034c87aefeSPatrick Mooney #define NVME_MMIO_SPACE_MIN	(1 << 14)
1044c87aefeSPatrick Mooney 
1054c87aefeSPatrick Mooney #define	NVME_QUEUES		16
1064c87aefeSPatrick Mooney #define	NVME_MAX_QENTRIES	2048
1076960cd89SAndy Fiddaman /* Memory Page size Minimum reported in CAP register */
1086960cd89SAndy Fiddaman #define	NVME_MPSMIN		0
1096960cd89SAndy Fiddaman /* MPSMIN converted to bytes */
1106960cd89SAndy Fiddaman #define	NVME_MPSMIN_BYTES	(1 << (12 + NVME_MPSMIN))
1114c87aefeSPatrick Mooney 
1124c87aefeSPatrick Mooney #define	NVME_PRP2_ITEMS		(PAGE_SIZE/sizeof(uint64_t))
1136960cd89SAndy Fiddaman #define	NVME_MDTS		9
1146960cd89SAndy Fiddaman /* Note the + 1 allows for the initial descriptor to not be page aligned */
1156960cd89SAndy Fiddaman #define	NVME_MAX_IOVEC		((1 << NVME_MDTS) + 1)
1166960cd89SAndy Fiddaman #define	NVME_MAX_DATA_SIZE	((1 << NVME_MDTS) * NVME_MPSMIN_BYTES)
1174c87aefeSPatrick Mooney 
11884659b24SMichael Zeller /* This is a synthetic status code to indicate there is no status */
11984659b24SMichael Zeller #define NVME_NO_STATUS		0xffff
12084659b24SMichael Zeller #define NVME_COMPLETION_VALID(c)	((c).status != NVME_NO_STATUS)
12184659b24SMichael Zeller 
122*d7b72f7bSAndy Fiddaman /* Reported temperature in Kelvin (i.e. room temperature) */
123*d7b72f7bSAndy Fiddaman #define NVME_TEMPERATURE 296
124*d7b72f7bSAndy Fiddaman 
1254c87aefeSPatrick Mooney /* helpers */
1264c87aefeSPatrick Mooney 
1274c87aefeSPatrick Mooney /* Convert a zero-based value into a one-based value */
1284c87aefeSPatrick Mooney #define ONE_BASED(zero)		((zero) + 1)
1294c87aefeSPatrick Mooney /* Convert a one-based value into a zero-based value */
1304c87aefeSPatrick Mooney #define ZERO_BASED(one)		((one)  - 1)
1314c87aefeSPatrick Mooney 
1324c87aefeSPatrick Mooney /* Encode number of SQ's and CQ's for Set/Get Features */
1334c87aefeSPatrick Mooney #define NVME_FEATURE_NUM_QUEUES(sc) \
1344c87aefeSPatrick Mooney 	(ZERO_BASED((sc)->num_squeues) & 0xffff) | \
1354c87aefeSPatrick Mooney 	(ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16;
1364c87aefeSPatrick Mooney 
1374c87aefeSPatrick Mooney #define	NVME_DOORBELL_OFFSET	offsetof(struct nvme_registers, doorbell)
1384c87aefeSPatrick Mooney 
1394c87aefeSPatrick Mooney enum nvme_controller_register_offsets {
1404c87aefeSPatrick Mooney 	NVME_CR_CAP_LOW = 0x00,
1414c87aefeSPatrick Mooney 	NVME_CR_CAP_HI  = 0x04,
1424c87aefeSPatrick Mooney 	NVME_CR_VS      = 0x08,
1434c87aefeSPatrick Mooney 	NVME_CR_INTMS   = 0x0c,
1444c87aefeSPatrick Mooney 	NVME_CR_INTMC   = 0x10,
1454c87aefeSPatrick Mooney 	NVME_CR_CC      = 0x14,
1464c87aefeSPatrick Mooney 	NVME_CR_CSTS    = 0x1c,
1474c87aefeSPatrick Mooney 	NVME_CR_NSSR    = 0x20,
1484c87aefeSPatrick Mooney 	NVME_CR_AQA     = 0x24,
1494c87aefeSPatrick Mooney 	NVME_CR_ASQ_LOW = 0x28,
1504c87aefeSPatrick Mooney 	NVME_CR_ASQ_HI  = 0x2c,
1514c87aefeSPatrick Mooney 	NVME_CR_ACQ_LOW = 0x30,
1524c87aefeSPatrick Mooney 	NVME_CR_ACQ_HI  = 0x34,
1534c87aefeSPatrick Mooney };
1544c87aefeSPatrick Mooney 
1554c87aefeSPatrick Mooney enum nvme_cmd_cdw11 {
1564c87aefeSPatrick Mooney 	NVME_CMD_CDW11_PC  = 0x0001,
1574c87aefeSPatrick Mooney 	NVME_CMD_CDW11_IEN = 0x0002,
1584c87aefeSPatrick Mooney 	NVME_CMD_CDW11_IV  = 0xFFFF0000,
1594c87aefeSPatrick Mooney };
1604c87aefeSPatrick Mooney 
161154972afSPatrick Mooney enum nvme_copy_dir {
162154972afSPatrick Mooney 	NVME_COPY_TO_PRP,
163154972afSPatrick Mooney 	NVME_COPY_FROM_PRP,
164154972afSPatrick Mooney };
165154972afSPatrick Mooney 
1664c87aefeSPatrick Mooney #define	NVME_CQ_INTEN	0x01
1674c87aefeSPatrick Mooney #define	NVME_CQ_INTCOAL	0x02
1684c87aefeSPatrick Mooney 
1694c87aefeSPatrick Mooney struct nvme_completion_queue {
1704c87aefeSPatrick Mooney 	struct nvme_completion *qbase;
1716960cd89SAndy Fiddaman 	pthread_mutex_t	mtx;
1724c87aefeSPatrick Mooney 	uint32_t	size;
1734c87aefeSPatrick Mooney 	uint16_t	tail; /* nvme progress */
1744c87aefeSPatrick Mooney 	uint16_t	head; /* guest progress */
1754c87aefeSPatrick Mooney 	uint16_t	intr_vec;
1764c87aefeSPatrick Mooney 	uint32_t	intr_en;
1774c87aefeSPatrick Mooney };
1784c87aefeSPatrick Mooney 
1794c87aefeSPatrick Mooney struct nvme_submission_queue {
1804c87aefeSPatrick Mooney 	struct nvme_command *qbase;
1816960cd89SAndy Fiddaman 	pthread_mutex_t	mtx;
1824c87aefeSPatrick Mooney 	uint32_t	size;
1834c87aefeSPatrick Mooney 	uint16_t	head; /* nvme progress */
1844c87aefeSPatrick Mooney 	uint16_t	tail; /* guest progress */
1854c87aefeSPatrick Mooney 	uint16_t	cqid; /* completion queue id */
1864c87aefeSPatrick Mooney 	int		qpriority;
1874c87aefeSPatrick Mooney };
1884c87aefeSPatrick Mooney 
1894c87aefeSPatrick Mooney enum nvme_storage_type {
1904c87aefeSPatrick Mooney 	NVME_STOR_BLOCKIF = 0,
1914c87aefeSPatrick Mooney 	NVME_STOR_RAM = 1,
1924c87aefeSPatrick Mooney };
1934c87aefeSPatrick Mooney 
1944c87aefeSPatrick Mooney struct pci_nvme_blockstore {
1954c87aefeSPatrick Mooney 	enum nvme_storage_type type;
1964c87aefeSPatrick Mooney 	void		*ctx;
1974c87aefeSPatrick Mooney 	uint64_t	size;
1984c87aefeSPatrick Mooney 	uint32_t	sectsz;
1994c87aefeSPatrick Mooney 	uint32_t	sectsz_bits;
20084659b24SMichael Zeller 	uint64_t	eui64;
201154972afSPatrick Mooney 	uint32_t	deallocate:1;
2024c87aefeSPatrick Mooney };
2034c87aefeSPatrick Mooney 
2046960cd89SAndy Fiddaman /*
2056960cd89SAndy Fiddaman  * Calculate the number of additional page descriptors for guest IO requests
2066960cd89SAndy Fiddaman  * based on the advertised Max Data Transfer (MDTS) and given the number of
2076960cd89SAndy Fiddaman  * default iovec's in a struct blockif_req.
2086960cd89SAndy Fiddaman  */
2096960cd89SAndy Fiddaman #define MDTS_PAD_SIZE \
210b0de25cbSAndy Fiddaman 	( NVME_MAX_IOVEC > BLOCKIF_IOV_MAX ? \
2116960cd89SAndy Fiddaman 	  NVME_MAX_IOVEC - BLOCKIF_IOV_MAX : \
212b0de25cbSAndy Fiddaman 	  0 )
2136960cd89SAndy Fiddaman 
2144c87aefeSPatrick Mooney struct pci_nvme_ioreq {
2154c87aefeSPatrick Mooney 	struct pci_nvme_softc *sc;
216154972afSPatrick Mooney 	STAILQ_ENTRY(pci_nvme_ioreq) link;
2174c87aefeSPatrick Mooney 	struct nvme_submission_queue *nvme_sq;
2184c87aefeSPatrick Mooney 	uint16_t	sqid;
2194c87aefeSPatrick Mooney 
2204c87aefeSPatrick Mooney 	/* command information */
2214c87aefeSPatrick Mooney 	uint16_t	opc;
2224c87aefeSPatrick Mooney 	uint16_t	cid;
2234c87aefeSPatrick Mooney 	uint32_t	nsid;
2244c87aefeSPatrick Mooney 
2254c87aefeSPatrick Mooney 	uint64_t	prev_gpaddr;
2264c87aefeSPatrick Mooney 	size_t		prev_size;
2276960cd89SAndy Fiddaman 	size_t		bytes;
2284c87aefeSPatrick Mooney 
2294c87aefeSPatrick Mooney 	struct blockif_req io_req;
2304c87aefeSPatrick Mooney 
2316960cd89SAndy Fiddaman 	struct iovec	iovpadding[MDTS_PAD_SIZE];
2324c87aefeSPatrick Mooney };
2334c87aefeSPatrick Mooney 
234154972afSPatrick Mooney enum nvme_dsm_type {
235154972afSPatrick Mooney 	/* Dataset Management bit in ONCS reflects backing storage capability */
236154972afSPatrick Mooney 	NVME_DATASET_MANAGEMENT_AUTO,
237154972afSPatrick Mooney 	/* Unconditionally set Dataset Management bit in ONCS */
238154972afSPatrick Mooney 	NVME_DATASET_MANAGEMENT_ENABLE,
239154972afSPatrick Mooney 	/* Unconditionally clear Dataset Management bit in ONCS */
240154972afSPatrick Mooney 	NVME_DATASET_MANAGEMENT_DISABLE,
241154972afSPatrick Mooney };
242154972afSPatrick Mooney 
2436960cd89SAndy Fiddaman struct pci_nvme_softc;
2446960cd89SAndy Fiddaman struct nvme_feature_obj;
2456960cd89SAndy Fiddaman 
2466960cd89SAndy Fiddaman typedef void (*nvme_feature_cb)(struct pci_nvme_softc *,
2476960cd89SAndy Fiddaman     struct nvme_feature_obj *,
2486960cd89SAndy Fiddaman     struct nvme_command *,
2496960cd89SAndy Fiddaman     struct nvme_completion *);
2506960cd89SAndy Fiddaman 
2516960cd89SAndy Fiddaman struct nvme_feature_obj {
2526960cd89SAndy Fiddaman 	uint32_t	cdw11;
2536960cd89SAndy Fiddaman 	nvme_feature_cb	set;
2546960cd89SAndy Fiddaman 	nvme_feature_cb	get;
2556960cd89SAndy Fiddaman 	bool namespace_specific;
2566960cd89SAndy Fiddaman };
2576960cd89SAndy Fiddaman 
2586960cd89SAndy Fiddaman #define NVME_FID_MAX		(NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1)
2596960cd89SAndy Fiddaman 
2606dc98349SAndy Fiddaman typedef enum {
2616dc98349SAndy Fiddaman 	PCI_NVME_AE_TYPE_ERROR = 0,
2626dc98349SAndy Fiddaman 	PCI_NVME_AE_TYPE_SMART,
2636dc98349SAndy Fiddaman 	PCI_NVME_AE_TYPE_NOTICE,
2646dc98349SAndy Fiddaman 	PCI_NVME_AE_TYPE_IO_CMD = 6,
2656dc98349SAndy Fiddaman 	PCI_NVME_AE_TYPE_VENDOR = 7,
2666dc98349SAndy Fiddaman 	PCI_NVME_AE_TYPE_MAX		/* Must be last */
2676dc98349SAndy Fiddaman } pci_nvme_async_type;
2686dc98349SAndy Fiddaman 
2696dc98349SAndy Fiddaman /* Asynchronous Event Requests */
2706960cd89SAndy Fiddaman struct pci_nvme_aer {
2716960cd89SAndy Fiddaman 	STAILQ_ENTRY(pci_nvme_aer) link;
2726960cd89SAndy Fiddaman 	uint16_t	cid;	/* Command ID of the submitted AER */
2736960cd89SAndy Fiddaman };
2746960cd89SAndy Fiddaman 
275*d7b72f7bSAndy Fiddaman /** Asynchronous Event Information - Notice */
2766dc98349SAndy Fiddaman typedef enum {
277*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_NS_ATTR_CHANGED = 0,
278*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_FW_ACTIVATION,
279*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_TELEMETRY_CHANGE,
280*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_ANA_CHANGE,
281*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_PREDICT_LATENCY_CHANGE,
282*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_LBA_STATUS_ALERT,
283*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_ENDURANCE_GROUP_CHANGE,
284*d7b72f7bSAndy Fiddaman 	PCI_NVME_AEI_NOTICE_MAX,
285*d7b72f7bSAndy Fiddaman } pci_nvme_async_event_info_notice;
286*d7b72f7bSAndy Fiddaman 
287*d7b72f7bSAndy Fiddaman #define PCI_NVME_AEI_NOTICE_SHIFT		8
288*d7b72f7bSAndy Fiddaman #define PCI_NVME_AEI_NOTICE_MASK(event)	(1 << (event + PCI_NVME_AEI_NOTICE_SHIFT))
2896dc98349SAndy Fiddaman 
2906dc98349SAndy Fiddaman /* Asynchronous Event Notifications */
2916dc98349SAndy Fiddaman struct pci_nvme_aen {
2926dc98349SAndy Fiddaman 	pci_nvme_async_type atype;
2936dc98349SAndy Fiddaman 	uint32_t	event_data;
2946dc98349SAndy Fiddaman 	bool		posted;
2956dc98349SAndy Fiddaman };
2966dc98349SAndy Fiddaman 
297*d7b72f7bSAndy Fiddaman /*
298*d7b72f7bSAndy Fiddaman  * By default, enable all Asynchrnous Event Notifications:
299*d7b72f7bSAndy Fiddaman  *     SMART / Health Critical Warnings
300*d7b72f7bSAndy Fiddaman  *     Namespace Attribute Notices
301*d7b72f7bSAndy Fiddaman  */
302*d7b72f7bSAndy Fiddaman #define PCI_NVME_AEN_DEFAULT_MASK	0x11f
303*d7b72f7bSAndy Fiddaman 
304*d7b72f7bSAndy Fiddaman typedef enum {
305*d7b72f7bSAndy Fiddaman 	NVME_CNTRLTYPE_IO = 1,
306*d7b72f7bSAndy Fiddaman 	NVME_CNTRLTYPE_DISCOVERY = 2,
307*d7b72f7bSAndy Fiddaman 	NVME_CNTRLTYPE_ADMIN = 3,
308*d7b72f7bSAndy Fiddaman } pci_nvme_cntrl_type;
309*d7b72f7bSAndy Fiddaman 
3104c87aefeSPatrick Mooney struct pci_nvme_softc {
3114c87aefeSPatrick Mooney 	struct pci_devinst *nsc_pi;
3124c87aefeSPatrick Mooney 
3134c87aefeSPatrick Mooney 	pthread_mutex_t	mtx;
3144c87aefeSPatrick Mooney 
3154c87aefeSPatrick Mooney 	struct nvme_registers regs;
3164c87aefeSPatrick Mooney 
3174c87aefeSPatrick Mooney 	struct nvme_namespace_data  nsdata;
3184c87aefeSPatrick Mooney 	struct nvme_controller_data ctrldata;
3194c87aefeSPatrick Mooney 	struct nvme_error_information_entry err_log;
3204c87aefeSPatrick Mooney 	struct nvme_health_information_page health_log;
3214c87aefeSPatrick Mooney 	struct nvme_firmware_page fw_log;
3226dc98349SAndy Fiddaman 	struct nvme_ns_list ns_log;
3234c87aefeSPatrick Mooney 
3244c87aefeSPatrick Mooney 	struct pci_nvme_blockstore nvstore;
3254c87aefeSPatrick Mooney 
3264c87aefeSPatrick Mooney 	uint16_t	max_qentries;	/* max entries per queue */
3274c87aefeSPatrick Mooney 	uint32_t	max_queues;	/* max number of IO SQ's or CQ's */
3284c87aefeSPatrick Mooney 	uint32_t	num_cqueues;
3294c87aefeSPatrick Mooney 	uint32_t	num_squeues;
3306960cd89SAndy Fiddaman 	bool		num_q_is_set; /* Has host set Number of Queues */
3314c87aefeSPatrick Mooney 
3324c87aefeSPatrick Mooney 	struct pci_nvme_ioreq *ioreqs;
333154972afSPatrick Mooney 	STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */
3344c87aefeSPatrick Mooney 	uint32_t	pending_ios;
3354c87aefeSPatrick Mooney 	uint32_t	ioslots;
3364c87aefeSPatrick Mooney 	sem_t		iosemlock;
3374c87aefeSPatrick Mooney 
3384c87aefeSPatrick Mooney 	/*
3394c87aefeSPatrick Mooney 	 * Memory mapped Submission and Completion queues
3404c87aefeSPatrick Mooney 	 * Each array includes both Admin and IO queues
3414c87aefeSPatrick Mooney 	 */
3424c87aefeSPatrick Mooney 	struct nvme_completion_queue *compl_queues;
3434c87aefeSPatrick Mooney 	struct nvme_submission_queue *submit_queues;
3444c87aefeSPatrick Mooney 
3456960cd89SAndy Fiddaman 	struct nvme_feature_obj feat[NVME_FID_MAX];
346154972afSPatrick Mooney 
347154972afSPatrick Mooney 	enum nvme_dsm_type dataset_management;
3486960cd89SAndy Fiddaman 
3496960cd89SAndy Fiddaman 	/* Accounting for SMART data */
3506960cd89SAndy Fiddaman 	__uint128_t	read_data_units;
3516960cd89SAndy Fiddaman 	__uint128_t	write_data_units;
3526960cd89SAndy Fiddaman 	__uint128_t	read_commands;
3536960cd89SAndy Fiddaman 	__uint128_t	write_commands;
3546960cd89SAndy Fiddaman 	uint32_t	read_dunits_remainder;
3556960cd89SAndy Fiddaman 	uint32_t	write_dunits_remainder;
3566960cd89SAndy Fiddaman 
3576960cd89SAndy Fiddaman 	STAILQ_HEAD(, pci_nvme_aer) aer_list;
3586dc98349SAndy Fiddaman 	pthread_mutex_t	aer_mtx;
3596960cd89SAndy Fiddaman 	uint32_t	aer_count;
3606dc98349SAndy Fiddaman 	struct pci_nvme_aen aen[PCI_NVME_AE_TYPE_MAX];
3616dc98349SAndy Fiddaman 	pthread_t	aen_tid;
3626dc98349SAndy Fiddaman 	pthread_mutex_t	aen_mtx;
3636dc98349SAndy Fiddaman 	pthread_cond_t	aen_cond;
3644c87aefeSPatrick Mooney };
3654c87aefeSPatrick Mooney 
3664c87aefeSPatrick Mooney 
3676dc98349SAndy Fiddaman static void pci_nvme_cq_update(struct pci_nvme_softc *sc,
3686dc98349SAndy Fiddaman     struct nvme_completion_queue *cq,
3696dc98349SAndy Fiddaman     uint32_t cdw0,
3706dc98349SAndy Fiddaman     uint16_t cid,
3716dc98349SAndy Fiddaman     uint16_t sqid,
3726dc98349SAndy Fiddaman     uint16_t status);
3736960cd89SAndy Fiddaman static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *);
3746960cd89SAndy Fiddaman static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *);
3756960cd89SAndy Fiddaman static void pci_nvme_io_done(struct blockif_req *, int);
3764c87aefeSPatrick Mooney 
3774c87aefeSPatrick Mooney /* Controller Configuration utils */
3784c87aefeSPatrick Mooney #define	NVME_CC_GET_EN(cc) \
3794c87aefeSPatrick Mooney 	((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK)
3804c87aefeSPatrick Mooney #define	NVME_CC_GET_CSS(cc) \
3814c87aefeSPatrick Mooney 	((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK)
3824c87aefeSPatrick Mooney #define	NVME_CC_GET_SHN(cc) \
3834c87aefeSPatrick Mooney 	((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK)
3844c87aefeSPatrick Mooney #define	NVME_CC_GET_IOSQES(cc) \
3854c87aefeSPatrick Mooney 	((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK)
3864c87aefeSPatrick Mooney #define	NVME_CC_GET_IOCQES(cc) \
3874c87aefeSPatrick Mooney 	((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK)
3884c87aefeSPatrick Mooney 
3894c87aefeSPatrick Mooney #define	NVME_CC_WRITE_MASK \
3904c87aefeSPatrick Mooney 	((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \
3914c87aefeSPatrick Mooney 	 (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \
3924c87aefeSPatrick Mooney 	 (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT))
3934c87aefeSPatrick Mooney 
3944c87aefeSPatrick Mooney #define	NVME_CC_NEN_WRITE_MASK \
3954c87aefeSPatrick Mooney 	((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \
3964c87aefeSPatrick Mooney 	 (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \
3974c87aefeSPatrick Mooney 	 (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT))
3984c87aefeSPatrick Mooney 
3994c87aefeSPatrick Mooney /* Controller Status utils */
4004c87aefeSPatrick Mooney #define	NVME_CSTS_GET_RDY(sts) \
4014c87aefeSPatrick Mooney 	((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK)
4024c87aefeSPatrick Mooney 
4034c87aefeSPatrick Mooney #define	NVME_CSTS_RDY	(1 << NVME_CSTS_REG_RDY_SHIFT)
4044c87aefeSPatrick Mooney 
4054c87aefeSPatrick Mooney /* Completion Queue status word utils */
4064c87aefeSPatrick Mooney #define	NVME_STATUS_P	(1 << NVME_STATUS_P_SHIFT)
4074c87aefeSPatrick Mooney #define	NVME_STATUS_MASK \
4084c87aefeSPatrick Mooney 	((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
4094c87aefeSPatrick Mooney 	 (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
4104c87aefeSPatrick Mooney 
411154972afSPatrick Mooney #define NVME_ONCS_DSM	(NVME_CTRLR_DATA_ONCS_DSM_MASK << \
412154972afSPatrick Mooney 	NVME_CTRLR_DATA_ONCS_DSM_SHIFT)
413154972afSPatrick Mooney 
4146960cd89SAndy Fiddaman static void nvme_feature_invalid_cb(struct pci_nvme_softc *,
4156960cd89SAndy Fiddaman     struct nvme_feature_obj *,
4166960cd89SAndy Fiddaman     struct nvme_command *,
4176960cd89SAndy Fiddaman     struct nvme_completion *);
418*d7b72f7bSAndy Fiddaman static void nvme_feature_temperature(struct pci_nvme_softc *,
419*d7b72f7bSAndy Fiddaman     struct nvme_feature_obj *,
420*d7b72f7bSAndy Fiddaman     struct nvme_command *,
421*d7b72f7bSAndy Fiddaman     struct nvme_completion *);
4226960cd89SAndy Fiddaman static void nvme_feature_num_queues(struct pci_nvme_softc *,
4236960cd89SAndy Fiddaman     struct nvme_feature_obj *,
4246960cd89SAndy Fiddaman     struct nvme_command *,
4256960cd89SAndy Fiddaman     struct nvme_completion *);
4266960cd89SAndy Fiddaman static void nvme_feature_iv_config(struct pci_nvme_softc *,
4276960cd89SAndy Fiddaman     struct nvme_feature_obj *,
4286960cd89SAndy Fiddaman     struct nvme_command *,
4296960cd89SAndy Fiddaman     struct nvme_completion *);
430*d7b72f7bSAndy Fiddaman static void nvme_feature_async_event(struct pci_nvme_softc *,
431*d7b72f7bSAndy Fiddaman     struct nvme_feature_obj *,
432*d7b72f7bSAndy Fiddaman     struct nvme_command *,
433*d7b72f7bSAndy Fiddaman     struct nvme_completion *);
4346960cd89SAndy Fiddaman 
4356dc98349SAndy Fiddaman static void *aen_thr(void *arg);
4366dc98349SAndy Fiddaman 
4374c87aefeSPatrick Mooney static __inline void
4384c87aefeSPatrick Mooney cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
4394c87aefeSPatrick Mooney {
4404c87aefeSPatrick Mooney 	size_t len;
4414c87aefeSPatrick Mooney 
4424c87aefeSPatrick Mooney 	len = strnlen(src, dst_size);
4434c87aefeSPatrick Mooney 	memset(dst, pad, dst_size);
4444c87aefeSPatrick Mooney 	memcpy(dst, src, len);
4454c87aefeSPatrick Mooney }
4464c87aefeSPatrick Mooney 
4474c87aefeSPatrick Mooney static __inline void
4484c87aefeSPatrick Mooney pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code)
4494c87aefeSPatrick Mooney {
4504c87aefeSPatrick Mooney 
4514c87aefeSPatrick Mooney 	*status &= ~NVME_STATUS_MASK;
4524c87aefeSPatrick Mooney 	*status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT |
4534c87aefeSPatrick Mooney 		(code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT;
4544c87aefeSPatrick Mooney }
4554c87aefeSPatrick Mooney 
4564c87aefeSPatrick Mooney static __inline void
4574c87aefeSPatrick Mooney pci_nvme_status_genc(uint16_t *status, uint16_t code)
4584c87aefeSPatrick Mooney {
4594c87aefeSPatrick Mooney 
4604c87aefeSPatrick Mooney 	pci_nvme_status_tc(status, NVME_SCT_GENERIC, code);
4614c87aefeSPatrick Mooney }
4624c87aefeSPatrick Mooney 
4636960cd89SAndy Fiddaman /*
4646960cd89SAndy Fiddaman  * Initialize the requested number or IO Submission and Completion Queues.
4656960cd89SAndy Fiddaman  * Admin queues are allocated implicitly.
4666960cd89SAndy Fiddaman  */
4676960cd89SAndy Fiddaman static void
4686960cd89SAndy Fiddaman pci_nvme_init_queues(struct pci_nvme_softc *sc, uint32_t nsq, uint32_t ncq)
4694c87aefeSPatrick Mooney {
4706960cd89SAndy Fiddaman 	uint32_t i;
4714c87aefeSPatrick Mooney 
4726960cd89SAndy Fiddaman 	/*
4736960cd89SAndy Fiddaman 	 * Allocate and initialize the Submission Queues
4746960cd89SAndy Fiddaman 	 */
4756960cd89SAndy Fiddaman 	if (nsq > NVME_QUEUES) {
4766960cd89SAndy Fiddaman 		WPRINTF("%s: clamping number of SQ from %u to %u",
4776960cd89SAndy Fiddaman 					__func__, nsq, NVME_QUEUES);
4786960cd89SAndy Fiddaman 		nsq = NVME_QUEUES;
4796960cd89SAndy Fiddaman 	}
4806960cd89SAndy Fiddaman 
4816960cd89SAndy Fiddaman 	sc->num_squeues = nsq;
4826960cd89SAndy Fiddaman 
4836960cd89SAndy Fiddaman 	sc->submit_queues = calloc(sc->num_squeues + 1,
4846960cd89SAndy Fiddaman 				sizeof(struct nvme_submission_queue));
4856960cd89SAndy Fiddaman 	if (sc->submit_queues == NULL) {
4866960cd89SAndy Fiddaman 		WPRINTF("%s: SQ allocation failed", __func__);
4876960cd89SAndy Fiddaman 		sc->num_squeues = 0;
4886960cd89SAndy Fiddaman 	} else {
4896960cd89SAndy Fiddaman 		struct nvme_submission_queue *sq = sc->submit_queues;
4906960cd89SAndy Fiddaman 
4917bb0eb34SAndy Fiddaman 		for (i = 0; i < sc->num_squeues + 1; i++)
4927bb0eb34SAndy Fiddaman 			pthread_mutex_init(&sq[i].mtx, NULL);
4936960cd89SAndy Fiddaman 	}
4946960cd89SAndy Fiddaman 
4956960cd89SAndy Fiddaman 	/*
4966960cd89SAndy Fiddaman 	 * Allocate and initialize the Completion Queues
4976960cd89SAndy Fiddaman 	 */
4986960cd89SAndy Fiddaman 	if (ncq > NVME_QUEUES) {
4996960cd89SAndy Fiddaman 		WPRINTF("%s: clamping number of CQ from %u to %u",
5006960cd89SAndy Fiddaman 					__func__, ncq, NVME_QUEUES);
5016960cd89SAndy Fiddaman 		ncq = NVME_QUEUES;
5026960cd89SAndy Fiddaman 	}
5036960cd89SAndy Fiddaman 
5046960cd89SAndy Fiddaman 	sc->num_cqueues = ncq;
5056960cd89SAndy Fiddaman 
5066960cd89SAndy Fiddaman 	sc->compl_queues = calloc(sc->num_cqueues + 1,
5076960cd89SAndy Fiddaman 				sizeof(struct nvme_completion_queue));
5086960cd89SAndy Fiddaman 	if (sc->compl_queues == NULL) {
5096960cd89SAndy Fiddaman 		WPRINTF("%s: CQ allocation failed", __func__);
5106960cd89SAndy Fiddaman 		sc->num_cqueues = 0;
5116960cd89SAndy Fiddaman 	} else {
5126960cd89SAndy Fiddaman 		struct nvme_completion_queue *cq = sc->compl_queues;
5136960cd89SAndy Fiddaman 
5147bb0eb34SAndy Fiddaman 		for (i = 0; i < sc->num_cqueues + 1; i++)
5157bb0eb34SAndy Fiddaman 			pthread_mutex_init(&cq[i].mtx, NULL);
5166960cd89SAndy Fiddaman 	}
5174c87aefeSPatrick Mooney }
5184c87aefeSPatrick Mooney 
5194c87aefeSPatrick Mooney static void
5204c87aefeSPatrick Mooney pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
5214c87aefeSPatrick Mooney {
5224c87aefeSPatrick Mooney 	struct nvme_controller_data *cd = &sc->ctrldata;
5234c87aefeSPatrick Mooney 
5244c87aefeSPatrick Mooney 	cd->vid = 0xFB5D;
5254c87aefeSPatrick Mooney 	cd->ssvid = 0x0000;
5264c87aefeSPatrick Mooney 
5274c87aefeSPatrick Mooney 	cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' ');
5284c87aefeSPatrick Mooney 	cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' ');
5294c87aefeSPatrick Mooney 
5304c87aefeSPatrick Mooney 	/* Num of submission commands that we can handle at a time (2^rab) */
5314c87aefeSPatrick Mooney 	cd->rab   = 4;
5324c87aefeSPatrick Mooney 
5334c87aefeSPatrick Mooney 	/* FreeBSD OUI */
5344c87aefeSPatrick Mooney 	cd->ieee[0] = 0x58;
5354c87aefeSPatrick Mooney 	cd->ieee[1] = 0x9c;
5364c87aefeSPatrick Mooney 	cd->ieee[2] = 0xfc;
5374c87aefeSPatrick Mooney 
5384c87aefeSPatrick Mooney 	cd->mic = 0;
5394c87aefeSPatrick Mooney 
5406960cd89SAndy Fiddaman 	cd->mdts = NVME_MDTS;	/* max data transfer size (2^mdts * CAP.MPSMIN) */
5414c87aefeSPatrick Mooney 
542*d7b72f7bSAndy Fiddaman 	cd->ver = NVME_REV(1,4);
5434c87aefeSPatrick Mooney 
544*d7b72f7bSAndy Fiddaman 	cd->cntrltype = NVME_CNTRLTYPE_IO;
5454c87aefeSPatrick Mooney 	cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT;
546*d7b72f7bSAndy Fiddaman 	cd->oaes = NVMEB(NVME_CTRLR_DATA_OAES_NS_ATTR);
5474c87aefeSPatrick Mooney 	cd->acl = 2;
5484c87aefeSPatrick Mooney 	cd->aerl = 4;
5494c87aefeSPatrick Mooney 
5506960cd89SAndy Fiddaman 	/* Advertise 1, Read-only firmware slot */
5516960cd89SAndy Fiddaman 	cd->frmw = NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK |
5526960cd89SAndy Fiddaman 	    (1 << NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT);
5534c87aefeSPatrick Mooney 	cd->lpa = 0;	/* TODO: support some simple things like SMART */
5544c87aefeSPatrick Mooney 	cd->elpe = 0;	/* max error log page entries */
5554c87aefeSPatrick Mooney 	cd->npss = 1;	/* number of power states support */
5564c87aefeSPatrick Mooney 
5574c87aefeSPatrick Mooney 	/* Warning Composite Temperature Threshold */
5584c87aefeSPatrick Mooney 	cd->wctemp = 0x0157;
559*d7b72f7bSAndy Fiddaman 	cd->cctemp = 0x0157;
5604c87aefeSPatrick Mooney 
5614c87aefeSPatrick Mooney 	cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) |
5624c87aefeSPatrick Mooney 	    (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT);
5634c87aefeSPatrick Mooney 	cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) |
5644c87aefeSPatrick Mooney 	    (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
5654c87aefeSPatrick Mooney 	cd->nn = 1;	/* number of namespaces */
5664c87aefeSPatrick Mooney 
567154972afSPatrick Mooney 	cd->oncs = 0;
568154972afSPatrick Mooney 	switch (sc->dataset_management) {
569154972afSPatrick Mooney 	case NVME_DATASET_MANAGEMENT_AUTO:
570154972afSPatrick Mooney 		if (sc->nvstore.deallocate)
571154972afSPatrick Mooney 			cd->oncs |= NVME_ONCS_DSM;
572154972afSPatrick Mooney 		break;
573154972afSPatrick Mooney 	case NVME_DATASET_MANAGEMENT_ENABLE:
574154972afSPatrick Mooney 		cd->oncs |= NVME_ONCS_DSM;
575154972afSPatrick Mooney 		break;
576154972afSPatrick Mooney 	default:
577154972afSPatrick Mooney 		break;
578154972afSPatrick Mooney 	}
579154972afSPatrick Mooney 
580*d7b72f7bSAndy Fiddaman 	cd->fna = NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK <<
581*d7b72f7bSAndy Fiddaman 	    NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT;
582*d7b72f7bSAndy Fiddaman 
583*d7b72f7bSAndy Fiddaman 	cd->vwc = NVME_CTRLR_DATA_VWC_ALL_NO << NVME_CTRLR_DATA_VWC_ALL_SHIFT;
5844c87aefeSPatrick Mooney 
5854c87aefeSPatrick Mooney 	cd->power_state[0].mp = 10;
5864c87aefeSPatrick Mooney }
5874c87aefeSPatrick Mooney 
58884659b24SMichael Zeller /*
58984659b24SMichael Zeller  * Calculate the CRC-16 of the given buffer
59084659b24SMichael Zeller  * See copyright attribution at top of file
59184659b24SMichael Zeller  */
59284659b24SMichael Zeller static uint16_t
59384659b24SMichael Zeller crc16(uint16_t crc, const void *buffer, unsigned int len)
5944c87aefeSPatrick Mooney {
59584659b24SMichael Zeller 	const unsigned char *cp = buffer;
59684659b24SMichael Zeller 	/* CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1). */
59784659b24SMichael Zeller 	static uint16_t const crc16_table[256] = {
59884659b24SMichael Zeller 		0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
59984659b24SMichael Zeller 		0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
60084659b24SMichael Zeller 		0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
60184659b24SMichael Zeller 		0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
60284659b24SMichael Zeller 		0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
60384659b24SMichael Zeller 		0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
60484659b24SMichael Zeller 		0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
60584659b24SMichael Zeller 		0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
60684659b24SMichael Zeller 		0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
60784659b24SMichael Zeller 		0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
60884659b24SMichael Zeller 		0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
60984659b24SMichael Zeller 		0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
61084659b24SMichael Zeller 		0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
61184659b24SMichael Zeller 		0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
61284659b24SMichael Zeller 		0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
61384659b24SMichael Zeller 		0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
61484659b24SMichael Zeller 		0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
61584659b24SMichael Zeller 		0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
61684659b24SMichael Zeller 		0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
61784659b24SMichael Zeller 		0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
61884659b24SMichael Zeller 		0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
61984659b24SMichael Zeller 		0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
62084659b24SMichael Zeller 		0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
62184659b24SMichael Zeller 		0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
62284659b24SMichael Zeller 		0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
62384659b24SMichael Zeller 		0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
62484659b24SMichael Zeller 		0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
62584659b24SMichael Zeller 		0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
62684659b24SMichael Zeller 		0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
62784659b24SMichael Zeller 		0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
62884659b24SMichael Zeller 		0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
62984659b24SMichael Zeller 		0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
63084659b24SMichael Zeller 	};
6314c87aefeSPatrick Mooney 
63284659b24SMichael Zeller 	while (len--)
63384659b24SMichael Zeller 		crc = (((crc >> 8) & 0xffU) ^
63484659b24SMichael Zeller 		    crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU;
63584659b24SMichael Zeller 	return crc;
63684659b24SMichael Zeller }
63784659b24SMichael Zeller 
63884659b24SMichael Zeller static void
6396dc98349SAndy Fiddaman pci_nvme_init_nsdata_size(struct pci_nvme_blockstore *nvstore,
6406dc98349SAndy Fiddaman     struct nvme_namespace_data *nd)
64184659b24SMichael Zeller {
6424c87aefeSPatrick Mooney 
643154972afSPatrick Mooney 	/* Get capacity and block size information from backing store */
644154972afSPatrick Mooney 	nd->nsze = nvstore->size / nvstore->sectsz;
6454c87aefeSPatrick Mooney 	nd->ncap = nd->nsze;
6464c87aefeSPatrick Mooney 	nd->nuse = nd->nsze;
6476dc98349SAndy Fiddaman }
6486dc98349SAndy Fiddaman 
6496dc98349SAndy Fiddaman static void
6506dc98349SAndy Fiddaman pci_nvme_init_nsdata(struct pci_nvme_softc *sc,
6516dc98349SAndy Fiddaman     struct nvme_namespace_data *nd, uint32_t nsid,
6526dc98349SAndy Fiddaman     struct pci_nvme_blockstore *nvstore)
6536dc98349SAndy Fiddaman {
6546dc98349SAndy Fiddaman 
6556dc98349SAndy Fiddaman 	pci_nvme_init_nsdata_size(nvstore, nd);
6564c87aefeSPatrick Mooney 
657154972afSPatrick Mooney 	if (nvstore->type == NVME_STOR_BLOCKIF)
658154972afSPatrick Mooney 		nvstore->deallocate = blockif_candelete(nvstore->ctx);
659154972afSPatrick Mooney 
6604c87aefeSPatrick Mooney 	nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
66184659b24SMichael Zeller 	nd->flbas = 0;
66284659b24SMichael Zeller 
66384659b24SMichael Zeller 	/* Create an EUI-64 if user did not provide one */
664154972afSPatrick Mooney 	if (nvstore->eui64 == 0) {
66584659b24SMichael Zeller 		char *data = NULL;
666154972afSPatrick Mooney 		uint64_t eui64 = nvstore->eui64;
66784659b24SMichael Zeller 
6682b948146SAndy Fiddaman 		asprintf(&data, "%s%u%u%u", get_config_value("name"),
6692b948146SAndy Fiddaman 		    sc->nsc_pi->pi_bus, sc->nsc_pi->pi_slot,
6702b948146SAndy Fiddaman 		    sc->nsc_pi->pi_func);
67184659b24SMichael Zeller 
67284659b24SMichael Zeller 		if (data != NULL) {
67384659b24SMichael Zeller 			eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data));
67484659b24SMichael Zeller 			free(data);
67584659b24SMichael Zeller 		}
676154972afSPatrick Mooney 		nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff);
67784659b24SMichael Zeller 	}
678154972afSPatrick Mooney 	be64enc(nd->eui64, nvstore->eui64);
67984659b24SMichael Zeller 
6804c87aefeSPatrick Mooney 	/* LBA data-sz = 2^lbads */
681154972afSPatrick Mooney 	nd->lbaf[0] = nvstore->sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT;
6824c87aefeSPatrick Mooney }
6834c87aefeSPatrick Mooney 
6844c87aefeSPatrick Mooney static void
6854c87aefeSPatrick Mooney pci_nvme_init_logpages(struct pci_nvme_softc *sc)
6864c87aefeSPatrick Mooney {
6874c87aefeSPatrick Mooney 
6884c87aefeSPatrick Mooney 	memset(&sc->err_log, 0, sizeof(sc->err_log));
6894c87aefeSPatrick Mooney 	memset(&sc->health_log, 0, sizeof(sc->health_log));
6904c87aefeSPatrick Mooney 	memset(&sc->fw_log, 0, sizeof(sc->fw_log));
6916dc98349SAndy Fiddaman 	memset(&sc->ns_log, 0, sizeof(sc->ns_log));
6926960cd89SAndy Fiddaman 
6936960cd89SAndy Fiddaman 	/* Set read/write remainder to round up according to spec */
6946960cd89SAndy Fiddaman 	sc->read_dunits_remainder = 999;
6956960cd89SAndy Fiddaman 	sc->write_dunits_remainder = 999;
6966960cd89SAndy Fiddaman 
6976960cd89SAndy Fiddaman 	/* Set nominal Health values checked by implementations */
698*d7b72f7bSAndy Fiddaman 	sc->health_log.temperature = NVME_TEMPERATURE;
6996960cd89SAndy Fiddaman 	sc->health_log.available_spare = 100;
7006960cd89SAndy Fiddaman 	sc->health_log.available_spare_threshold = 10;
7014c87aefeSPatrick Mooney }
7024c87aefeSPatrick Mooney 
7034c87aefeSPatrick Mooney static void
7046960cd89SAndy Fiddaman pci_nvme_init_features(struct pci_nvme_softc *sc)
7056960cd89SAndy Fiddaman {
706*d7b72f7bSAndy Fiddaman 	enum nvme_feature	fid;
7076960cd89SAndy Fiddaman 
708*d7b72f7bSAndy Fiddaman 	for (fid = 0; fid < NVME_FID_MAX; fid++) {
709*d7b72f7bSAndy Fiddaman 		switch (fid) {
710*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_ARBITRATION:
711*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_POWER_MANAGEMENT:
712*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_INTERRUPT_COALESCING: //XXX
713*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_WRITE_ATOMICITY:
714*d7b72f7bSAndy Fiddaman 			/* Mandatory but no special handling required */
715*d7b72f7bSAndy Fiddaman 		//XXX hang - case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG:
716*d7b72f7bSAndy Fiddaman 		//XXX hang - case NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
717*d7b72f7bSAndy Fiddaman 		//		  this returns a data buffer
718*d7b72f7bSAndy Fiddaman 			break;
719*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_TEMPERATURE_THRESHOLD:
720*d7b72f7bSAndy Fiddaman 			sc->feat[fid].set = nvme_feature_temperature;
721*d7b72f7bSAndy Fiddaman 			break;
722*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_ERROR_RECOVERY:
723*d7b72f7bSAndy Fiddaman 			sc->feat[fid].namespace_specific = true;
724*d7b72f7bSAndy Fiddaman 			break;
725*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_NUMBER_OF_QUEUES:
726*d7b72f7bSAndy Fiddaman 			sc->feat[fid].set = nvme_feature_num_queues;
727*d7b72f7bSAndy Fiddaman 			break;
728*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
729*d7b72f7bSAndy Fiddaman 			sc->feat[fid].set = nvme_feature_iv_config;
730*d7b72f7bSAndy Fiddaman 			break;
731*d7b72f7bSAndy Fiddaman 		case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
732*d7b72f7bSAndy Fiddaman 			sc->feat[fid].set = nvme_feature_async_event;
7336dc98349SAndy Fiddaman 			/* Enable all AENs by default */
734*d7b72f7bSAndy Fiddaman 			sc->feat[fid].cdw11 = PCI_NVME_AEN_DEFAULT_MASK;
735*d7b72f7bSAndy Fiddaman 			break;
736*d7b72f7bSAndy Fiddaman 		default:
737*d7b72f7bSAndy Fiddaman 			sc->feat[fid].set = nvme_feature_invalid_cb;
738*d7b72f7bSAndy Fiddaman 			sc->feat[fid].get = nvme_feature_invalid_cb;
739*d7b72f7bSAndy Fiddaman 		}
740*d7b72f7bSAndy Fiddaman 	}
7416960cd89SAndy Fiddaman }
7426960cd89SAndy Fiddaman 
7436960cd89SAndy Fiddaman static void
7446dc98349SAndy Fiddaman pci_nvme_aer_reset(struct pci_nvme_softc *sc)
7456960cd89SAndy Fiddaman {
7466960cd89SAndy Fiddaman 
7476960cd89SAndy Fiddaman 	STAILQ_INIT(&sc->aer_list);
7486960cd89SAndy Fiddaman 	sc->aer_count = 0;
7496960cd89SAndy Fiddaman }
7506960cd89SAndy Fiddaman 
7516960cd89SAndy Fiddaman static void
7526dc98349SAndy Fiddaman pci_nvme_aer_init(struct pci_nvme_softc *sc)
7536dc98349SAndy Fiddaman {
7546dc98349SAndy Fiddaman 
7556dc98349SAndy Fiddaman 	pthread_mutex_init(&sc->aer_mtx, NULL);
7566dc98349SAndy Fiddaman 	pci_nvme_aer_reset(sc);
7576dc98349SAndy Fiddaman }
7586dc98349SAndy Fiddaman 
7596dc98349SAndy Fiddaman static void
7606960cd89SAndy Fiddaman pci_nvme_aer_destroy(struct pci_nvme_softc *sc)
7616960cd89SAndy Fiddaman {
7626960cd89SAndy Fiddaman 	struct pci_nvme_aer *aer = NULL;
7636960cd89SAndy Fiddaman 
7646dc98349SAndy Fiddaman 	pthread_mutex_lock(&sc->aer_mtx);
7656960cd89SAndy Fiddaman 	while (!STAILQ_EMPTY(&sc->aer_list)) {
7666960cd89SAndy Fiddaman 		aer = STAILQ_FIRST(&sc->aer_list);
7676960cd89SAndy Fiddaman 		STAILQ_REMOVE_HEAD(&sc->aer_list, link);
7686960cd89SAndy Fiddaman 		free(aer);
7696960cd89SAndy Fiddaman 	}
7706dc98349SAndy Fiddaman 	pthread_mutex_unlock(&sc->aer_mtx);
7716960cd89SAndy Fiddaman 
7726dc98349SAndy Fiddaman 	pci_nvme_aer_reset(sc);
7736960cd89SAndy Fiddaman }
7746960cd89SAndy Fiddaman 
7756960cd89SAndy Fiddaman static bool
7766960cd89SAndy Fiddaman pci_nvme_aer_available(struct pci_nvme_softc *sc)
7776960cd89SAndy Fiddaman {
7786960cd89SAndy Fiddaman 
7796dc98349SAndy Fiddaman 	return (sc->aer_count != 0);
7806960cd89SAndy Fiddaman }
7816960cd89SAndy Fiddaman 
7826960cd89SAndy Fiddaman static bool
7836960cd89SAndy Fiddaman pci_nvme_aer_limit_reached(struct pci_nvme_softc *sc)
7846960cd89SAndy Fiddaman {
7856960cd89SAndy Fiddaman 	struct nvme_controller_data *cd = &sc->ctrldata;
7866960cd89SAndy Fiddaman 
7876960cd89SAndy Fiddaman 	/* AERL is a zero based value while aer_count is one's based */
7886960cd89SAndy Fiddaman 	return (sc->aer_count == (cd->aerl + 1));
7896960cd89SAndy Fiddaman }
7906960cd89SAndy Fiddaman 
7916960cd89SAndy Fiddaman /*
7926960cd89SAndy Fiddaman  * Add an Async Event Request
7936960cd89SAndy Fiddaman  *
7946960cd89SAndy Fiddaman  * Stores an AER to be returned later if the Controller needs to notify the
7956960cd89SAndy Fiddaman  * host of an event.
7966960cd89SAndy Fiddaman  * Note that while the NVMe spec doesn't require Controllers to return AER's
7976960cd89SAndy Fiddaman  * in order, this implementation does preserve the order.
7986960cd89SAndy Fiddaman  */
7996960cd89SAndy Fiddaman static int
8006960cd89SAndy Fiddaman pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid)
8016960cd89SAndy Fiddaman {
8026960cd89SAndy Fiddaman 	struct pci_nvme_aer *aer = NULL;
8036960cd89SAndy Fiddaman 
8046960cd89SAndy Fiddaman 	aer = calloc(1, sizeof(struct pci_nvme_aer));
8056960cd89SAndy Fiddaman 	if (aer == NULL)
8066960cd89SAndy Fiddaman 		return (-1);
8076960cd89SAndy Fiddaman 
8086960cd89SAndy Fiddaman 	/* Save the Command ID for use in the completion message */
8096960cd89SAndy Fiddaman 	aer->cid = cid;
8106dc98349SAndy Fiddaman 
8116dc98349SAndy Fiddaman 	pthread_mutex_lock(&sc->aer_mtx);
8126dc98349SAndy Fiddaman 	sc->aer_count++;
8136960cd89SAndy Fiddaman 	STAILQ_INSERT_TAIL(&sc->aer_list, aer, link);
8146dc98349SAndy Fiddaman 	pthread_mutex_unlock(&sc->aer_mtx);
8156960cd89SAndy Fiddaman 
8166960cd89SAndy Fiddaman 	return (0);
8176960cd89SAndy Fiddaman }
8186960cd89SAndy Fiddaman 
8196960cd89SAndy Fiddaman /*
8206960cd89SAndy Fiddaman  * Get an Async Event Request structure
8216960cd89SAndy Fiddaman  *
8226960cd89SAndy Fiddaman  * Returns a pointer to an AER previously submitted by the host or NULL if
8236960cd89SAndy Fiddaman  * no AER's exist. Caller is responsible for freeing the returned struct.
8246960cd89SAndy Fiddaman  */
8256960cd89SAndy Fiddaman static struct pci_nvme_aer *
8266960cd89SAndy Fiddaman pci_nvme_aer_get(struct pci_nvme_softc *sc)
8276960cd89SAndy Fiddaman {
8286960cd89SAndy Fiddaman 	struct pci_nvme_aer *aer = NULL;
8296960cd89SAndy Fiddaman 
8306dc98349SAndy Fiddaman 	pthread_mutex_lock(&sc->aer_mtx);
8316960cd89SAndy Fiddaman 	aer = STAILQ_FIRST(&sc->aer_list);
8326960cd89SAndy Fiddaman 	if (aer != NULL) {
8336960cd89SAndy Fiddaman 		STAILQ_REMOVE_HEAD(&sc->aer_list, link);
8346960cd89SAndy Fiddaman 		sc->aer_count--;
8356960cd89SAndy Fiddaman 	}
8366dc98349SAndy Fiddaman 	pthread_mutex_unlock(&sc->aer_mtx);
8376960cd89SAndy Fiddaman 
8386960cd89SAndy Fiddaman 	return (aer);
8396960cd89SAndy Fiddaman }
8406dc98349SAndy Fiddaman 
8416dc98349SAndy Fiddaman static void
8426dc98349SAndy Fiddaman pci_nvme_aen_reset(struct pci_nvme_softc *sc)
8436dc98349SAndy Fiddaman {
8446dc98349SAndy Fiddaman 	uint32_t	atype;
8456dc98349SAndy Fiddaman 
8466dc98349SAndy Fiddaman 	memset(sc->aen, 0, PCI_NVME_AE_TYPE_MAX * sizeof(struct pci_nvme_aen));
8476dc98349SAndy Fiddaman 
8486dc98349SAndy Fiddaman 	for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
8496dc98349SAndy Fiddaman 		sc->aen[atype].atype = atype;
8506dc98349SAndy Fiddaman 	}
8516dc98349SAndy Fiddaman }
8526dc98349SAndy Fiddaman 
8536dc98349SAndy Fiddaman static void
8546dc98349SAndy Fiddaman pci_nvme_aen_init(struct pci_nvme_softc *sc)
8556dc98349SAndy Fiddaman {
8566dc98349SAndy Fiddaman 	char nstr[80];
8576dc98349SAndy Fiddaman 
8586dc98349SAndy Fiddaman 	pci_nvme_aen_reset(sc);
8596dc98349SAndy Fiddaman 
8606dc98349SAndy Fiddaman 	pthread_mutex_init(&sc->aen_mtx, NULL);
8616dc98349SAndy Fiddaman 	pthread_create(&sc->aen_tid, NULL, aen_thr, sc);
8626dc98349SAndy Fiddaman 	snprintf(nstr, sizeof(nstr), "nvme-aen-%d:%d", sc->nsc_pi->pi_slot,
8636dc98349SAndy Fiddaman 	    sc->nsc_pi->pi_func);
8646dc98349SAndy Fiddaman 	pthread_set_name_np(sc->aen_tid, nstr);
8656dc98349SAndy Fiddaman }
8666dc98349SAndy Fiddaman 
8676dc98349SAndy Fiddaman static void
8686dc98349SAndy Fiddaman pci_nvme_aen_destroy(struct pci_nvme_softc *sc)
8696dc98349SAndy Fiddaman {
8706dc98349SAndy Fiddaman 
8716dc98349SAndy Fiddaman 	pci_nvme_aen_reset(sc);
8726dc98349SAndy Fiddaman }
8736dc98349SAndy Fiddaman 
8746dc98349SAndy Fiddaman /* Notify the AEN thread of pending work */
8756dc98349SAndy Fiddaman static void
8766dc98349SAndy Fiddaman pci_nvme_aen_notify(struct pci_nvme_softc *sc)
8776dc98349SAndy Fiddaman {
8786dc98349SAndy Fiddaman 
8796dc98349SAndy Fiddaman 	pthread_cond_signal(&sc->aen_cond);
8806dc98349SAndy Fiddaman }
8816dc98349SAndy Fiddaman 
8826dc98349SAndy Fiddaman /*
8836dc98349SAndy Fiddaman  * Post an Asynchronous Event Notification
8846dc98349SAndy Fiddaman  */
8856dc98349SAndy Fiddaman static int32_t
8866dc98349SAndy Fiddaman pci_nvme_aen_post(struct pci_nvme_softc *sc, pci_nvme_async_type atype,
8876dc98349SAndy Fiddaman 		uint32_t event_data)
8886dc98349SAndy Fiddaman {
8896dc98349SAndy Fiddaman 	struct pci_nvme_aen *aen;
8906dc98349SAndy Fiddaman 
8916dc98349SAndy Fiddaman 	if (atype >= PCI_NVME_AE_TYPE_MAX) {
8926dc98349SAndy Fiddaman 		return(EINVAL);
8936dc98349SAndy Fiddaman 	}
8946dc98349SAndy Fiddaman 
8956dc98349SAndy Fiddaman 	pthread_mutex_lock(&sc->aen_mtx);
8966dc98349SAndy Fiddaman 	aen = &sc->aen[atype];
8976dc98349SAndy Fiddaman 
8986dc98349SAndy Fiddaman 	/* Has the controller already posted an event of this type? */
8996dc98349SAndy Fiddaman 	if (aen->posted) {
9006dc98349SAndy Fiddaman 		pthread_mutex_unlock(&sc->aen_mtx);
9016dc98349SAndy Fiddaman 		return(EALREADY);
9026dc98349SAndy Fiddaman 	}
9036dc98349SAndy Fiddaman 
9046dc98349SAndy Fiddaman 	aen->event_data = event_data;
9056dc98349SAndy Fiddaman 	aen->posted = true;
9066dc98349SAndy Fiddaman 	pthread_mutex_unlock(&sc->aen_mtx);
9076dc98349SAndy Fiddaman 
9086dc98349SAndy Fiddaman 	pci_nvme_aen_notify(sc);
9096dc98349SAndy Fiddaman 
9106dc98349SAndy Fiddaman 	return(0);
9116dc98349SAndy Fiddaman }
9126dc98349SAndy Fiddaman 
9136dc98349SAndy Fiddaman static void
9146dc98349SAndy Fiddaman pci_nvme_aen_process(struct pci_nvme_softc *sc)
9156dc98349SAndy Fiddaman {
9166dc98349SAndy Fiddaman 	struct pci_nvme_aer *aer;
9176dc98349SAndy Fiddaman 	struct pci_nvme_aen *aen;
9186dc98349SAndy Fiddaman 	pci_nvme_async_type atype;
9196dc98349SAndy Fiddaman 	uint32_t mask;
9206dc98349SAndy Fiddaman 	uint16_t status;
9216dc98349SAndy Fiddaman 	uint8_t lid;
9226dc98349SAndy Fiddaman 
9236dc98349SAndy Fiddaman #ifndef __FreeBSD__
9246dc98349SAndy Fiddaman 	lid = 0;
9256960cd89SAndy Fiddaman #endif
9266960cd89SAndy Fiddaman 
9276dc98349SAndy Fiddaman 	assert(pthread_mutex_isowned_np(&sc->aen_mtx));
9286dc98349SAndy Fiddaman 	for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
9296dc98349SAndy Fiddaman 		aen = &sc->aen[atype];
9306dc98349SAndy Fiddaman 		/* Previous iterations may have depleted the available AER's */
9316dc98349SAndy Fiddaman 		if (!pci_nvme_aer_available(sc)) {
9326dc98349SAndy Fiddaman 			DPRINTF("%s: no AER", __func__);
9336dc98349SAndy Fiddaman 			break;
9346dc98349SAndy Fiddaman 		}
9356dc98349SAndy Fiddaman 
9366dc98349SAndy Fiddaman 		if (!aen->posted) {
9376dc98349SAndy Fiddaman 			DPRINTF("%s: no AEN posted for atype=%#x", __func__, atype);
9386dc98349SAndy Fiddaman 			continue;
9396dc98349SAndy Fiddaman 		}
9406dc98349SAndy Fiddaman 
9416dc98349SAndy Fiddaman 		status = NVME_SC_SUCCESS;
9426dc98349SAndy Fiddaman 
9436dc98349SAndy Fiddaman 		/* Is the event masked? */
9446dc98349SAndy Fiddaman 		mask =
9456dc98349SAndy Fiddaman 		    sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11;
9466dc98349SAndy Fiddaman 
9476dc98349SAndy Fiddaman 		DPRINTF("%s: atype=%#x mask=%#x event_data=%#x", __func__, atype, mask, aen->event_data);
9486dc98349SAndy Fiddaman 		switch (atype) {
9496dc98349SAndy Fiddaman 		case PCI_NVME_AE_TYPE_ERROR:
9506dc98349SAndy Fiddaman 			lid = NVME_LOG_ERROR;
9516dc98349SAndy Fiddaman 			break;
9526dc98349SAndy Fiddaman 		case PCI_NVME_AE_TYPE_SMART:
9536dc98349SAndy Fiddaman 			mask &= 0xff;
9546dc98349SAndy Fiddaman 			if ((mask & aen->event_data) == 0)
9556dc98349SAndy Fiddaman 				continue;
9566dc98349SAndy Fiddaman 			lid = NVME_LOG_HEALTH_INFORMATION;
9576dc98349SAndy Fiddaman 			break;
9586dc98349SAndy Fiddaman 		case PCI_NVME_AE_TYPE_NOTICE:
959*d7b72f7bSAndy Fiddaman 			if (aen->event_data >= PCI_NVME_AEI_NOTICE_MAX) {
9606dc98349SAndy Fiddaman 				EPRINTLN("%s unknown AEN notice type %u",
9616dc98349SAndy Fiddaman 				    __func__, aen->event_data);
9626dc98349SAndy Fiddaman 				status = NVME_SC_INTERNAL_DEVICE_ERROR;
9636dc98349SAndy Fiddaman 				break;
9646dc98349SAndy Fiddaman 			}
965*d7b72f7bSAndy Fiddaman 			if ((PCI_NVME_AEI_NOTICE_MASK(aen->event_data) & mask) == 0)
9666dc98349SAndy Fiddaman 				continue;
9676dc98349SAndy Fiddaman 			switch (aen->event_data) {
968*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_NS_ATTR_CHANGED:
9696dc98349SAndy Fiddaman 				lid = NVME_LOG_CHANGED_NAMESPACE;
9706dc98349SAndy Fiddaman 				break;
971*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_FW_ACTIVATION:
9726dc98349SAndy Fiddaman 				lid = NVME_LOG_FIRMWARE_SLOT;
9736dc98349SAndy Fiddaman 				break;
974*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_TELEMETRY_CHANGE:
9756dc98349SAndy Fiddaman 				lid = NVME_LOG_TELEMETRY_CONTROLLER_INITIATED;
9766dc98349SAndy Fiddaman 				break;
977*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_ANA_CHANGE:
978*d7b72f7bSAndy Fiddaman 				lid = NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS;
9796dc98349SAndy Fiddaman 				break;
980*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_PREDICT_LATENCY_CHANGE:
9816dc98349SAndy Fiddaman 				lid = NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE;
9826dc98349SAndy Fiddaman 				break;
983*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_LBA_STATUS_ALERT:
9846dc98349SAndy Fiddaman 				lid = NVME_LOG_LBA_STATUS_INFORMATION;
9856dc98349SAndy Fiddaman 				break;
986*d7b72f7bSAndy Fiddaman 			case PCI_NVME_AEI_NOTICE_ENDURANCE_GROUP_CHANGE:
9876dc98349SAndy Fiddaman 				lid = NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE;
9886dc98349SAndy Fiddaman 				break;
9896dc98349SAndy Fiddaman 			default:
9906dc98349SAndy Fiddaman 				lid = 0;
9916dc98349SAndy Fiddaman 			}
9926dc98349SAndy Fiddaman 			break;
9936dc98349SAndy Fiddaman 		default:
9946dc98349SAndy Fiddaman 			/* bad type?!? */
9956dc98349SAndy Fiddaman 			EPRINTLN("%s unknown AEN type %u", __func__, atype);
9966dc98349SAndy Fiddaman 			status = NVME_SC_INTERNAL_DEVICE_ERROR;
9976dc98349SAndy Fiddaman 			break;
9986dc98349SAndy Fiddaman 		}
9996dc98349SAndy Fiddaman 
10006dc98349SAndy Fiddaman 		aer = pci_nvme_aer_get(sc);
10016dc98349SAndy Fiddaman 		assert(aer != NULL);
10026dc98349SAndy Fiddaman 
10036dc98349SAndy Fiddaman 		DPRINTF("%s: CID=%#x CDW0=%#x", __func__, aer->cid, (lid << 16) | (aen->event_data << 8) | atype);
10046dc98349SAndy Fiddaman 		pci_nvme_cq_update(sc, &sc->compl_queues[0],
10056dc98349SAndy Fiddaman 		    (lid << 16) | (aen->event_data << 8) | atype, /* cdw0 */
10066dc98349SAndy Fiddaman 		    aer->cid,
10076dc98349SAndy Fiddaman 		    0,		/* SQID */
10086dc98349SAndy Fiddaman 		    status);
10096dc98349SAndy Fiddaman 
10106dc98349SAndy Fiddaman 		aen->event_data = 0;
10116dc98349SAndy Fiddaman 		aen->posted = false;
10126dc98349SAndy Fiddaman 
10136dc98349SAndy Fiddaman 		pci_generate_msix(sc->nsc_pi, 0);
10146dc98349SAndy Fiddaman 	}
10156dc98349SAndy Fiddaman }
10166dc98349SAndy Fiddaman 
10176dc98349SAndy Fiddaman static void *
10186dc98349SAndy Fiddaman aen_thr(void *arg)
10196dc98349SAndy Fiddaman {
10206dc98349SAndy Fiddaman 	struct pci_nvme_softc *sc;
10216dc98349SAndy Fiddaman 
10226dc98349SAndy Fiddaman 	sc = arg;
10236dc98349SAndy Fiddaman 
10246dc98349SAndy Fiddaman 	pthread_mutex_lock(&sc->aen_mtx);
10256dc98349SAndy Fiddaman 	for (;;) {
10266dc98349SAndy Fiddaman 		pci_nvme_aen_process(sc);
10276dc98349SAndy Fiddaman 		pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx);
10286dc98349SAndy Fiddaman 	}
10296dc98349SAndy Fiddaman #ifdef __FreeBSD__
10306dc98349SAndy Fiddaman 	pthread_mutex_unlock(&sc->aen_mtx);
10316dc98349SAndy Fiddaman 
10326dc98349SAndy Fiddaman 	pthread_exit(NULL);
10336dc98349SAndy Fiddaman #endif
10346dc98349SAndy Fiddaman 	return (NULL);
10356dc98349SAndy Fiddaman }
10366dc98349SAndy Fiddaman 
10376960cd89SAndy Fiddaman static void
10384c87aefeSPatrick Mooney pci_nvme_reset_locked(struct pci_nvme_softc *sc)
10394c87aefeSPatrick Mooney {
10406960cd89SAndy Fiddaman 	uint32_t i;
10416960cd89SAndy Fiddaman 
10426960cd89SAndy Fiddaman 	DPRINTF("%s", __func__);
10434c87aefeSPatrick Mooney 
10444c87aefeSPatrick Mooney 	sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) |
10454c87aefeSPatrick Mooney 	    (1 << NVME_CAP_LO_REG_CQR_SHIFT) |
10464c87aefeSPatrick Mooney 	    (60 << NVME_CAP_LO_REG_TO_SHIFT);
10474c87aefeSPatrick Mooney 
10484c87aefeSPatrick Mooney 	sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT;
10494c87aefeSPatrick Mooney 
1050*d7b72f7bSAndy Fiddaman 	sc->regs.vs = NVME_REV(1,4);	/* NVMe v1.4 */
10514c87aefeSPatrick Mooney 
10524c87aefeSPatrick Mooney 	sc->regs.cc = 0;
10534c87aefeSPatrick Mooney 
10546960cd89SAndy Fiddaman 	assert(sc->submit_queues != NULL);
10556960cd89SAndy Fiddaman 
10566960cd89SAndy Fiddaman 	for (i = 0; i < sc->num_squeues + 1; i++) {
10574c87aefeSPatrick Mooney 		sc->submit_queues[i].qbase = NULL;
10584c87aefeSPatrick Mooney 		sc->submit_queues[i].size = 0;
10594c87aefeSPatrick Mooney 		sc->submit_queues[i].cqid = 0;
10604c87aefeSPatrick Mooney 		sc->submit_queues[i].tail = 0;
10614c87aefeSPatrick Mooney 		sc->submit_queues[i].head = 0;
10624c87aefeSPatrick Mooney 	}
10634c87aefeSPatrick Mooney 
10646960cd89SAndy Fiddaman 	assert(sc->compl_queues != NULL);
10656960cd89SAndy Fiddaman 
10666960cd89SAndy Fiddaman 	for (i = 0; i < sc->num_cqueues + 1; i++) {
10674c87aefeSPatrick Mooney 		sc->compl_queues[i].qbase = NULL;
10684c87aefeSPatrick Mooney 		sc->compl_queues[i].size = 0;
10694c87aefeSPatrick Mooney 		sc->compl_queues[i].tail = 0;
10704c87aefeSPatrick Mooney 		sc->compl_queues[i].head = 0;
10714c87aefeSPatrick Mooney 	}
10724c87aefeSPatrick Mooney 
10736960cd89SAndy Fiddaman 	sc->num_q_is_set = false;
10746960cd89SAndy Fiddaman 
10756960cd89SAndy Fiddaman 	pci_nvme_aer_destroy(sc);
10766dc98349SAndy Fiddaman 	pci_nvme_aen_destroy(sc);
1077*d7b72f7bSAndy Fiddaman 
1078*d7b72f7bSAndy Fiddaman 	/*
1079*d7b72f7bSAndy Fiddaman 	 * Clear CSTS.RDY last to prevent the host from enabling Controller
1080*d7b72f7bSAndy Fiddaman 	 * before cleanup completes
1081*d7b72f7bSAndy Fiddaman 	 */
1082*d7b72f7bSAndy Fiddaman 	sc->regs.csts = 0;
10834c87aefeSPatrick Mooney }
10844c87aefeSPatrick Mooney 
10854c87aefeSPatrick Mooney static void
10864c87aefeSPatrick Mooney pci_nvme_reset(struct pci_nvme_softc *sc)
10874c87aefeSPatrick Mooney {
10884c87aefeSPatrick Mooney 	pthread_mutex_lock(&sc->mtx);
10894c87aefeSPatrick Mooney 	pci_nvme_reset_locked(sc);
10904c87aefeSPatrick Mooney 	pthread_mutex_unlock(&sc->mtx);
10914c87aefeSPatrick Mooney }
10924c87aefeSPatrick Mooney 
10934c87aefeSPatrick Mooney static void
10944c87aefeSPatrick Mooney pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc)
10954c87aefeSPatrick Mooney {
10964c87aefeSPatrick Mooney 	uint16_t acqs, asqs;
10974c87aefeSPatrick Mooney 
10986960cd89SAndy Fiddaman 	DPRINTF("%s", __func__);
10994c87aefeSPatrick Mooney 
11004c87aefeSPatrick Mooney 	asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1;
11014c87aefeSPatrick Mooney 	sc->submit_queues[0].size = asqs;
11024c87aefeSPatrick Mooney 	sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq,
11034c87aefeSPatrick Mooney 	            sizeof(struct nvme_command) * asqs);
11044c87aefeSPatrick Mooney 
11056960cd89SAndy Fiddaman 	DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p",
11066960cd89SAndy Fiddaman 	        __func__, sc->regs.asq, sc->submit_queues[0].qbase);
11074c87aefeSPatrick Mooney 
11084c87aefeSPatrick Mooney 	acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) &
11094c87aefeSPatrick Mooney 	    NVME_AQA_REG_ACQS_MASK) + 1;
11104c87aefeSPatrick Mooney 	sc->compl_queues[0].size = acqs;
11114c87aefeSPatrick Mooney 	sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq,
11124c87aefeSPatrick Mooney 	         sizeof(struct nvme_completion) * acqs);
11136960cd89SAndy Fiddaman 	sc->compl_queues[0].intr_en = NVME_CQ_INTEN;
11146960cd89SAndy Fiddaman 
11156960cd89SAndy Fiddaman 	DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p",
11166960cd89SAndy Fiddaman 	        __func__, sc->regs.acq, sc->compl_queues[0].qbase);
11174c87aefeSPatrick Mooney }
11184c87aefeSPatrick Mooney 
11194c87aefeSPatrick Mooney static int
1120154972afSPatrick Mooney nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b,
1121154972afSPatrick Mooney 	size_t len, enum nvme_copy_dir dir)
11224c87aefeSPatrick Mooney {
1123154972afSPatrick Mooney 	uint8_t *p;
11244c87aefeSPatrick Mooney 	size_t bytes;
11254c87aefeSPatrick Mooney 
11264c87aefeSPatrick Mooney 	if (len > (8 * 1024)) {
11274c87aefeSPatrick Mooney 		return (-1);
11284c87aefeSPatrick Mooney 	}
11294c87aefeSPatrick Mooney 
11304c87aefeSPatrick Mooney 	/* Copy from the start of prp1 to the end of the physical page */
11314c87aefeSPatrick Mooney 	bytes = PAGE_SIZE - (prp1 & PAGE_MASK);
11324c87aefeSPatrick Mooney 	bytes = MIN(bytes, len);
11334c87aefeSPatrick Mooney 
1134154972afSPatrick Mooney 	p = vm_map_gpa(ctx, prp1, bytes);
1135154972afSPatrick Mooney 	if (p == NULL) {
11364c87aefeSPatrick Mooney 		return (-1);
11374c87aefeSPatrick Mooney 	}
11384c87aefeSPatrick Mooney 
1139154972afSPatrick Mooney 	if (dir == NVME_COPY_TO_PRP)
1140154972afSPatrick Mooney 		memcpy(p, b, bytes);
1141154972afSPatrick Mooney 	else
1142154972afSPatrick Mooney 		memcpy(b, p, bytes);
11434c87aefeSPatrick Mooney 
1144154972afSPatrick Mooney 	b += bytes;
11454c87aefeSPatrick Mooney 
11464c87aefeSPatrick Mooney 	len -= bytes;
11474c87aefeSPatrick Mooney 	if (len == 0) {
11484c87aefeSPatrick Mooney 		return (0);
11494c87aefeSPatrick Mooney 	}
11504c87aefeSPatrick Mooney 
11514c87aefeSPatrick Mooney 	len = MIN(len, PAGE_SIZE);
11524c87aefeSPatrick Mooney 
1153154972afSPatrick Mooney 	p = vm_map_gpa(ctx, prp2, len);
1154154972afSPatrick Mooney 	if (p == NULL) {
11554c87aefeSPatrick Mooney 		return (-1);
11564c87aefeSPatrick Mooney 	}
11574c87aefeSPatrick Mooney 
1158154972afSPatrick Mooney 	if (dir == NVME_COPY_TO_PRP)
1159154972afSPatrick Mooney 		memcpy(p, b, len);
1160154972afSPatrick Mooney 	else
1161154972afSPatrick Mooney 		memcpy(b, p, len);
11624c87aefeSPatrick Mooney 
11634c87aefeSPatrick Mooney 	return (0);
11644c87aefeSPatrick Mooney }
11654c87aefeSPatrick Mooney 
11666960cd89SAndy Fiddaman /*
11676960cd89SAndy Fiddaman  * Write a Completion Queue Entry update
11686960cd89SAndy Fiddaman  *
11696960cd89SAndy Fiddaman  * Write the completion and update the doorbell value
11706960cd89SAndy Fiddaman  */
11716960cd89SAndy Fiddaman static void
11726960cd89SAndy Fiddaman pci_nvme_cq_update(struct pci_nvme_softc *sc,
11736960cd89SAndy Fiddaman 		struct nvme_completion_queue *cq,
11746960cd89SAndy Fiddaman 		uint32_t cdw0,
11756960cd89SAndy Fiddaman 		uint16_t cid,
11766960cd89SAndy Fiddaman 		uint16_t sqid,
11776960cd89SAndy Fiddaman 		uint16_t status)
11786960cd89SAndy Fiddaman {
11796960cd89SAndy Fiddaman 	struct nvme_submission_queue *sq = &sc->submit_queues[sqid];
11806960cd89SAndy Fiddaman 	struct nvme_completion *cqe;
11816960cd89SAndy Fiddaman 
11826960cd89SAndy Fiddaman 	assert(cq->qbase != NULL);
11836960cd89SAndy Fiddaman 
11846960cd89SAndy Fiddaman 	pthread_mutex_lock(&cq->mtx);
11856960cd89SAndy Fiddaman 
11866960cd89SAndy Fiddaman 	cqe = &cq->qbase[cq->tail];
11876960cd89SAndy Fiddaman 
11886960cd89SAndy Fiddaman 	/* Flip the phase bit */
11896960cd89SAndy Fiddaman 	status |= (cqe->status ^ NVME_STATUS_P) & NVME_STATUS_P_MASK;
11906960cd89SAndy Fiddaman 
11916960cd89SAndy Fiddaman 	cqe->cdw0 = cdw0;
11926960cd89SAndy Fiddaman 	cqe->sqhd = sq->head;
11936960cd89SAndy Fiddaman 	cqe->sqid = sqid;
11946960cd89SAndy Fiddaman 	cqe->cid = cid;
11956960cd89SAndy Fiddaman 	cqe->status = status;
11966960cd89SAndy Fiddaman 
11976960cd89SAndy Fiddaman 	cq->tail++;
11986960cd89SAndy Fiddaman 	if (cq->tail >= cq->size) {
11996960cd89SAndy Fiddaman 		cq->tail = 0;
12006960cd89SAndy Fiddaman 	}
12016960cd89SAndy Fiddaman 
12026960cd89SAndy Fiddaman 	pthread_mutex_unlock(&cq->mtx);
12036960cd89SAndy Fiddaman }
12046960cd89SAndy Fiddaman 
12054c87aefeSPatrick Mooney static int
12064c87aefeSPatrick Mooney nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
12074c87aefeSPatrick Mooney 	struct nvme_completion* compl)
12084c87aefeSPatrick Mooney {
12094c87aefeSPatrick Mooney 	uint16_t qid = command->cdw10 & 0xffff;
12104c87aefeSPatrick Mooney 
12116960cd89SAndy Fiddaman 	DPRINTF("%s DELETE_IO_SQ %u", __func__, qid);
12126960cd89SAndy Fiddaman 	if (qid == 0 || qid > sc->num_squeues ||
12136960cd89SAndy Fiddaman 	    (sc->submit_queues[qid].qbase == NULL)) {
12146960cd89SAndy Fiddaman 		WPRINTF("%s NOT PERMITTED queue id %u / num_squeues %u",
12156960cd89SAndy Fiddaman 		        __func__, qid, sc->num_squeues);
12164c87aefeSPatrick Mooney 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
12174c87aefeSPatrick Mooney 		    NVME_SC_INVALID_QUEUE_IDENTIFIER);
12184c87aefeSPatrick Mooney 		return (1);
12194c87aefeSPatrick Mooney 	}
12204c87aefeSPatrick Mooney 
12214c87aefeSPatrick Mooney 	sc->submit_queues[qid].qbase = NULL;
12226960cd89SAndy Fiddaman 	sc->submit_queues[qid].cqid = 0;
12234c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
12244c87aefeSPatrick Mooney 	return (1);
12254c87aefeSPatrick Mooney }
12264c87aefeSPatrick Mooney 
12274c87aefeSPatrick Mooney static int
12284c87aefeSPatrick Mooney nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
12294c87aefeSPatrick Mooney 	struct nvme_completion* compl)
12304c87aefeSPatrick Mooney {
12314c87aefeSPatrick Mooney 	if (command->cdw11 & NVME_CMD_CDW11_PC) {
12324c87aefeSPatrick Mooney 		uint16_t qid = command->cdw10 & 0xffff;
12334c87aefeSPatrick Mooney 		struct nvme_submission_queue *nsq;
12344c87aefeSPatrick Mooney 
12356960cd89SAndy Fiddaman 		if ((qid == 0) || (qid > sc->num_squeues) ||
12366960cd89SAndy Fiddaman 		    (sc->submit_queues[qid].qbase != NULL)) {
12376960cd89SAndy Fiddaman 			WPRINTF("%s queue index %u > num_squeues %u",
12386960cd89SAndy Fiddaman 			        __func__, qid, sc->num_squeues);
12394c87aefeSPatrick Mooney 			pci_nvme_status_tc(&compl->status,
12404c87aefeSPatrick Mooney 			    NVME_SCT_COMMAND_SPECIFIC,
12414c87aefeSPatrick Mooney 			    NVME_SC_INVALID_QUEUE_IDENTIFIER);
12424c87aefeSPatrick Mooney 			return (1);
12434c87aefeSPatrick Mooney 		}
12444c87aefeSPatrick Mooney 
12454c87aefeSPatrick Mooney 		nsq = &sc->submit_queues[qid];
12464c87aefeSPatrick Mooney 		nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
12476960cd89SAndy Fiddaman 		DPRINTF("%s size=%u (max=%u)", __func__, nsq->size, sc->max_qentries);
12486960cd89SAndy Fiddaman 		if ((nsq->size < 2) || (nsq->size > sc->max_qentries)) {
12496960cd89SAndy Fiddaman 			/*
12506960cd89SAndy Fiddaman 			 * Queues must specify at least two entries
12516960cd89SAndy Fiddaman 			 * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to
12526960cd89SAndy Fiddaman 			 * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec
12536960cd89SAndy Fiddaman 			 */
12546960cd89SAndy Fiddaman 			pci_nvme_status_tc(&compl->status,
12556960cd89SAndy Fiddaman 			    NVME_SCT_COMMAND_SPECIFIC,
12566960cd89SAndy Fiddaman 			    NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED);
12576960cd89SAndy Fiddaman 			return (1);
12586960cd89SAndy Fiddaman 		}
12596960cd89SAndy Fiddaman 		nsq->head = nsq->tail = 0;
12606960cd89SAndy Fiddaman 
12616960cd89SAndy Fiddaman 		nsq->cqid = (command->cdw11 >> 16) & 0xffff;
12626960cd89SAndy Fiddaman 		if ((nsq->cqid == 0) || (nsq->cqid > sc->num_cqueues)) {
12636960cd89SAndy Fiddaman 			pci_nvme_status_tc(&compl->status,
12646960cd89SAndy Fiddaman 			    NVME_SCT_COMMAND_SPECIFIC,
12656960cd89SAndy Fiddaman 			    NVME_SC_INVALID_QUEUE_IDENTIFIER);
12666960cd89SAndy Fiddaman 			return (1);
12676960cd89SAndy Fiddaman 		}
12686960cd89SAndy Fiddaman 
12696960cd89SAndy Fiddaman 		if (sc->compl_queues[nsq->cqid].qbase == NULL) {
12706960cd89SAndy Fiddaman 			pci_nvme_status_tc(&compl->status,
12716960cd89SAndy Fiddaman 			    NVME_SCT_COMMAND_SPECIFIC,
12726960cd89SAndy Fiddaman 			    NVME_SC_COMPLETION_QUEUE_INVALID);
12736960cd89SAndy Fiddaman 			return (1);
12746960cd89SAndy Fiddaman 		}
12756960cd89SAndy Fiddaman 
12766960cd89SAndy Fiddaman 		nsq->qpriority = (command->cdw11 >> 1) & 0x03;
12774c87aefeSPatrick Mooney 
12784c87aefeSPatrick Mooney 		nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
12794c87aefeSPatrick Mooney 		              sizeof(struct nvme_command) * (size_t)nsq->size);
12804c87aefeSPatrick Mooney 
12816960cd89SAndy Fiddaman 		DPRINTF("%s sq %u size %u gaddr %p cqid %u", __func__,
12826960cd89SAndy Fiddaman 		        qid, nsq->size, nsq->qbase, nsq->cqid);
12834c87aefeSPatrick Mooney 
12844c87aefeSPatrick Mooney 		pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
12854c87aefeSPatrick Mooney 
12866960cd89SAndy Fiddaman 		DPRINTF("%s completed creating IOSQ qid %u",
12876960cd89SAndy Fiddaman 		         __func__, qid);
12884c87aefeSPatrick Mooney 	} else {
12894c87aefeSPatrick Mooney 		/*
12904c87aefeSPatrick Mooney 		 * Guest sent non-cont submission queue request.
12914c87aefeSPatrick Mooney 		 * This setting is unsupported by this emulation.
12924c87aefeSPatrick Mooney 		 */
12936960cd89SAndy Fiddaman 		WPRINTF("%s unsupported non-contig (list-based) "
12946960cd89SAndy Fiddaman 		         "create i/o submission queue", __func__);
12954c87aefeSPatrick Mooney 
12964c87aefeSPatrick Mooney 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
12974c87aefeSPatrick Mooney 	}
12984c87aefeSPatrick Mooney 	return (1);
12994c87aefeSPatrick Mooney }
13004c87aefeSPatrick Mooney 
13014c87aefeSPatrick Mooney static int
13024c87aefeSPatrick Mooney nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
13034c87aefeSPatrick Mooney 	struct nvme_completion* compl)
13044c87aefeSPatrick Mooney {
13054c87aefeSPatrick Mooney 	uint16_t qid = command->cdw10 & 0xffff;
13066960cd89SAndy Fiddaman 	uint16_t sqid;
13074c87aefeSPatrick Mooney 
13086960cd89SAndy Fiddaman 	DPRINTF("%s DELETE_IO_CQ %u", __func__, qid);
13096960cd89SAndy Fiddaman 	if (qid == 0 || qid > sc->num_cqueues ||
13106960cd89SAndy Fiddaman 	    (sc->compl_queues[qid].qbase == NULL)) {
13116960cd89SAndy Fiddaman 		WPRINTF("%s queue index %u / num_cqueues %u",
13126960cd89SAndy Fiddaman 		        __func__, qid, sc->num_cqueues);
13134c87aefeSPatrick Mooney 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
13144c87aefeSPatrick Mooney 		    NVME_SC_INVALID_QUEUE_IDENTIFIER);
13154c87aefeSPatrick Mooney 		return (1);
13164c87aefeSPatrick Mooney 	}
13174c87aefeSPatrick Mooney 
13186960cd89SAndy Fiddaman 	/* Deleting an Active CQ is an error */
13196960cd89SAndy Fiddaman 	for (sqid = 1; sqid < sc->num_squeues + 1; sqid++)
13206960cd89SAndy Fiddaman 		if (sc->submit_queues[sqid].cqid == qid) {
13216960cd89SAndy Fiddaman 			pci_nvme_status_tc(&compl->status,
13226960cd89SAndy Fiddaman 			    NVME_SCT_COMMAND_SPECIFIC,
13236960cd89SAndy Fiddaman 			    NVME_SC_INVALID_QUEUE_DELETION);
13246960cd89SAndy Fiddaman 			return (1);
13256960cd89SAndy Fiddaman 		}
13266960cd89SAndy Fiddaman 
13274c87aefeSPatrick Mooney 	sc->compl_queues[qid].qbase = NULL;
13284c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
13294c87aefeSPatrick Mooney 	return (1);
13304c87aefeSPatrick Mooney }
13314c87aefeSPatrick Mooney 
13324c87aefeSPatrick Mooney static int
13334c87aefeSPatrick Mooney nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
13344c87aefeSPatrick Mooney 	struct nvme_completion* compl)
13354c87aefeSPatrick Mooney {
13364c87aefeSPatrick Mooney 	struct nvme_completion_queue *ncq;
13376960cd89SAndy Fiddaman 	uint16_t qid = command->cdw10 & 0xffff;
13384c87aefeSPatrick Mooney 
13396960cd89SAndy Fiddaman 	/* Only support Physically Contiguous queues */
13406960cd89SAndy Fiddaman 	if ((command->cdw11 & NVME_CMD_CDW11_PC) == 0) {
13416960cd89SAndy Fiddaman 		WPRINTF("%s unsupported non-contig (list-based) "
13426960cd89SAndy Fiddaman 		         "create i/o completion queue",
13436960cd89SAndy Fiddaman 		         __func__);
13446960cd89SAndy Fiddaman 
13456960cd89SAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
13466960cd89SAndy Fiddaman 		return (1);
13476960cd89SAndy Fiddaman 	}
13486960cd89SAndy Fiddaman 
13496960cd89SAndy Fiddaman 	if ((qid == 0) || (qid > sc->num_cqueues) ||
13506960cd89SAndy Fiddaman 	    (sc->compl_queues[qid].qbase != NULL)) {
13516960cd89SAndy Fiddaman 		WPRINTF("%s queue index %u > num_cqueues %u",
13526960cd89SAndy Fiddaman 			__func__, qid, sc->num_cqueues);
13534c87aefeSPatrick Mooney 		pci_nvme_status_tc(&compl->status,
13544c87aefeSPatrick Mooney 		    NVME_SCT_COMMAND_SPECIFIC,
13554c87aefeSPatrick Mooney 		    NVME_SC_INVALID_QUEUE_IDENTIFIER);
13564c87aefeSPatrick Mooney 		return (1);
13574c87aefeSPatrick Mooney  	}
13584c87aefeSPatrick Mooney 
13594c87aefeSPatrick Mooney 	ncq = &sc->compl_queues[qid];
13604c87aefeSPatrick Mooney 	ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1;
13614c87aefeSPatrick Mooney 	ncq->intr_vec = (command->cdw11 >> 16) & 0xffff;
13626960cd89SAndy Fiddaman 	if (ncq->intr_vec > (sc->max_queues + 1)) {
13636960cd89SAndy Fiddaman 		pci_nvme_status_tc(&compl->status,
13646960cd89SAndy Fiddaman 		    NVME_SCT_COMMAND_SPECIFIC,
13656960cd89SAndy Fiddaman 		    NVME_SC_INVALID_INTERRUPT_VECTOR);
13666960cd89SAndy Fiddaman 		return (1);
13676960cd89SAndy Fiddaman 	}
13684c87aefeSPatrick Mooney 
13696960cd89SAndy Fiddaman 	ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
13706960cd89SAndy Fiddaman 	if ((ncq->size < 2) || (ncq->size > sc->max_qentries))  {
13716960cd89SAndy Fiddaman 		/*
13726960cd89SAndy Fiddaman 		 * Queues must specify at least two entries
13736960cd89SAndy Fiddaman 		 * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to
13746960cd89SAndy Fiddaman 		 * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec
13756960cd89SAndy Fiddaman 		 */
13766960cd89SAndy Fiddaman 		pci_nvme_status_tc(&compl->status,
13776960cd89SAndy Fiddaman 		    NVME_SCT_COMMAND_SPECIFIC,
13786960cd89SAndy Fiddaman 		    NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED);
13796960cd89SAndy Fiddaman 		return (1);
13806960cd89SAndy Fiddaman 	}
13816960cd89SAndy Fiddaman 	ncq->head = ncq->tail = 0;
13824c87aefeSPatrick Mooney 	ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
13834c87aefeSPatrick Mooney 		     command->prp1,
13844c87aefeSPatrick Mooney 		     sizeof(struct nvme_command) * (size_t)ncq->size);
13854c87aefeSPatrick Mooney 
13864c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
13874c87aefeSPatrick Mooney 
13884c87aefeSPatrick Mooney 
13894c87aefeSPatrick Mooney 	return (1);
13904c87aefeSPatrick Mooney }
13914c87aefeSPatrick Mooney 
13924c87aefeSPatrick Mooney static int
13934c87aefeSPatrick Mooney nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command,
13944c87aefeSPatrick Mooney 	struct nvme_completion* compl)
13954c87aefeSPatrick Mooney {
1396*d7b72f7bSAndy Fiddaman 	uint64_t logoff;
13972b948146SAndy Fiddaman 	uint32_t logsize;
13984c87aefeSPatrick Mooney 	uint8_t logpage = command->cdw10 & 0xFF;
13994c87aefeSPatrick Mooney 
14002b948146SAndy Fiddaman #ifndef __FreeBSD__
14012b948146SAndy Fiddaman 	logsize = 0;
14022b948146SAndy Fiddaman #endif
14032b948146SAndy Fiddaman 
14046960cd89SAndy Fiddaman 	DPRINTF("%s log page %u len %u", __func__, logpage, logsize);
14054c87aefeSPatrick Mooney 
14064c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
14074c87aefeSPatrick Mooney 
14086960cd89SAndy Fiddaman 	/*
14096960cd89SAndy Fiddaman 	 * Command specifies the number of dwords to return in fields NUMDU
14106960cd89SAndy Fiddaman 	 * and NUMDL. This is a zero-based value.
14116960cd89SAndy Fiddaman 	 */
14126960cd89SAndy Fiddaman 	logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1;
14136960cd89SAndy Fiddaman 	logsize *= sizeof(uint32_t);
1414*d7b72f7bSAndy Fiddaman 	logoff  = ((uint64_t)(command->cdw13) << 32) | command->cdw12;
14156960cd89SAndy Fiddaman 
14164c87aefeSPatrick Mooney 	switch (logpage) {
14174c87aefeSPatrick Mooney 	case NVME_LOG_ERROR:
1418*d7b72f7bSAndy Fiddaman 		if (logoff >= sizeof(sc->err_log)) {
1419*d7b72f7bSAndy Fiddaman 			pci_nvme_status_genc(&compl->status,
1420*d7b72f7bSAndy Fiddaman 			    NVME_SC_INVALID_FIELD);
1421*d7b72f7bSAndy Fiddaman 			break;
1422*d7b72f7bSAndy Fiddaman 		}
1423*d7b72f7bSAndy Fiddaman 
14244c87aefeSPatrick Mooney 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1425*d7b72f7bSAndy Fiddaman 		    command->prp2, (uint8_t *)&sc->err_log + logoff,
1426*d7b72f7bSAndy Fiddaman 		    MIN(logsize - logoff, sizeof(sc->err_log)),
1427154972afSPatrick Mooney 		    NVME_COPY_TO_PRP);
14284c87aefeSPatrick Mooney 		break;
14294c87aefeSPatrick Mooney 	case NVME_LOG_HEALTH_INFORMATION:
1430*d7b72f7bSAndy Fiddaman 		if (logoff >= sizeof(sc->health_log)) {
1431*d7b72f7bSAndy Fiddaman 			pci_nvme_status_genc(&compl->status,
1432*d7b72f7bSAndy Fiddaman 			    NVME_SC_INVALID_FIELD);
1433*d7b72f7bSAndy Fiddaman 			break;
1434*d7b72f7bSAndy Fiddaman 		}
1435*d7b72f7bSAndy Fiddaman 
14366960cd89SAndy Fiddaman 		pthread_mutex_lock(&sc->mtx);
14376960cd89SAndy Fiddaman 		memcpy(&sc->health_log.data_units_read, &sc->read_data_units,
14386960cd89SAndy Fiddaman 		    sizeof(sc->health_log.data_units_read));
14396960cd89SAndy Fiddaman 		memcpy(&sc->health_log.data_units_written, &sc->write_data_units,
14406960cd89SAndy Fiddaman 		    sizeof(sc->health_log.data_units_written));
14416960cd89SAndy Fiddaman 		memcpy(&sc->health_log.host_read_commands, &sc->read_commands,
14426960cd89SAndy Fiddaman 		    sizeof(sc->health_log.host_read_commands));
14436960cd89SAndy Fiddaman 		memcpy(&sc->health_log.host_write_commands, &sc->write_commands,
14446960cd89SAndy Fiddaman 		    sizeof(sc->health_log.host_write_commands));
14456960cd89SAndy Fiddaman 		pthread_mutex_unlock(&sc->mtx);
14466960cd89SAndy Fiddaman 
14474c87aefeSPatrick Mooney 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1448*d7b72f7bSAndy Fiddaman 		    command->prp2, (uint8_t *)&sc->health_log + logoff,
1449*d7b72f7bSAndy Fiddaman 		    MIN(logsize - logoff, sizeof(sc->health_log)),
1450154972afSPatrick Mooney 		    NVME_COPY_TO_PRP);
14514c87aefeSPatrick Mooney 		break;
14524c87aefeSPatrick Mooney 	case NVME_LOG_FIRMWARE_SLOT:
1453*d7b72f7bSAndy Fiddaman 		if (logoff >= sizeof(sc->fw_log)) {
1454*d7b72f7bSAndy Fiddaman 			pci_nvme_status_genc(&compl->status,
1455*d7b72f7bSAndy Fiddaman 			    NVME_SC_INVALID_FIELD);
1456*d7b72f7bSAndy Fiddaman 			break;
1457*d7b72f7bSAndy Fiddaman 		}
1458*d7b72f7bSAndy Fiddaman 
14594c87aefeSPatrick Mooney 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1460*d7b72f7bSAndy Fiddaman 		    command->prp2, (uint8_t *)&sc->fw_log + logoff,
1461*d7b72f7bSAndy Fiddaman 		    MIN(logsize - logoff, sizeof(sc->fw_log)),
1462154972afSPatrick Mooney 		    NVME_COPY_TO_PRP);
14634c87aefeSPatrick Mooney 		break;
14646dc98349SAndy Fiddaman 	case NVME_LOG_CHANGED_NAMESPACE:
1465*d7b72f7bSAndy Fiddaman 		if (logoff >= sizeof(sc->ns_log)) {
1466*d7b72f7bSAndy Fiddaman 			pci_nvme_status_genc(&compl->status,
1467*d7b72f7bSAndy Fiddaman 			    NVME_SC_INVALID_FIELD);
1468*d7b72f7bSAndy Fiddaman 			break;
1469*d7b72f7bSAndy Fiddaman 		}
1470*d7b72f7bSAndy Fiddaman 
14716dc98349SAndy Fiddaman 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1472*d7b72f7bSAndy Fiddaman 		    command->prp2, (uint8_t *)&sc->ns_log + logoff,
1473*d7b72f7bSAndy Fiddaman 		    MIN(logsize - logoff, sizeof(sc->ns_log)),
14746dc98349SAndy Fiddaman 		    NVME_COPY_TO_PRP);
14756dc98349SAndy Fiddaman 		memset(&sc->ns_log, 0, sizeof(sc->ns_log));
14766dc98349SAndy Fiddaman 		break;
14774c87aefeSPatrick Mooney 	default:
14786960cd89SAndy Fiddaman 		DPRINTF("%s get log page %x command not supported",
14796960cd89SAndy Fiddaman 		        __func__, logpage);
14804c87aefeSPatrick Mooney 
14814c87aefeSPatrick Mooney 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
14824c87aefeSPatrick Mooney 		    NVME_SC_INVALID_LOG_PAGE);
14834c87aefeSPatrick Mooney 	}
14844c87aefeSPatrick Mooney 
14854c87aefeSPatrick Mooney 	return (1);
14864c87aefeSPatrick Mooney }
14874c87aefeSPatrick Mooney 
14884c87aefeSPatrick Mooney static int
14894c87aefeSPatrick Mooney nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command,
14904c87aefeSPatrick Mooney 	struct nvme_completion* compl)
14914c87aefeSPatrick Mooney {
14924c87aefeSPatrick Mooney 	void *dest;
14932b948146SAndy Fiddaman 	uint16_t status;
14942b948146SAndy Fiddaman 
14952b948146SAndy Fiddaman #ifndef __FreeBSD__
14962b948146SAndy Fiddaman 	status = 0;
14972b948146SAndy Fiddaman #endif
14984c87aefeSPatrick Mooney 
14996960cd89SAndy Fiddaman 	DPRINTF("%s identify 0x%x nsid 0x%x", __func__,
15006960cd89SAndy Fiddaman 	        command->cdw10 & 0xFF, command->nsid);
15016960cd89SAndy Fiddaman 
15026960cd89SAndy Fiddaman 	pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
15034c87aefeSPatrick Mooney 
15044c87aefeSPatrick Mooney 	switch (command->cdw10 & 0xFF) {
15054c87aefeSPatrick Mooney 	case 0x00: /* return Identify Namespace data structure */
1506*d7b72f7bSAndy Fiddaman 		/* Global NS only valid with NS Management */
1507*d7b72f7bSAndy Fiddaman 		if (command->nsid == NVME_GLOBAL_NAMESPACE_TAG) {
1508*d7b72f7bSAndy Fiddaman 			pci_nvme_status_genc(&status,
1509*d7b72f7bSAndy Fiddaman 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
1510*d7b72f7bSAndy Fiddaman 			break;
1511*d7b72f7bSAndy Fiddaman 		}
15124c87aefeSPatrick Mooney 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1513154972afSPatrick Mooney 		    command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata),
1514154972afSPatrick Mooney 		    NVME_COPY_TO_PRP);
15154c87aefeSPatrick Mooney 		break;
15164c87aefeSPatrick Mooney 	case 0x01: /* return Identify Controller data structure */
15174c87aefeSPatrick Mooney 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
15184c87aefeSPatrick Mooney 		    command->prp2, (uint8_t *)&sc->ctrldata,
1519154972afSPatrick Mooney 		    sizeof(sc->ctrldata),
1520154972afSPatrick Mooney 		    NVME_COPY_TO_PRP);
15214c87aefeSPatrick Mooney 		break;
15224c87aefeSPatrick Mooney 	case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */
15234c87aefeSPatrick Mooney 		dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
15244c87aefeSPatrick Mooney 		                  sizeof(uint32_t) * 1024);
15256960cd89SAndy Fiddaman 		/* All unused entries shall be zero */
15266960cd89SAndy Fiddaman 		bzero(dest, sizeof(uint32_t) * 1024);
15274c87aefeSPatrick Mooney 		((uint32_t *)dest)[0] = 1;
15284c87aefeSPatrick Mooney 		break;
15294c87aefeSPatrick Mooney 	case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */
15306960cd89SAndy Fiddaman 		if (command->nsid != 1) {
15316960cd89SAndy Fiddaman 			pci_nvme_status_genc(&status,
15326960cd89SAndy Fiddaman 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
15336960cd89SAndy Fiddaman 			break;
15346960cd89SAndy Fiddaman 		}
15356960cd89SAndy Fiddaman 		dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
15366960cd89SAndy Fiddaman 		                  sizeof(uint32_t) * 1024);
15376960cd89SAndy Fiddaman 		/* All bytes after the descriptor shall be zero */
15386960cd89SAndy Fiddaman 		bzero(dest, sizeof(uint32_t) * 1024);
15396960cd89SAndy Fiddaman 
15406960cd89SAndy Fiddaman 		/* Return NIDT=1 (i.e. EUI64) descriptor */
15416960cd89SAndy Fiddaman 		((uint8_t *)dest)[0] = 1;
15426960cd89SAndy Fiddaman 		((uint8_t *)dest)[1] = sizeof(uint64_t);
15436960cd89SAndy Fiddaman 		bcopy(sc->nsdata.eui64, ((uint8_t *)dest) + 4, sizeof(uint64_t));
15446960cd89SAndy Fiddaman 		break;
15454c87aefeSPatrick Mooney 	default:
15466960cd89SAndy Fiddaman 		DPRINTF("%s unsupported identify command requested 0x%x",
15476960cd89SAndy Fiddaman 		         __func__, command->cdw10 & 0xFF);
15486960cd89SAndy Fiddaman 		pci_nvme_status_genc(&status, NVME_SC_INVALID_FIELD);
15496960cd89SAndy Fiddaman 		break;
15506960cd89SAndy Fiddaman 	}
15516960cd89SAndy Fiddaman 
15526960cd89SAndy Fiddaman 	compl->status = status;
15536960cd89SAndy Fiddaman 	return (1);
15546960cd89SAndy Fiddaman }
15556960cd89SAndy Fiddaman 
15566960cd89SAndy Fiddaman static const char *
15576960cd89SAndy Fiddaman nvme_fid_to_name(uint8_t fid)
15586960cd89SAndy Fiddaman {
15596960cd89SAndy Fiddaman 	const char *name;
15606960cd89SAndy Fiddaman 
15616960cd89SAndy Fiddaman 	switch (fid) {
15626960cd89SAndy Fiddaman 	case NVME_FEAT_ARBITRATION:
15636960cd89SAndy Fiddaman 		name = "Arbitration";
15646960cd89SAndy Fiddaman 		break;
15656960cd89SAndy Fiddaman 	case NVME_FEAT_POWER_MANAGEMENT:
15666960cd89SAndy Fiddaman 		name = "Power Management";
15676960cd89SAndy Fiddaman 		break;
15686960cd89SAndy Fiddaman 	case NVME_FEAT_LBA_RANGE_TYPE:
15696960cd89SAndy Fiddaman 		name = "LBA Range Type";
15706960cd89SAndy Fiddaman 		break;
15716960cd89SAndy Fiddaman 	case NVME_FEAT_TEMPERATURE_THRESHOLD:
15726960cd89SAndy Fiddaman 		name = "Temperature Threshold";
15736960cd89SAndy Fiddaman 		break;
15746960cd89SAndy Fiddaman 	case NVME_FEAT_ERROR_RECOVERY:
15756960cd89SAndy Fiddaman 		name = "Error Recovery";
15766960cd89SAndy Fiddaman 		break;
15776960cd89SAndy Fiddaman 	case NVME_FEAT_VOLATILE_WRITE_CACHE:
15786960cd89SAndy Fiddaman 		name = "Volatile Write Cache";
15796960cd89SAndy Fiddaman 		break;
15806960cd89SAndy Fiddaman 	case NVME_FEAT_NUMBER_OF_QUEUES:
15816960cd89SAndy Fiddaman 		name = "Number of Queues";
15826960cd89SAndy Fiddaman 		break;
15836960cd89SAndy Fiddaman 	case NVME_FEAT_INTERRUPT_COALESCING:
15846960cd89SAndy Fiddaman 		name = "Interrupt Coalescing";
15856960cd89SAndy Fiddaman 		break;
15866960cd89SAndy Fiddaman 	case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
15876960cd89SAndy Fiddaman 		name = "Interrupt Vector Configuration";
15886960cd89SAndy Fiddaman 		break;
15896960cd89SAndy Fiddaman 	case NVME_FEAT_WRITE_ATOMICITY:
15906960cd89SAndy Fiddaman 		name = "Write Atomicity Normal";
15916960cd89SAndy Fiddaman 		break;
15926960cd89SAndy Fiddaman 	case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
15936960cd89SAndy Fiddaman 		name = "Asynchronous Event Configuration";
15946960cd89SAndy Fiddaman 		break;
15956960cd89SAndy Fiddaman 	case NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
15966960cd89SAndy Fiddaman 		name = "Autonomous Power State Transition";
15976960cd89SAndy Fiddaman 		break;
15986960cd89SAndy Fiddaman 	case NVME_FEAT_HOST_MEMORY_BUFFER:
15996960cd89SAndy Fiddaman 		name = "Host Memory Buffer";
16006960cd89SAndy Fiddaman 		break;
16016960cd89SAndy Fiddaman 	case NVME_FEAT_TIMESTAMP:
16026960cd89SAndy Fiddaman 		name = "Timestamp";
16036960cd89SAndy Fiddaman 		break;
16046960cd89SAndy Fiddaman 	case NVME_FEAT_KEEP_ALIVE_TIMER:
16056960cd89SAndy Fiddaman 		name = "Keep Alive Timer";
16066960cd89SAndy Fiddaman 		break;
16076960cd89SAndy Fiddaman 	case NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT:
16086960cd89SAndy Fiddaman 		name = "Host Controlled Thermal Management";
16096960cd89SAndy Fiddaman 		break;
16106960cd89SAndy Fiddaman 	case NVME_FEAT_NON_OP_POWER_STATE_CONFIG:
16116960cd89SAndy Fiddaman 		name = "Non-Operation Power State Config";
16126960cd89SAndy Fiddaman 		break;
16136960cd89SAndy Fiddaman 	case NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG:
16146960cd89SAndy Fiddaman 		name = "Read Recovery Level Config";
16156960cd89SAndy Fiddaman 		break;
16166960cd89SAndy Fiddaman 	case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG:
16176960cd89SAndy Fiddaman 		name = "Predictable Latency Mode Config";
16186960cd89SAndy Fiddaman 		break;
16196960cd89SAndy Fiddaman 	case NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW:
16206960cd89SAndy Fiddaman 		name = "Predictable Latency Mode Window";
16216960cd89SAndy Fiddaman 		break;
16226960cd89SAndy Fiddaman 	case NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES:
16236960cd89SAndy Fiddaman 		name = "LBA Status Information Report Interval";
16246960cd89SAndy Fiddaman 		break;
16256960cd89SAndy Fiddaman 	case NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
16266960cd89SAndy Fiddaman 		name = "Host Behavior Support";
16276960cd89SAndy Fiddaman 		break;
16286960cd89SAndy Fiddaman 	case NVME_FEAT_SANITIZE_CONFIG:
16296960cd89SAndy Fiddaman 		name = "Sanitize Config";
16306960cd89SAndy Fiddaman 		break;
16316960cd89SAndy Fiddaman 	case NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION:
16326960cd89SAndy Fiddaman 		name = "Endurance Group Event Configuration";
16336960cd89SAndy Fiddaman 		break;
16346960cd89SAndy Fiddaman 	case NVME_FEAT_SOFTWARE_PROGRESS_MARKER:
16356960cd89SAndy Fiddaman 		name = "Software Progress Marker";
16366960cd89SAndy Fiddaman 		break;
16376960cd89SAndy Fiddaman 	case NVME_FEAT_HOST_IDENTIFIER:
16386960cd89SAndy Fiddaman 		name = "Host Identifier";
16396960cd89SAndy Fiddaman 		break;
16406960cd89SAndy Fiddaman 	case NVME_FEAT_RESERVATION_NOTIFICATION_MASK:
16416960cd89SAndy Fiddaman 		name = "Reservation Notification Mask";
16426960cd89SAndy Fiddaman 		break;
16436960cd89SAndy Fiddaman 	case NVME_FEAT_RESERVATION_PERSISTENCE:
16446960cd89SAndy Fiddaman 		name = "Reservation Persistence";
16456960cd89SAndy Fiddaman 		break;
16466960cd89SAndy Fiddaman 	case NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG:
16476960cd89SAndy Fiddaman 		name = "Namespace Write Protection Config";
16486960cd89SAndy Fiddaman 		break;
16496960cd89SAndy Fiddaman 	default:
16506960cd89SAndy Fiddaman 		name = "Unknown";
16516960cd89SAndy Fiddaman 		break;
16526960cd89SAndy Fiddaman 	}
16536960cd89SAndy Fiddaman 
16546960cd89SAndy Fiddaman 	return (name);
16556960cd89SAndy Fiddaman }
16566960cd89SAndy Fiddaman 
16576960cd89SAndy Fiddaman static void
16586960cd89SAndy Fiddaman nvme_feature_invalid_cb(struct pci_nvme_softc *sc,
16596960cd89SAndy Fiddaman     struct nvme_feature_obj *feat,
16606960cd89SAndy Fiddaman     struct nvme_command *command,
16616960cd89SAndy Fiddaman     struct nvme_completion *compl)
16626960cd89SAndy Fiddaman {
16636960cd89SAndy Fiddaman 
16644c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
16654c87aefeSPatrick Mooney }
16664c87aefeSPatrick Mooney 
16676960cd89SAndy Fiddaman static void
16686960cd89SAndy Fiddaman nvme_feature_iv_config(struct pci_nvme_softc *sc,
16696960cd89SAndy Fiddaman     struct nvme_feature_obj *feat,
16706960cd89SAndy Fiddaman     struct nvme_command *command,
16716960cd89SAndy Fiddaman     struct nvme_completion *compl)
16726960cd89SAndy Fiddaman {
16736960cd89SAndy Fiddaman 	uint32_t i;
16746960cd89SAndy Fiddaman 	uint32_t cdw11 = command->cdw11;
16756960cd89SAndy Fiddaman 	uint16_t iv;
16766960cd89SAndy Fiddaman 	bool cd;
16776960cd89SAndy Fiddaman 
16786960cd89SAndy Fiddaman 	pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
16796960cd89SAndy Fiddaman 
16806960cd89SAndy Fiddaman 	iv = cdw11 & 0xffff;
16816960cd89SAndy Fiddaman 	cd = cdw11 & (1 << 16);
16826960cd89SAndy Fiddaman 
16836960cd89SAndy Fiddaman 	if (iv > (sc->max_queues + 1)) {
16846960cd89SAndy Fiddaman 		return;
16856960cd89SAndy Fiddaman 	}
16866960cd89SAndy Fiddaman 
16876960cd89SAndy Fiddaman 	/* No Interrupt Coalescing (i.e. not Coalescing Disable) for Admin Q */
16886960cd89SAndy Fiddaman 	if ((iv == 0) && !cd)
16896960cd89SAndy Fiddaman 		return;
16906960cd89SAndy Fiddaman 
16916960cd89SAndy Fiddaman 	/* Requested Interrupt Vector must be used by a CQ */
16926960cd89SAndy Fiddaman 	for (i = 0; i < sc->num_cqueues + 1; i++) {
16936960cd89SAndy Fiddaman 		if (sc->compl_queues[i].intr_vec == iv) {
16944c87aefeSPatrick Mooney 			pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
16956960cd89SAndy Fiddaman 		}
16964c87aefeSPatrick Mooney 	}
1697*d7b72f7bSAndy Fiddaman }
16984c87aefeSPatrick Mooney 
1699*d7b72f7bSAndy Fiddaman #define NVME_ASYNC_EVENT_ENDURANCE_GROUP		(0x4000)
1700*d7b72f7bSAndy Fiddaman static void
1701*d7b72f7bSAndy Fiddaman nvme_feature_async_event(struct pci_nvme_softc *sc,
1702*d7b72f7bSAndy Fiddaman     struct nvme_feature_obj *feat,
1703*d7b72f7bSAndy Fiddaman     struct nvme_command *command,
1704*d7b72f7bSAndy Fiddaman     struct nvme_completion *compl)
1705*d7b72f7bSAndy Fiddaman {
1706*d7b72f7bSAndy Fiddaman 
1707*d7b72f7bSAndy Fiddaman 	if (command->cdw11 & NVME_ASYNC_EVENT_ENDURANCE_GROUP)
1708*d7b72f7bSAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1709*d7b72f7bSAndy Fiddaman }
1710*d7b72f7bSAndy Fiddaman 
1711*d7b72f7bSAndy Fiddaman #define NVME_TEMP_THRESH_OVER	0
1712*d7b72f7bSAndy Fiddaman #define NVME_TEMP_THRESH_UNDER	1
1713*d7b72f7bSAndy Fiddaman static void
1714*d7b72f7bSAndy Fiddaman nvme_feature_temperature(struct pci_nvme_softc *sc,
1715*d7b72f7bSAndy Fiddaman     struct nvme_feature_obj *feat,
1716*d7b72f7bSAndy Fiddaman     struct nvme_command *command,
1717*d7b72f7bSAndy Fiddaman     struct nvme_completion *compl)
1718*d7b72f7bSAndy Fiddaman {
1719*d7b72f7bSAndy Fiddaman 	uint16_t	tmpth;	/* Temperature Threshold */
1720*d7b72f7bSAndy Fiddaman 	uint8_t		tmpsel; /* Threshold Temperature Select */
1721*d7b72f7bSAndy Fiddaman 	uint8_t		thsel;  /* Threshold Type Select */
1722*d7b72f7bSAndy Fiddaman 	bool		set_crit = false;
1723*d7b72f7bSAndy Fiddaman 
1724*d7b72f7bSAndy Fiddaman 	tmpth  = command->cdw11 & 0xffff;
1725*d7b72f7bSAndy Fiddaman 	tmpsel = (command->cdw11 >> 16) & 0xf;
1726*d7b72f7bSAndy Fiddaman 	thsel  = (command->cdw11 >> 20) & 0x3;
1727*d7b72f7bSAndy Fiddaman 
1728*d7b72f7bSAndy Fiddaman 	DPRINTF("%s: tmpth=%#x tmpsel=%#x thsel=%#x", __func__, tmpth, tmpsel, thsel);
1729*d7b72f7bSAndy Fiddaman 
1730*d7b72f7bSAndy Fiddaman 	/* Check for unsupported values */
1731*d7b72f7bSAndy Fiddaman 	if (((tmpsel != 0) && (tmpsel != 0xf)) ||
1732*d7b72f7bSAndy Fiddaman 	    (thsel > NVME_TEMP_THRESH_UNDER)) {
1733*d7b72f7bSAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1734*d7b72f7bSAndy Fiddaman 		return;
1735*d7b72f7bSAndy Fiddaman 	}
1736*d7b72f7bSAndy Fiddaman 
1737*d7b72f7bSAndy Fiddaman 	if (((thsel == NVME_TEMP_THRESH_OVER)  && (NVME_TEMPERATURE >= tmpth)) ||
1738*d7b72f7bSAndy Fiddaman 	    ((thsel == NVME_TEMP_THRESH_UNDER) && (NVME_TEMPERATURE <= tmpth)))
1739*d7b72f7bSAndy Fiddaman 		set_crit = true;
1740*d7b72f7bSAndy Fiddaman 
1741*d7b72f7bSAndy Fiddaman 	pthread_mutex_lock(&sc->mtx);
1742*d7b72f7bSAndy Fiddaman 	if (set_crit)
1743*d7b72f7bSAndy Fiddaman 		sc->health_log.critical_warning |=
1744*d7b72f7bSAndy Fiddaman 		    NVME_CRIT_WARN_ST_TEMPERATURE;
1745*d7b72f7bSAndy Fiddaman 	else
1746*d7b72f7bSAndy Fiddaman 		sc->health_log.critical_warning &=
1747*d7b72f7bSAndy Fiddaman 		    ~NVME_CRIT_WARN_ST_TEMPERATURE;
1748*d7b72f7bSAndy Fiddaman 	pthread_mutex_unlock(&sc->mtx);
1749*d7b72f7bSAndy Fiddaman 
1750*d7b72f7bSAndy Fiddaman 	if (set_crit)
1751*d7b72f7bSAndy Fiddaman 		pci_nvme_aen_post(sc, PCI_NVME_AE_TYPE_SMART,
1752*d7b72f7bSAndy Fiddaman 		    sc->health_log.critical_warning);
1753*d7b72f7bSAndy Fiddaman 
1754*d7b72f7bSAndy Fiddaman 
1755*d7b72f7bSAndy Fiddaman 	DPRINTF("%s: set_crit=%c critical_warning=%#x status=%#x", __func__, set_crit ? 'T':'F', sc->health_log.critical_warning, compl->status);
17566960cd89SAndy Fiddaman }
17576960cd89SAndy Fiddaman 
17586960cd89SAndy Fiddaman static void
17596960cd89SAndy Fiddaman nvme_feature_num_queues(struct pci_nvme_softc *sc,
17606960cd89SAndy Fiddaman     struct nvme_feature_obj *feat,
17616960cd89SAndy Fiddaman     struct nvme_command *command,
17624c87aefeSPatrick Mooney     struct nvme_completion *compl)
17634c87aefeSPatrick Mooney {
17644c87aefeSPatrick Mooney 	uint16_t nqr;	/* Number of Queues Requested */
17654c87aefeSPatrick Mooney 
17666960cd89SAndy Fiddaman 	if (sc->num_q_is_set) {
17676960cd89SAndy Fiddaman 		WPRINTF("%s: Number of Queues already set", __func__);
17686960cd89SAndy Fiddaman 		pci_nvme_status_genc(&compl->status,
17696960cd89SAndy Fiddaman 		    NVME_SC_COMMAND_SEQUENCE_ERROR);
17706960cd89SAndy Fiddaman 		return;
17716960cd89SAndy Fiddaman 	}
17726960cd89SAndy Fiddaman 
17734c87aefeSPatrick Mooney 	nqr = command->cdw11 & 0xFFFF;
17744c87aefeSPatrick Mooney 	if (nqr == 0xffff) {
17756960cd89SAndy Fiddaman 		WPRINTF("%s: Illegal NSQR value %#x", __func__, nqr);
17764c87aefeSPatrick Mooney 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
17776960cd89SAndy Fiddaman 		return;
17784c87aefeSPatrick Mooney 	}
17794c87aefeSPatrick Mooney 
17804c87aefeSPatrick Mooney 	sc->num_squeues = ONE_BASED(nqr);
17814c87aefeSPatrick Mooney 	if (sc->num_squeues > sc->max_queues) {
17826960cd89SAndy Fiddaman 		DPRINTF("NSQR=%u is greater than max %u", sc->num_squeues,
17836960cd89SAndy Fiddaman 					sc->max_queues);
17844c87aefeSPatrick Mooney 		sc->num_squeues = sc->max_queues;
17854c87aefeSPatrick Mooney 	}
17864c87aefeSPatrick Mooney 
17874c87aefeSPatrick Mooney 	nqr = (command->cdw11 >> 16) & 0xFFFF;
17884c87aefeSPatrick Mooney 	if (nqr == 0xffff) {
17896960cd89SAndy Fiddaman 		WPRINTF("%s: Illegal NCQR value %#x", __func__, nqr);
17904c87aefeSPatrick Mooney 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
17916960cd89SAndy Fiddaman 		return;
17924c87aefeSPatrick Mooney 	}
17934c87aefeSPatrick Mooney 
17944c87aefeSPatrick Mooney 	sc->num_cqueues = ONE_BASED(nqr);
17954c87aefeSPatrick Mooney 	if (sc->num_cqueues > sc->max_queues) {
17966960cd89SAndy Fiddaman 		DPRINTF("NCQR=%u is greater than max %u", sc->num_cqueues,
17976960cd89SAndy Fiddaman 					sc->max_queues);
17984c87aefeSPatrick Mooney 		sc->num_cqueues = sc->max_queues;
17994c87aefeSPatrick Mooney 	}
18004c87aefeSPatrick Mooney 
18016960cd89SAndy Fiddaman 	/* Patch the command value which will be saved on callback's return */
18026960cd89SAndy Fiddaman 	command->cdw11 = NVME_FEATURE_NUM_QUEUES(sc);
18034c87aefeSPatrick Mooney 	compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc);
18044c87aefeSPatrick Mooney 
18056960cd89SAndy Fiddaman 	sc->num_q_is_set = true;
18064c87aefeSPatrick Mooney }
18074c87aefeSPatrick Mooney 
18084c87aefeSPatrick Mooney static int
18094c87aefeSPatrick Mooney nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command,
18104c87aefeSPatrick Mooney 	struct nvme_completion *compl)
18114c87aefeSPatrick Mooney {
18126960cd89SAndy Fiddaman 	struct nvme_feature_obj *feat;
18136960cd89SAndy Fiddaman 	uint32_t nsid = command->nsid;
18146960cd89SAndy Fiddaman 	uint8_t fid = command->cdw10 & 0xFF;
18154c87aefeSPatrick Mooney 
18166960cd89SAndy Fiddaman 	DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid));
18174c87aefeSPatrick Mooney 
18186960cd89SAndy Fiddaman 	if (fid >= NVME_FID_MAX) {
18196960cd89SAndy Fiddaman 		DPRINTF("%s invalid feature 0x%x", __func__, fid);
18204c87aefeSPatrick Mooney 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
18214c87aefeSPatrick Mooney 		return (1);
18224c87aefeSPatrick Mooney 	}
18236960cd89SAndy Fiddaman 	feat = &sc->feat[fid];
18244c87aefeSPatrick Mooney 
1825*d7b72f7bSAndy Fiddaman 	if (feat->namespace_specific && (nsid == NVME_GLOBAL_NAMESPACE_TAG)) {
1826*d7b72f7bSAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1827*d7b72f7bSAndy Fiddaman 		return (1);
1828*d7b72f7bSAndy Fiddaman 	}
1829*d7b72f7bSAndy Fiddaman 
18306960cd89SAndy Fiddaman 	if (!feat->namespace_specific &&
18316960cd89SAndy Fiddaman 	    !((nsid == 0) || (nsid == NVME_GLOBAL_NAMESPACE_TAG))) {
18326960cd89SAndy Fiddaman 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
18336960cd89SAndy Fiddaman 		    NVME_SC_FEATURE_NOT_NS_SPECIFIC);
18344c87aefeSPatrick Mooney 		return (1);
18354c87aefeSPatrick Mooney 	}
18364c87aefeSPatrick Mooney 
18376960cd89SAndy Fiddaman 	compl->cdw0 = 0;
18386960cd89SAndy Fiddaman 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
18396960cd89SAndy Fiddaman 
18406960cd89SAndy Fiddaman 	if (feat->set)
18416960cd89SAndy Fiddaman 		feat->set(sc, feat, command, compl);
18426960cd89SAndy Fiddaman 
18436dc98349SAndy Fiddaman 	DPRINTF("%s: status=%#x cdw11=%#x", __func__, compl->status, command->cdw11);
18446dc98349SAndy Fiddaman 	if (compl->status == NVME_SC_SUCCESS) {
18456960cd89SAndy Fiddaman 		feat->cdw11 = command->cdw11;
18466dc98349SAndy Fiddaman 		if ((fid == NVME_FEAT_ASYNC_EVENT_CONFIGURATION) &&
18476dc98349SAndy Fiddaman 		    (command->cdw11 != 0))
18486dc98349SAndy Fiddaman 			pci_nvme_aen_notify(sc);
18496dc98349SAndy Fiddaman 	}
18506960cd89SAndy Fiddaman 
18516960cd89SAndy Fiddaman 	return (0);
18526960cd89SAndy Fiddaman }
18536960cd89SAndy Fiddaman 
1854*d7b72f7bSAndy Fiddaman #define NVME_FEATURES_SEL_SUPPORTED	0x3
1855*d7b72f7bSAndy Fiddaman #define NVME_FEATURES_NS_SPECIFIC	(1 << 1)
1856*d7b72f7bSAndy Fiddaman 
18574c87aefeSPatrick Mooney static int
18584c87aefeSPatrick Mooney nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command,
18594c87aefeSPatrick Mooney 	struct nvme_completion* compl)
18604c87aefeSPatrick Mooney {
18616960cd89SAndy Fiddaman 	struct nvme_feature_obj *feat;
18626960cd89SAndy Fiddaman 	uint8_t fid = command->cdw10 & 0xFF;
1863*d7b72f7bSAndy Fiddaman 	uint8_t sel = (command->cdw10 >> 8) & 0x7;
18644c87aefeSPatrick Mooney 
18656960cd89SAndy Fiddaman 	DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid));
18664c87aefeSPatrick Mooney 
18676960cd89SAndy Fiddaman 	if (fid >= NVME_FID_MAX) {
18686960cd89SAndy Fiddaman 		DPRINTF("%s invalid feature 0x%x", __func__, fid);
18694c87aefeSPatrick Mooney 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
18704c87aefeSPatrick Mooney 		return (1);
18714c87aefeSPatrick Mooney 	}
18724c87aefeSPatrick Mooney 
18736960cd89SAndy Fiddaman 	compl->cdw0 = 0;
18744c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
18756960cd89SAndy Fiddaman 
18766960cd89SAndy Fiddaman 	feat = &sc->feat[fid];
18776960cd89SAndy Fiddaman 	if (feat->get) {
18786960cd89SAndy Fiddaman 		feat->get(sc, feat, command, compl);
18796960cd89SAndy Fiddaman 	}
18806960cd89SAndy Fiddaman 
18816960cd89SAndy Fiddaman 	if (compl->status == NVME_SC_SUCCESS) {
1882*d7b72f7bSAndy Fiddaman 		if ((sel == NVME_FEATURES_SEL_SUPPORTED) && feat->namespace_specific)
1883*d7b72f7bSAndy Fiddaman 			compl->cdw0 = NVME_FEATURES_NS_SPECIFIC;
1884*d7b72f7bSAndy Fiddaman 		else
18856960cd89SAndy Fiddaman 			compl->cdw0 = feat->cdw11;
18866960cd89SAndy Fiddaman 	}
18876960cd89SAndy Fiddaman 
18886960cd89SAndy Fiddaman 	return (0);
18896960cd89SAndy Fiddaman }
18906960cd89SAndy Fiddaman 
18916960cd89SAndy Fiddaman static int
18926960cd89SAndy Fiddaman nvme_opc_format_nvm(struct pci_nvme_softc* sc, struct nvme_command* command,
18936960cd89SAndy Fiddaman 	struct nvme_completion* compl)
18946960cd89SAndy Fiddaman {
18956960cd89SAndy Fiddaman 	uint8_t	ses, lbaf, pi;
18966960cd89SAndy Fiddaman 
18976960cd89SAndy Fiddaman 	/* Only supports Secure Erase Setting - User Data Erase */
18986960cd89SAndy Fiddaman 	ses = (command->cdw10 >> 9) & 0x7;
18996960cd89SAndy Fiddaman 	if (ses > 0x1) {
19006960cd89SAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
19016960cd89SAndy Fiddaman 		return (1);
19026960cd89SAndy Fiddaman 	}
19036960cd89SAndy Fiddaman 
19046960cd89SAndy Fiddaman 	/* Only supports a single LBA Format */
19056960cd89SAndy Fiddaman 	lbaf = command->cdw10 & 0xf;
19066960cd89SAndy Fiddaman 	if (lbaf != 0) {
19076960cd89SAndy Fiddaman 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
19086960cd89SAndy Fiddaman 		    NVME_SC_INVALID_FORMAT);
19096960cd89SAndy Fiddaman 		return (1);
19106960cd89SAndy Fiddaman 	}
19116960cd89SAndy Fiddaman 
19126960cd89SAndy Fiddaman 	/* Doesn't support Protection Infomation */
19136960cd89SAndy Fiddaman 	pi = (command->cdw10 >> 5) & 0x7;
19146960cd89SAndy Fiddaman 	if (pi != 0) {
19156960cd89SAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
19166960cd89SAndy Fiddaman 		return (1);
19176960cd89SAndy Fiddaman 	}
19186960cd89SAndy Fiddaman 
19196960cd89SAndy Fiddaman 	if (sc->nvstore.type == NVME_STOR_RAM) {
19206960cd89SAndy Fiddaman 		if (sc->nvstore.ctx)
19216960cd89SAndy Fiddaman 			free(sc->nvstore.ctx);
19226960cd89SAndy Fiddaman 		sc->nvstore.ctx = calloc(1, sc->nvstore.size);
19236960cd89SAndy Fiddaman 		pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
19246960cd89SAndy Fiddaman 	} else {
19256960cd89SAndy Fiddaman 		struct pci_nvme_ioreq *req;
19266960cd89SAndy Fiddaman 		int err;
19276960cd89SAndy Fiddaman 
19286960cd89SAndy Fiddaman 		req = pci_nvme_get_ioreq(sc);
19296960cd89SAndy Fiddaman 		if (req == NULL) {
19306960cd89SAndy Fiddaman 			pci_nvme_status_genc(&compl->status,
19316960cd89SAndy Fiddaman 			    NVME_SC_INTERNAL_DEVICE_ERROR);
19326960cd89SAndy Fiddaman 			WPRINTF("%s: unable to allocate IO req", __func__);
19336960cd89SAndy Fiddaman 			return (1);
19346960cd89SAndy Fiddaman 		}
19356960cd89SAndy Fiddaman 		req->nvme_sq = &sc->submit_queues[0];
19366960cd89SAndy Fiddaman 		req->sqid = 0;
19376960cd89SAndy Fiddaman 		req->opc = command->opc;
19386960cd89SAndy Fiddaman 		req->cid = command->cid;
19396960cd89SAndy Fiddaman 		req->nsid = command->nsid;
19406960cd89SAndy Fiddaman 
19416960cd89SAndy Fiddaman 		req->io_req.br_offset = 0;
19426960cd89SAndy Fiddaman 		req->io_req.br_resid = sc->nvstore.size;
19436960cd89SAndy Fiddaman 		req->io_req.br_callback = pci_nvme_io_done;
19446960cd89SAndy Fiddaman 
19456960cd89SAndy Fiddaman 		err = blockif_delete(sc->nvstore.ctx, &req->io_req);
19466960cd89SAndy Fiddaman 		if (err) {
19476960cd89SAndy Fiddaman 			pci_nvme_status_genc(&compl->status,
19486960cd89SAndy Fiddaman 			    NVME_SC_INTERNAL_DEVICE_ERROR);
19496960cd89SAndy Fiddaman 			pci_nvme_release_ioreq(sc, req);
1950*d7b72f7bSAndy Fiddaman 		} else
1951*d7b72f7bSAndy Fiddaman 			compl->status = NVME_NO_STATUS;
19526960cd89SAndy Fiddaman 	}
19536960cd89SAndy Fiddaman 
19544c87aefeSPatrick Mooney 	return (1);
19554c87aefeSPatrick Mooney }
19564c87aefeSPatrick Mooney 
19574c87aefeSPatrick Mooney static int
19584c87aefeSPatrick Mooney nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command,
19594c87aefeSPatrick Mooney 	struct nvme_completion* compl)
19604c87aefeSPatrick Mooney {
19616960cd89SAndy Fiddaman 	DPRINTF("%s submission queue %u, command ID 0x%x", __func__,
19626960cd89SAndy Fiddaman 	        command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF);
19634c87aefeSPatrick Mooney 
19644c87aefeSPatrick Mooney 	/* TODO: search for the command ID and abort it */
19654c87aefeSPatrick Mooney 
19664c87aefeSPatrick Mooney 	compl->cdw0 = 1;
19674c87aefeSPatrick Mooney 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
19684c87aefeSPatrick Mooney 	return (1);
19694c87aefeSPatrick Mooney }
19704c87aefeSPatrick Mooney 
19714c87aefeSPatrick Mooney static int
19724c87aefeSPatrick Mooney nvme_opc_async_event_req(struct pci_nvme_softc* sc,
19734c87aefeSPatrick Mooney 	struct nvme_command* command, struct nvme_completion* compl)
19744c87aefeSPatrick Mooney {
19756dc98349SAndy Fiddaman 	DPRINTF("%s async event request count=%u aerl=%u cid=%#x", __func__,
19766dc98349SAndy Fiddaman 	    sc->aer_count, sc->ctrldata.aerl, command->cid);
19776960cd89SAndy Fiddaman 
19786960cd89SAndy Fiddaman 	/* Don't exceed the Async Event Request Limit (AERL). */
19796960cd89SAndy Fiddaman 	if (pci_nvme_aer_limit_reached(sc)) {
19806960cd89SAndy Fiddaman 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
19816960cd89SAndy Fiddaman 				NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
19826960cd89SAndy Fiddaman 		return (1);
19836960cd89SAndy Fiddaman 	}
19846960cd89SAndy Fiddaman 
19856960cd89SAndy Fiddaman 	if (pci_nvme_aer_add(sc, command->cid)) {
19866960cd89SAndy Fiddaman 		pci_nvme_status_tc(&compl->status, NVME_SCT_GENERIC,
19876960cd89SAndy Fiddaman 				NVME_SC_INTERNAL_DEVICE_ERROR);
19886960cd89SAndy Fiddaman 		return (1);
19896960cd89SAndy Fiddaman 	}
19904c87aefeSPatrick Mooney 
19914c87aefeSPatrick Mooney 	/*
19926960cd89SAndy Fiddaman 	 * Raise events when they happen based on the Set Features cmd.
19934c87aefeSPatrick Mooney 	 * These events happen async, so only set completion successful if
19944c87aefeSPatrick Mooney 	 * there is an event reflective of the request to get event.
19954c87aefeSPatrick Mooney 	 */
19966960cd89SAndy Fiddaman 	compl->status = NVME_NO_STATUS;
19976dc98349SAndy Fiddaman 	pci_nvme_aen_notify(sc);
19986960cd89SAndy Fiddaman 
19994c87aefeSPatrick Mooney 	return (0);
20004c87aefeSPatrick Mooney }
20014c87aefeSPatrick Mooney 
20024c87aefeSPatrick Mooney static void
20034c87aefeSPatrick Mooney pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value)
20044c87aefeSPatrick Mooney {
20054c87aefeSPatrick Mooney 	struct nvme_completion compl;
20064c87aefeSPatrick Mooney 	struct nvme_command *cmd;
20074c87aefeSPatrick Mooney 	struct nvme_submission_queue *sq;
20084c87aefeSPatrick Mooney 	struct nvme_completion_queue *cq;
20094c87aefeSPatrick Mooney 	uint16_t sqhead;
20104c87aefeSPatrick Mooney 
20116960cd89SAndy Fiddaman 	DPRINTF("%s index %u", __func__, (uint32_t)value);
20124c87aefeSPatrick Mooney 
20134c87aefeSPatrick Mooney 	sq = &sc->submit_queues[0];
2014154972afSPatrick Mooney 	cq = &sc->compl_queues[0];
20154c87aefeSPatrick Mooney 
20166960cd89SAndy Fiddaman 	pthread_mutex_lock(&sq->mtx);
20174c87aefeSPatrick Mooney 
20186960cd89SAndy Fiddaman 	sqhead = sq->head;
20196960cd89SAndy Fiddaman 	DPRINTF("sqhead %u, tail %u", sqhead, sq->tail);
20204c87aefeSPatrick Mooney 
20214c87aefeSPatrick Mooney 	while (sqhead != atomic_load_acq_short(&sq->tail)) {
20224c87aefeSPatrick Mooney 		cmd = &(sq->qbase)[sqhead];
202384659b24SMichael Zeller 		compl.cdw0 = 0;
20244c87aefeSPatrick Mooney 		compl.status = 0;
20254c87aefeSPatrick Mooney 
20264c87aefeSPatrick Mooney 		switch (cmd->opc) {
20274c87aefeSPatrick Mooney 		case NVME_OPC_DELETE_IO_SQ:
20286960cd89SAndy Fiddaman 			DPRINTF("%s command DELETE_IO_SQ", __func__);
2029154972afSPatrick Mooney 			nvme_opc_delete_io_sq(sc, cmd, &compl);
20304c87aefeSPatrick Mooney 			break;
20314c87aefeSPatrick Mooney 		case NVME_OPC_CREATE_IO_SQ:
20326960cd89SAndy Fiddaman 			DPRINTF("%s command CREATE_IO_SQ", __func__);
2033154972afSPatrick Mooney 			nvme_opc_create_io_sq(sc, cmd, &compl);
20344c87aefeSPatrick Mooney 			break;
20354c87aefeSPatrick Mooney 		case NVME_OPC_DELETE_IO_CQ:
20366960cd89SAndy Fiddaman 			DPRINTF("%s command DELETE_IO_CQ", __func__);
2037154972afSPatrick Mooney 			nvme_opc_delete_io_cq(sc, cmd, &compl);
20384c87aefeSPatrick Mooney 			break;
20394c87aefeSPatrick Mooney 		case NVME_OPC_CREATE_IO_CQ:
20406960cd89SAndy Fiddaman 			DPRINTF("%s command CREATE_IO_CQ", __func__);
2041154972afSPatrick Mooney 			nvme_opc_create_io_cq(sc, cmd, &compl);
20424c87aefeSPatrick Mooney 			break;
20434c87aefeSPatrick Mooney 		case NVME_OPC_GET_LOG_PAGE:
20446960cd89SAndy Fiddaman 			DPRINTF("%s command GET_LOG_PAGE", __func__);
2045154972afSPatrick Mooney 			nvme_opc_get_log_page(sc, cmd, &compl);
20464c87aefeSPatrick Mooney 			break;
20474c87aefeSPatrick Mooney 		case NVME_OPC_IDENTIFY:
20486960cd89SAndy Fiddaman 			DPRINTF("%s command IDENTIFY", __func__);
2049154972afSPatrick Mooney 			nvme_opc_identify(sc, cmd, &compl);
20504c87aefeSPatrick Mooney 			break;
20514c87aefeSPatrick Mooney 		case NVME_OPC_ABORT:
20526960cd89SAndy Fiddaman 			DPRINTF("%s command ABORT", __func__);
2053154972afSPatrick Mooney 			nvme_opc_abort(sc, cmd, &compl);
20544c87aefeSPatrick Mooney 			break;
20554c87aefeSPatrick Mooney 		case NVME_OPC_SET_FEATURES:
20566960cd89SAndy Fiddaman 			DPRINTF("%s command SET_FEATURES", __func__);
2057154972afSPatrick Mooney 			nvme_opc_set_features(sc, cmd, &compl);
20584c87aefeSPatrick Mooney 			break;
20594c87aefeSPatrick Mooney 		case NVME_OPC_GET_FEATURES:
20606960cd89SAndy Fiddaman 			DPRINTF("%s command GET_FEATURES", __func__);
2061154972afSPatrick Mooney 			nvme_opc_get_features(sc, cmd, &compl);
20624c87aefeSPatrick Mooney 			break;
20636960cd89SAndy Fiddaman 		case NVME_OPC_FIRMWARE_ACTIVATE:
20646960cd89SAndy Fiddaman 			DPRINTF("%s command FIRMWARE_ACTIVATE", __func__);
20656960cd89SAndy Fiddaman 			pci_nvme_status_tc(&compl.status,
20666960cd89SAndy Fiddaman 			    NVME_SCT_COMMAND_SPECIFIC,
20676960cd89SAndy Fiddaman 			    NVME_SC_INVALID_FIRMWARE_SLOT);
20686960cd89SAndy Fiddaman 			break;
20694c87aefeSPatrick Mooney 		case NVME_OPC_ASYNC_EVENT_REQUEST:
20706960cd89SAndy Fiddaman 			DPRINTF("%s command ASYNC_EVENT_REQ", __func__);
2071154972afSPatrick Mooney 			nvme_opc_async_event_req(sc, cmd, &compl);
20726960cd89SAndy Fiddaman 			break;
20736960cd89SAndy Fiddaman 		case NVME_OPC_FORMAT_NVM:
20746960cd89SAndy Fiddaman 			DPRINTF("%s command FORMAT_NVM", __func__);
20756960cd89SAndy Fiddaman 			if ((sc->ctrldata.oacs &
20766960cd89SAndy Fiddaman 			    (1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT)) == 0) {
20776960cd89SAndy Fiddaman 				pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE);
2078*d7b72f7bSAndy Fiddaman 				break;
20796960cd89SAndy Fiddaman 			}
20806960cd89SAndy Fiddaman 			nvme_opc_format_nvm(sc, cmd, &compl);
20814c87aefeSPatrick Mooney 			break;
2082*d7b72f7bSAndy Fiddaman 		case NVME_OPC_SECURITY_SEND:
2083*d7b72f7bSAndy Fiddaman 		case NVME_OPC_SECURITY_RECEIVE:
2084*d7b72f7bSAndy Fiddaman 		case NVME_OPC_SANITIZE:
2085*d7b72f7bSAndy Fiddaman 		case NVME_OPC_GET_LBA_STATUS:
2086*d7b72f7bSAndy Fiddaman 			DPRINTF("%s command OPC=%#x (unsupported)", __func__,
2087*d7b72f7bSAndy Fiddaman 			    cmd->opc);
2088*d7b72f7bSAndy Fiddaman 			/* Valid but unsupported opcodes */
2089*d7b72f7bSAndy Fiddaman 			pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_FIELD);
2090*d7b72f7bSAndy Fiddaman 			break;
20914c87aefeSPatrick Mooney 		default:
2092*d7b72f7bSAndy Fiddaman 			DPRINTF("%s command OPC=%#X (not implemented)",
2093*d7b72f7bSAndy Fiddaman 			    __func__,
20946960cd89SAndy Fiddaman 			    cmd->opc);
209584659b24SMichael Zeller 			pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE);
20964c87aefeSPatrick Mooney 		}
2097154972afSPatrick Mooney 		sqhead = (sqhead + 1) % sq->size;
20984c87aefeSPatrick Mooney 
209984659b24SMichael Zeller 		if (NVME_COMPLETION_VALID(compl)) {
21006960cd89SAndy Fiddaman 			pci_nvme_cq_update(sc, &sc->compl_queues[0],
21016960cd89SAndy Fiddaman 			    compl.cdw0,
21026960cd89SAndy Fiddaman 			    cmd->cid,
21036960cd89SAndy Fiddaman 			    0,		/* SQID */
21046960cd89SAndy Fiddaman 			    compl.status);
21054c87aefeSPatrick Mooney 		}
21064c87aefeSPatrick Mooney 	}
21074c87aefeSPatrick Mooney 
21086960cd89SAndy Fiddaman 	DPRINTF("setting sqhead %u", sqhead);
21096960cd89SAndy Fiddaman 	sq->head = sqhead;
21104c87aefeSPatrick Mooney 
2111154972afSPatrick Mooney 	if (cq->head != cq->tail)
21124c87aefeSPatrick Mooney 		pci_generate_msix(sc->nsc_pi, 0);
21134c87aefeSPatrick Mooney 
21146960cd89SAndy Fiddaman 	pthread_mutex_unlock(&sq->mtx);
21156960cd89SAndy Fiddaman }
21166960cd89SAndy Fiddaman 
21176960cd89SAndy Fiddaman /*
21186960cd89SAndy Fiddaman  * Update the Write and Read statistics reported in SMART data
21196960cd89SAndy Fiddaman  *
21206960cd89SAndy Fiddaman  * NVMe defines "data unit" as thousand's of 512 byte blocks and is rounded up.
21216960cd89SAndy Fiddaman  * E.g. 1 data unit is 1 - 1,000 512 byte blocks. 3 data units are 2,001 - 3,000
21226960cd89SAndy Fiddaman  * 512 byte blocks. Rounding up is acheived by initializing the remainder to 999.
21236960cd89SAndy Fiddaman  */
21246960cd89SAndy Fiddaman static void
21256960cd89SAndy Fiddaman pci_nvme_stats_write_read_update(struct pci_nvme_softc *sc, uint8_t opc,
21266960cd89SAndy Fiddaman     size_t bytes, uint16_t status)
21276960cd89SAndy Fiddaman {
21286960cd89SAndy Fiddaman 
21296960cd89SAndy Fiddaman 	pthread_mutex_lock(&sc->mtx);
21306960cd89SAndy Fiddaman 	switch (opc) {
21316960cd89SAndy Fiddaman 	case NVME_OPC_WRITE:
21326960cd89SAndy Fiddaman 		sc->write_commands++;
21336960cd89SAndy Fiddaman 		if (status != NVME_SC_SUCCESS)
21346960cd89SAndy Fiddaman 			break;
21356960cd89SAndy Fiddaman 		sc->write_dunits_remainder += (bytes / 512);
21366960cd89SAndy Fiddaman 		while (sc->write_dunits_remainder >= 1000) {
21376960cd89SAndy Fiddaman 			sc->write_data_units++;
21386960cd89SAndy Fiddaman 			sc->write_dunits_remainder -= 1000;
21396960cd89SAndy Fiddaman 		}
21406960cd89SAndy Fiddaman 		break;
21416960cd89SAndy Fiddaman 	case NVME_OPC_READ:
21426960cd89SAndy Fiddaman 		sc->read_commands++;
21436960cd89SAndy Fiddaman 		if (status != NVME_SC_SUCCESS)
21446960cd89SAndy Fiddaman 			break;
21456960cd89SAndy Fiddaman 		sc->read_dunits_remainder += (bytes / 512);
21466960cd89SAndy Fiddaman 		while (sc->read_dunits_remainder >= 1000) {
21476960cd89SAndy Fiddaman 			sc->read_data_units++;
21486960cd89SAndy Fiddaman 			sc->read_dunits_remainder -= 1000;
21496960cd89SAndy Fiddaman 		}
21506960cd89SAndy Fiddaman 		break;
21516960cd89SAndy Fiddaman 	default:
21526960cd89SAndy Fiddaman 		DPRINTF("%s: Invalid OPC 0x%02x for stats", __func__, opc);
21536960cd89SAndy Fiddaman 		break;
21546960cd89SAndy Fiddaman 	}
21556960cd89SAndy Fiddaman 	pthread_mutex_unlock(&sc->mtx);
21566960cd89SAndy Fiddaman }
21576960cd89SAndy Fiddaman 
21586960cd89SAndy Fiddaman /*
2159*d7b72f7bSAndy Fiddaman  * Check if the combination of Starting LBA (slba) and number of blocks
2160*d7b72f7bSAndy Fiddaman  * exceeds the range of the underlying storage.
21616960cd89SAndy Fiddaman  *
21626960cd89SAndy Fiddaman  * Because NVMe specifies the SLBA in blocks as a uint64_t and blockif stores
21636960cd89SAndy Fiddaman  * the capacity in bytes as a uint64_t, care must be taken to avoid integer
21646960cd89SAndy Fiddaman  * overflow.
21656960cd89SAndy Fiddaman  */
21666960cd89SAndy Fiddaman static bool
21676960cd89SAndy Fiddaman pci_nvme_out_of_range(struct pci_nvme_blockstore *nvstore, uint64_t slba,
2168*d7b72f7bSAndy Fiddaman     uint32_t nblocks)
21696960cd89SAndy Fiddaman {
21706960cd89SAndy Fiddaman 	size_t	offset, bytes;
21716960cd89SAndy Fiddaman 
21726960cd89SAndy Fiddaman 	/* Overflow check of multiplying Starting LBA by the sector size */
21736960cd89SAndy Fiddaman 	if (slba >> (64 - nvstore->sectsz_bits))
21746960cd89SAndy Fiddaman 		return (true);
21756960cd89SAndy Fiddaman 
21766960cd89SAndy Fiddaman 	offset = slba << nvstore->sectsz_bits;
2177*d7b72f7bSAndy Fiddaman 	bytes = nblocks << nvstore->sectsz_bits;
21786960cd89SAndy Fiddaman 
21796960cd89SAndy Fiddaman 	/* Overflow check of Number of Logical Blocks */
2180*d7b72f7bSAndy Fiddaman 	if ((nvstore->size <= offset) || ((nvstore->size - offset) < bytes))
21816960cd89SAndy Fiddaman 		return (true);
21826960cd89SAndy Fiddaman 
21836960cd89SAndy Fiddaman 	return (false);
21844c87aefeSPatrick Mooney }
21854c87aefeSPatrick Mooney 
21864c87aefeSPatrick Mooney static int
21874c87aefeSPatrick Mooney pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req,
21884c87aefeSPatrick Mooney 	uint64_t gpaddr, size_t size, int do_write, uint64_t lba)
21894c87aefeSPatrick Mooney {
21904c87aefeSPatrick Mooney 	int iovidx;
21914c87aefeSPatrick Mooney 
21926960cd89SAndy Fiddaman 	if (req == NULL)
21936960cd89SAndy Fiddaman 		return (-1);
21946960cd89SAndy Fiddaman 
21956960cd89SAndy Fiddaman 	if (req->io_req.br_iovcnt == NVME_MAX_IOVEC) {
21966960cd89SAndy Fiddaman 		return (-1);
21976960cd89SAndy Fiddaman 	}
21986960cd89SAndy Fiddaman 
21994c87aefeSPatrick Mooney 	/* concatenate contig block-iovs to minimize number of iovs */
22004c87aefeSPatrick Mooney 	if ((req->prev_gpaddr + req->prev_size) == gpaddr) {
22014c87aefeSPatrick Mooney 		iovidx = req->io_req.br_iovcnt - 1;
22024c87aefeSPatrick Mooney 
22034c87aefeSPatrick Mooney 		req->io_req.br_iov[iovidx].iov_base =
22044c87aefeSPatrick Mooney 		    paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
22054c87aefeSPatrick Mooney 				     req->prev_gpaddr, size);
22064c87aefeSPatrick Mooney 
22074c87aefeSPatrick Mooney 		req->prev_size += size;
22084c87aefeSPatrick Mooney 		req->io_req.br_resid += size;
22094c87aefeSPatrick Mooney 
22104c87aefeSPatrick Mooney 		req->io_req.br_iov[iovidx].iov_len = req->prev_size;
22114c87aefeSPatrick Mooney 	} else {
22124c87aefeSPatrick Mooney 		iovidx = req->io_req.br_iovcnt;
22134c87aefeSPatrick Mooney 		if (iovidx == 0) {
22144c87aefeSPatrick Mooney 			req->io_req.br_offset = lba;
22154c87aefeSPatrick Mooney 			req->io_req.br_resid = 0;
22164c87aefeSPatrick Mooney 			req->io_req.br_param = req;
22174c87aefeSPatrick Mooney 		}
22184c87aefeSPatrick Mooney 
22194c87aefeSPatrick Mooney 		req->io_req.br_iov[iovidx].iov_base =
22204c87aefeSPatrick Mooney 		    paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
22214c87aefeSPatrick Mooney 				     gpaddr, size);
22224c87aefeSPatrick Mooney 
22234c87aefeSPatrick Mooney 		req->io_req.br_iov[iovidx].iov_len = size;
22244c87aefeSPatrick Mooney 
22254c87aefeSPatrick Mooney 		req->prev_gpaddr = gpaddr;
22264c87aefeSPatrick Mooney 		req->prev_size = size;
22274c87aefeSPatrick Mooney 		req->io_req.br_resid += size;
22284c87aefeSPatrick Mooney 
22294c87aefeSPatrick Mooney 		req->io_req.br_iovcnt++;
22304c87aefeSPatrick Mooney 	}
22314c87aefeSPatrick Mooney 
22324c87aefeSPatrick Mooney 	return (0);
22334c87aefeSPatrick Mooney }
22344c87aefeSPatrick Mooney 
22354c87aefeSPatrick Mooney static void
22364c87aefeSPatrick Mooney pci_nvme_set_completion(struct pci_nvme_softc *sc,
22374c87aefeSPatrick Mooney 	struct nvme_submission_queue *sq, int sqid, uint16_t cid,
22386960cd89SAndy Fiddaman 	uint32_t cdw0, uint16_t status)
22394c87aefeSPatrick Mooney {
22404c87aefeSPatrick Mooney 	struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid];
22414c87aefeSPatrick Mooney 
22426960cd89SAndy Fiddaman 	DPRINTF("%s sqid %d cqid %u cid %u status: 0x%x 0x%x",
22434c87aefeSPatrick Mooney 		 __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status),
22446960cd89SAndy Fiddaman 		 NVME_STATUS_GET_SC(status));
22454c87aefeSPatrick Mooney 
22466960cd89SAndy Fiddaman 	pci_nvme_cq_update(sc, cq,
22476960cd89SAndy Fiddaman 	    0,		/* CDW0 */
22486960cd89SAndy Fiddaman 	    cid,
22496960cd89SAndy Fiddaman 	    sqid,
22506960cd89SAndy Fiddaman 	    status);
22514c87aefeSPatrick Mooney 
2252154972afSPatrick Mooney 	if (cq->head != cq->tail) {
2253154972afSPatrick Mooney 		if (cq->intr_en & NVME_CQ_INTEN) {
22544c87aefeSPatrick Mooney 			pci_generate_msix(sc->nsc_pi, cq->intr_vec);
2255154972afSPatrick Mooney 		} else {
22566960cd89SAndy Fiddaman 			DPRINTF("%s: CQ%u interrupt disabled",
22576960cd89SAndy Fiddaman 						__func__, sq->cqid);
2258154972afSPatrick Mooney 		}
2259154972afSPatrick Mooney 	}
22604c87aefeSPatrick Mooney }
22614c87aefeSPatrick Mooney 
22624c87aefeSPatrick Mooney static void
22634c87aefeSPatrick Mooney pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req)
22644c87aefeSPatrick Mooney {
22654c87aefeSPatrick Mooney 	req->sc = NULL;
22664c87aefeSPatrick Mooney 	req->nvme_sq = NULL;
22674c87aefeSPatrick Mooney 	req->sqid = 0;
22684c87aefeSPatrick Mooney 
22694c87aefeSPatrick Mooney 	pthread_mutex_lock(&sc->mtx);
22704c87aefeSPatrick Mooney 
2271154972afSPatrick Mooney 	STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link);
22724c87aefeSPatrick Mooney 	sc->pending_ios--;
22734c87aefeSPatrick Mooney 
22744c87aefeSPatrick Mooney 	/* when no more IO pending, can set to ready if device reset/enabled */
22754c87aefeSPatrick Mooney 	if (sc->pending_ios == 0 &&
22764c87aefeSPatrick Mooney 	    NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts)))
22774c87aefeSPatrick Mooney 		sc->regs.csts |= NVME_CSTS_RDY;
22784c87aefeSPatrick Mooney 
22794c87aefeSPatrick Mooney 	pthread_mutex_unlock(&sc->mtx);
22804c87aefeSPatrick Mooney 
22814c87aefeSPatrick Mooney 	sem_post(&sc->iosemlock);
22824c87aefeSPatrick Mooney }
22834c87aefeSPatrick Mooney 
22844c87aefeSPatrick Mooney static struct pci_nvme_ioreq *
22854c87aefeSPatrick Mooney pci_nvme_get_ioreq(struct pci_nvme_softc *sc)
22864c87aefeSPatrick Mooney {
2287b0de25cbSAndy Fiddaman 	struct pci_nvme_ioreq *req = NULL;
22884c87aefeSPatrick Mooney 
22894c87aefeSPatrick Mooney 	sem_wait(&sc->iosemlock);
22904c87aefeSPatrick Mooney 	pthread_mutex_lock(&sc->mtx);
22914c87aefeSPatrick Mooney 
2292154972afSPatrick Mooney 	req = STAILQ_FIRST(&sc->ioreqs_free);
22934c87aefeSPatrick Mooney 	assert(req != NULL);
2294154972afSPatrick Mooney 	STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link);
22954c87aefeSPatrick Mooney 
22964c87aefeSPatrick Mooney 	req->sc = sc;
22974c87aefeSPatrick Mooney 
22984c87aefeSPatrick Mooney 	sc->pending_ios++;
22994c87aefeSPatrick Mooney 
23004c87aefeSPatrick Mooney 	pthread_mutex_unlock(&sc->mtx);
23014c87aefeSPatrick Mooney 
23024c87aefeSPatrick Mooney 	req->io_req.br_iovcnt = 0;
23034c87aefeSPatrick Mooney 	req->io_req.br_offset = 0;
23044c87aefeSPatrick Mooney 	req->io_req.br_resid = 0;
23054c87aefeSPatrick Mooney 	req->io_req.br_param = req;
23064c87aefeSPatrick Mooney 	req->prev_gpaddr = 0;
23074c87aefeSPatrick Mooney 	req->prev_size = 0;
23084c87aefeSPatrick Mooney 
23094c87aefeSPatrick Mooney 	return req;
23104c87aefeSPatrick Mooney }
23114c87aefeSPatrick Mooney 
23124c87aefeSPatrick Mooney static void
23134c87aefeSPatrick Mooney pci_nvme_io_done(struct blockif_req *br, int err)
23144c87aefeSPatrick Mooney {
23154c87aefeSPatrick Mooney 	struct pci_nvme_ioreq *req = br->br_param;
23164c87aefeSPatrick Mooney 	struct nvme_submission_queue *sq = req->nvme_sq;
23172b948146SAndy Fiddaman 	uint16_t code, status;
23182b948146SAndy Fiddaman 
23192b948146SAndy Fiddaman #ifndef __FreeBSD__
23202b948146SAndy Fiddaman 	status = 0;
23212b948146SAndy Fiddaman #endif
23224c87aefeSPatrick Mooney 
23236960cd89SAndy Fiddaman 	DPRINTF("%s error %d %s", __func__, err, strerror(err));
23244c87aefeSPatrick Mooney 
23254c87aefeSPatrick Mooney 	/* TODO return correct error */
23264c87aefeSPatrick Mooney 	code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
23274c87aefeSPatrick Mooney 	pci_nvme_status_genc(&status, code);
23284c87aefeSPatrick Mooney 
23296960cd89SAndy Fiddaman 	pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status);
23306960cd89SAndy Fiddaman 	pci_nvme_stats_write_read_update(req->sc, req->opc,
23316960cd89SAndy Fiddaman 	    req->bytes, status);
23324c87aefeSPatrick Mooney 	pci_nvme_release_ioreq(req->sc, req);
23334c87aefeSPatrick Mooney }
23344c87aefeSPatrick Mooney 
23356960cd89SAndy Fiddaman /*
23366960cd89SAndy Fiddaman  * Implements the Flush command. The specification states:
23376960cd89SAndy Fiddaman  *    If a volatile write cache is not present, Flush commands complete
23386960cd89SAndy Fiddaman  *    successfully and have no effect
23396960cd89SAndy Fiddaman  * in the description of the Volatile Write Cache (VWC) field of the Identify
23406960cd89SAndy Fiddaman  * Controller data. Therefore, set status to Success if the command is
23416960cd89SAndy Fiddaman  * not supported (i.e. RAM or as indicated by the blockif).
23426960cd89SAndy Fiddaman  */
23436960cd89SAndy Fiddaman static bool
23446960cd89SAndy Fiddaman nvme_opc_flush(struct pci_nvme_softc *sc,
23456960cd89SAndy Fiddaman     struct nvme_command *cmd,
23466960cd89SAndy Fiddaman     struct pci_nvme_blockstore *nvstore,
23476960cd89SAndy Fiddaman     struct pci_nvme_ioreq *req,
23486960cd89SAndy Fiddaman     uint16_t *status)
23494c87aefeSPatrick Mooney {
23506960cd89SAndy Fiddaman 	bool pending = false;
23514c87aefeSPatrick Mooney 
23526960cd89SAndy Fiddaman 	if (nvstore->type == NVME_STOR_RAM) {
23536960cd89SAndy Fiddaman 		pci_nvme_status_genc(status, NVME_SC_SUCCESS);
23546960cd89SAndy Fiddaman 	} else {
23556960cd89SAndy Fiddaman 		int err;
23564c87aefeSPatrick Mooney 
23576960cd89SAndy Fiddaman 		req->io_req.br_callback = pci_nvme_io_done;
23586960cd89SAndy Fiddaman 
23596960cd89SAndy Fiddaman 		err = blockif_flush(nvstore->ctx, &req->io_req);
23606960cd89SAndy Fiddaman 		switch (err) {
23616960cd89SAndy Fiddaman 		case 0:
23626960cd89SAndy Fiddaman 			pending = true;
23636960cd89SAndy Fiddaman 			break;
23646960cd89SAndy Fiddaman 		case EOPNOTSUPP:
23656960cd89SAndy Fiddaman 			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
23666960cd89SAndy Fiddaman 			break;
23676960cd89SAndy Fiddaman 		default:
23686960cd89SAndy Fiddaman 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
23696960cd89SAndy Fiddaman 		}
23706960cd89SAndy Fiddaman 	}
23716960cd89SAndy Fiddaman 
23726960cd89SAndy Fiddaman 	return (pending);
23736960cd89SAndy Fiddaman }
23746960cd89SAndy Fiddaman 
23756960cd89SAndy Fiddaman static uint16_t
23766960cd89SAndy Fiddaman nvme_write_read_ram(struct pci_nvme_softc *sc,
23776960cd89SAndy Fiddaman     struct pci_nvme_blockstore *nvstore,
23786960cd89SAndy Fiddaman     uint64_t prp1, uint64_t prp2,
23796960cd89SAndy Fiddaman     size_t offset, uint64_t bytes,
23806960cd89SAndy Fiddaman     bool is_write)
23816960cd89SAndy Fiddaman {
23826960cd89SAndy Fiddaman 	uint8_t *buf = nvstore->ctx;
23836960cd89SAndy Fiddaman 	enum nvme_copy_dir dir;
23842b948146SAndy Fiddaman 	uint16_t status;
23852b948146SAndy Fiddaman 
23862b948146SAndy Fiddaman #ifndef __FreeBSD__
23872b948146SAndy Fiddaman 	status = 0;
23882b948146SAndy Fiddaman #endif
23896960cd89SAndy Fiddaman 
23906960cd89SAndy Fiddaman 	if (is_write)
23916960cd89SAndy Fiddaman 		dir = NVME_COPY_TO_PRP;
23926960cd89SAndy Fiddaman 	else
23936960cd89SAndy Fiddaman 		dir = NVME_COPY_FROM_PRP;
23946960cd89SAndy Fiddaman 
23956960cd89SAndy Fiddaman 	if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, prp1, prp2,
23966960cd89SAndy Fiddaman 	    buf + offset, bytes, dir))
23976960cd89SAndy Fiddaman 		pci_nvme_status_genc(&status,
23986960cd89SAndy Fiddaman 		    NVME_SC_DATA_TRANSFER_ERROR);
23996960cd89SAndy Fiddaman 	else
24006960cd89SAndy Fiddaman 		pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
24016960cd89SAndy Fiddaman 
24026960cd89SAndy Fiddaman 	return (status);
24036960cd89SAndy Fiddaman }
24046960cd89SAndy Fiddaman 
24056960cd89SAndy Fiddaman static uint16_t
24066960cd89SAndy Fiddaman nvme_write_read_blockif(struct pci_nvme_softc *sc,
24076960cd89SAndy Fiddaman     struct pci_nvme_blockstore *nvstore,
24086960cd89SAndy Fiddaman     struct pci_nvme_ioreq *req,
24096960cd89SAndy Fiddaman     uint64_t prp1, uint64_t prp2,
24106960cd89SAndy Fiddaman     size_t offset, uint64_t bytes,
24116960cd89SAndy Fiddaman     bool is_write)
24126960cd89SAndy Fiddaman {
24136960cd89SAndy Fiddaman 	uint64_t size;
24146960cd89SAndy Fiddaman 	int err;
24156960cd89SAndy Fiddaman 	uint16_t status = NVME_NO_STATUS;
24166960cd89SAndy Fiddaman 
24176960cd89SAndy Fiddaman 	size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes);
24186960cd89SAndy Fiddaman 	if (pci_nvme_append_iov_req(sc, req, prp1,
24196960cd89SAndy Fiddaman 	    size, is_write, offset)) {
24206960cd89SAndy Fiddaman 		pci_nvme_status_genc(&status,
24216960cd89SAndy Fiddaman 		    NVME_SC_DATA_TRANSFER_ERROR);
24226960cd89SAndy Fiddaman 		goto out;
24236960cd89SAndy Fiddaman 	}
24246960cd89SAndy Fiddaman 
24256960cd89SAndy Fiddaman 	offset += size;
24266960cd89SAndy Fiddaman 	bytes  -= size;
24276960cd89SAndy Fiddaman 
24286960cd89SAndy Fiddaman 	if (bytes == 0) {
24296960cd89SAndy Fiddaman 		;
24306960cd89SAndy Fiddaman 	} else if (bytes <= PAGE_SIZE) {
24316960cd89SAndy Fiddaman 		size = bytes;
24326960cd89SAndy Fiddaman 		if (pci_nvme_append_iov_req(sc, req, prp2,
24336960cd89SAndy Fiddaman 		    size, is_write, offset)) {
24346960cd89SAndy Fiddaman 			pci_nvme_status_genc(&status,
24356960cd89SAndy Fiddaman 			    NVME_SC_DATA_TRANSFER_ERROR);
24366960cd89SAndy Fiddaman 			goto out;
24376960cd89SAndy Fiddaman 		}
24386960cd89SAndy Fiddaman 	} else {
24396960cd89SAndy Fiddaman 		void *vmctx = sc->nsc_pi->pi_vmctx;
24406960cd89SAndy Fiddaman 		uint64_t *prp_list = &prp2;
24416960cd89SAndy Fiddaman 		uint64_t *last = prp_list;
24426960cd89SAndy Fiddaman 
24436960cd89SAndy Fiddaman 		/* PRP2 is pointer to a physical region page list */
24446960cd89SAndy Fiddaman 		while (bytes) {
24456960cd89SAndy Fiddaman 			/* Last entry in list points to the next list */
2446b0de25cbSAndy Fiddaman 			if ((prp_list == last) && (bytes > PAGE_SIZE)) {
24476960cd89SAndy Fiddaman 				uint64_t prp = *prp_list;
24486960cd89SAndy Fiddaman 
24496960cd89SAndy Fiddaman 				prp_list = paddr_guest2host(vmctx, prp,
24506960cd89SAndy Fiddaman 				    PAGE_SIZE - (prp % PAGE_SIZE));
24516960cd89SAndy Fiddaman 				last = prp_list + (NVME_PRP2_ITEMS - 1);
24526960cd89SAndy Fiddaman 			}
24536960cd89SAndy Fiddaman 
24546960cd89SAndy Fiddaman 			size = MIN(bytes, PAGE_SIZE);
24556960cd89SAndy Fiddaman 
24566960cd89SAndy Fiddaman 			if (pci_nvme_append_iov_req(sc, req, *prp_list,
24576960cd89SAndy Fiddaman 			    size, is_write, offset)) {
24586960cd89SAndy Fiddaman 				pci_nvme_status_genc(&status,
24596960cd89SAndy Fiddaman 				    NVME_SC_DATA_TRANSFER_ERROR);
24606960cd89SAndy Fiddaman 				goto out;
24616960cd89SAndy Fiddaman 			}
24626960cd89SAndy Fiddaman 
24636960cd89SAndy Fiddaman 			offset += size;
24646960cd89SAndy Fiddaman 			bytes  -= size;
24656960cd89SAndy Fiddaman 
24666960cd89SAndy Fiddaman 			prp_list++;
24676960cd89SAndy Fiddaman 		}
24686960cd89SAndy Fiddaman 	}
24696960cd89SAndy Fiddaman 	req->io_req.br_callback = pci_nvme_io_done;
24706960cd89SAndy Fiddaman 	if (is_write)
24716960cd89SAndy Fiddaman 		err = blockif_write(nvstore->ctx, &req->io_req);
24726960cd89SAndy Fiddaman 	else
24736960cd89SAndy Fiddaman 		err = blockif_read(nvstore->ctx, &req->io_req);
24746960cd89SAndy Fiddaman 
24756960cd89SAndy Fiddaman 	if (err)
24766960cd89SAndy Fiddaman 		pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR);
24776960cd89SAndy Fiddaman out:
24786960cd89SAndy Fiddaman 	return (status);
24796960cd89SAndy Fiddaman }
24806960cd89SAndy Fiddaman 
24816960cd89SAndy Fiddaman static bool
24826960cd89SAndy Fiddaman nvme_opc_write_read(struct pci_nvme_softc *sc,
24836960cd89SAndy Fiddaman     struct nvme_command *cmd,
24846960cd89SAndy Fiddaman     struct pci_nvme_blockstore *nvstore,
24856960cd89SAndy Fiddaman     struct pci_nvme_ioreq *req,
24866960cd89SAndy Fiddaman     uint16_t *status)
24876960cd89SAndy Fiddaman {
24882b948146SAndy Fiddaman 	uint64_t lba, nblocks, bytes;
24896960cd89SAndy Fiddaman 	size_t offset;
24906960cd89SAndy Fiddaman 	bool is_write = cmd->opc == NVME_OPC_WRITE;
24916960cd89SAndy Fiddaman 	bool pending = false;
24926960cd89SAndy Fiddaman 
24932b948146SAndy Fiddaman #ifndef __FreeBSD__
24942b948146SAndy Fiddaman 	bytes = 0;
24952b948146SAndy Fiddaman #endif
24962b948146SAndy Fiddaman 
24976960cd89SAndy Fiddaman 	lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
24986960cd89SAndy Fiddaman 	nblocks = (cmd->cdw12 & 0xFFFF) + 1;
24996dc98349SAndy Fiddaman 
25006960cd89SAndy Fiddaman 	if (pci_nvme_out_of_range(nvstore, lba, nblocks)) {
2501*d7b72f7bSAndy Fiddaman 		WPRINTF("%s command would exceed LBA range(slba=%#lx nblocks=%#lx)",
2502*d7b72f7bSAndy Fiddaman 		    __func__, lba, nblocks);
25036960cd89SAndy Fiddaman 		pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
25046960cd89SAndy Fiddaman 		goto out;
25056960cd89SAndy Fiddaman 	}
25066960cd89SAndy Fiddaman 
25076960cd89SAndy Fiddaman 	bytes  = nblocks << nvstore->sectsz_bits;
25086960cd89SAndy Fiddaman 	if (bytes > NVME_MAX_DATA_SIZE) {
25096960cd89SAndy Fiddaman 		WPRINTF("%s command would exceed MDTS", __func__);
25106960cd89SAndy Fiddaman 		pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD);
25116960cd89SAndy Fiddaman 		goto out;
25126960cd89SAndy Fiddaman 	}
25136960cd89SAndy Fiddaman 
25146960cd89SAndy Fiddaman 	offset = lba << nvstore->sectsz_bits;
25156960cd89SAndy Fiddaman 
25166960cd89SAndy Fiddaman 	req->bytes = bytes;
25176960cd89SAndy Fiddaman 	req->io_req.br_offset = lba;
25186960cd89SAndy Fiddaman 
25196960cd89SAndy Fiddaman 	/* PRP bits 1:0 must be zero */
25206960cd89SAndy Fiddaman 	cmd->prp1 &= ~0x3UL;
25216960cd89SAndy Fiddaman 	cmd->prp2 &= ~0x3UL;
25226960cd89SAndy Fiddaman 
25236960cd89SAndy Fiddaman 	if (nvstore->type == NVME_STOR_RAM) {
25246960cd89SAndy Fiddaman 		*status = nvme_write_read_ram(sc, nvstore, cmd->prp1,
25256960cd89SAndy Fiddaman 		    cmd->prp2, offset, bytes, is_write);
25266960cd89SAndy Fiddaman 	} else {
25276960cd89SAndy Fiddaman 		*status = nvme_write_read_blockif(sc, nvstore, req,
25286960cd89SAndy Fiddaman 		    cmd->prp1, cmd->prp2, offset, bytes, is_write);
25296960cd89SAndy Fiddaman 
25306960cd89SAndy Fiddaman 		if (*status == NVME_NO_STATUS)
25316960cd89SAndy Fiddaman 			pending = true;
25326960cd89SAndy Fiddaman 	}
25336960cd89SAndy Fiddaman out:
25346960cd89SAndy Fiddaman 	if (!pending)
25356960cd89SAndy Fiddaman 		pci_nvme_stats_write_read_update(sc, cmd->opc, bytes, *status);
25366960cd89SAndy Fiddaman 
25376960cd89SAndy Fiddaman 	return (pending);
25384c87aefeSPatrick Mooney }
25394c87aefeSPatrick Mooney 
2540154972afSPatrick Mooney static void
2541154972afSPatrick Mooney pci_nvme_dealloc_sm(struct blockif_req *br, int err)
2542154972afSPatrick Mooney {
2543154972afSPatrick Mooney 	struct pci_nvme_ioreq *req = br->br_param;
2544154972afSPatrick Mooney 	struct pci_nvme_softc *sc = req->sc;
2545154972afSPatrick Mooney 	bool done = true;
2546154972afSPatrick Mooney 	uint16_t status;
25472b948146SAndy Fiddaman 
25482b948146SAndy Fiddaman #ifndef __FreeBSD__
25492b948146SAndy Fiddaman 	status = 0;
2550154972afSPatrick Mooney #endif
2551154972afSPatrick Mooney 
2552154972afSPatrick Mooney 	if (err) {
2553154972afSPatrick Mooney 		pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR);
2554154972afSPatrick Mooney 	} else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
2555154972afSPatrick Mooney 		pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
2556154972afSPatrick Mooney 	} else {
2557154972afSPatrick Mooney 		struct iovec *iov = req->io_req.br_iov;
2558154972afSPatrick Mooney 
2559154972afSPatrick Mooney 		req->prev_gpaddr++;
2560154972afSPatrick Mooney 		iov += req->prev_gpaddr;
2561154972afSPatrick Mooney 
2562154972afSPatrick Mooney 		/* The iov_* values already include the sector size */
2563154972afSPatrick Mooney 		req->io_req.br_offset = (off_t)iov->iov_base;
2564154972afSPatrick Mooney 		req->io_req.br_resid = iov->iov_len;
2565154972afSPatrick Mooney 		if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
2566154972afSPatrick Mooney 			pci_nvme_status_genc(&status,
2567154972afSPatrick Mooney 			    NVME_SC_INTERNAL_DEVICE_ERROR);
2568154972afSPatrick Mooney 		} else
2569154972afSPatrick Mooney 			done = false;
2570154972afSPatrick Mooney 	}
2571154972afSPatrick Mooney 
2572154972afSPatrick Mooney 	if (done) {
2573154972afSPatrick Mooney 		pci_nvme_set_completion(sc, req->nvme_sq, req->sqid,
25746960cd89SAndy Fiddaman 		    req->cid, 0, status);
2575154972afSPatrick Mooney 		pci_nvme_release_ioreq(sc, req);
2576154972afSPatrick Mooney 	}
2577154972afSPatrick Mooney }
2578154972afSPatrick Mooney 
25796960cd89SAndy Fiddaman static bool
2580154972afSPatrick Mooney nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
2581154972afSPatrick Mooney     struct nvme_command *cmd,
2582154972afSPatrick Mooney     struct pci_nvme_blockstore *nvstore,
2583154972afSPatrick Mooney     struct pci_nvme_ioreq *req,
2584154972afSPatrick Mooney     uint16_t *status)
2585154972afSPatrick Mooney {
25862b948146SAndy Fiddaman 	struct nvme_dsm_range *range;
25876960cd89SAndy Fiddaman 	uint32_t nr, r, non_zero, dr;
25886960cd89SAndy Fiddaman 	int err;
25896960cd89SAndy Fiddaman 	bool pending = false;
2590154972afSPatrick Mooney 
25912b948146SAndy Fiddaman #ifndef __FreeBSD__
25922b948146SAndy Fiddaman 	range = NULL;
25932b948146SAndy Fiddaman #endif
25942b948146SAndy Fiddaman 
2595154972afSPatrick Mooney 	if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
2596154972afSPatrick Mooney 		pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
2597154972afSPatrick Mooney 		goto out;
2598154972afSPatrick Mooney 	}
2599154972afSPatrick Mooney 
26006960cd89SAndy Fiddaman 	nr = cmd->cdw10 & 0xff;
2601154972afSPatrick Mooney 
2602154972afSPatrick Mooney 	/* copy locally because a range entry could straddle PRPs */
2603154972afSPatrick Mooney 	range = calloc(1, NVME_MAX_DSM_TRIM);
2604154972afSPatrick Mooney 	if (range == NULL) {
2605154972afSPatrick Mooney 		pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
2606154972afSPatrick Mooney 		goto out;
2607154972afSPatrick Mooney 	}
2608154972afSPatrick Mooney 	nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
2609154972afSPatrick Mooney 	    (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
2610154972afSPatrick Mooney 
26116960cd89SAndy Fiddaman 	/* Check for invalid ranges and the number of non-zero lengths */
26126960cd89SAndy Fiddaman 	non_zero = 0;
26136960cd89SAndy Fiddaman 	for (r = 0; r <= nr; r++) {
26146960cd89SAndy Fiddaman 		if (pci_nvme_out_of_range(nvstore,
26156960cd89SAndy Fiddaman 		    range[r].starting_lba, range[r].length)) {
26166960cd89SAndy Fiddaman 			pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
26176960cd89SAndy Fiddaman 			goto out;
26186960cd89SAndy Fiddaman 		}
26196960cd89SAndy Fiddaman 		if (range[r].length != 0)
26206960cd89SAndy Fiddaman 			non_zero++;
26216960cd89SAndy Fiddaman 	}
26226960cd89SAndy Fiddaman 
26236960cd89SAndy Fiddaman 	if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
26246960cd89SAndy Fiddaman 		size_t offset, bytes;
26256960cd89SAndy Fiddaman 		int sectsz_bits = sc->nvstore.sectsz_bits;
26266960cd89SAndy Fiddaman 
26276960cd89SAndy Fiddaman 		/*
26286960cd89SAndy Fiddaman 		 * DSM calls are advisory only, and compliant controllers
26296960cd89SAndy Fiddaman 		 * may choose to take no actions (i.e. return Success).
26306960cd89SAndy Fiddaman 		 */
26316960cd89SAndy Fiddaman 		if (!nvstore->deallocate) {
26326960cd89SAndy Fiddaman 			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
26336960cd89SAndy Fiddaman 			goto out;
26346960cd89SAndy Fiddaman 		}
26356960cd89SAndy Fiddaman 
26366960cd89SAndy Fiddaman 		/* If all ranges have a zero length, return Success */
26376960cd89SAndy Fiddaman 		if (non_zero == 0) {
26386960cd89SAndy Fiddaman 			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
26396960cd89SAndy Fiddaman 			goto out;
26406960cd89SAndy Fiddaman 		}
26416960cd89SAndy Fiddaman 
26426960cd89SAndy Fiddaman 		if (req == NULL) {
26436960cd89SAndy Fiddaman 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
26446960cd89SAndy Fiddaman 			goto out;
26456960cd89SAndy Fiddaman 		}
26466960cd89SAndy Fiddaman 
26476960cd89SAndy Fiddaman 		offset = range[0].starting_lba << sectsz_bits;
26486960cd89SAndy Fiddaman 		bytes = range[0].length << sectsz_bits;
26496960cd89SAndy Fiddaman 
2650154972afSPatrick Mooney 		/*
2651154972afSPatrick Mooney 		 * If the request is for more than a single range, store
2652154972afSPatrick Mooney 		 * the ranges in the br_iov. Optimize for the common case
2653154972afSPatrick Mooney 		 * of a single range.
2654154972afSPatrick Mooney 		 *
2655154972afSPatrick Mooney 		 * Note that NVMe Number of Ranges is a zero based value
2656154972afSPatrick Mooney 		 */
2657154972afSPatrick Mooney 		req->io_req.br_iovcnt = 0;
26586960cd89SAndy Fiddaman 		req->io_req.br_offset = offset;
26596960cd89SAndy Fiddaman 		req->io_req.br_resid = bytes;
2660154972afSPatrick Mooney 
2661154972afSPatrick Mooney 		if (nr == 0) {
2662154972afSPatrick Mooney 			req->io_req.br_callback = pci_nvme_io_done;
2663154972afSPatrick Mooney 		} else {
2664154972afSPatrick Mooney 			struct iovec *iov = req->io_req.br_iov;
2665154972afSPatrick Mooney 
26666960cd89SAndy Fiddaman 			for (r = 0, dr = 0; r <= nr; r++) {
26676960cd89SAndy Fiddaman 				offset = range[r].starting_lba << sectsz_bits;
26686960cd89SAndy Fiddaman 				bytes = range[r].length << sectsz_bits;
26696960cd89SAndy Fiddaman 				if (bytes == 0)
26706960cd89SAndy Fiddaman 					continue;
26716960cd89SAndy Fiddaman 
26726960cd89SAndy Fiddaman 				if ((nvstore->size - offset) < bytes) {
26736960cd89SAndy Fiddaman 					pci_nvme_status_genc(status,
26746960cd89SAndy Fiddaman 					    NVME_SC_LBA_OUT_OF_RANGE);
26756960cd89SAndy Fiddaman 					goto out;
26766960cd89SAndy Fiddaman 				}
26776960cd89SAndy Fiddaman 				iov[dr].iov_base = (void *)offset;
26786960cd89SAndy Fiddaman 				iov[dr].iov_len = bytes;
26796960cd89SAndy Fiddaman 				dr++;
2680154972afSPatrick Mooney 			}
2681154972afSPatrick Mooney 			req->io_req.br_callback = pci_nvme_dealloc_sm;
2682154972afSPatrick Mooney 
2683154972afSPatrick Mooney 			/*
2684154972afSPatrick Mooney 			 * Use prev_gpaddr to track the current entry and
2685154972afSPatrick Mooney 			 * prev_size to track the number of entries
2686154972afSPatrick Mooney 			 */
2687154972afSPatrick Mooney 			req->prev_gpaddr = 0;
26886960cd89SAndy Fiddaman 			req->prev_size = dr;
2689154972afSPatrick Mooney 		}
2690154972afSPatrick Mooney 
2691154972afSPatrick Mooney 		err = blockif_delete(nvstore->ctx, &req->io_req);
2692154972afSPatrick Mooney 		if (err)
2693154972afSPatrick Mooney 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
26946960cd89SAndy Fiddaman 		else
26956960cd89SAndy Fiddaman 			pending = true;
2696154972afSPatrick Mooney 	}
2697154972afSPatrick Mooney out:
26986960cd89SAndy Fiddaman 	free(range);
26996960cd89SAndy Fiddaman 	return (pending);
2700154972afSPatrick Mooney }
27014c87aefeSPatrick Mooney 
27024c87aefeSPatrick Mooney static void
27034c87aefeSPatrick Mooney pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
27044c87aefeSPatrick Mooney {
27054c87aefeSPatrick Mooney 	struct nvme_submission_queue *sq;
27062b948146SAndy Fiddaman 	uint16_t status;
27074c87aefeSPatrick Mooney 	uint16_t sqhead;
27084c87aefeSPatrick Mooney 
27092b948146SAndy Fiddaman #ifndef __FreeBSD__
27102b948146SAndy Fiddaman 	status = 0;
27112b948146SAndy Fiddaman #endif
27122b948146SAndy Fiddaman 
27134c87aefeSPatrick Mooney 	/* handle all submissions up to sq->tail index */
27144c87aefeSPatrick Mooney 	sq = &sc->submit_queues[idx];
27154c87aefeSPatrick Mooney 
27166960cd89SAndy Fiddaman 	pthread_mutex_lock(&sq->mtx);
27174c87aefeSPatrick Mooney 
27186960cd89SAndy Fiddaman 	sqhead = sq->head;
27196960cd89SAndy Fiddaman 	DPRINTF("nvme_handle_io qid %u head %u tail %u cmdlist %p",
27206960cd89SAndy Fiddaman 	         idx, sqhead, sq->tail, sq->qbase);
27214c87aefeSPatrick Mooney 
27224c87aefeSPatrick Mooney 	while (sqhead != atomic_load_acq_short(&sq->tail)) {
27234c87aefeSPatrick Mooney 		struct nvme_command *cmd;
27246960cd89SAndy Fiddaman 		struct pci_nvme_ioreq *req;
27256960cd89SAndy Fiddaman 		uint32_t nsid;
27266960cd89SAndy Fiddaman 		bool pending;
27274c87aefeSPatrick Mooney 
27286960cd89SAndy Fiddaman 		pending = false;
27296960cd89SAndy Fiddaman 		req = NULL;
27306960cd89SAndy Fiddaman 		status = 0;
27314c87aefeSPatrick Mooney 
27324c87aefeSPatrick Mooney 		cmd = &sq->qbase[sqhead];
27334c87aefeSPatrick Mooney 		sqhead = (sqhead + 1) % sq->size;
27344c87aefeSPatrick Mooney 
27356960cd89SAndy Fiddaman 		nsid = le32toh(cmd->nsid);
27366960cd89SAndy Fiddaman 		if ((nsid == 0) || (nsid > sc->ctrldata.nn)) {
27376960cd89SAndy Fiddaman 			pci_nvme_status_genc(&status,
27386960cd89SAndy Fiddaman 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
27396960cd89SAndy Fiddaman 			status |=
27406960cd89SAndy Fiddaman 			    NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT;
27416960cd89SAndy Fiddaman 			goto complete;
27424c87aefeSPatrick Mooney  		}
27434c87aefeSPatrick Mooney 
27444c87aefeSPatrick Mooney 		req = pci_nvme_get_ioreq(sc);
27456960cd89SAndy Fiddaman 		if (req == NULL) {
27466960cd89SAndy Fiddaman 			pci_nvme_status_genc(&status,
27476960cd89SAndy Fiddaman 			    NVME_SC_INTERNAL_DEVICE_ERROR);
27486960cd89SAndy Fiddaman 			WPRINTF("%s: unable to allocate IO req", __func__);
27496960cd89SAndy Fiddaman 			goto complete;
27506960cd89SAndy Fiddaman 		}
27514c87aefeSPatrick Mooney 		req->nvme_sq = sq;
27524c87aefeSPatrick Mooney 		req->sqid = idx;
27534c87aefeSPatrick Mooney 		req->opc = cmd->opc;
27544c87aefeSPatrick Mooney 		req->cid = cmd->cid;
27554c87aefeSPatrick Mooney 		req->nsid = cmd->nsid;
27564c87aefeSPatrick Mooney 
27574c87aefeSPatrick Mooney 		switch (cmd->opc) {
27586960cd89SAndy Fiddaman 		case NVME_OPC_FLUSH:
27596960cd89SAndy Fiddaman 			pending = nvme_opc_flush(sc, cmd, &sc->nvstore,
27606960cd89SAndy Fiddaman 			    req, &status);
27614c87aefeSPatrick Mooney  			break;
27624c87aefeSPatrick Mooney 		case NVME_OPC_WRITE:
27636960cd89SAndy Fiddaman 		case NVME_OPC_READ:
27646960cd89SAndy Fiddaman 			pending = nvme_opc_write_read(sc, cmd, &sc->nvstore,
27656960cd89SAndy Fiddaman 			    req, &status);
27666960cd89SAndy Fiddaman 			break;
27676960cd89SAndy Fiddaman 		case NVME_OPC_WRITE_ZEROES:
27686960cd89SAndy Fiddaman 			/* TODO: write zeroes
27696960cd89SAndy Fiddaman 			WPRINTF("%s write zeroes lba 0x%lx blocks %u",
27706960cd89SAndy Fiddaman 			        __func__, lba, cmd->cdw12 & 0xFFFF); */
27716960cd89SAndy Fiddaman 			pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
27726960cd89SAndy Fiddaman 			break;
27736960cd89SAndy Fiddaman 		case NVME_OPC_DATASET_MANAGEMENT:
27746960cd89SAndy Fiddaman  			pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore,
27756960cd89SAndy Fiddaman 			    req, &status);
27764c87aefeSPatrick Mooney 			break;
27774c87aefeSPatrick Mooney  		default:
27786960cd89SAndy Fiddaman  			WPRINTF("%s unhandled io command 0x%x",
27796960cd89SAndy Fiddaman 			    __func__, cmd->opc);
27806960cd89SAndy Fiddaman 			pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE);
27814c87aefeSPatrick Mooney 		}
27826960cd89SAndy Fiddaman complete:
27836960cd89SAndy Fiddaman 		if (!pending) {
27844c87aefeSPatrick Mooney 			pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
27856960cd89SAndy Fiddaman 			    status);
27866960cd89SAndy Fiddaman 			if (req != NULL)
27874c87aefeSPatrick Mooney 				pci_nvme_release_ioreq(sc, req);
27884c87aefeSPatrick Mooney 		}
27894c87aefeSPatrick Mooney 	}
27904c87aefeSPatrick Mooney 
27916960cd89SAndy Fiddaman 	sq->head = sqhead;
27926960cd89SAndy Fiddaman 
27936960cd89SAndy Fiddaman 	pthread_mutex_unlock(&sq->mtx);
27944c87aefeSPatrick Mooney }
27954c87aefeSPatrick Mooney 
27964c87aefeSPatrick Mooney static void
27974c87aefeSPatrick Mooney pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc,
27984c87aefeSPatrick Mooney 	uint64_t idx, int is_sq, uint64_t value)
27994c87aefeSPatrick Mooney {
28006960cd89SAndy Fiddaman 	DPRINTF("nvme doorbell %lu, %s, val 0x%lx",
28016960cd89SAndy Fiddaman 	        idx, is_sq ? "SQ" : "CQ", value & 0xFFFF);
28024c87aefeSPatrick Mooney 
28034c87aefeSPatrick Mooney 	if (is_sq) {
28046960cd89SAndy Fiddaman 		if (idx > sc->num_squeues) {
28056960cd89SAndy Fiddaman 			WPRINTF("%s queue index %lu overflow from "
28066960cd89SAndy Fiddaman 			         "guest (max %u)",
28076960cd89SAndy Fiddaman 			         __func__, idx, sc->num_squeues);
28086960cd89SAndy Fiddaman 			return;
28096960cd89SAndy Fiddaman 		}
28106960cd89SAndy Fiddaman 
28114c87aefeSPatrick Mooney 		atomic_store_short(&sc->submit_queues[idx].tail,
28124c87aefeSPatrick Mooney 		                   (uint16_t)value);
28134c87aefeSPatrick Mooney 
28144c87aefeSPatrick Mooney 		if (idx == 0) {
28154c87aefeSPatrick Mooney 			pci_nvme_handle_admin_cmd(sc, value);
28164c87aefeSPatrick Mooney 		} else {
28174c87aefeSPatrick Mooney 			/* submission queue; handle new entries in SQ */
28184c87aefeSPatrick Mooney 			if (idx > sc->num_squeues) {
28196960cd89SAndy Fiddaman 				WPRINTF("%s SQ index %lu overflow from "
2820154972afSPatrick Mooney 				         "guest (max %u)",
28216960cd89SAndy Fiddaman 				         __func__, idx, sc->num_squeues);
28224c87aefeSPatrick Mooney 				return;
28234c87aefeSPatrick Mooney 			}
28244c87aefeSPatrick Mooney 			pci_nvme_handle_io_cmd(sc, (uint16_t)idx);
28254c87aefeSPatrick Mooney 		}
28264c87aefeSPatrick Mooney 	} else {
28274c87aefeSPatrick Mooney 		if (idx > sc->num_cqueues) {
28286960cd89SAndy Fiddaman 			WPRINTF("%s queue index %lu overflow from "
2829154972afSPatrick Mooney 			         "guest (max %u)",
28306960cd89SAndy Fiddaman 			         __func__, idx, sc->num_cqueues);
28314c87aefeSPatrick Mooney 			return;
28324c87aefeSPatrick Mooney 		}
28334c87aefeSPatrick Mooney 
28346960cd89SAndy Fiddaman 		atomic_store_short(&sc->compl_queues[idx].head,
28356960cd89SAndy Fiddaman 				(uint16_t)value);
28364c87aefeSPatrick Mooney 	}
28374c87aefeSPatrick Mooney }
28384c87aefeSPatrick Mooney 
28394c87aefeSPatrick Mooney static void
28404c87aefeSPatrick Mooney pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite)
28414c87aefeSPatrick Mooney {
28424c87aefeSPatrick Mooney 	const char *s = iswrite ? "WRITE" : "READ";
28434c87aefeSPatrick Mooney 
28444c87aefeSPatrick Mooney 	switch (offset) {
28454c87aefeSPatrick Mooney 	case NVME_CR_CAP_LOW:
28466960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_CAP_LOW", func, s);
28474c87aefeSPatrick Mooney 		break;
28484c87aefeSPatrick Mooney 	case NVME_CR_CAP_HI:
28496960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_CAP_HI", func, s);
28504c87aefeSPatrick Mooney 		break;
28514c87aefeSPatrick Mooney 	case NVME_CR_VS:
28526960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_VS", func, s);
28534c87aefeSPatrick Mooney 		break;
28544c87aefeSPatrick Mooney 	case NVME_CR_INTMS:
28556960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_INTMS", func, s);
28564c87aefeSPatrick Mooney 		break;
28574c87aefeSPatrick Mooney 	case NVME_CR_INTMC:
28586960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_INTMC", func, s);
28594c87aefeSPatrick Mooney 		break;
28604c87aefeSPatrick Mooney 	case NVME_CR_CC:
28616960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_CC", func, s);
28624c87aefeSPatrick Mooney 		break;
28634c87aefeSPatrick Mooney 	case NVME_CR_CSTS:
28646960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_CSTS", func, s);
28654c87aefeSPatrick Mooney 		break;
28664c87aefeSPatrick Mooney 	case NVME_CR_NSSR:
28676960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_NSSR", func, s);
28684c87aefeSPatrick Mooney 		break;
28694c87aefeSPatrick Mooney 	case NVME_CR_AQA:
28706960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_AQA", func, s);
28714c87aefeSPatrick Mooney 		break;
28724c87aefeSPatrick Mooney 	case NVME_CR_ASQ_LOW:
28736960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_ASQ_LOW", func, s);
28744c87aefeSPatrick Mooney 		break;
28754c87aefeSPatrick Mooney 	case NVME_CR_ASQ_HI:
28766960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_ASQ_HI", func, s);
28774c87aefeSPatrick Mooney 		break;
28784c87aefeSPatrick Mooney 	case NVME_CR_ACQ_LOW:
28796960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_ACQ_LOW", func, s);
28804c87aefeSPatrick Mooney 		break;
28814c87aefeSPatrick Mooney 	case NVME_CR_ACQ_HI:
28826960cd89SAndy Fiddaman 		DPRINTF("%s %s NVME_CR_ACQ_HI", func, s);
28834c87aefeSPatrick Mooney 		break;
28844c87aefeSPatrick Mooney 	default:
28856960cd89SAndy Fiddaman 		DPRINTF("unknown nvme bar-0 offset 0x%lx", offset);
28864c87aefeSPatrick Mooney 	}
28874c87aefeSPatrick Mooney 
28884c87aefeSPatrick Mooney }
28894c87aefeSPatrick Mooney 
28904c87aefeSPatrick Mooney static void
28914c87aefeSPatrick Mooney pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc,
28924c87aefeSPatrick Mooney 	uint64_t offset, int size, uint64_t value)
28934c87aefeSPatrick Mooney {
28944c87aefeSPatrick Mooney 	uint32_t ccreg;
28954c87aefeSPatrick Mooney 
28964c87aefeSPatrick Mooney 	if (offset >= NVME_DOORBELL_OFFSET) {
28974c87aefeSPatrick Mooney 		uint64_t belloffset = offset - NVME_DOORBELL_OFFSET;
28984c87aefeSPatrick Mooney 		uint64_t idx = belloffset / 8; /* door bell size = 2*int */
28994c87aefeSPatrick Mooney 		int is_sq = (belloffset % 8) < 4;
29004c87aefeSPatrick Mooney 
29014c87aefeSPatrick Mooney 		if (belloffset > ((sc->max_queues+1) * 8 - 4)) {
29026960cd89SAndy Fiddaman 			WPRINTF("guest attempted an overflow write offset "
29034c87aefeSPatrick Mooney 			         "0x%lx, val 0x%lx in %s",
29046960cd89SAndy Fiddaman 			         offset, value, __func__);
29054c87aefeSPatrick Mooney 			return;
29064c87aefeSPatrick Mooney 		}
29074c87aefeSPatrick Mooney 
29084c87aefeSPatrick Mooney 		pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value);
29094c87aefeSPatrick Mooney 		return;
29104c87aefeSPatrick Mooney 	}
29114c87aefeSPatrick Mooney 
29126960cd89SAndy Fiddaman 	DPRINTF("nvme-write offset 0x%lx, size %d, value 0x%lx",
29136960cd89SAndy Fiddaman 	        offset, size, value);
29144c87aefeSPatrick Mooney 
29154c87aefeSPatrick Mooney 	if (size != 4) {
29166960cd89SAndy Fiddaman 		WPRINTF("guest wrote invalid size %d (offset 0x%lx, "
29174c87aefeSPatrick Mooney 		         "val 0x%lx) to bar0 in %s",
29186960cd89SAndy Fiddaman 		         size, offset, value, __func__);
29194c87aefeSPatrick Mooney 		/* TODO: shutdown device */
29204c87aefeSPatrick Mooney 		return;
29214c87aefeSPatrick Mooney 	}
29224c87aefeSPatrick Mooney 
29234c87aefeSPatrick Mooney 	pci_nvme_bar0_reg_dumps(__func__, offset, 1);
29244c87aefeSPatrick Mooney 
29254c87aefeSPatrick Mooney 	pthread_mutex_lock(&sc->mtx);
29264c87aefeSPatrick Mooney 
29274c87aefeSPatrick Mooney 	switch (offset) {
29284c87aefeSPatrick Mooney 	case NVME_CR_CAP_LOW:
29294c87aefeSPatrick Mooney 	case NVME_CR_CAP_HI:
29304c87aefeSPatrick Mooney 		/* readonly */
29314c87aefeSPatrick Mooney 		break;
29324c87aefeSPatrick Mooney 	case NVME_CR_VS:
29334c87aefeSPatrick Mooney 		/* readonly */
29344c87aefeSPatrick Mooney 		break;
29354c87aefeSPatrick Mooney 	case NVME_CR_INTMS:
29364c87aefeSPatrick Mooney 		/* MSI-X, so ignore */
29374c87aefeSPatrick Mooney 		break;
29384c87aefeSPatrick Mooney 	case NVME_CR_INTMC:
29394c87aefeSPatrick Mooney 		/* MSI-X, so ignore */
29404c87aefeSPatrick Mooney 		break;
29414c87aefeSPatrick Mooney 	case NVME_CR_CC:
29424c87aefeSPatrick Mooney 		ccreg = (uint32_t)value;
29434c87aefeSPatrick Mooney 
29446960cd89SAndy Fiddaman 		DPRINTF("%s NVME_CR_CC en %x css %x shn %x iosqes %u "
2945154972afSPatrick Mooney 		         "iocqes %u",
29464c87aefeSPatrick Mooney 		        __func__,
29474c87aefeSPatrick Mooney 			 NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg),
29484c87aefeSPatrick Mooney 			 NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg),
29496960cd89SAndy Fiddaman 			 NVME_CC_GET_IOCQES(ccreg));
29504c87aefeSPatrick Mooney 
29514c87aefeSPatrick Mooney 		if (NVME_CC_GET_SHN(ccreg)) {
29524c87aefeSPatrick Mooney 			/* perform shutdown - flush out data to backend */
29534c87aefeSPatrick Mooney 			sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK <<
29544c87aefeSPatrick Mooney 			    NVME_CSTS_REG_SHST_SHIFT);
29554c87aefeSPatrick Mooney 			sc->regs.csts |= NVME_SHST_COMPLETE <<
29564c87aefeSPatrick Mooney 			    NVME_CSTS_REG_SHST_SHIFT;
29574c87aefeSPatrick Mooney 		}
29584c87aefeSPatrick Mooney 		if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) {
29594c87aefeSPatrick Mooney 			if (NVME_CC_GET_EN(ccreg) == 0)
29604c87aefeSPatrick Mooney 				/* transition 1-> causes controller reset */
29614c87aefeSPatrick Mooney 				pci_nvme_reset_locked(sc);
29624c87aefeSPatrick Mooney 			else
29634c87aefeSPatrick Mooney 				pci_nvme_init_controller(ctx, sc);
29644c87aefeSPatrick Mooney 		}
29654c87aefeSPatrick Mooney 
29664c87aefeSPatrick Mooney 		/* Insert the iocqes, iosqes and en bits from the write */
29674c87aefeSPatrick Mooney 		sc->regs.cc &= ~NVME_CC_WRITE_MASK;
29684c87aefeSPatrick Mooney 		sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK;
29694c87aefeSPatrick Mooney 		if (NVME_CC_GET_EN(ccreg) == 0) {
29704c87aefeSPatrick Mooney 			/* Insert the ams, mps and css bit fields */
29714c87aefeSPatrick Mooney 			sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK;
29724c87aefeSPatrick Mooney 			sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK;
29734c87aefeSPatrick Mooney 			sc->regs.csts &= ~NVME_CSTS_RDY;
29744c87aefeSPatrick Mooney 		} else if (sc->pending_ios == 0) {
29754c87aefeSPatrick Mooney 			sc->regs.csts |= NVME_CSTS_RDY;
29764c87aefeSPatrick Mooney 		}
29774c87aefeSPatrick Mooney 		break;
29784c87aefeSPatrick Mooney 	case NVME_CR_CSTS:
29794c87aefeSPatrick Mooney 		break;
29804c87aefeSPatrick Mooney 	case NVME_CR_NSSR:
29814c87aefeSPatrick Mooney 		/* ignore writes; don't support subsystem reset */
29824c87aefeSPatrick Mooney 		break;
29834c87aefeSPatrick Mooney 	case NVME_CR_AQA:
29844c87aefeSPatrick Mooney 		sc->regs.aqa = (uint32_t)value;
29854c87aefeSPatrick Mooney 		break;
29864c87aefeSPatrick Mooney 	case NVME_CR_ASQ_LOW:
29874c87aefeSPatrick Mooney 		sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) |
29884c87aefeSPatrick Mooney 		               (0xFFFFF000 & value);
29894c87aefeSPatrick Mooney 		break;
29904c87aefeSPatrick Mooney 	case NVME_CR_ASQ_HI:
29914c87aefeSPatrick Mooney 		sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) |
29924c87aefeSPatrick Mooney 		               (value << 32);
29934c87aefeSPatrick Mooney 		break;
29944c87aefeSPatrick Mooney 	case NVME_CR_ACQ_LOW:
29954c87aefeSPatrick Mooney 		sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) |
29964c87aefeSPatrick Mooney 		               (0xFFFFF000 & value);
29974c87aefeSPatrick Mooney 		break;
29984c87aefeSPatrick Mooney 	case NVME_CR_ACQ_HI:
29994c87aefeSPatrick Mooney 		sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) |
30004c87aefeSPatrick Mooney 		               (value << 32);
30014c87aefeSPatrick Mooney 		break;
30024c87aefeSPatrick Mooney 	default:
30036960cd89SAndy Fiddaman 		DPRINTF("%s unknown offset 0x%lx, value 0x%lx size %d",
30046960cd89SAndy Fiddaman 		         __func__, offset, value, size);
30054c87aefeSPatrick Mooney 	}
30064c87aefeSPatrick Mooney 	pthread_mutex_unlock(&sc->mtx);
30074c87aefeSPatrick Mooney }
30084c87aefeSPatrick Mooney 
30094c87aefeSPatrick Mooney static void
30104c87aefeSPatrick Mooney pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
30114c87aefeSPatrick Mooney                 int baridx, uint64_t offset, int size, uint64_t value)
30124c87aefeSPatrick Mooney {
30134c87aefeSPatrick Mooney 	struct pci_nvme_softc* sc = pi->pi_arg;
30144c87aefeSPatrick Mooney 
30154c87aefeSPatrick Mooney 	if (baridx == pci_msix_table_bar(pi) ||
30164c87aefeSPatrick Mooney 	    baridx == pci_msix_pba_bar(pi)) {
30176960cd89SAndy Fiddaman 		DPRINTF("nvme-write baridx %d, msix: off 0x%lx, size %d, "
30186960cd89SAndy Fiddaman 		         " value 0x%lx", baridx, offset, size, value);
30194c87aefeSPatrick Mooney 
30204c87aefeSPatrick Mooney 		pci_emul_msix_twrite(pi, offset, size, value);
30214c87aefeSPatrick Mooney 		return;
30224c87aefeSPatrick Mooney 	}
30234c87aefeSPatrick Mooney 
30244c87aefeSPatrick Mooney 	switch (baridx) {
30254c87aefeSPatrick Mooney 	case 0:
30264c87aefeSPatrick Mooney 		pci_nvme_write_bar_0(ctx, sc, offset, size, value);
30274c87aefeSPatrick Mooney 		break;
30284c87aefeSPatrick Mooney 
30294c87aefeSPatrick Mooney 	default:
30306960cd89SAndy Fiddaman 		DPRINTF("%s unknown baridx %d, val 0x%lx",
30316960cd89SAndy Fiddaman 		         __func__, baridx, value);
30324c87aefeSPatrick Mooney 	}
30334c87aefeSPatrick Mooney }
30344c87aefeSPatrick Mooney 
30354c87aefeSPatrick Mooney static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc,
30364c87aefeSPatrick Mooney 	uint64_t offset, int size)
30374c87aefeSPatrick Mooney {
30384c87aefeSPatrick Mooney 	uint64_t value;
30394c87aefeSPatrick Mooney 
30404c87aefeSPatrick Mooney 	pci_nvme_bar0_reg_dumps(__func__, offset, 0);
30414c87aefeSPatrick Mooney 
30424c87aefeSPatrick Mooney 	if (offset < NVME_DOORBELL_OFFSET) {
30434c87aefeSPatrick Mooney 		void *p = &(sc->regs);
30444c87aefeSPatrick Mooney 		pthread_mutex_lock(&sc->mtx);
30454c87aefeSPatrick Mooney 		memcpy(&value, (void *)((uintptr_t)p + offset), size);
30464c87aefeSPatrick Mooney 		pthread_mutex_unlock(&sc->mtx);
30474c87aefeSPatrick Mooney 	} else {
30484c87aefeSPatrick Mooney 		value = 0;
30496960cd89SAndy Fiddaman                 WPRINTF("pci_nvme: read invalid offset %ld", offset);
30504c87aefeSPatrick Mooney 	}
30514c87aefeSPatrick Mooney 
30524c87aefeSPatrick Mooney 	switch (size) {
30534c87aefeSPatrick Mooney 	case 1:
30544c87aefeSPatrick Mooney 		value &= 0xFF;
30554c87aefeSPatrick Mooney 		break;
30564c87aefeSPatrick Mooney 	case 2:
30574c87aefeSPatrick Mooney 		value &= 0xFFFF;
30584c87aefeSPatrick Mooney 		break;
30594c87aefeSPatrick Mooney 	case 4:
30604c87aefeSPatrick Mooney 		value &= 0xFFFFFFFF;
30614c87aefeSPatrick Mooney 		break;
30624c87aefeSPatrick Mooney 	}
30634c87aefeSPatrick Mooney 
30646960cd89SAndy Fiddaman 	DPRINTF("   nvme-read offset 0x%lx, size %d -> value 0x%x",
30656960cd89SAndy Fiddaman 	         offset, size, (uint32_t)value);
30664c87aefeSPatrick Mooney 
30674c87aefeSPatrick Mooney 	return (value);
30684c87aefeSPatrick Mooney }
30694c87aefeSPatrick Mooney 
30704c87aefeSPatrick Mooney 
30714c87aefeSPatrick Mooney 
30724c87aefeSPatrick Mooney static uint64_t
30734c87aefeSPatrick Mooney pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
30744c87aefeSPatrick Mooney     uint64_t offset, int size)
30754c87aefeSPatrick Mooney {
30764c87aefeSPatrick Mooney 	struct pci_nvme_softc* sc = pi->pi_arg;
30774c87aefeSPatrick Mooney 
30784c87aefeSPatrick Mooney 	if (baridx == pci_msix_table_bar(pi) ||
30794c87aefeSPatrick Mooney 	    baridx == pci_msix_pba_bar(pi)) {
30806960cd89SAndy Fiddaman 		DPRINTF("nvme-read bar: %d, msix: regoff 0x%lx, size %d",
30816960cd89SAndy Fiddaman 		        baridx, offset, size);
30824c87aefeSPatrick Mooney 
30834c87aefeSPatrick Mooney 		return pci_emul_msix_tread(pi, offset, size);
30844c87aefeSPatrick Mooney 	}
30854c87aefeSPatrick Mooney 
30864c87aefeSPatrick Mooney 	switch (baridx) {
30874c87aefeSPatrick Mooney 	case 0:
30884c87aefeSPatrick Mooney        		return pci_nvme_read_bar_0(sc, offset, size);
30894c87aefeSPatrick Mooney 
30904c87aefeSPatrick Mooney 	default:
30916960cd89SAndy Fiddaman 		DPRINTF("unknown bar %d, 0x%lx", baridx, offset);
30924c87aefeSPatrick Mooney 	}
30934c87aefeSPatrick Mooney 
30944c87aefeSPatrick Mooney 	return (0);
30954c87aefeSPatrick Mooney }
30964c87aefeSPatrick Mooney 
30974c87aefeSPatrick Mooney static int
30982b948146SAndy Fiddaman pci_nvme_parse_config(struct pci_nvme_softc *sc, nvlist_t *nvl)
30994c87aefeSPatrick Mooney {
31004c87aefeSPatrick Mooney 	char bident[sizeof("XX:X:X")];
31012b948146SAndy Fiddaman 	const char *value;
31024c87aefeSPatrick Mooney 	uint32_t sectsz;
31034c87aefeSPatrick Mooney 
31044c87aefeSPatrick Mooney 	sc->max_queues = NVME_QUEUES;
31054c87aefeSPatrick Mooney 	sc->max_qentries = NVME_MAX_QENTRIES;
31064c87aefeSPatrick Mooney 	sc->ioslots = NVME_IOSLOTS;
31074c87aefeSPatrick Mooney 	sc->num_squeues = sc->max_queues;
31084c87aefeSPatrick Mooney 	sc->num_cqueues = sc->max_queues;
3109154972afSPatrick Mooney 	sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
31104c87aefeSPatrick Mooney 	sectsz = 0;
31114c87aefeSPatrick Mooney 	snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn),
31124c87aefeSPatrick Mooney 	         "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
31134c87aefeSPatrick Mooney 
31142b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "maxq");
31152b948146SAndy Fiddaman 	if (value != NULL)
31162b948146SAndy Fiddaman 		sc->max_queues = atoi(value);
31172b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "qsz");
31182b948146SAndy Fiddaman 	if (value != NULL) {
31192b948146SAndy Fiddaman 		sc->max_qentries = atoi(value);
31202b948146SAndy Fiddaman 		if (sc->max_qentries <= 0) {
31212b948146SAndy Fiddaman 			EPRINTLN("nvme: Invalid qsz option %d",
31222b948146SAndy Fiddaman 			    sc->max_qentries);
31232b948146SAndy Fiddaman 			return (-1);
31242b948146SAndy Fiddaman 		}
31252b948146SAndy Fiddaman 	}
31262b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "ioslots");
31272b948146SAndy Fiddaman 	if (value != NULL) {
31282b948146SAndy Fiddaman 		sc->ioslots = atoi(value);
31292b948146SAndy Fiddaman 		if (sc->ioslots <= 0) {
31302b948146SAndy Fiddaman 			EPRINTLN("Invalid ioslots option %d", sc->ioslots);
31312b948146SAndy Fiddaman 			return (-1);
31322b948146SAndy Fiddaman 		}
31332b948146SAndy Fiddaman 	}
31342b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "sectsz");
31352b948146SAndy Fiddaman 	if (value != NULL)
31362b948146SAndy Fiddaman 		sectsz = atoi(value);
31372b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "ser");
31382b948146SAndy Fiddaman 	if (value != NULL) {
31394c87aefeSPatrick Mooney 		/*
31404c87aefeSPatrick Mooney 		 * This field indicates the Product Serial Number in
31414c87aefeSPatrick Mooney 		 * 7-bit ASCII, unused bytes should be space characters.
31424c87aefeSPatrick Mooney 		 * Ref: NVMe v1.3c.
31434c87aefeSPatrick Mooney 		 */
31444c87aefeSPatrick Mooney 		cpywithpad((char *)sc->ctrldata.sn,
31452b948146SAndy Fiddaman 		    sizeof(sc->ctrldata.sn), value, ' ');
31462b948146SAndy Fiddaman 	}
31472b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "eui64");
31482b948146SAndy Fiddaman 	if (value != NULL)
31492b948146SAndy Fiddaman 		sc->nvstore.eui64 = htobe64(strtoull(value, NULL, 0));
31502b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "dsm");
31512b948146SAndy Fiddaman 	if (value != NULL) {
31522b948146SAndy Fiddaman 		if (strcmp(value, "auto") == 0)
31532b948146SAndy Fiddaman 			sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
31542b948146SAndy Fiddaman 		else if (strcmp(value, "enable") == 0)
31552b948146SAndy Fiddaman 			sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE;
31562b948146SAndy Fiddaman 		else if (strcmp(value, "disable") == 0)
31572b948146SAndy Fiddaman 			sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE;
31582b948146SAndy Fiddaman 	}
31592b948146SAndy Fiddaman 
31602b948146SAndy Fiddaman 	value = get_config_value_node(nvl, "ram");
31612b948146SAndy Fiddaman 	if (value != NULL) {
31622b948146SAndy Fiddaman 		uint64_t sz = strtoull(value, NULL, 10);
31634c87aefeSPatrick Mooney 
31644c87aefeSPatrick Mooney 		sc->nvstore.type = NVME_STOR_RAM;
31654c87aefeSPatrick Mooney 		sc->nvstore.size = sz * 1024 * 1024;
31664c87aefeSPatrick Mooney 		sc->nvstore.ctx = calloc(1, sc->nvstore.size);
31674c87aefeSPatrick Mooney 		sc->nvstore.sectsz = 4096;
31684c87aefeSPatrick Mooney 		sc->nvstore.sectsz_bits = 12;
31694c87aefeSPatrick Mooney 		if (sc->nvstore.ctx == NULL) {
31702b948146SAndy Fiddaman 			EPRINTLN("nvme: Unable to allocate RAM");
31714c87aefeSPatrick Mooney 			return (-1);
31724c87aefeSPatrick Mooney 		}
31732b948146SAndy Fiddaman 	} else {
31744c87aefeSPatrick Mooney 		snprintf(bident, sizeof(bident), "%d:%d",
31754c87aefeSPatrick Mooney 		    sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
31762b948146SAndy Fiddaman 		sc->nvstore.ctx = blockif_open(nvl, bident);
31774c87aefeSPatrick Mooney 		if (sc->nvstore.ctx == NULL) {
31782b948146SAndy Fiddaman 			EPRINTLN("nvme: Could not open backing file: %s",
31792b948146SAndy Fiddaman 			    strerror(errno));
31804c87aefeSPatrick Mooney 			return (-1);
31814c87aefeSPatrick Mooney 		}
31824c87aefeSPatrick Mooney 		sc->nvstore.type = NVME_STOR_BLOCKIF;
31834c87aefeSPatrick Mooney 		sc->nvstore.size = blockif_size(sc->nvstore.ctx);
31844c87aefeSPatrick Mooney 	}
31854c87aefeSPatrick Mooney 
31864c87aefeSPatrick Mooney 	if (sectsz == 512 || sectsz == 4096 || sectsz == 8192)
31874c87aefeSPatrick Mooney 		sc->nvstore.sectsz = sectsz;
31884c87aefeSPatrick Mooney 	else if (sc->nvstore.type != NVME_STOR_RAM)
31894c87aefeSPatrick Mooney 		sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx);
31904c87aefeSPatrick Mooney 	for (sc->nvstore.sectsz_bits = 9;
31914c87aefeSPatrick Mooney 	     (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz;
31924c87aefeSPatrick Mooney 	     sc->nvstore.sectsz_bits++);
31934c87aefeSPatrick Mooney 
31944c87aefeSPatrick Mooney 	if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES)
31954c87aefeSPatrick Mooney 		sc->max_queues = NVME_QUEUES;
31964c87aefeSPatrick Mooney 
31974c87aefeSPatrick Mooney 	return (0);
31984c87aefeSPatrick Mooney }
31994c87aefeSPatrick Mooney 
32006dc98349SAndy Fiddaman static void
32016dc98349SAndy Fiddaman pci_nvme_resized(struct blockif_ctxt *bctxt, void *arg, size_t new_size)
32026dc98349SAndy Fiddaman {
32036dc98349SAndy Fiddaman 	struct pci_nvme_softc *sc;
32046dc98349SAndy Fiddaman 	struct pci_nvme_blockstore *nvstore;
32056dc98349SAndy Fiddaman 	struct nvme_namespace_data *nd;
32066dc98349SAndy Fiddaman 
32076dc98349SAndy Fiddaman 	sc = arg;
32086dc98349SAndy Fiddaman 	nvstore = &sc->nvstore;
32096dc98349SAndy Fiddaman 	nd = &sc->nsdata;
32106dc98349SAndy Fiddaman 
32116dc98349SAndy Fiddaman 	nvstore->size = new_size;
32126dc98349SAndy Fiddaman 	pci_nvme_init_nsdata_size(nvstore, nd);
32136dc98349SAndy Fiddaman 
32146dc98349SAndy Fiddaman 	/* Add changed NSID to list */
32156dc98349SAndy Fiddaman 	sc->ns_log.ns[0] = 1;
32166dc98349SAndy Fiddaman 	sc->ns_log.ns[1] = 0;
32176dc98349SAndy Fiddaman 
32186dc98349SAndy Fiddaman 	pci_nvme_aen_post(sc, PCI_NVME_AE_TYPE_NOTICE,
3219*d7b72f7bSAndy Fiddaman 	    PCI_NVME_AEI_NOTICE_NS_ATTR_CHANGED);
32206dc98349SAndy Fiddaman }
32216dc98349SAndy Fiddaman 
32224c87aefeSPatrick Mooney static int
32232b948146SAndy Fiddaman pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
32244c87aefeSPatrick Mooney {
32254c87aefeSPatrick Mooney 	struct pci_nvme_softc *sc;
32264c87aefeSPatrick Mooney 	uint32_t pci_membar_sz;
32274c87aefeSPatrick Mooney 	int	error;
32284c87aefeSPatrick Mooney 
32294c87aefeSPatrick Mooney 	error = 0;
32304c87aefeSPatrick Mooney 
32314c87aefeSPatrick Mooney 	sc = calloc(1, sizeof(struct pci_nvme_softc));
32324c87aefeSPatrick Mooney 	pi->pi_arg = sc;
32334c87aefeSPatrick Mooney 	sc->nsc_pi = pi;
32344c87aefeSPatrick Mooney 
32352b948146SAndy Fiddaman 	error = pci_nvme_parse_config(sc, nvl);
32364c87aefeSPatrick Mooney 	if (error < 0)
32374c87aefeSPatrick Mooney 		goto done;
32384c87aefeSPatrick Mooney 	else
32394c87aefeSPatrick Mooney 		error = 0;
32404c87aefeSPatrick Mooney 
3241154972afSPatrick Mooney 	STAILQ_INIT(&sc->ioreqs_free);
32424c87aefeSPatrick Mooney 	sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq));
32434c87aefeSPatrick Mooney 	for (int i = 0; i < sc->ioslots; i++) {
3244154972afSPatrick Mooney 		STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link);
32454c87aefeSPatrick Mooney 	}
32464c87aefeSPatrick Mooney 
32474c87aefeSPatrick Mooney 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A);
32484c87aefeSPatrick Mooney 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D);
32494c87aefeSPatrick Mooney 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
32504c87aefeSPatrick Mooney 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM);
32514c87aefeSPatrick Mooney 	pci_set_cfgdata8(pi, PCIR_PROGIF,
32524c87aefeSPatrick Mooney 	                 PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0);
32534c87aefeSPatrick Mooney 
32544c87aefeSPatrick Mooney 	/*
32554c87aefeSPatrick Mooney 	 * Allocate size of NVMe registers + doorbell space for all queues.
32564c87aefeSPatrick Mooney 	 *
32574c87aefeSPatrick Mooney 	 * The specification requires a minimum memory I/O window size of 16K.
32584c87aefeSPatrick Mooney 	 * The Windows driver will refuse to start a device with a smaller
32594c87aefeSPatrick Mooney 	 * window.
32604c87aefeSPatrick Mooney 	 */
32614c87aefeSPatrick Mooney 	pci_membar_sz = sizeof(struct nvme_registers) +
32624c87aefeSPatrick Mooney 	    2 * sizeof(uint32_t) * (sc->max_queues + 1);
32634c87aefeSPatrick Mooney 	pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN);
32644c87aefeSPatrick Mooney 
32656960cd89SAndy Fiddaman 	DPRINTF("nvme membar size: %u", pci_membar_sz);
32664c87aefeSPatrick Mooney 
32674c87aefeSPatrick Mooney 	error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz);
32684c87aefeSPatrick Mooney 	if (error) {
32696960cd89SAndy Fiddaman 		WPRINTF("%s pci alloc mem bar failed", __func__);
32704c87aefeSPatrick Mooney 		goto done;
32714c87aefeSPatrick Mooney 	}
32724c87aefeSPatrick Mooney 
32734c87aefeSPatrick Mooney 	error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR);
32744c87aefeSPatrick Mooney 	if (error) {
32756960cd89SAndy Fiddaman 		WPRINTF("%s pci add msixcap failed", __func__);
32764c87aefeSPatrick Mooney 		goto done;
32774c87aefeSPatrick Mooney 	}
32784c87aefeSPatrick Mooney 
327984659b24SMichael Zeller 	error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP);
328084659b24SMichael Zeller 	if (error) {
32816960cd89SAndy Fiddaman 		WPRINTF("%s pci add Express capability failed", __func__);
328284659b24SMichael Zeller 		goto done;
328384659b24SMichael Zeller 	}
328484659b24SMichael Zeller 
32854c87aefeSPatrick Mooney 	pthread_mutex_init(&sc->mtx, NULL);
32864c87aefeSPatrick Mooney 	sem_init(&sc->iosemlock, 0, sc->ioslots);
32876dc98349SAndy Fiddaman 	blockif_register_resize_callback(sc->nvstore.ctx, pci_nvme_resized, sc);
32884c87aefeSPatrick Mooney 
32896960cd89SAndy Fiddaman 	pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues);
3290154972afSPatrick Mooney 	/*
3291154972afSPatrick Mooney 	 * Controller data depends on Namespace data so initialize Namespace
3292154972afSPatrick Mooney 	 * data first.
3293154972afSPatrick Mooney 	 */
3294154972afSPatrick Mooney 	pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
32954c87aefeSPatrick Mooney 	pci_nvme_init_ctrldata(sc);
32964c87aefeSPatrick Mooney 	pci_nvme_init_logpages(sc);
32976960cd89SAndy Fiddaman 	pci_nvme_init_features(sc);
32986960cd89SAndy Fiddaman 
32996960cd89SAndy Fiddaman 	pci_nvme_aer_init(sc);
33006dc98349SAndy Fiddaman 	pci_nvme_aen_init(sc);
33016960cd89SAndy Fiddaman 
33026960cd89SAndy Fiddaman 	pci_nvme_reset(sc);
33034c87aefeSPatrick Mooney 
33044c87aefeSPatrick Mooney 	pci_lintr_request(pi);
33054c87aefeSPatrick Mooney 
33064c87aefeSPatrick Mooney done:
33074c87aefeSPatrick Mooney 	return (error);
33084c87aefeSPatrick Mooney }
33094c87aefeSPatrick Mooney 
331076e6cd87SAndy Fiddaman static int
3311b0de25cbSAndy Fiddaman pci_nvme_legacy_config(nvlist_t *nvl, const char *opts)
331276e6cd87SAndy Fiddaman {
3313b0de25cbSAndy Fiddaman 	char *cp, *ram;
331476e6cd87SAndy Fiddaman 
3315b0de25cbSAndy Fiddaman 	if (opts == NULL)
3316b0de25cbSAndy Fiddaman 		return (0);
331776e6cd87SAndy Fiddaman 
3318b0de25cbSAndy Fiddaman 	if (strncmp(opts, "ram=", 4) == 0) {
3319b0de25cbSAndy Fiddaman 		cp = strchr(opts, ',');
3320b0de25cbSAndy Fiddaman 		if (cp == NULL) {
3321b0de25cbSAndy Fiddaman 			set_config_value_node(nvl, "ram", opts + 4);
3322b0de25cbSAndy Fiddaman 			return (0);
332376e6cd87SAndy Fiddaman 		}
3324b0de25cbSAndy Fiddaman 		ram = strndup(opts + 4, cp - opts - 4);
3325b0de25cbSAndy Fiddaman 		set_config_value_node(nvl, "ram", ram);
3326b0de25cbSAndy Fiddaman 		free(ram);
3327b0de25cbSAndy Fiddaman 		return (pci_parse_legacy_config(nvl, cp + 1));
3328b0de25cbSAndy Fiddaman 	} else
3329b0de25cbSAndy Fiddaman 		return (blockif_legacy_config(nvl, opts));
3330b0de25cbSAndy Fiddaman }
33314c87aefeSPatrick Mooney 
33324c87aefeSPatrick Mooney struct pci_devemu pci_de_nvme = {
33334c87aefeSPatrick Mooney 	.pe_emu =	"nvme",
33344c87aefeSPatrick Mooney 	.pe_init =	pci_nvme_init,
3335b0de25cbSAndy Fiddaman 	.pe_legacy_config = pci_nvme_legacy_config,
33364c87aefeSPatrick Mooney 	.pe_barwrite =	pci_nvme_write,
33374c87aefeSPatrick Mooney 	.pe_barread =	pci_nvme_read
33384c87aefeSPatrick Mooney };
33394c87aefeSPatrick Mooney PCI_EMUL_SET(pci_de_nvme);
3340