xref: /freebsd/sys/cam/nvme/nvme_da.c (revision d38677d23c2545b2d98c93ef13a6c9af9c2d28cc)
1baabaca3SWarner Losh /*-
2f24882ecSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3f24882ecSPedro F. Giffuni  *
4baabaca3SWarner Losh  * Copyright (c) 2015 Netflix, Inc
5baabaca3SWarner Losh  * All rights reserved.
6baabaca3SWarner Losh  *
7baabaca3SWarner Losh  * Redistribution and use in source and binary forms, with or without
8baabaca3SWarner Losh  * modification, are permitted provided that the following conditions
9baabaca3SWarner Losh  * are met:
10baabaca3SWarner Losh  * 1. Redistributions of source code must retain the above copyright
11baabaca3SWarner Losh  *    notice, this list of conditions and the following disclaimer,
12baabaca3SWarner Losh  *    without modification, immediately at the beginning of the file.
13baabaca3SWarner Losh  * 2. Redistributions in binary form must reproduce the above copyright
14baabaca3SWarner Losh  *    notice, this list of conditions and the following disclaimer in the
15baabaca3SWarner Losh  *    documentation and/or other materials provided with the distribution.
16baabaca3SWarner Losh  *
17baabaca3SWarner Losh  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18baabaca3SWarner Losh  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19baabaca3SWarner Losh  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20baabaca3SWarner Losh  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21baabaca3SWarner Losh  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22baabaca3SWarner Losh  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23baabaca3SWarner Losh  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24baabaca3SWarner Losh  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25baabaca3SWarner Losh  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26baabaca3SWarner Losh  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27baabaca3SWarner Losh  *
28baabaca3SWarner Losh  * Derived from ata_da.c:
29baabaca3SWarner Losh  * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
30baabaca3SWarner Losh  */
31baabaca3SWarner Losh 
32baabaca3SWarner Losh #include <sys/cdefs.h>
33baabaca3SWarner Losh __FBSDID("$FreeBSD$");
34baabaca3SWarner Losh 
35baabaca3SWarner Losh #include <sys/param.h>
36baabaca3SWarner Losh 
37baabaca3SWarner Losh #ifdef _KERNEL
38baabaca3SWarner Losh #include <sys/systm.h>
39baabaca3SWarner Losh #include <sys/kernel.h>
40baabaca3SWarner Losh #include <sys/bio.h>
41baabaca3SWarner Losh #include <sys/sysctl.h>
42baabaca3SWarner Losh #include <sys/taskqueue.h>
43baabaca3SWarner Losh #include <sys/lock.h>
44baabaca3SWarner Losh #include <sys/mutex.h>
45baabaca3SWarner Losh #include <sys/conf.h>
46baabaca3SWarner Losh #include <sys/devicestat.h>
47baabaca3SWarner Losh #include <sys/eventhandler.h>
48baabaca3SWarner Losh #include <sys/malloc.h>
49baabaca3SWarner Losh #include <sys/cons.h>
50baabaca3SWarner Losh #include <sys/proc.h>
51baabaca3SWarner Losh #include <sys/reboot.h>
52baabaca3SWarner Losh #include <geom/geom_disk.h>
53baabaca3SWarner Losh #endif /* _KERNEL */
54baabaca3SWarner Losh 
55baabaca3SWarner Losh #ifndef _KERNEL
56baabaca3SWarner Losh #include <stdio.h>
57baabaca3SWarner Losh #include <string.h>
58baabaca3SWarner Losh #endif /* _KERNEL */
59baabaca3SWarner Losh 
60baabaca3SWarner Losh #include <cam/cam.h>
61baabaca3SWarner Losh #include <cam/cam_ccb.h>
62baabaca3SWarner Losh #include <cam/cam_periph.h>
63baabaca3SWarner Losh #include <cam/cam_xpt_periph.h>
64baabaca3SWarner Losh #include <cam/cam_sim.h>
65baabaca3SWarner Losh #include <cam/cam_iosched.h>
66baabaca3SWarner Losh 
67baabaca3SWarner Losh #include <cam/nvme/nvme_all.h>
68baabaca3SWarner Losh 
69baabaca3SWarner Losh typedef enum {
70baabaca3SWarner Losh 	NDA_STATE_NORMAL
71baabaca3SWarner Losh } nda_state;
72baabaca3SWarner Losh 
73baabaca3SWarner Losh typedef enum {
74baabaca3SWarner Losh 	NDA_FLAG_OPEN		= 0x0001,
75baabaca3SWarner Losh 	NDA_FLAG_DIRTY		= 0x0002,
76baabaca3SWarner Losh 	NDA_FLAG_SCTX_INIT	= 0x0004,
77baabaca3SWarner Losh } nda_flags;
78baabaca3SWarner Losh 
79baabaca3SWarner Losh typedef enum {
80baabaca3SWarner Losh 	NDA_Q_4K   = 0x01,
81baabaca3SWarner Losh 	NDA_Q_NONE = 0x00,
82baabaca3SWarner Losh } nda_quirks;
83baabaca3SWarner Losh 
84baabaca3SWarner Losh #define NDA_Q_BIT_STRING	\
85baabaca3SWarner Losh 	"\020"			\
86baabaca3SWarner Losh 	"\001Bit 0"
87baabaca3SWarner Losh 
88baabaca3SWarner Losh typedef enum {
89baabaca3SWarner Losh 	NDA_CCB_BUFFER_IO	= 0x01,
90baabaca3SWarner Losh 	NDA_CCB_DUMP            = 0x02,
91baabaca3SWarner Losh 	NDA_CCB_TRIM            = 0x03,
92baabaca3SWarner Losh 	NDA_CCB_TYPE_MASK	= 0x0F,
93baabaca3SWarner Losh } nda_ccb_state;
94baabaca3SWarner Losh 
95baabaca3SWarner Losh /* Offsets into our private area for storing information */
96807e94b2SWarner Losh #define ccb_state	ccb_h.ppriv_field0
97807e94b2SWarner Losh #define ccb_bp		ccb_h.ppriv_ptr1	/* For NDA_CCB_BUFFER_IO */
98807e94b2SWarner Losh #define ccb_trim	ccb_h.ppriv_ptr1	/* For NDA_CCB_TRIM */
99baabaca3SWarner Losh 
100baabaca3SWarner Losh struct nda_softc {
101baabaca3SWarner Losh 	struct   cam_iosched_softc *cam_iosched;
102baabaca3SWarner Losh 	int			outstanding_cmds;	/* Number of active commands */
103baabaca3SWarner Losh 	int			refcount;		/* Active xpt_action() calls */
104baabaca3SWarner Losh 	nda_state		state;
105baabaca3SWarner Losh 	nda_flags		flags;
106baabaca3SWarner Losh 	nda_quirks		quirks;
107baabaca3SWarner Losh 	int			unmappedio;
108807e94b2SWarner Losh 	quad_t			deletes;
109807e94b2SWarner Losh 	quad_t			dsm_req;
110baabaca3SWarner Losh 	uint32_t		nsid;			/* Namespace ID for this nda device */
111baabaca3SWarner Losh 	struct disk		*disk;
112baabaca3SWarner Losh 	struct task		sysctl_task;
113baabaca3SWarner Losh 	struct sysctl_ctx_list	sysctl_ctx;
114baabaca3SWarner Losh 	struct sysctl_oid	*sysctl_tree;
115*d38677d2SWarner Losh #ifdef CAM_TEST_FAILURE
116*d38677d2SWarner Losh 	int			force_read_error;
117*d38677d2SWarner Losh 	int			force_write_error;
118*d38677d2SWarner Losh 	int			periodic_read_error;
119*d38677d2SWarner Losh 	int			periodic_read_count;
120*d38677d2SWarner Losh #endif
121baabaca3SWarner Losh #ifdef CAM_IO_STATS
122baabaca3SWarner Losh 	struct sysctl_ctx_list	sysctl_stats_ctx;
123baabaca3SWarner Losh 	struct sysctl_oid	*sysctl_stats_tree;
124baabaca3SWarner Losh 	u_int			timeouts;
125baabaca3SWarner Losh 	u_int			errors;
126baabaca3SWarner Losh 	u_int			invalidations;
127baabaca3SWarner Losh #endif
128baabaca3SWarner Losh };
129baabaca3SWarner Losh 
130807e94b2SWarner Losh struct nda_trim_request {
131807e94b2SWarner Losh 	union {
132807e94b2SWarner Losh 		struct nvme_dsm_range dsm;
133807e94b2SWarner Losh 		uint8_t		data[NVME_MAX_DSM_TRIM];
134807e94b2SWarner Losh 	};
135807e94b2SWarner Losh 	TAILQ_HEAD(, bio) bps;
136807e94b2SWarner Losh };
137807e94b2SWarner Losh 
138baabaca3SWarner Losh /* Need quirk table */
139baabaca3SWarner Losh 
140baabaca3SWarner Losh static	disk_strategy_t	ndastrategy;
141baabaca3SWarner Losh static	dumper_t	ndadump;
142baabaca3SWarner Losh static	periph_init_t	ndainit;
143baabaca3SWarner Losh static	void		ndaasync(void *callback_arg, u_int32_t code,
144baabaca3SWarner Losh 				struct cam_path *path, void *arg);
145baabaca3SWarner Losh static	void		ndasysctlinit(void *context, int pending);
146baabaca3SWarner Losh static	periph_ctor_t	ndaregister;
147baabaca3SWarner Losh static	periph_dtor_t	ndacleanup;
148baabaca3SWarner Losh static	periph_start_t	ndastart;
149baabaca3SWarner Losh static	periph_oninv_t	ndaoninvalidate;
150baabaca3SWarner Losh static	void		ndadone(struct cam_periph *periph,
151baabaca3SWarner Losh 			       union ccb *done_ccb);
152baabaca3SWarner Losh static  int		ndaerror(union ccb *ccb, u_int32_t cam_flags,
153baabaca3SWarner Losh 				u_int32_t sense_flags);
154baabaca3SWarner Losh static void		ndashutdown(void *arg, int howto);
155baabaca3SWarner Losh static void		ndasuspend(void *arg);
156baabaca3SWarner Losh 
157baabaca3SWarner Losh #ifndef	NDA_DEFAULT_SEND_ORDERED
158baabaca3SWarner Losh #define	NDA_DEFAULT_SEND_ORDERED	1
159baabaca3SWarner Losh #endif
160baabaca3SWarner Losh #ifndef NDA_DEFAULT_TIMEOUT
161baabaca3SWarner Losh #define NDA_DEFAULT_TIMEOUT 30	/* Timeout in seconds */
162baabaca3SWarner Losh #endif
163baabaca3SWarner Losh #ifndef	NDA_DEFAULT_RETRY
164baabaca3SWarner Losh #define	NDA_DEFAULT_RETRY	4
165baabaca3SWarner Losh #endif
166807e94b2SWarner Losh #ifndef NDA_MAX_TRIM_ENTRIES
167807e94b2SWarner Losh #define NDA_MAX_TRIM_ENTRIES 256	/* Number of DSM trims to use, max 256 */
168807e94b2SWarner Losh #endif
169baabaca3SWarner Losh 
170baabaca3SWarner Losh //static int nda_retry_count = NDA_DEFAULT_RETRY;
171baabaca3SWarner Losh static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED;
172baabaca3SWarner Losh static int nda_default_timeout = NDA_DEFAULT_TIMEOUT;
173807e94b2SWarner Losh static int nda_max_trim_entries = NDA_MAX_TRIM_ENTRIES;
174baabaca3SWarner Losh 
175baabaca3SWarner Losh /*
176baabaca3SWarner Losh  * All NVMe media is non-rotational, so all nvme device instances
177baabaca3SWarner Losh  * share this to implement the sysctl.
178baabaca3SWarner Losh  */
179baabaca3SWarner Losh static int nda_rotating_media = 0;
180baabaca3SWarner Losh 
181baabaca3SWarner Losh static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0,
182baabaca3SWarner Losh             "CAM Direct Access Disk driver");
183baabaca3SWarner Losh 
184baabaca3SWarner Losh static struct periph_driver ndadriver =
185baabaca3SWarner Losh {
186baabaca3SWarner Losh 	ndainit, "nda",
187baabaca3SWarner Losh 	TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0
188baabaca3SWarner Losh };
189baabaca3SWarner Losh 
190baabaca3SWarner Losh PERIPHDRIVER_DECLARE(nda, ndadriver);
191baabaca3SWarner Losh 
192baabaca3SWarner Losh static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers");
193baabaca3SWarner Losh 
194baabaca3SWarner Losh /*
195baabaca3SWarner Losh  * nice wrappers. Maybe these belong in nvme_all.c instead of
196baabaca3SWarner Losh  * here, but this is the only place that uses these. Should
197baabaca3SWarner Losh  * we ever grow another NVME periph, we should move them
198baabaca3SWarner Losh  * all there wholesale.
199baabaca3SWarner Losh  */
200baabaca3SWarner Losh 
201baabaca3SWarner Losh static void
202baabaca3SWarner Losh nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio)
203baabaca3SWarner Losh {
204baabaca3SWarner Losh 	cam_fill_nvmeio(nvmeio,
205baabaca3SWarner Losh 	    0,			/* retries */
206baabaca3SWarner Losh 	    ndadone,		/* cbfcnp */
207baabaca3SWarner Losh 	    CAM_DIR_NONE,	/* flags */
208baabaca3SWarner Losh 	    NULL,		/* data_ptr */
209baabaca3SWarner Losh 	    0,			/* dxfer_len */
210717bff5dSWarner Losh 	    nda_default_timeout * 1000); /* timeout 30s */
211baabaca3SWarner Losh 	nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid);
212baabaca3SWarner Losh }
213baabaca3SWarner Losh 
214baabaca3SWarner Losh static void
215baabaca3SWarner Losh nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
216baabaca3SWarner Losh     void *payload, uint32_t num_ranges)
217baabaca3SWarner Losh {
218baabaca3SWarner Losh 	cam_fill_nvmeio(nvmeio,
219baabaca3SWarner Losh 	    0,			/* retries */
220baabaca3SWarner Losh 	    ndadone,		/* cbfcnp */
221baabaca3SWarner Losh 	    CAM_DIR_OUT,	/* flags */
222baabaca3SWarner Losh 	    payload,		/* data_ptr */
223baabaca3SWarner Losh 	    num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */
224717bff5dSWarner Losh 	    nda_default_timeout * 1000); /* timeout 30s */
225baabaca3SWarner Losh 	nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges);
226baabaca3SWarner Losh }
227baabaca3SWarner Losh 
228baabaca3SWarner Losh static void
229baabaca3SWarner Losh nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
230baabaca3SWarner Losh     void *payload, uint64_t lba, uint32_t len, uint32_t count)
231baabaca3SWarner Losh {
232baabaca3SWarner Losh 	cam_fill_nvmeio(nvmeio,
233baabaca3SWarner Losh 	    0,			/* retries */
234baabaca3SWarner Losh 	    ndadone,		/* cbfcnp */
235baabaca3SWarner Losh 	    CAM_DIR_OUT,	/* flags */
236baabaca3SWarner Losh 	    payload,		/* data_ptr */
237baabaca3SWarner Losh 	    len,		/* dxfer_len */
238717bff5dSWarner Losh 	    nda_default_timeout * 1000); /* timeout 30s */
239baabaca3SWarner Losh 	nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count);
240baabaca3SWarner Losh }
241baabaca3SWarner Losh 
242baabaca3SWarner Losh static void
243baabaca3SWarner Losh nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
244baabaca3SWarner Losh     struct bio *bp, uint32_t rwcmd)
245baabaca3SWarner Losh {
246baabaca3SWarner Losh 	int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT;
247baabaca3SWarner Losh 	void *payload;
248baabaca3SWarner Losh 	uint64_t lba;
249baabaca3SWarner Losh 	uint32_t count;
250baabaca3SWarner Losh 
251baabaca3SWarner Losh 	if (bp->bio_flags & BIO_UNMAPPED) {
252baabaca3SWarner Losh 		flags |= CAM_DATA_BIO;
253baabaca3SWarner Losh 		payload = bp;
254baabaca3SWarner Losh 	} else {
255baabaca3SWarner Losh 		payload = bp->bio_data;
256baabaca3SWarner Losh 	}
257baabaca3SWarner Losh 
258baabaca3SWarner Losh 	lba = bp->bio_pblkno;
259baabaca3SWarner Losh 	count = bp->bio_bcount / softc->disk->d_sectorsize;
260baabaca3SWarner Losh 
261baabaca3SWarner Losh 	cam_fill_nvmeio(nvmeio,
262baabaca3SWarner Losh 	    0,			/* retries */
263baabaca3SWarner Losh 	    ndadone,		/* cbfcnp */
264baabaca3SWarner Losh 	    flags,		/* flags */
265baabaca3SWarner Losh 	    payload,		/* data_ptr */
266baabaca3SWarner Losh 	    bp->bio_bcount,	/* dxfer_len */
267717bff5dSWarner Losh 	    nda_default_timeout * 1000); /* timeout 30s */
268baabaca3SWarner Losh 	nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count);
269baabaca3SWarner Losh }
270baabaca3SWarner Losh 
271baabaca3SWarner Losh static int
272baabaca3SWarner Losh ndaopen(struct disk *dp)
273baabaca3SWarner Losh {
274baabaca3SWarner Losh 	struct cam_periph *periph;
275baabaca3SWarner Losh 	struct nda_softc *softc;
276baabaca3SWarner Losh 	int error;
277baabaca3SWarner Losh 
278baabaca3SWarner Losh 	periph = (struct cam_periph *)dp->d_drv1;
27999e7a4adSScott Long 	if (cam_periph_acquire(periph) != 0) {
280baabaca3SWarner Losh 		return(ENXIO);
281baabaca3SWarner Losh 	}
282baabaca3SWarner Losh 
283baabaca3SWarner Losh 	cam_periph_lock(periph);
284baabaca3SWarner Losh 	if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) {
285baabaca3SWarner Losh 		cam_periph_unlock(periph);
286baabaca3SWarner Losh 		cam_periph_release(periph);
287baabaca3SWarner Losh 		return (error);
288baabaca3SWarner Losh 	}
289baabaca3SWarner Losh 
290baabaca3SWarner Losh 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
291baabaca3SWarner Losh 	    ("ndaopen\n"));
292baabaca3SWarner Losh 
293baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
294baabaca3SWarner Losh 	softc->flags |= NDA_FLAG_OPEN;
295baabaca3SWarner Losh 
296baabaca3SWarner Losh 	cam_periph_unhold(periph);
297baabaca3SWarner Losh 	cam_periph_unlock(periph);
298baabaca3SWarner Losh 	return (0);
299baabaca3SWarner Losh }
300baabaca3SWarner Losh 
301baabaca3SWarner Losh static int
302baabaca3SWarner Losh ndaclose(struct disk *dp)
303baabaca3SWarner Losh {
304baabaca3SWarner Losh 	struct	cam_periph *periph;
305baabaca3SWarner Losh 	struct	nda_softc *softc;
306baabaca3SWarner Losh 	union ccb *ccb;
307baabaca3SWarner Losh 	int error;
308baabaca3SWarner Losh 
309baabaca3SWarner Losh 	periph = (struct cam_periph *)dp->d_drv1;
310baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
311baabaca3SWarner Losh 	cam_periph_lock(periph);
312baabaca3SWarner Losh 
313baabaca3SWarner Losh 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
314baabaca3SWarner Losh 	    ("ndaclose\n"));
315baabaca3SWarner Losh 
316baabaca3SWarner Losh 	if ((softc->flags & NDA_FLAG_DIRTY) != 0 &&
317baabaca3SWarner Losh 	    (periph->flags & CAM_PERIPH_INVALID) == 0 &&
318baabaca3SWarner Losh 	    cam_periph_hold(periph, PRIBIO) == 0) {
319baabaca3SWarner Losh 
320baabaca3SWarner Losh 		ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
321baabaca3SWarner Losh 		nda_nvme_flush(softc, &ccb->nvmeio);
322baabaca3SWarner Losh 		error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
323baabaca3SWarner Losh 		    /*sense_flags*/0, softc->disk->d_devstat);
324baabaca3SWarner Losh 
325baabaca3SWarner Losh 		if (error != 0)
326baabaca3SWarner Losh 			xpt_print(periph->path, "Synchronize cache failed\n");
327baabaca3SWarner Losh 		else
328baabaca3SWarner Losh 			softc->flags &= ~NDA_FLAG_DIRTY;
329baabaca3SWarner Losh 		xpt_release_ccb(ccb);
330baabaca3SWarner Losh 		cam_periph_unhold(periph);
331baabaca3SWarner Losh 	}
332baabaca3SWarner Losh 
333baabaca3SWarner Losh 	softc->flags &= ~NDA_FLAG_OPEN;
334baabaca3SWarner Losh 
335baabaca3SWarner Losh 	while (softc->refcount != 0)
336baabaca3SWarner Losh 		cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
337baabaca3SWarner Losh 	cam_periph_unlock(periph);
338baabaca3SWarner Losh 	cam_periph_release(periph);
339baabaca3SWarner Losh 	return (0);
340baabaca3SWarner Losh }
341baabaca3SWarner Losh 
342baabaca3SWarner Losh static void
343baabaca3SWarner Losh ndaschedule(struct cam_periph *periph)
344baabaca3SWarner Losh {
345baabaca3SWarner Losh 	struct nda_softc *softc = (struct nda_softc *)periph->softc;
346baabaca3SWarner Losh 
347baabaca3SWarner Losh 	if (softc->state != NDA_STATE_NORMAL)
348baabaca3SWarner Losh 		return;
349baabaca3SWarner Losh 
350baabaca3SWarner Losh 	cam_iosched_schedule(softc->cam_iosched, periph);
351baabaca3SWarner Losh }
352baabaca3SWarner Losh 
353baabaca3SWarner Losh /*
354baabaca3SWarner Losh  * Actually translate the requested transfer into one the physical driver
355baabaca3SWarner Losh  * can understand.  The transfer is described by a buf and will include
356baabaca3SWarner Losh  * only one physical transfer.
357baabaca3SWarner Losh  */
358baabaca3SWarner Losh static void
359baabaca3SWarner Losh ndastrategy(struct bio *bp)
360baabaca3SWarner Losh {
361baabaca3SWarner Losh 	struct cam_periph *periph;
362baabaca3SWarner Losh 	struct nda_softc *softc;
363baabaca3SWarner Losh 
364baabaca3SWarner Losh 	periph = (struct cam_periph *)bp->bio_disk->d_drv1;
365baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
366baabaca3SWarner Losh 
367baabaca3SWarner Losh 	cam_periph_lock(periph);
368baabaca3SWarner Losh 
369baabaca3SWarner Losh 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp));
370baabaca3SWarner Losh 
371baabaca3SWarner Losh 	/*
372baabaca3SWarner Losh 	 * If the device has been made invalid, error out
373baabaca3SWarner Losh 	 */
374baabaca3SWarner Losh 	if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
375baabaca3SWarner Losh 		cam_periph_unlock(periph);
376baabaca3SWarner Losh 		biofinish(bp, NULL, ENXIO);
377baabaca3SWarner Losh 		return;
378baabaca3SWarner Losh 	}
379baabaca3SWarner Losh 
380807e94b2SWarner Losh 	if (bp->bio_cmd == BIO_DELETE)
381807e94b2SWarner Losh 		softc->deletes++;
382807e94b2SWarner Losh 
383baabaca3SWarner Losh 	/*
384baabaca3SWarner Losh 	 * Place it in the queue of disk activities for this disk
385baabaca3SWarner Losh 	 */
386baabaca3SWarner Losh 	cam_iosched_queue_work(softc->cam_iosched, bp);
387baabaca3SWarner Losh 
388baabaca3SWarner Losh 	/*
389baabaca3SWarner Losh 	 * Schedule ourselves for performing the work.
390baabaca3SWarner Losh 	 */
391baabaca3SWarner Losh 	ndaschedule(periph);
392baabaca3SWarner Losh 	cam_periph_unlock(periph);
393baabaca3SWarner Losh 
394baabaca3SWarner Losh 	return;
395baabaca3SWarner Losh }
396baabaca3SWarner Losh 
397baabaca3SWarner Losh static int
398baabaca3SWarner Losh ndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length)
399baabaca3SWarner Losh {
400baabaca3SWarner Losh 	struct	    cam_periph *periph;
401baabaca3SWarner Losh 	struct	    nda_softc *softc;
402baabaca3SWarner Losh 	u_int	    secsize;
403c4231018SWarner Losh 	struct ccb_nvmeio nvmeio;
404baabaca3SWarner Losh 	struct	    disk *dp;
405baabaca3SWarner Losh 	uint64_t    lba;
406baabaca3SWarner Losh 	uint32_t    count;
407baabaca3SWarner Losh 	int	    error = 0;
408baabaca3SWarner Losh 
409baabaca3SWarner Losh 	dp = arg;
410baabaca3SWarner Losh 	periph = dp->d_drv1;
411baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
412baabaca3SWarner Losh 	secsize = softc->disk->d_sectorsize;
413baabaca3SWarner Losh 	lba = offset / secsize;
414baabaca3SWarner Losh 	count = length / secsize;
415baabaca3SWarner Losh 
416bff0b56cSScott Long 	if ((periph->flags & CAM_PERIPH_INVALID) != 0)
417baabaca3SWarner Losh 		return (ENXIO);
418baabaca3SWarner Losh 
419fa271a5dSWarner Losh 	/* xpt_get_ccb returns a zero'd allocation for the ccb, mimic that here */
420fa271a5dSWarner Losh 	memset(&nvmeio, 0, sizeof(nvmeio));
421baabaca3SWarner Losh 	if (length > 0) {
422c4231018SWarner Losh 		xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
423807e94b2SWarner Losh 		nvmeio.ccb_state = NDA_CCB_DUMP;
424c4231018SWarner Losh 		nda_nvme_write(softc, &nvmeio, virtual, lba, length, count);
4252b31251aSWarner Losh 		error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
4262b31251aSWarner Losh 		    0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
427baabaca3SWarner Losh 		if (error != 0)
4282b31251aSWarner Losh 			printf("Aborting dump due to I/O error %d.\n", error);
4292b31251aSWarner Losh 
430baabaca3SWarner Losh 		return (error);
431baabaca3SWarner Losh 	}
432baabaca3SWarner Losh 
433baabaca3SWarner Losh 	/* Flush */
434c4231018SWarner Losh 	xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
435baabaca3SWarner Losh 
436807e94b2SWarner Losh 	nvmeio.ccb_state = NDA_CCB_DUMP;
437c4231018SWarner Losh 	nda_nvme_flush(softc, &nvmeio);
4382b31251aSWarner Losh 	error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
4392b31251aSWarner Losh 	    0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
440baabaca3SWarner Losh 	if (error != 0)
441baabaca3SWarner Losh 		xpt_print(periph->path, "flush cmd failed\n");
442baabaca3SWarner Losh 	return (error);
443baabaca3SWarner Losh }
444baabaca3SWarner Losh 
445baabaca3SWarner Losh static void
446baabaca3SWarner Losh ndainit(void)
447baabaca3SWarner Losh {
448baabaca3SWarner Losh 	cam_status status;
449baabaca3SWarner Losh 
450baabaca3SWarner Losh 	/*
451baabaca3SWarner Losh 	 * Install a global async callback.  This callback will
452baabaca3SWarner Losh 	 * receive async callbacks like "new device found".
453baabaca3SWarner Losh 	 */
454baabaca3SWarner Losh 	status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL);
455baabaca3SWarner Losh 
456baabaca3SWarner Losh 	if (status != CAM_REQ_CMP) {
457baabaca3SWarner Losh 		printf("nda: Failed to attach master async callback "
458baabaca3SWarner Losh 		       "due to status 0x%x!\n", status);
459baabaca3SWarner Losh 	} else if (nda_send_ordered) {
460baabaca3SWarner Losh 
461baabaca3SWarner Losh 		/* Register our event handlers */
462baabaca3SWarner Losh 		if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend,
463baabaca3SWarner Losh 					   NULL, EVENTHANDLER_PRI_LAST)) == NULL)
464baabaca3SWarner Losh 		    printf("ndainit: power event registration failed!\n");
465baabaca3SWarner Losh 		if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown,
466baabaca3SWarner Losh 					   NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
467baabaca3SWarner Losh 		    printf("ndainit: shutdown event registration failed!\n");
468baabaca3SWarner Losh 	}
469baabaca3SWarner Losh }
470baabaca3SWarner Losh 
471baabaca3SWarner Losh /*
472baabaca3SWarner Losh  * Callback from GEOM, called when it has finished cleaning up its
473baabaca3SWarner Losh  * resources.
474baabaca3SWarner Losh  */
475baabaca3SWarner Losh static void
476baabaca3SWarner Losh ndadiskgonecb(struct disk *dp)
477baabaca3SWarner Losh {
478baabaca3SWarner Losh 	struct cam_periph *periph;
479baabaca3SWarner Losh 
480baabaca3SWarner Losh 	periph = (struct cam_periph *)dp->d_drv1;
481baabaca3SWarner Losh 
482baabaca3SWarner Losh 	cam_periph_release(periph);
483baabaca3SWarner Losh }
484baabaca3SWarner Losh 
485baabaca3SWarner Losh static void
486baabaca3SWarner Losh ndaoninvalidate(struct cam_periph *periph)
487baabaca3SWarner Losh {
488baabaca3SWarner Losh 	struct nda_softc *softc;
489baabaca3SWarner Losh 
490baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
491baabaca3SWarner Losh 
492baabaca3SWarner Losh 	/*
493baabaca3SWarner Losh 	 * De-register any async callbacks.
494baabaca3SWarner Losh 	 */
495baabaca3SWarner Losh 	xpt_register_async(0, ndaasync, periph, periph->path);
496baabaca3SWarner Losh #ifdef CAM_IO_STATS
497baabaca3SWarner Losh 	softc->invalidations++;
498baabaca3SWarner Losh #endif
499baabaca3SWarner Losh 
500baabaca3SWarner Losh 	/*
501baabaca3SWarner Losh 	 * Return all queued I/O with ENXIO.
502baabaca3SWarner Losh 	 * XXX Handle any transactions queued to the card
503baabaca3SWarner Losh 	 *     with XPT_ABORT_CCB.
504baabaca3SWarner Losh 	 */
505baabaca3SWarner Losh 	cam_iosched_flush(softc->cam_iosched, NULL, ENXIO);
506baabaca3SWarner Losh 
507baabaca3SWarner Losh 	disk_gone(softc->disk);
508baabaca3SWarner Losh }
509baabaca3SWarner Losh 
510baabaca3SWarner Losh static void
511baabaca3SWarner Losh ndacleanup(struct cam_periph *periph)
512baabaca3SWarner Losh {
513baabaca3SWarner Losh 	struct nda_softc *softc;
514baabaca3SWarner Losh 
515baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
516baabaca3SWarner Losh 
517baabaca3SWarner Losh 	cam_periph_unlock(periph);
518baabaca3SWarner Losh 
519baabaca3SWarner Losh 	cam_iosched_fini(softc->cam_iosched);
520baabaca3SWarner Losh 
521baabaca3SWarner Losh 	/*
522baabaca3SWarner Losh 	 * If we can't free the sysctl tree, oh well...
523baabaca3SWarner Losh 	 */
524baabaca3SWarner Losh 	if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) {
525baabaca3SWarner Losh #ifdef CAM_IO_STATS
526baabaca3SWarner Losh 		if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0)
527baabaca3SWarner Losh 			xpt_print(periph->path,
528baabaca3SWarner Losh 			    "can't remove sysctl stats context\n");
529baabaca3SWarner Losh #endif
530baabaca3SWarner Losh 		if (sysctl_ctx_free(&softc->sysctl_ctx) != 0)
531baabaca3SWarner Losh 			xpt_print(periph->path,
532baabaca3SWarner Losh 			    "can't remove sysctl context\n");
533baabaca3SWarner Losh 	}
534baabaca3SWarner Losh 
535baabaca3SWarner Losh 	disk_destroy(softc->disk);
536baabaca3SWarner Losh 	free(softc, M_DEVBUF);
537baabaca3SWarner Losh 	cam_periph_lock(periph);
538baabaca3SWarner Losh }
539baabaca3SWarner Losh 
540baabaca3SWarner Losh static void
541baabaca3SWarner Losh ndaasync(void *callback_arg, u_int32_t code,
542baabaca3SWarner Losh 	struct cam_path *path, void *arg)
543baabaca3SWarner Losh {
544baabaca3SWarner Losh 	struct cam_periph *periph;
545baabaca3SWarner Losh 
546baabaca3SWarner Losh 	periph = (struct cam_periph *)callback_arg;
547baabaca3SWarner Losh 	switch (code) {
548baabaca3SWarner Losh 	case AC_FOUND_DEVICE:
549baabaca3SWarner Losh 	{
550baabaca3SWarner Losh 		struct ccb_getdev *cgd;
551baabaca3SWarner Losh 		cam_status status;
552baabaca3SWarner Losh 
553baabaca3SWarner Losh 		cgd = (struct ccb_getdev *)arg;
554baabaca3SWarner Losh 		if (cgd == NULL)
555baabaca3SWarner Losh 			break;
556baabaca3SWarner Losh 
557baabaca3SWarner Losh 		if (cgd->protocol != PROTO_NVME)
558baabaca3SWarner Losh 			break;
559baabaca3SWarner Losh 
560baabaca3SWarner Losh 		/*
561baabaca3SWarner Losh 		 * Allocate a peripheral instance for
562baabaca3SWarner Losh 		 * this device and start the probe
563baabaca3SWarner Losh 		 * process.
564baabaca3SWarner Losh 		 */
565baabaca3SWarner Losh 		status = cam_periph_alloc(ndaregister, ndaoninvalidate,
566baabaca3SWarner Losh 					  ndacleanup, ndastart,
567baabaca3SWarner Losh 					  "nda", CAM_PERIPH_BIO,
568baabaca3SWarner Losh 					  path, ndaasync,
569baabaca3SWarner Losh 					  AC_FOUND_DEVICE, cgd);
570baabaca3SWarner Losh 
571baabaca3SWarner Losh 		if (status != CAM_REQ_CMP
572baabaca3SWarner Losh 		 && status != CAM_REQ_INPROG)
573baabaca3SWarner Losh 			printf("ndaasync: Unable to attach to new device "
574baabaca3SWarner Losh 				"due to status 0x%x\n", status);
575baabaca3SWarner Losh 		break;
576baabaca3SWarner Losh 	}
577baabaca3SWarner Losh 	case AC_ADVINFO_CHANGED:
578baabaca3SWarner Losh 	{
579baabaca3SWarner Losh 		uintptr_t buftype;
580baabaca3SWarner Losh 
581baabaca3SWarner Losh 		buftype = (uintptr_t)arg;
582baabaca3SWarner Losh 		if (buftype == CDAI_TYPE_PHYS_PATH) {
583baabaca3SWarner Losh 			struct nda_softc *softc;
584baabaca3SWarner Losh 
585baabaca3SWarner Losh 			softc = periph->softc;
586baabaca3SWarner Losh 			disk_attr_changed(softc->disk, "GEOM::physpath",
587baabaca3SWarner Losh 					  M_NOWAIT);
588baabaca3SWarner Losh 		}
589baabaca3SWarner Losh 		break;
590baabaca3SWarner Losh 	}
591baabaca3SWarner Losh 	case AC_LOST_DEVICE:
592baabaca3SWarner Losh 	default:
593baabaca3SWarner Losh 		cam_periph_async(periph, code, path, arg);
594baabaca3SWarner Losh 		break;
595baabaca3SWarner Losh 	}
596baabaca3SWarner Losh }
597baabaca3SWarner Losh 
598baabaca3SWarner Losh static void
599baabaca3SWarner Losh ndasysctlinit(void *context, int pending)
600baabaca3SWarner Losh {
601baabaca3SWarner Losh 	struct cam_periph *periph;
602baabaca3SWarner Losh 	struct nda_softc *softc;
6034d470952SAlexander Motin 	char tmpstr[32], tmpstr2[16];
604baabaca3SWarner Losh 
605baabaca3SWarner Losh 	periph = (struct cam_periph *)context;
606baabaca3SWarner Losh 
607baabaca3SWarner Losh 	/* periph was held for us when this task was enqueued */
608baabaca3SWarner Losh 	if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
609baabaca3SWarner Losh 		cam_periph_release(periph);
610baabaca3SWarner Losh 		return;
611baabaca3SWarner Losh 	}
612baabaca3SWarner Losh 
613baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
614baabaca3SWarner Losh 	snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number);
615baabaca3SWarner Losh 	snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number);
616baabaca3SWarner Losh 
617baabaca3SWarner Losh 	sysctl_ctx_init(&softc->sysctl_ctx);
618baabaca3SWarner Losh 	softc->flags |= NDA_FLAG_SCTX_INIT;
6194c484fd2SEd Schouten 	softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx,
620baabaca3SWarner Losh 		SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2,
6214c484fd2SEd Schouten 		CTLFLAG_RD, 0, tmpstr, "device_index");
622baabaca3SWarner Losh 	if (softc->sysctl_tree == NULL) {
623baabaca3SWarner Losh 		printf("ndasysctlinit: unable to allocate sysctl tree\n");
624baabaca3SWarner Losh 		cam_periph_release(periph);
625baabaca3SWarner Losh 		return;
626baabaca3SWarner Losh 	}
627baabaca3SWarner Losh 
628baabaca3SWarner Losh 	SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
629baabaca3SWarner Losh 		OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE,
630baabaca3SWarner Losh 		&softc->unmappedio, 0, "Unmapped I/O leaf");
631baabaca3SWarner Losh 
632807e94b2SWarner Losh 	SYSCTL_ADD_QUAD(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
633807e94b2SWarner Losh 		OID_AUTO, "deletes", CTLFLAG_RD | CTLFLAG_MPSAFE,
634807e94b2SWarner Losh 		&softc->deletes, "Number of BIO_DELETE requests");
635807e94b2SWarner Losh 
636807e94b2SWarner Losh 	SYSCTL_ADD_QUAD(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
637807e94b2SWarner Losh 		OID_AUTO, "dsm_req", CTLFLAG_RD | CTLFLAG_MPSAFE,
638807e94b2SWarner Losh 		&softc->dsm_req, "Number of DSM requests sent to SIM");
639807e94b2SWarner Losh 
640baabaca3SWarner Losh 	SYSCTL_ADD_INT(&softc->sysctl_ctx,
641baabaca3SWarner Losh 		       SYSCTL_CHILDREN(softc->sysctl_tree),
642baabaca3SWarner Losh 		       OID_AUTO,
643baabaca3SWarner Losh 		       "rotating",
644baabaca3SWarner Losh 		       CTLFLAG_RD | CTLFLAG_MPSAFE,
645baabaca3SWarner Losh 		       &nda_rotating_media,
646baabaca3SWarner Losh 		       0,
647baabaca3SWarner Losh 		       "Rotating media");
648baabaca3SWarner Losh 
649baabaca3SWarner Losh #ifdef CAM_IO_STATS
650baabaca3SWarner Losh 	softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx,
651baabaca3SWarner Losh 		SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats",
652baabaca3SWarner Losh 		CTLFLAG_RD, 0, "Statistics");
653baabaca3SWarner Losh 	if (softc->sysctl_stats_tree == NULL) {
654baabaca3SWarner Losh 		printf("ndasysctlinit: unable to allocate sysctl tree for stats\n");
655baabaca3SWarner Losh 		cam_periph_release(periph);
656baabaca3SWarner Losh 		return;
657baabaca3SWarner Losh 	}
658baabaca3SWarner Losh 	SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
659baabaca3SWarner Losh 		SYSCTL_CHILDREN(softc->sysctl_stats_tree),
660baabaca3SWarner Losh 		OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE,
661baabaca3SWarner Losh 		&softc->timeouts, 0,
662baabaca3SWarner Losh 		"Device timeouts reported by the SIM");
663baabaca3SWarner Losh 	SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
664baabaca3SWarner Losh 		SYSCTL_CHILDREN(softc->sysctl_stats_tree),
665baabaca3SWarner Losh 		OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE,
666baabaca3SWarner Losh 		&softc->errors, 0,
667baabaca3SWarner Losh 		"Transport errors reported by the SIM.");
668baabaca3SWarner Losh 	SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
669baabaca3SWarner Losh 		SYSCTL_CHILDREN(softc->sysctl_stats_tree),
670baabaca3SWarner Losh 		OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE,
671baabaca3SWarner Losh 		&softc->invalidations, 0,
672baabaca3SWarner Losh 		"Device pack invalidations.");
673baabaca3SWarner Losh #endif
674baabaca3SWarner Losh 
675*d38677d2SWarner Losh #ifdef CAM_TEST_FAILURE
676*d38677d2SWarner Losh 	SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
677*d38677d2SWarner Losh 		OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
678*d38677d2SWarner Losh 		periph, 0, cam_periph_invalidate_sysctl, "I",
679*d38677d2SWarner Losh 		"Write 1 to invalidate the drive immediately");
680*d38677d2SWarner Losh #endif
681*d38677d2SWarner Losh 
682baabaca3SWarner Losh 	cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx,
683baabaca3SWarner Losh 	    softc->sysctl_tree);
684baabaca3SWarner Losh 
685baabaca3SWarner Losh 	cam_periph_release(periph);
686baabaca3SWarner Losh }
687baabaca3SWarner Losh 
688baabaca3SWarner Losh static int
689baabaca3SWarner Losh ndagetattr(struct bio *bp)
690baabaca3SWarner Losh {
691baabaca3SWarner Losh 	int ret;
692baabaca3SWarner Losh 	struct cam_periph *periph;
693baabaca3SWarner Losh 
694baabaca3SWarner Losh 	periph = (struct cam_periph *)bp->bio_disk->d_drv1;
695baabaca3SWarner Losh 	cam_periph_lock(periph);
696baabaca3SWarner Losh 	ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute,
697baabaca3SWarner Losh 	    periph->path);
698baabaca3SWarner Losh 	cam_periph_unlock(periph);
699baabaca3SWarner Losh 	if (ret == 0)
700baabaca3SWarner Losh 		bp->bio_completed = bp->bio_length;
701baabaca3SWarner Losh 	return ret;
702baabaca3SWarner Losh }
703baabaca3SWarner Losh 
704baabaca3SWarner Losh static cam_status
705baabaca3SWarner Losh ndaregister(struct cam_periph *periph, void *arg)
706baabaca3SWarner Losh {
707baabaca3SWarner Losh 	struct nda_softc *softc;
708baabaca3SWarner Losh 	struct disk *disk;
709baabaca3SWarner Losh 	struct ccb_pathinq cpi;
710baabaca3SWarner Losh 	const struct nvme_namespace_data *nsd;
711baabaca3SWarner Losh 	const struct nvme_controller_data *cd;
712baabaca3SWarner Losh 	char   announce_buf[80];
7130d787e9bSWojciech Macek 	uint8_t flbas_fmt, lbads, vwc_present;
714baabaca3SWarner Losh 	u_int maxio;
715baabaca3SWarner Losh 	int quirks;
716baabaca3SWarner Losh 
7179f8ed7e4SWarner Losh 	nsd = nvme_get_identify_ns(periph);
7189f8ed7e4SWarner Losh 	cd = nvme_get_identify_cntrl(periph);
719baabaca3SWarner Losh 
720baabaca3SWarner Losh 	softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF,
721baabaca3SWarner Losh 	    M_NOWAIT | M_ZERO);
722baabaca3SWarner Losh 
723baabaca3SWarner Losh 	if (softc == NULL) {
724baabaca3SWarner Losh 		printf("ndaregister: Unable to probe new device. "
725baabaca3SWarner Losh 		    "Unable to allocate softc\n");
726baabaca3SWarner Losh 		return(CAM_REQ_CMP_ERR);
727baabaca3SWarner Losh 	}
728baabaca3SWarner Losh 
7290028abe6SWarner Losh 	if (cam_iosched_init(&softc->cam_iosched, periph) != 0) {
730baabaca3SWarner Losh 		printf("ndaregister: Unable to probe new device. "
731baabaca3SWarner Losh 		       "Unable to allocate iosched memory\n");
7322e1fccf2SConrad Meyer 		free(softc, M_DEVBUF);
733baabaca3SWarner Losh 		return(CAM_REQ_CMP_ERR);
734baabaca3SWarner Losh 	}
735baabaca3SWarner Losh 
736baabaca3SWarner Losh 	/* ident_data parsing */
737baabaca3SWarner Losh 
738baabaca3SWarner Losh 	periph->softc = softc;
739baabaca3SWarner Losh 
740baabaca3SWarner Losh 	softc->quirks = NDA_Q_NONE;
741baabaca3SWarner Losh 
742762a7f4fSWarner Losh 	xpt_path_inq(&cpi, periph->path);
743baabaca3SWarner Losh 
744baabaca3SWarner Losh 	TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph);
745baabaca3SWarner Losh 
746baabaca3SWarner Losh 	/*
747baabaca3SWarner Losh 	 * The name space ID is the lun, save it for later I/O
748baabaca3SWarner Losh 	 */
7491d6e8110SWarner Losh 	softc->nsid = (uint32_t)xpt_path_lun_id(periph->path);
750baabaca3SWarner Losh 
751baabaca3SWarner Losh 	/*
752baabaca3SWarner Losh 	 * Register this media as a disk
753baabaca3SWarner Losh 	 */
754baabaca3SWarner Losh 	(void)cam_periph_hold(periph, PRIBIO);
755baabaca3SWarner Losh 	cam_periph_unlock(periph);
756baabaca3SWarner Losh 	snprintf(announce_buf, sizeof(announce_buf),
757baabaca3SWarner Losh 	    "kern.cam.nda.%d.quirks", periph->unit_number);
758baabaca3SWarner Losh 	quirks = softc->quirks;
759baabaca3SWarner Losh 	TUNABLE_INT_FETCH(announce_buf, &quirks);
760baabaca3SWarner Losh 	softc->quirks = quirks;
761baabaca3SWarner Losh 	cam_iosched_set_sort_queue(softc->cam_iosched, 0);
762baabaca3SWarner Losh 	softc->disk = disk = disk_alloc();
763baabaca3SWarner Losh 	strlcpy(softc->disk->d_descr, cd->mn,
764baabaca3SWarner Losh 	    MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn)));
765baabaca3SWarner Losh 	strlcpy(softc->disk->d_ident, cd->sn,
766baabaca3SWarner Losh 	    MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn)));
76717160457SAlexander Motin 	disk->d_rotation_rate = DISK_RR_NON_ROTATING;
768baabaca3SWarner Losh 	disk->d_open = ndaopen;
769baabaca3SWarner Losh 	disk->d_close = ndaclose;
770baabaca3SWarner Losh 	disk->d_strategy = ndastrategy;
771baabaca3SWarner Losh 	disk->d_getattr = ndagetattr;
772baabaca3SWarner Losh 	disk->d_dump = ndadump;
773baabaca3SWarner Losh 	disk->d_gone = ndadiskgonecb;
774baabaca3SWarner Losh 	disk->d_name = "nda";
775baabaca3SWarner Losh 	disk->d_drv1 = periph;
776baabaca3SWarner Losh 	disk->d_unit = periph->unit_number;
777baabaca3SWarner Losh 	maxio = cpi.maxio;		/* Honor max I/O size of SIM */
778baabaca3SWarner Losh 	if (maxio == 0)
779baabaca3SWarner Losh 		maxio = DFLTPHYS;	/* traditional default */
780baabaca3SWarner Losh 	else if (maxio > MAXPHYS)
781baabaca3SWarner Losh 		maxio = MAXPHYS;	/* for safety */
782baabaca3SWarner Losh 	disk->d_maxsize = maxio;
7830d787e9bSWojciech Macek 	flbas_fmt = (nsd->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) &
7840d787e9bSWojciech Macek 		NVME_NS_DATA_FLBAS_FORMAT_MASK;
7850d787e9bSWojciech Macek 	lbads = (nsd->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) &
7860d787e9bSWojciech Macek 		NVME_NS_DATA_LBAF_LBADS_MASK;
7870d787e9bSWojciech Macek 	disk->d_sectorsize = 1 << lbads;
788baabaca3SWarner Losh 	disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze);
789baabaca3SWarner Losh 	disk->d_delmaxsize = disk->d_mediasize;
790baabaca3SWarner Losh 	disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
791baabaca3SWarner Losh //	if (cd->oncs.dsm) // XXX broken?
792baabaca3SWarner Losh 		disk->d_flags |= DISKFLAG_CANDELETE;
7930d787e9bSWojciech Macek 	vwc_present = (cd->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) &
7940d787e9bSWojciech Macek 		NVME_CTRLR_DATA_VWC_PRESENT_MASK;
7950d787e9bSWojciech Macek 	if (vwc_present)
796baabaca3SWarner Losh 		disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
797baabaca3SWarner Losh 	if ((cpi.hba_misc & PIM_UNMAPPED) != 0) {
798baabaca3SWarner Losh 		disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
799baabaca3SWarner Losh 		softc->unmappedio = 1;
800baabaca3SWarner Losh 	}
801baabaca3SWarner Losh 	/*
802baabaca3SWarner Losh 	 * d_ident and d_descr are both far bigger than the length of either
803baabaca3SWarner Losh 	 *  the serial or model number strings.
804baabaca3SWarner Losh 	 */
805baabaca3SWarner Losh 	nvme_strvis(disk->d_descr, cd->mn,
806baabaca3SWarner Losh 	    sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH);
807baabaca3SWarner Losh 	nvme_strvis(disk->d_ident, cd->sn,
808baabaca3SWarner Losh 	    sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH);
809baabaca3SWarner Losh 	disk->d_hba_vendor = cpi.hba_vendor;
810baabaca3SWarner Losh 	disk->d_hba_device = cpi.hba_device;
811baabaca3SWarner Losh 	disk->d_hba_subvendor = cpi.hba_subvendor;
812baabaca3SWarner Losh 	disk->d_hba_subdevice = cpi.hba_subdevice;
813baabaca3SWarner Losh 	disk->d_stripesize = disk->d_sectorsize;
814baabaca3SWarner Losh 	disk->d_stripeoffset = 0;
815baabaca3SWarner Losh 	disk->d_devstat = devstat_new_entry(periph->periph_name,
816baabaca3SWarner Losh 	    periph->unit_number, disk->d_sectorsize,
817baabaca3SWarner Losh 	    DEVSTAT_ALL_SUPPORTED,
818baabaca3SWarner Losh 	    DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport),
819baabaca3SWarner Losh 	    DEVSTAT_PRIORITY_DISK);
820d45e1674SWarner Losh 	/*
821d45e1674SWarner Losh 	 * Add alias for older nvd drives to ease transition.
822d45e1674SWarner Losh 	 */
823712ad719SWarner Losh 	/* disk_add_alias(disk, "nvd"); Have reports of this causing problems */
824baabaca3SWarner Losh 
825baabaca3SWarner Losh 	/*
826baabaca3SWarner Losh 	 * Acquire a reference to the periph before we register with GEOM.
827baabaca3SWarner Losh 	 * We'll release this reference once GEOM calls us back (via
828baabaca3SWarner Losh 	 * ndadiskgonecb()) telling us that our provider has been freed.
829baabaca3SWarner Losh 	 */
83099e7a4adSScott Long 	if (cam_periph_acquire(periph) != 0) {
831baabaca3SWarner Losh 		xpt_print(periph->path, "%s: lost periph during "
832baabaca3SWarner Losh 			  "registration!\n", __func__);
833baabaca3SWarner Losh 		cam_periph_lock(periph);
834baabaca3SWarner Losh 		return (CAM_REQ_CMP_ERR);
835baabaca3SWarner Losh 	}
836baabaca3SWarner Losh 	disk_create(softc->disk, DISK_VERSION);
837baabaca3SWarner Losh 	cam_periph_lock(periph);
838baabaca3SWarner Losh 	cam_periph_unhold(periph);
839baabaca3SWarner Losh 
840baabaca3SWarner Losh 	snprintf(announce_buf, sizeof(announce_buf),
841baabaca3SWarner Losh 		"%juMB (%ju %u byte sectors)",
842baabaca3SWarner Losh 	    (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)),
843baabaca3SWarner Losh 		(uintmax_t)disk->d_mediasize / disk->d_sectorsize,
844baabaca3SWarner Losh 		disk->d_sectorsize);
845baabaca3SWarner Losh 	xpt_announce_periph(periph, announce_buf);
846baabaca3SWarner Losh 	xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING);
847baabaca3SWarner Losh 
848baabaca3SWarner Losh 	/*
849baabaca3SWarner Losh 	 * Create our sysctl variables, now that we know
850baabaca3SWarner Losh 	 * we have successfully attached.
851baabaca3SWarner Losh 	 */
85299e7a4adSScott Long 	if (cam_periph_acquire(periph) == 0)
853baabaca3SWarner Losh 		taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task);
854baabaca3SWarner Losh 
855baabaca3SWarner Losh 	/*
856baabaca3SWarner Losh 	 * Register for device going away and info about the drive
857baabaca3SWarner Losh 	 * changing (though with NVMe, it can't)
858baabaca3SWarner Losh 	 */
859baabaca3SWarner Losh 	xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
860baabaca3SWarner Losh 	    ndaasync, periph, periph->path);
861baabaca3SWarner Losh 
862baabaca3SWarner Losh 	softc->state = NDA_STATE_NORMAL;
863baabaca3SWarner Losh 	return(CAM_REQ_CMP);
864baabaca3SWarner Losh }
865baabaca3SWarner Losh 
866baabaca3SWarner Losh static void
867baabaca3SWarner Losh ndastart(struct cam_periph *periph, union ccb *start_ccb)
868baabaca3SWarner Losh {
869baabaca3SWarner Losh 	struct nda_softc *softc = (struct nda_softc *)periph->softc;
870baabaca3SWarner Losh 	struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio;
871baabaca3SWarner Losh 
872baabaca3SWarner Losh 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n"));
873baabaca3SWarner Losh 
874baabaca3SWarner Losh 	switch (softc->state) {
875baabaca3SWarner Losh 	case NDA_STATE_NORMAL:
876baabaca3SWarner Losh 	{
877baabaca3SWarner Losh 		struct bio *bp;
878baabaca3SWarner Losh 
879baabaca3SWarner Losh 		bp = cam_iosched_next_bio(softc->cam_iosched);
880baabaca3SWarner Losh 		CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp));
881baabaca3SWarner Losh 		if (bp == NULL) {
882baabaca3SWarner Losh 			xpt_release_ccb(start_ccb);
883baabaca3SWarner Losh 			break;
884baabaca3SWarner Losh 		}
885baabaca3SWarner Losh 
886baabaca3SWarner Losh 		switch (bp->bio_cmd) {
887baabaca3SWarner Losh 		case BIO_WRITE:
888baabaca3SWarner Losh 			softc->flags |= NDA_FLAG_DIRTY;
889baabaca3SWarner Losh 			/* FALLTHROUGH */
890baabaca3SWarner Losh 		case BIO_READ:
891baabaca3SWarner Losh 		{
892*d38677d2SWarner Losh #ifdef CAM_TEST_FAILURE
893baabaca3SWarner Losh 			int fail = 0;
894baabaca3SWarner Losh 
895baabaca3SWarner Losh 			/*
896baabaca3SWarner Losh 			 * Support the failure ioctls.  If the command is a
897baabaca3SWarner Losh 			 * read, and there are pending forced read errors, or
898baabaca3SWarner Losh 			 * if a write and pending write errors, then fail this
899baabaca3SWarner Losh 			 * operation with EIO.  This is useful for testing
900baabaca3SWarner Losh 			 * purposes.  Also, support having every Nth read fail.
901baabaca3SWarner Losh 			 *
902baabaca3SWarner Losh 			 * This is a rather blunt tool.
903baabaca3SWarner Losh 			 */
904baabaca3SWarner Losh 			if (bp->bio_cmd == BIO_READ) {
905baabaca3SWarner Losh 				if (softc->force_read_error) {
906baabaca3SWarner Losh 					softc->force_read_error--;
907baabaca3SWarner Losh 					fail = 1;
908baabaca3SWarner Losh 				}
909baabaca3SWarner Losh 				if (softc->periodic_read_error > 0) {
910baabaca3SWarner Losh 					if (++softc->periodic_read_count >=
911baabaca3SWarner Losh 					    softc->periodic_read_error) {
912baabaca3SWarner Losh 						softc->periodic_read_count = 0;
913baabaca3SWarner Losh 						fail = 1;
914baabaca3SWarner Losh 					}
915baabaca3SWarner Losh 				}
916baabaca3SWarner Losh 			} else {
917baabaca3SWarner Losh 				if (softc->force_write_error) {
918baabaca3SWarner Losh 					softc->force_write_error--;
919baabaca3SWarner Losh 					fail = 1;
920baabaca3SWarner Losh 				}
921baabaca3SWarner Losh 			}
922baabaca3SWarner Losh 			if (fail) {
923baabaca3SWarner Losh 				biofinish(bp, NULL, EIO);
924baabaca3SWarner Losh 				xpt_release_ccb(start_ccb);
925baabaca3SWarner Losh 				ndaschedule(periph);
926baabaca3SWarner Losh 				return;
927baabaca3SWarner Losh 			}
928baabaca3SWarner Losh #endif
929baabaca3SWarner Losh 			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
930baabaca3SWarner Losh 			    round_page(bp->bio_bcount + bp->bio_ma_offset) /
931baabaca3SWarner Losh 			    PAGE_SIZE == bp->bio_ma_n,
932baabaca3SWarner Losh 			    ("Short bio %p", bp));
933baabaca3SWarner Losh 			nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ?
934baabaca3SWarner Losh 			    NVME_OPC_READ : NVME_OPC_WRITE);
935baabaca3SWarner Losh 			break;
936baabaca3SWarner Losh 		}
937baabaca3SWarner Losh 		case BIO_DELETE:
938baabaca3SWarner Losh 		{
939807e94b2SWarner Losh 			struct nvme_dsm_range *dsm_range, *dsm_end;
940807e94b2SWarner Losh 			struct nda_trim_request *trim;
941807e94b2SWarner Losh 			struct bio *bp1;
942807e94b2SWarner Losh 			int ents;
943baabaca3SWarner Losh 
944807e94b2SWarner Losh 			trim = malloc(sizeof(*trim), M_NVMEDA, M_ZERO | M_NOWAIT);
945807e94b2SWarner Losh 			if (trim == NULL) {
94660b7691dSWarner Losh 				biofinish(bp, NULL, ENOMEM);
94760b7691dSWarner Losh 				xpt_release_ccb(start_ccb);
94860b7691dSWarner Losh 				ndaschedule(periph);
94960b7691dSWarner Losh 				return;
95060b7691dSWarner Losh 			}
951807e94b2SWarner Losh 			TAILQ_INIT(&trim->bps);
952807e94b2SWarner Losh 			bp1 = bp;
953807e94b2SWarner Losh 			ents = sizeof(trim->data) / sizeof(struct nvme_dsm_range);
954807e94b2SWarner Losh 			ents = min(ents, nda_max_trim_entries);
955807e94b2SWarner Losh 			dsm_range = &trim->dsm;
956807e94b2SWarner Losh 			dsm_end = dsm_range + ents;
957807e94b2SWarner Losh 			do {
958807e94b2SWarner Losh 				TAILQ_INSERT_TAIL(&trim->bps, bp1, bio_queue);
959baabaca3SWarner Losh 				dsm_range->length =
960807e94b2SWarner Losh 				    htole32(bp1->bio_bcount / softc->disk->d_sectorsize);
961baabaca3SWarner Losh 				dsm_range->starting_lba =
962807e94b2SWarner Losh 				    htole32(bp1->bio_offset / softc->disk->d_sectorsize);
963807e94b2SWarner Losh 				dsm_range++;
964807e94b2SWarner Losh 				if (dsm_range >= dsm_end)
965807e94b2SWarner Losh 					break;
966807e94b2SWarner Losh 				bp1 = cam_iosched_next_trim(softc->cam_iosched);
967807e94b2SWarner Losh 				/* XXX -- Could collapse adjacent ranges, but we don't for now */
968807e94b2SWarner Losh 				/* XXX -- Could limit based on total payload size */
969807e94b2SWarner Losh 			} while (bp1 != NULL);
970807e94b2SWarner Losh 			start_ccb->ccb_trim = trim;
971807e94b2SWarner Losh 			softc->dsm_req++;
972807e94b2SWarner Losh 			nda_nvme_trim(softc, &start_ccb->nvmeio, &trim->dsm,
973807e94b2SWarner Losh 			    dsm_range - &trim->dsm);
974807e94b2SWarner Losh 			start_ccb->ccb_state = NDA_CCB_TRIM;
975851063e1SWarner Losh 			/*
976851063e1SWarner Losh 			 * Note: We can have multiple TRIMs in flight, so we don't call
977851063e1SWarner Losh 			 * cam_iosched_submit_trim(softc->cam_iosched);
978851063e1SWarner Losh 			 * since that forces the I/O scheduler to only schedule one at a time.
979851063e1SWarner Losh 			 * On NVMe drives, this is a performance disaster.
980851063e1SWarner Losh 			 */
981baabaca3SWarner Losh 			goto out;
982baabaca3SWarner Losh 		}
983baabaca3SWarner Losh 		case BIO_FLUSH:
984baabaca3SWarner Losh 			nda_nvme_flush(softc, nvmeio);
985baabaca3SWarner Losh 			break;
986baabaca3SWarner Losh 		}
987807e94b2SWarner Losh 		start_ccb->ccb_state = NDA_CCB_BUFFER_IO;
988807e94b2SWarner Losh 		start_ccb->ccb_bp = bp;
989baabaca3SWarner Losh out:
990807e94b2SWarner Losh 		start_ccb->ccb_h.flags |= CAM_UNLOCKED;
991baabaca3SWarner Losh 		softc->outstanding_cmds++;
992baabaca3SWarner Losh 		softc->refcount++;
993baabaca3SWarner Losh 		cam_periph_unlock(periph);
994baabaca3SWarner Losh 		xpt_action(start_ccb);
995baabaca3SWarner Losh 		cam_periph_lock(periph);
996baabaca3SWarner Losh 		softc->refcount--;
997baabaca3SWarner Losh 
998baabaca3SWarner Losh 		/* May have more work to do, so ensure we stay scheduled */
999baabaca3SWarner Losh 		ndaschedule(periph);
1000baabaca3SWarner Losh 		break;
1001baabaca3SWarner Losh 		}
1002baabaca3SWarner Losh 	}
1003baabaca3SWarner Losh }
1004baabaca3SWarner Losh 
1005baabaca3SWarner Losh static void
1006baabaca3SWarner Losh ndadone(struct cam_periph *periph, union ccb *done_ccb)
1007baabaca3SWarner Losh {
1008baabaca3SWarner Losh 	struct nda_softc *softc;
1009baabaca3SWarner Losh 	struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio;
1010baabaca3SWarner Losh 	struct cam_path *path;
1011baabaca3SWarner Losh 	int state;
1012baabaca3SWarner Losh 
1013baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
1014baabaca3SWarner Losh 	path = done_ccb->ccb_h.path;
1015baabaca3SWarner Losh 
1016baabaca3SWarner Losh 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n"));
1017baabaca3SWarner Losh 
1018807e94b2SWarner Losh 	state = nvmeio->ccb_state & NDA_CCB_TYPE_MASK;
1019baabaca3SWarner Losh 	switch (state) {
1020baabaca3SWarner Losh 	case NDA_CCB_BUFFER_IO:
1021baabaca3SWarner Losh 	case NDA_CCB_TRIM:
1022baabaca3SWarner Losh 	{
1023baabaca3SWarner Losh 		int error;
1024baabaca3SWarner Losh 
1025baabaca3SWarner Losh 		cam_periph_lock(periph);
1026baabaca3SWarner Losh 		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1027baabaca3SWarner Losh 			error = ndaerror(done_ccb, 0, 0);
1028baabaca3SWarner Losh 			if (error == ERESTART) {
1029baabaca3SWarner Losh 				/* A retry was scheduled, so just return. */
1030baabaca3SWarner Losh 				cam_periph_unlock(periph);
1031baabaca3SWarner Losh 				return;
1032baabaca3SWarner Losh 			}
1033baabaca3SWarner Losh 			if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
1034baabaca3SWarner Losh 				cam_release_devq(path,
1035baabaca3SWarner Losh 						 /*relsim_flags*/0,
1036baabaca3SWarner Losh 						 /*reduction*/0,
1037baabaca3SWarner Losh 						 /*timeout*/0,
1038baabaca3SWarner Losh 						 /*getcount_only*/0);
1039baabaca3SWarner Losh 		} else {
1040baabaca3SWarner Losh 			if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
1041baabaca3SWarner Losh 				panic("REQ_CMP with QFRZN");
1042baabaca3SWarner Losh 			error = 0;
1043baabaca3SWarner Losh 		}
1044807e94b2SWarner Losh 		if (state == NDA_CCB_BUFFER_IO) {
1045807e94b2SWarner Losh 			struct bio *bp;
1046807e94b2SWarner Losh 
1047807e94b2SWarner Losh 			bp = (struct bio *)done_ccb->ccb_bp;
1048baabaca3SWarner Losh 			bp->bio_error = error;
1049baabaca3SWarner Losh 			if (error != 0) {
1050baabaca3SWarner Losh 				bp->bio_resid = bp->bio_bcount;
1051baabaca3SWarner Losh 				bp->bio_flags |= BIO_ERROR;
1052baabaca3SWarner Losh 			} else {
1053baabaca3SWarner Losh 				bp->bio_resid = 0;
1054baabaca3SWarner Losh 			}
1055baabaca3SWarner Losh 			softc->outstanding_cmds--;
1056baabaca3SWarner Losh 
10579754579bSWarner Losh 			/*
10589754579bSWarner Losh 			 * We need to call cam_iosched before we call biodone so that we
10599754579bSWarner Losh 			 * don't measure any activity that happens in the completion
10609754579bSWarner Losh 			 * routine, which in the case of sendfile can be quite
10619754579bSWarner Losh 			 * extensive.
10629754579bSWarner Losh 			 */
1063baabaca3SWarner Losh 			cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb);
1064baabaca3SWarner Losh 			xpt_release_ccb(done_ccb);
1065807e94b2SWarner Losh 			ndaschedule(periph);
1066807e94b2SWarner Losh 			cam_periph_unlock(periph);
1067807e94b2SWarner Losh 			biodone(bp);
1068807e94b2SWarner Losh 		} else { /* state == NDA_CCB_TRIM */
1069807e94b2SWarner Losh 			struct nda_trim_request *trim;
1070807e94b2SWarner Losh 			struct bio *bp1, *bp2;
10714d87e271SWarner Losh 			TAILQ_HEAD(, bio) queue;
1072baabaca3SWarner Losh 
1073807e94b2SWarner Losh 			trim = nvmeio->ccb_trim;
1074baabaca3SWarner Losh 			TAILQ_INIT(&queue);
1075807e94b2SWarner Losh 			TAILQ_CONCAT(&queue, &trim->bps, bio_queue);
1076807e94b2SWarner Losh 			free(trim, M_NVMEDA);
1077807e94b2SWarner Losh 
1078851063e1SWarner Losh 			/*
1079851063e1SWarner Losh 			 * Since we can have multiple trims in flight, we don't
1080851063e1SWarner Losh 			 * need to call this here.
1081851063e1SWarner Losh 			 * cam_iosched_trim_done(softc->cam_iosched);
1082851063e1SWarner Losh 			 */
1083807e94b2SWarner Losh 			/*
1084807e94b2SWarner Losh 			 * The the I/O scheduler that we're finishing the I/O
1085807e94b2SWarner Losh 			 * so we can keep book. The first one we pass in the CCB
1086807e94b2SWarner Losh 			 * which has the timing information. The rest we pass in NULL
1087807e94b2SWarner Losh 			 * so we can keep proper counts.
1088807e94b2SWarner Losh 			 */
1089807e94b2SWarner Losh 			bp1 = TAILQ_FIRST(&queue);
1090807e94b2SWarner Losh 			cam_iosched_bio_complete(softc->cam_iosched, bp1, done_ccb);
1091807e94b2SWarner Losh 			xpt_release_ccb(done_ccb);
1092baabaca3SWarner Losh 			ndaschedule(periph);
1093baabaca3SWarner Losh 			cam_periph_unlock(periph);
1094807e94b2SWarner Losh 			while ((bp2 = TAILQ_FIRST(&queue)) != NULL) {
1095807e94b2SWarner Losh 				TAILQ_REMOVE(&queue, bp2, bio_queue);
1096807e94b2SWarner Losh 				bp2->bio_error = error;
1097baabaca3SWarner Losh 				if (error != 0) {
1098807e94b2SWarner Losh 					bp2->bio_flags |= BIO_ERROR;
1099807e94b2SWarner Losh 					bp2->bio_resid = bp1->bio_bcount;
1100baabaca3SWarner Losh 				} else
1101807e94b2SWarner Losh 					bp2->bio_resid = 0;
1102807e94b2SWarner Losh 				if (bp1 != bp2)
1103807e94b2SWarner Losh 					cam_iosched_bio_complete(softc->cam_iosched, bp2, NULL);
1104807e94b2SWarner Losh 				biodone(bp2);
1105baabaca3SWarner Losh 			}
1106baabaca3SWarner Losh 		}
1107baabaca3SWarner Losh 		return;
1108baabaca3SWarner Losh 	}
1109baabaca3SWarner Losh 	case NDA_CCB_DUMP:
1110baabaca3SWarner Losh 		/* No-op.  We're polling */
1111baabaca3SWarner Losh 		return;
1112baabaca3SWarner Losh 	default:
1113baabaca3SWarner Losh 		break;
1114baabaca3SWarner Losh 	}
1115baabaca3SWarner Losh 	xpt_release_ccb(done_ccb);
1116baabaca3SWarner Losh }
1117baabaca3SWarner Losh 
1118baabaca3SWarner Losh static int
1119baabaca3SWarner Losh ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags)
1120baabaca3SWarner Losh {
1121baabaca3SWarner Losh 	struct nda_softc *softc;
1122baabaca3SWarner Losh 	struct cam_periph *periph;
1123baabaca3SWarner Losh 
1124baabaca3SWarner Losh 	periph = xpt_path_periph(ccb->ccb_h.path);
1125baabaca3SWarner Losh 	softc = (struct nda_softc *)periph->softc;
1126baabaca3SWarner Losh 
1127baabaca3SWarner Losh 	switch (ccb->ccb_h.status & CAM_STATUS_MASK) {
1128baabaca3SWarner Losh 	case CAM_CMD_TIMEOUT:
1129baabaca3SWarner Losh #ifdef CAM_IO_STATS
1130baabaca3SWarner Losh 		softc->timeouts++;
1131baabaca3SWarner Losh #endif
1132baabaca3SWarner Losh 		break;
1133baabaca3SWarner Losh 	case CAM_REQ_ABORTED:
1134baabaca3SWarner Losh 	case CAM_REQ_CMP_ERR:
1135baabaca3SWarner Losh 	case CAM_REQ_TERMIO:
1136baabaca3SWarner Losh 	case CAM_UNREC_HBA_ERROR:
1137baabaca3SWarner Losh 	case CAM_DATA_RUN_ERR:
1138baabaca3SWarner Losh 	case CAM_ATA_STATUS_ERROR:
1139baabaca3SWarner Losh #ifdef CAM_IO_STATS
1140baabaca3SWarner Losh 		softc->errors++;
1141baabaca3SWarner Losh #endif
1142baabaca3SWarner Losh 		break;
1143baabaca3SWarner Losh 	default:
1144baabaca3SWarner Losh 		break;
1145baabaca3SWarner Losh 	}
1146baabaca3SWarner Losh 
1147553484aeSWarner Losh 	return(cam_periph_error(ccb, cam_flags, sense_flags));
1148baabaca3SWarner Losh }
1149baabaca3SWarner Losh 
1150baabaca3SWarner Losh /*
1151baabaca3SWarner Losh  * Step through all NDA peripheral drivers, and if the device is still open,
1152baabaca3SWarner Losh  * sync the disk cache to physical media.
1153baabaca3SWarner Losh  */
1154baabaca3SWarner Losh static void
1155baabaca3SWarner Losh ndaflush(void)
1156baabaca3SWarner Losh {
1157baabaca3SWarner Losh 	struct cam_periph *periph;
1158baabaca3SWarner Losh 	struct nda_softc *softc;
1159baabaca3SWarner Losh 	union ccb *ccb;
1160baabaca3SWarner Losh 	int error;
1161baabaca3SWarner Losh 
1162baabaca3SWarner Losh 	CAM_PERIPH_FOREACH(periph, &ndadriver) {
1163baabaca3SWarner Losh 		softc = (struct nda_softc *)periph->softc;
11649d602e4eSWarner Losh 
1165baabaca3SWarner Losh 		if (SCHEDULER_STOPPED()) {
11669d602e4eSWarner Losh 			/*
11679d602e4eSWarner Losh 			 * If we paniced with the lock held or the periph is not
11689d602e4eSWarner Losh 			 * open, do not recurse.  Otherwise, call ndadump since
11699d602e4eSWarner Losh 			 * that avoids the sleeping cam_periph_getccb does if no
11709d602e4eSWarner Losh 			 * CCBs are available.
11719d602e4eSWarner Losh 			 */
1172baabaca3SWarner Losh 			if (!cam_periph_owned(periph) &&
1173baabaca3SWarner Losh 			    (softc->flags & NDA_FLAG_OPEN)) {
1174baabaca3SWarner Losh 				ndadump(softc->disk, NULL, 0, 0, 0);
1175baabaca3SWarner Losh 			}
1176baabaca3SWarner Losh 			continue;
1177baabaca3SWarner Losh 		}
11789d602e4eSWarner Losh 
1179baabaca3SWarner Losh 		/*
11809d602e4eSWarner Losh 		 * We only sync the cache if the drive is still open
1181baabaca3SWarner Losh 		 */
11829d602e4eSWarner Losh 		cam_periph_lock(periph);
1183baabaca3SWarner Losh 		if ((softc->flags & NDA_FLAG_OPEN) == 0) {
1184baabaca3SWarner Losh 			cam_periph_unlock(periph);
1185baabaca3SWarner Losh 			continue;
1186baabaca3SWarner Losh 		}
1187baabaca3SWarner Losh 
1188baabaca3SWarner Losh 		ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
1189baabaca3SWarner Losh 		nda_nvme_flush(softc, &ccb->nvmeio);
1190baabaca3SWarner Losh 		error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
1191baabaca3SWarner Losh 		    /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY,
1192baabaca3SWarner Losh 		    softc->disk->d_devstat);
1193baabaca3SWarner Losh 		if (error != 0)
1194baabaca3SWarner Losh 			xpt_print(periph->path, "Synchronize cache failed\n");
1195baabaca3SWarner Losh 		xpt_release_ccb(ccb);
1196baabaca3SWarner Losh 		cam_periph_unlock(periph);
1197baabaca3SWarner Losh 	}
1198baabaca3SWarner Losh }
1199baabaca3SWarner Losh 
1200baabaca3SWarner Losh static void
1201baabaca3SWarner Losh ndashutdown(void *arg, int howto)
1202baabaca3SWarner Losh {
1203baabaca3SWarner Losh 
1204baabaca3SWarner Losh 	ndaflush();
1205baabaca3SWarner Losh }
1206baabaca3SWarner Losh 
1207baabaca3SWarner Losh static void
1208baabaca3SWarner Losh ndasuspend(void *arg)
1209baabaca3SWarner Losh {
1210baabaca3SWarner Losh 
1211baabaca3SWarner Losh 	ndaflush();
1212baabaca3SWarner Losh }
1213