xref: /freebsd/sys/cam/scsi/scsi_pass.c (revision 6bfca4dcab07dad45a805879d954876b353c0810)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1997, 1998, 2000 Justin T. Gibbs.
5  * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions, and the following disclaimer,
13  *    without modification, immediately at the beginning of the file.
14  * 2. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/conf.h>
34 #include <sys/types.h>
35 #include <sys/bio.h>
36 #include <sys/bus.h>
37 #include <sys/devicestat.h>
38 #include <sys/errno.h>
39 #include <sys/fcntl.h>
40 #include <sys/malloc.h>
41 #include <sys/proc.h>
42 #include <sys/poll.h>
43 #include <sys/selinfo.h>
44 #include <sys/sdt.h>
45 #include <sys/sysent.h>
46 #include <sys/taskqueue.h>
47 #include <vm/uma.h>
48 #include <vm/vm.h>
49 #include <vm/vm_extern.h>
50 
51 #include <machine/bus.h>
52 
53 #include <cam/cam.h>
54 #include <cam/cam_ccb.h>
55 #include <cam/cam_periph.h>
56 #include <cam/cam_queue.h>
57 #include <cam/cam_xpt.h>
58 #include <cam/cam_xpt_periph.h>
59 #include <cam/cam_debug.h>
60 #include <cam/cam_compat.h>
61 #include <cam/cam_xpt_periph.h>
62 
63 #include <cam/scsi/scsi_all.h>
64 #include <cam/scsi/scsi_pass.h>
65 
66 typedef enum {
67 	PASS_FLAG_OPEN			= 0x01,
68 	PASS_FLAG_LOCKED		= 0x02,
69 	PASS_FLAG_INVALID		= 0x04,
70 	PASS_FLAG_INITIAL_PHYSPATH	= 0x08,
71 	PASS_FLAG_ZONE_INPROG		= 0x10,
72 	PASS_FLAG_ZONE_VALID		= 0x20,
73 	PASS_FLAG_UNMAPPED_CAPABLE	= 0x40,
74 	PASS_FLAG_ABANDONED_REF_SET	= 0x80
75 } pass_flags;
76 
77 typedef enum {
78 	PASS_STATE_NORMAL
79 } pass_state;
80 
81 typedef enum {
82 	PASS_CCB_BUFFER_IO,
83 	PASS_CCB_QUEUED_IO
84 } pass_ccb_types;
85 
86 #define ccb_type	ppriv_field0
87 #define ccb_ioreq	ppriv_ptr1
88 
89 /*
90  * The maximum number of memory segments we preallocate.
91  */
92 #define	PASS_MAX_SEGS	16
93 
94 typedef enum {
95 	PASS_IO_NONE		= 0x00,
96 	PASS_IO_USER_SEG_MALLOC	= 0x01,
97 	PASS_IO_KERN_SEG_MALLOC	= 0x02,
98 	PASS_IO_ABANDONED	= 0x04
99 } pass_io_flags;
100 
101 struct pass_io_req {
102 	union ccb			 ccb;
103 	union ccb			*alloced_ccb;
104 	union ccb			*user_ccb_ptr;
105 	camq_entry			 user_periph_links;
106 	ccb_ppriv_area			 user_periph_priv;
107 	struct cam_periph_map_info	 mapinfo;
108 	pass_io_flags			 flags;
109 	ccb_flags			 data_flags;
110 	int				 num_user_segs;
111 	bus_dma_segment_t		 user_segs[PASS_MAX_SEGS];
112 	int				 num_kern_segs;
113 	bus_dma_segment_t		 kern_segs[PASS_MAX_SEGS];
114 	bus_dma_segment_t		*user_segptr;
115 	bus_dma_segment_t		*kern_segptr;
116 	int				 num_bufs;
117 	uint32_t			 dirs[CAM_PERIPH_MAXMAPS];
118 	uint32_t			 lengths[CAM_PERIPH_MAXMAPS];
119 	uint8_t				*user_bufs[CAM_PERIPH_MAXMAPS];
120 	uint8_t				*kern_bufs[CAM_PERIPH_MAXMAPS];
121 	struct bintime			 start_time;
122 	TAILQ_ENTRY(pass_io_req)	 links;
123 };
124 
125 struct pass_softc {
126 	pass_state		  state;
127 	pass_flags		  flags;
128 	uint8_t		  pd_type;
129 	int			  open_count;
130 	u_int		 	  maxio;
131 	struct devstat		 *device_stats;
132 	struct cdev		 *dev;
133 	struct cdev		 *alias_dev;
134 	struct task		  add_physpath_task;
135 	struct task		  shutdown_kqueue_task;
136 	struct selinfo		  read_select;
137 	TAILQ_HEAD(, pass_io_req) incoming_queue;
138 	TAILQ_HEAD(, pass_io_req) active_queue;
139 	TAILQ_HEAD(, pass_io_req) abandoned_queue;
140 	TAILQ_HEAD(, pass_io_req) done_queue;
141 	struct cam_periph	 *periph;
142 	char			  zone_name[12];
143 	char			  io_zone_name[12];
144 	uma_zone_t		  pass_zone;
145 	uma_zone_t		  pass_io_zone;
146 	size_t			  io_zone_size;
147 };
148 
149 static	d_open_t	passopen;
150 static	d_close_t	passclose;
151 static	d_ioctl_t	passioctl;
152 static	d_ioctl_t	passdoioctl;
153 static	d_poll_t	passpoll;
154 static	d_kqfilter_t	passkqfilter;
155 static	void		passreadfiltdetach(struct knote *kn);
156 static	int		passreadfilt(struct knote *kn, long hint);
157 
158 static	periph_init_t	passinit;
159 static	periph_ctor_t	passregister;
160 static	periph_oninv_t	passoninvalidate;
161 static	periph_dtor_t	passcleanup;
162 static	periph_start_t	passstart;
163 static	void		pass_shutdown_kqueue(void *context, int pending);
164 static	void		pass_add_physpath(void *context, int pending);
165 static	void		passasync(void *callback_arg, uint32_t code,
166 				  struct cam_path *path, void *arg);
167 static	void		passdone(struct cam_periph *periph,
168 				 union ccb *done_ccb);
169 static	int		passcreatezone(struct cam_periph *periph);
170 static	void		passiocleanup(struct pass_softc *softc,
171 				      struct pass_io_req *io_req);
172 static	int		passcopysglist(struct cam_periph *periph,
173 				       struct pass_io_req *io_req,
174 				       ccb_flags direction);
175 static	int		passmemsetup(struct cam_periph *periph,
176 				     struct pass_io_req *io_req);
177 static	int		passmemdone(struct cam_periph *periph,
178 				    struct pass_io_req *io_req);
179 static	int		passerror(union ccb *ccb, uint32_t cam_flags,
180 				  uint32_t sense_flags);
181 static 	int		passsendccb(struct cam_periph *periph, union ccb *ccb,
182 				    union ccb *inccb);
183 static	void		passflags(union ccb *ccb, uint32_t *cam_flags,
184 				  uint32_t *sense_flags);
185 
186 static struct periph_driver passdriver =
187 {
188 	passinit, "pass",
189 	TAILQ_HEAD_INITIALIZER(passdriver.units), /* generation */ 0
190 };
191 
192 PERIPHDRIVER_DECLARE(pass, passdriver);
193 
194 static struct cdevsw pass_cdevsw = {
195 	.d_version =	D_VERSION,
196 	.d_flags =	D_TRACKCLOSE,
197 	.d_open =	passopen,
198 	.d_close =	passclose,
199 	.d_ioctl =	passioctl,
200 	.d_poll = 	passpoll,
201 	.d_kqfilter = 	passkqfilter,
202 	.d_name =	"pass",
203 };
204 
205 static struct filterops passread_filtops = {
206 	.f_isfd	=	1,
207 	.f_detach =	passreadfiltdetach,
208 	.f_event =	passreadfilt
209 };
210 
211 static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers");
212 
213 static void
214 passinit(void)
215 {
216 	cam_status status;
217 
218 	/*
219 	 * Install a global async callback.  This callback will
220 	 * receive async callbacks like "new device found".
221 	 */
222 	status = xpt_register_async(AC_FOUND_DEVICE, passasync, NULL, NULL);
223 
224 	if (status != CAM_REQ_CMP) {
225 		printf("pass: Failed to attach master async callback "
226 		       "due to status 0x%x!\n", status);
227 	}
228 
229 }
230 
231 static void
232 passrejectios(struct cam_periph *periph)
233 {
234 	struct pass_io_req *io_req, *io_req2;
235 	struct pass_softc *softc;
236 
237 	softc = (struct pass_softc *)periph->softc;
238 
239 	/*
240 	 * The user can no longer get status for I/O on the done queue, so
241 	 * clean up all outstanding I/O on the done queue.
242 	 */
243 	TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
244 		TAILQ_REMOVE(&softc->done_queue, io_req, links);
245 		passiocleanup(softc, io_req);
246 		uma_zfree(softc->pass_zone, io_req);
247 	}
248 
249 	/*
250 	 * The underlying device is gone, so we can't issue these I/Os.
251 	 * The devfs node has been shut down, so we can't return status to
252 	 * the user.  Free any I/O left on the incoming queue.
253 	 */
254 	TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links, io_req2) {
255 		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
256 		passiocleanup(softc, io_req);
257 		uma_zfree(softc->pass_zone, io_req);
258 	}
259 
260 	/*
261 	 * Normally we would put I/Os on the abandoned queue and acquire a
262 	 * reference when we saw the final close.  But, the device went
263 	 * away and devfs may have moved everything off to deadfs by the
264 	 * time the I/O done callback is called; as a result, we won't see
265 	 * any more closes.  So, if we have any active I/Os, we need to put
266 	 * them on the abandoned queue.  When the abandoned queue is empty,
267 	 * we'll release the remaining reference (see below) to the peripheral.
268 	 */
269 	TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links, io_req2) {
270 		TAILQ_REMOVE(&softc->active_queue, io_req, links);
271 		io_req->flags |= PASS_IO_ABANDONED;
272 		TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req, links);
273 	}
274 
275 	/*
276 	 * If we put any I/O on the abandoned queue, acquire a reference.
277 	 */
278 	if ((!TAILQ_EMPTY(&softc->abandoned_queue))
279 	 && ((softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0)) {
280 		cam_periph_doacquire(periph);
281 		softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
282 	}
283 }
284 
285 static void
286 passdevgonecb(void *arg)
287 {
288 	struct cam_periph *periph;
289 	struct mtx *mtx;
290 	struct pass_softc *softc;
291 	int i;
292 
293 	periph = (struct cam_periph *)arg;
294 	mtx = cam_periph_mtx(periph);
295 	mtx_lock(mtx);
296 
297 	softc = (struct pass_softc *)periph->softc;
298 	KASSERT(softc->open_count >= 0, ("Negative open count %d",
299 		softc->open_count));
300 
301 	/*
302 	 * When we get this callback, we will get no more close calls from
303 	 * devfs.  So if we have any dangling opens, we need to release the
304 	 * reference held for that particular context.
305 	 */
306 	for (i = 0; i < softc->open_count; i++)
307 		cam_periph_release_locked(periph);
308 
309 	softc->open_count = 0;
310 
311 	/*
312 	 * Release the reference held for the device node, it is gone now.
313 	 * Accordingly, inform all queued I/Os of their fate.
314 	 */
315 	cam_periph_release_locked(periph);
316 	passrejectios(periph);
317 
318 	/*
319 	 * We reference the SIM lock directly here, instead of using
320 	 * cam_periph_unlock().  The reason is that the final call to
321 	 * cam_periph_release_locked() above could result in the periph
322 	 * getting freed.  If that is the case, dereferencing the periph
323 	 * with a cam_periph_unlock() call would cause a page fault.
324 	 */
325 	mtx_unlock(mtx);
326 
327 	/*
328 	 * We have to remove our kqueue context from a thread because it
329 	 * may sleep.  It would be nice if we could get a callback from
330 	 * kqueue when it is done cleaning up resources.
331 	 */
332 	taskqueue_enqueue(taskqueue_thread, &softc->shutdown_kqueue_task);
333 }
334 
335 static void
336 passoninvalidate(struct cam_periph *periph)
337 {
338 	struct pass_softc *softc;
339 
340 	softc = (struct pass_softc *)periph->softc;
341 
342 	/*
343 	 * De-register any async callbacks.
344 	 */
345 	xpt_register_async(0, passasync, periph, periph->path);
346 
347 	softc->flags |= PASS_FLAG_INVALID;
348 
349 	/*
350 	 * Tell devfs this device has gone away, and ask for a callback
351 	 * when it has cleaned up its state.
352 	 */
353 	destroy_dev_sched_cb(softc->dev, passdevgonecb, periph);
354 }
355 
356 static void
357 passcleanup(struct cam_periph *periph)
358 {
359 	struct pass_softc *softc;
360 
361 	softc = (struct pass_softc *)periph->softc;
362 
363 	cam_periph_assert(periph, MA_OWNED);
364 	KASSERT(TAILQ_EMPTY(&softc->active_queue),
365 		("%s called when there are commands on the active queue!\n",
366 		__func__));
367 	KASSERT(TAILQ_EMPTY(&softc->abandoned_queue),
368 		("%s called when there are commands on the abandoned queue!\n",
369 		__func__));
370 	KASSERT(TAILQ_EMPTY(&softc->incoming_queue),
371 		("%s called when there are commands on the incoming queue!\n",
372 		__func__));
373 	KASSERT(TAILQ_EMPTY(&softc->done_queue),
374 		("%s called when there are commands on the done queue!\n",
375 		__func__));
376 
377 	devstat_remove_entry(softc->device_stats);
378 
379 	cam_periph_unlock(periph);
380 
381 	/*
382 	 * We call taskqueue_drain() for the physpath task to make sure it
383 	 * is complete.  We drop the lock because this can potentially
384 	 * sleep.  XXX KDM that is bad.  Need a way to get a callback when
385 	 * a taskqueue is drained.
386 	 *
387  	 * Note that we don't drain the kqueue shutdown task queue.  This
388 	 * is because we hold a reference on the periph for kqueue, and
389 	 * release that reference from the kqueue shutdown task queue.  So
390 	 * we cannot come into this routine unless we've released that
391 	 * reference.  Also, because that could be the last reference, we
392 	 * could be called from the cam_periph_release() call in
393 	 * pass_shutdown_kqueue().  In that case, the taskqueue_drain()
394 	 * would deadlock.  It would be preferable if we had a way to
395 	 * get a callback when a taskqueue is done.
396 	 */
397 	taskqueue_drain(taskqueue_thread, &softc->add_physpath_task);
398 
399 	/*
400 	 * It should be safe to destroy the zones from here, because all
401 	 * of the references to this peripheral have been freed, and all
402 	 * I/O has been terminated and freed.  We check the zones for NULL
403 	 * because they may not have been allocated yet if the device went
404 	 * away before any asynchronous I/O has been issued.
405 	 */
406 	if (softc->pass_zone != NULL)
407 		uma_zdestroy(softc->pass_zone);
408 	if (softc->pass_io_zone != NULL)
409 		uma_zdestroy(softc->pass_io_zone);
410 
411 	cam_periph_lock(periph);
412 
413 	free(softc, M_DEVBUF);
414 }
415 
416 static void
417 pass_shutdown_kqueue(void *context, int pending)
418 {
419 	struct cam_periph *periph;
420 	struct pass_softc *softc;
421 
422 	periph = context;
423 	softc = periph->softc;
424 
425 	knlist_clear(&softc->read_select.si_note, /*is_locked*/ 0);
426 	knlist_destroy(&softc->read_select.si_note);
427 
428 	/*
429 	 * Release the reference we held for kqueue.
430 	 */
431 	cam_periph_release(periph);
432 }
433 
434 static void
435 pass_add_physpath(void *context, int pending)
436 {
437 	struct cam_periph *periph;
438 	struct pass_softc *softc;
439 	struct mtx *mtx;
440 	char *physpath;
441 
442 	/*
443 	 * If we have one, create a devfs alias for our
444 	 * physical path.
445 	 */
446 	periph = context;
447 	softc = periph->softc;
448 	physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
449 	mtx = cam_periph_mtx(periph);
450 	mtx_lock(mtx);
451 
452 	if (periph->flags & CAM_PERIPH_INVALID)
453 		goto out;
454 
455 	if (xpt_getattr(physpath, MAXPATHLEN,
456 			"GEOM::physpath", periph->path) == 0
457 	 && strlen(physpath) != 0) {
458 		mtx_unlock(mtx);
459 		make_dev_physpath_alias(MAKEDEV_WAITOK | MAKEDEV_CHECKNAME,
460 				&softc->alias_dev, softc->dev,
461 				softc->alias_dev, physpath);
462 		mtx_lock(mtx);
463 	}
464 
465 out:
466 	/*
467 	 * Now that we've made our alias, we no longer have to have a
468 	 * reference to the device.
469 	 */
470 	if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0)
471 		softc->flags |= PASS_FLAG_INITIAL_PHYSPATH;
472 
473 	/*
474 	 * We always acquire a reference to the periph before queueing this
475 	 * task queue function, so it won't go away before we run.
476 	 */
477 	while (pending-- > 0)
478 		cam_periph_release_locked(periph);
479 	mtx_unlock(mtx);
480 
481 	free(physpath, M_DEVBUF);
482 }
483 
484 static void
485 passasync(void *callback_arg, uint32_t code,
486 	  struct cam_path *path, void *arg)
487 {
488 	struct cam_periph *periph;
489 
490 	periph = (struct cam_periph *)callback_arg;
491 
492 	switch (code) {
493 	case AC_FOUND_DEVICE:
494 	{
495 		struct ccb_getdev *cgd;
496 		cam_status status;
497 
498 		cgd = (struct ccb_getdev *)arg;
499 		if (cgd == NULL)
500 			break;
501 
502 		/*
503 		 * Allocate a peripheral instance for
504 		 * this device and start the probe
505 		 * process.
506 		 */
507 		status = cam_periph_alloc(passregister, passoninvalidate,
508 					  passcleanup, passstart, "pass",
509 					  CAM_PERIPH_BIO, path,
510 					  passasync, AC_FOUND_DEVICE, cgd);
511 
512 		if (status != CAM_REQ_CMP
513 		 && status != CAM_REQ_INPROG) {
514 			const struct cam_status_entry *entry;
515 
516 			entry = cam_fetch_status_entry(status);
517 
518 			printf("passasync: Unable to attach new device "
519 			       "due to status %#x: %s\n", status, entry ?
520 			       entry->status_text : "Unknown");
521 		}
522 
523 		break;
524 	}
525 	case AC_ADVINFO_CHANGED:
526 	{
527 		uintptr_t buftype;
528 
529 		buftype = (uintptr_t)arg;
530 		if (buftype == CDAI_TYPE_PHYS_PATH) {
531 			struct pass_softc *softc;
532 
533 			softc = (struct pass_softc *)periph->softc;
534 			/*
535 			 * Acquire a reference to the periph before we
536 			 * start the taskqueue, so that we don't run into
537 			 * a situation where the periph goes away before
538 			 * the task queue has a chance to run.
539 			 */
540 			if (cam_periph_acquire(periph) != 0)
541 				break;
542 
543 			taskqueue_enqueue(taskqueue_thread,
544 					  &softc->add_physpath_task);
545 		}
546 		break;
547 	}
548 	default:
549 		cam_periph_async(periph, code, path, arg);
550 		break;
551 	}
552 }
553 
554 static cam_status
555 passregister(struct cam_periph *periph, void *arg)
556 {
557 	struct pass_softc *softc;
558 	struct ccb_getdev *cgd;
559 	struct ccb_pathinq cpi;
560 	struct make_dev_args args;
561 	int error, no_tags;
562 
563 	cgd = (struct ccb_getdev *)arg;
564 	if (cgd == NULL) {
565 		printf("%s: no getdev CCB, can't register device\n", __func__);
566 		return(CAM_REQ_CMP_ERR);
567 	}
568 
569 	softc = (struct pass_softc *)malloc(sizeof(*softc),
570 					    M_DEVBUF, M_NOWAIT);
571 
572 	if (softc == NULL) {
573 		printf("%s: Unable to probe new device. "
574 		       "Unable to allocate softc\n", __func__);
575 		return(CAM_REQ_CMP_ERR);
576 	}
577 
578 	bzero(softc, sizeof(*softc));
579 	softc->state = PASS_STATE_NORMAL;
580 	if (cgd->protocol == PROTO_SCSI || cgd->protocol == PROTO_ATAPI)
581 		softc->pd_type = SID_TYPE(&cgd->inq_data);
582 	else if (cgd->protocol == PROTO_SATAPM)
583 		softc->pd_type = T_ENCLOSURE;
584 	else
585 		softc->pd_type = T_DIRECT;
586 
587 	periph->softc = softc;
588 	softc->periph = periph;
589 	TAILQ_INIT(&softc->incoming_queue);
590 	TAILQ_INIT(&softc->active_queue);
591 	TAILQ_INIT(&softc->abandoned_queue);
592 	TAILQ_INIT(&softc->done_queue);
593 	snprintf(softc->zone_name, sizeof(softc->zone_name), "%s%d",
594 		 periph->periph_name, periph->unit_number);
595 	snprintf(softc->io_zone_name, sizeof(softc->io_zone_name), "%s%dIO",
596 		 periph->periph_name, periph->unit_number);
597 	softc->io_zone_size = maxphys;
598 	knlist_init_mtx(&softc->read_select.si_note, cam_periph_mtx(periph));
599 
600 	xpt_path_inq(&cpi, periph->path);
601 
602 	if (cpi.maxio == 0)
603 		softc->maxio = DFLTPHYS;	/* traditional default */
604 	else if (cpi.maxio > maxphys)
605 		softc->maxio = maxphys;		/* for safety */
606 	else
607 		softc->maxio = cpi.maxio;	/* real value */
608 
609 	if (cpi.hba_misc & PIM_UNMAPPED)
610 		softc->flags |= PASS_FLAG_UNMAPPED_CAPABLE;
611 
612 	/*
613 	 * We pass in 0 for a blocksize, since we don't
614 	 * know what the blocksize of this device is, if
615 	 * it even has a blocksize.
616 	 */
617 	cam_periph_unlock(periph);
618 	no_tags = (cgd->inq_data.flags & SID_CmdQue) == 0;
619 	softc->device_stats = devstat_new_entry("pass",
620 			  periph->unit_number, 0,
621 			  DEVSTAT_NO_BLOCKSIZE
622 			  | (no_tags ? DEVSTAT_NO_ORDERED_TAGS : 0),
623 			  softc->pd_type |
624 			  XPORT_DEVSTAT_TYPE(cpi.transport) |
625 			  DEVSTAT_TYPE_PASS,
626 			  DEVSTAT_PRIORITY_PASS);
627 
628 	/*
629 	 * Initialize the taskqueue handler for shutting down kqueue.
630 	 */
631 	TASK_INIT(&softc->shutdown_kqueue_task, /*priority*/ 0,
632 		  pass_shutdown_kqueue, periph);
633 
634 	/*
635 	 * Acquire a reference to the periph that we can release once we've
636 	 * cleaned up the kqueue.
637 	 */
638 	if (cam_periph_acquire(periph) != 0) {
639 		xpt_print(periph->path, "%s: lost periph during "
640 			  "registration!\n", __func__);
641 		cam_periph_lock(periph);
642 		return (CAM_REQ_CMP_ERR);
643 	}
644 
645 	/*
646 	 * Acquire a reference to the periph before we create the devfs
647 	 * instance for it.  We'll release this reference once the devfs
648 	 * instance has been freed.
649 	 */
650 	if (cam_periph_acquire(periph) != 0) {
651 		xpt_print(periph->path, "%s: lost periph during "
652 			  "registration!\n", __func__);
653 		cam_periph_lock(periph);
654 		return (CAM_REQ_CMP_ERR);
655 	}
656 
657 	/* Register the device */
658 	make_dev_args_init(&args);
659 	args.mda_devsw = &pass_cdevsw;
660 	args.mda_unit = periph->unit_number;
661 	args.mda_uid = UID_ROOT;
662 	args.mda_gid = GID_OPERATOR;
663 	args.mda_mode = 0600;
664 	args.mda_si_drv1 = periph;
665 	args.mda_flags = MAKEDEV_NOWAIT;
666 	error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name,
667 	    periph->unit_number);
668 	if (error != 0) {
669 		cam_periph_lock(periph);
670 		cam_periph_release_locked(periph);
671 		return (CAM_REQ_CMP_ERR);
672 	}
673 
674 	/*
675 	 * Hold a reference to the periph before we create the physical
676 	 * path alias so it can't go away.
677 	 */
678 	if (cam_periph_acquire(periph) != 0) {
679 		xpt_print(periph->path, "%s: lost periph during "
680 			  "registration!\n", __func__);
681 		cam_periph_lock(periph);
682 		return (CAM_REQ_CMP_ERR);
683 	}
684 
685 	cam_periph_lock(periph);
686 
687 	TASK_INIT(&softc->add_physpath_task, /*priority*/0,
688 		  pass_add_physpath, periph);
689 
690 	/*
691 	 * See if physical path information is already available.
692 	 */
693 	taskqueue_enqueue(taskqueue_thread, &softc->add_physpath_task);
694 
695 	/*
696 	 * Add an async callback so that we get notified if
697 	 * this device goes away or its physical path
698 	 * (stored in the advanced info data of the EDT) has
699 	 * changed.
700 	 */
701 	xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
702 			   passasync, periph, periph->path);
703 
704 	if (bootverbose)
705 		xpt_announce_periph(periph, NULL);
706 
707 	return(CAM_REQ_CMP);
708 }
709 
710 static int
711 passopen(struct cdev *dev, int flags, int fmt, struct thread *td)
712 {
713 	struct cam_periph *periph;
714 	struct pass_softc *softc;
715 	int error;
716 
717 	periph = (struct cam_periph *)dev->si_drv1;
718 	if (cam_periph_acquire(periph) != 0)
719 		return (ENXIO);
720 
721 	cam_periph_lock(periph);
722 
723 	softc = (struct pass_softc *)periph->softc;
724 
725 	if (softc->flags & PASS_FLAG_INVALID) {
726 		cam_periph_release_locked(periph);
727 		cam_periph_unlock(periph);
728 		return(ENXIO);
729 	}
730 
731 	/*
732 	 * Don't allow access when we're running at a high securelevel.
733 	 */
734 	error = securelevel_gt(td->td_ucred, 1);
735 	if (error) {
736 		cam_periph_release_locked(periph);
737 		cam_periph_unlock(periph);
738 		return(error);
739 	}
740 
741 	/*
742 	 * Only allow read-write access.
743 	 */
744 	if (((flags & FWRITE) == 0) || ((flags & FREAD) == 0)) {
745 		cam_periph_release_locked(periph);
746 		cam_periph_unlock(periph);
747 		return(EPERM);
748 	}
749 
750 	/*
751 	 * We don't allow nonblocking access.
752 	 */
753 	if ((flags & O_NONBLOCK) != 0) {
754 		xpt_print(periph->path, "can't do nonblocking access\n");
755 		cam_periph_release_locked(periph);
756 		cam_periph_unlock(periph);
757 		return(EINVAL);
758 	}
759 
760 	softc->open_count++;
761 
762 	cam_periph_unlock(periph);
763 
764 	return (error);
765 }
766 
767 static int
768 passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
769 {
770 	struct 	cam_periph *periph;
771 	struct  pass_softc *softc;
772 	struct mtx *mtx;
773 
774 	periph = (struct cam_periph *)dev->si_drv1;
775 	mtx = cam_periph_mtx(periph);
776 	mtx_lock(mtx);
777 
778 	softc = periph->softc;
779 	softc->open_count--;
780 
781 	if (softc->open_count == 0) {
782 		struct pass_io_req *io_req, *io_req2;
783 
784 		TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
785 			TAILQ_REMOVE(&softc->done_queue, io_req, links);
786 			passiocleanup(softc, io_req);
787 			uma_zfree(softc->pass_zone, io_req);
788 		}
789 
790 		TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links,
791 				   io_req2) {
792 			TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
793 			passiocleanup(softc, io_req);
794 			uma_zfree(softc->pass_zone, io_req);
795 		}
796 
797 		/*
798 		 * If there are any active I/Os, we need to forcibly acquire a
799 		 * reference to the peripheral so that we don't go away
800 		 * before they complete.  We'll release the reference when
801 		 * the abandoned queue is empty.
802 		 */
803 		io_req = TAILQ_FIRST(&softc->active_queue);
804 		if ((io_req != NULL)
805 		 && (softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0) {
806 			cam_periph_doacquire(periph);
807 			softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
808 		}
809 
810 		/*
811 		 * Since the I/O in the active queue is not under our
812 		 * control, just set a flag so that we can clean it up when
813 		 * it completes and put it on the abandoned queue.  This
814 		 * will prevent our sending spurious completions in the
815 		 * event that the device is opened again before these I/Os
816 		 * complete.
817 		 */
818 		TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links,
819 				   io_req2) {
820 			TAILQ_REMOVE(&softc->active_queue, io_req, links);
821 			io_req->flags |= PASS_IO_ABANDONED;
822 			TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req,
823 					  links);
824 		}
825 	}
826 
827 	cam_periph_release_locked(periph);
828 
829 	/*
830 	 * We reference the lock directly here, instead of using
831 	 * cam_periph_unlock().  The reason is that the call to
832 	 * cam_periph_release_locked() above could result in the periph
833 	 * getting freed.  If that is the case, dereferencing the periph
834 	 * with a cam_periph_unlock() call would cause a page fault.
835 	 *
836 	 * cam_periph_release() avoids this problem using the same method,
837 	 * but we're manually acquiring and dropping the lock here to
838 	 * protect the open count and avoid another lock acquisition and
839 	 * release.
840 	 */
841 	mtx_unlock(mtx);
842 
843 	return (0);
844 }
845 
846 static void
847 passstart(struct cam_periph *periph, union ccb *start_ccb)
848 {
849 	struct pass_softc *softc;
850 
851 	softc = (struct pass_softc *)periph->softc;
852 
853 	switch (softc->state) {
854 	case PASS_STATE_NORMAL: {
855 		struct pass_io_req *io_req;
856 
857 		/*
858 		 * Check for any queued I/O requests that require an
859 		 * allocated slot.
860 		 */
861 		io_req = TAILQ_FIRST(&softc->incoming_queue);
862 		if (io_req == NULL) {
863 			xpt_release_ccb(start_ccb);
864 			break;
865 		}
866 		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
867 		TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
868 		/*
869 		 * Merge the user's CCB into the allocated CCB.
870 		 */
871 		xpt_merge_ccb(start_ccb, &io_req->ccb);
872 		start_ccb->ccb_h.ccb_type = PASS_CCB_QUEUED_IO;
873 		start_ccb->ccb_h.ccb_ioreq = io_req;
874 		start_ccb->ccb_h.cbfcnp = passdone;
875 		io_req->alloced_ccb = start_ccb;
876 		binuptime(&io_req->start_time);
877 		devstat_start_transaction(softc->device_stats,
878 					  &io_req->start_time);
879 
880 		xpt_action(start_ccb);
881 
882 		/*
883 		 * If we have any more I/O waiting, schedule ourselves again.
884 		 */
885 		if (!TAILQ_EMPTY(&softc->incoming_queue))
886 			xpt_schedule(periph, CAM_PRIORITY_NORMAL);
887 		break;
888 	}
889 	default:
890 		break;
891 	}
892 }
893 
894 static void
895 passdone(struct cam_periph *periph, union ccb *done_ccb)
896 {
897 	struct pass_softc *softc;
898 	struct ccb_scsiio *csio;
899 
900 	softc = (struct pass_softc *)periph->softc;
901 
902 	cam_periph_assert(periph, MA_OWNED);
903 
904 	csio = &done_ccb->csio;
905 	switch (csio->ccb_h.ccb_type) {
906 	case PASS_CCB_QUEUED_IO: {
907 		struct pass_io_req *io_req;
908 
909 		io_req = done_ccb->ccb_h.ccb_ioreq;
910 #if 0
911 		xpt_print(periph->path, "%s: called for user CCB %p\n",
912 			  __func__, io_req->user_ccb_ptr);
913 #endif
914 		if (((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) &&
915 		    ((io_req->flags & PASS_IO_ABANDONED) == 0)) {
916 			int error;
917 			uint32_t cam_flags, sense_flags;
918 
919 			passflags(done_ccb, &cam_flags, &sense_flags);
920 			error = passerror(done_ccb, cam_flags, sense_flags);
921 
922 			if (error == ERESTART) {
923 				KASSERT(((sense_flags & SF_NO_RETRY) == 0),
924 				    ("passerror returned ERESTART with no retry requested\n"));
925 				return;
926 			}
927 		}
928 
929 		/*
930 		 * Copy the allocated CCB contents back to the malloced CCB
931 		 * so we can give status back to the user when he requests it.
932 		 */
933 		bcopy(done_ccb, &io_req->ccb, sizeof(*done_ccb));
934 
935 		/*
936 		 * Log data/transaction completion with devstat(9).
937 		 */
938 		switch (done_ccb->ccb_h.func_code) {
939 		case XPT_SCSI_IO:
940 			devstat_end_transaction(softc->device_stats,
941 			    done_ccb->csio.dxfer_len - done_ccb->csio.resid,
942 			    done_ccb->csio.tag_action & 0x3,
943 			    ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
944 			    CAM_DIR_NONE) ? DEVSTAT_NO_DATA :
945 			    (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
946 			    DEVSTAT_WRITE : DEVSTAT_READ, NULL,
947 			    &io_req->start_time);
948 			break;
949 		case XPT_ATA_IO:
950 			devstat_end_transaction(softc->device_stats,
951 			    done_ccb->ataio.dxfer_len - done_ccb->ataio.resid,
952 			    0, /* Not used in ATA */
953 			    ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
954 			    CAM_DIR_NONE) ? DEVSTAT_NO_DATA :
955 			    (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
956 			    DEVSTAT_WRITE : DEVSTAT_READ, NULL,
957 			    &io_req->start_time);
958 			break;
959 		case XPT_SMP_IO:
960 			/*
961 			 * XXX KDM this isn't quite right, but there isn't
962 			 * currently an easy way to represent a bidirectional
963 			 * transfer in devstat.  The only way to do it
964 			 * and have the byte counts come out right would
965 			 * mean that we would have to record two
966 			 * transactions, one for the request and one for the
967 			 * response.  For now, so that we report something,
968 			 * just treat the entire thing as a read.
969 			 */
970 			devstat_end_transaction(softc->device_stats,
971 			    done_ccb->smpio.smp_request_len +
972 			    done_ccb->smpio.smp_response_len,
973 			    DEVSTAT_TAG_SIMPLE, DEVSTAT_READ, NULL,
974 			    &io_req->start_time);
975 			break;
976 		default:
977 			devstat_end_transaction(softc->device_stats, 0,
978 			    DEVSTAT_TAG_NONE, DEVSTAT_NO_DATA, NULL,
979 			    &io_req->start_time);
980 			break;
981 		}
982 
983 		/*
984 		 * In the normal case, take the completed I/O off of the
985 		 * active queue and put it on the done queue.  Notitfy the
986 		 * user that we have a completed I/O.
987 		 */
988 		if ((io_req->flags & PASS_IO_ABANDONED) == 0) {
989 			TAILQ_REMOVE(&softc->active_queue, io_req, links);
990 			TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
991 			selwakeuppri(&softc->read_select, PRIBIO);
992 			KNOTE_LOCKED(&softc->read_select.si_note, 0);
993 		} else {
994 			/*
995 			 * In the case of an abandoned I/O (final close
996 			 * without fetching the I/O), take it off of the
997 			 * abandoned queue and free it.
998 			 */
999 			TAILQ_REMOVE(&softc->abandoned_queue, io_req, links);
1000 			passiocleanup(softc, io_req);
1001 			uma_zfree(softc->pass_zone, io_req);
1002 
1003 			/*
1004 			 * Release the done_ccb here, since we may wind up
1005 			 * freeing the peripheral when we decrement the
1006 			 * reference count below.
1007 			 */
1008 			xpt_release_ccb(done_ccb);
1009 
1010 			/*
1011 			 * If the abandoned queue is empty, we can release
1012 			 * our reference to the periph since we won't have
1013 			 * any more completions coming.
1014 			 */
1015 			if ((TAILQ_EMPTY(&softc->abandoned_queue))
1016 			 && (softc->flags & PASS_FLAG_ABANDONED_REF_SET)) {
1017 				softc->flags &= ~PASS_FLAG_ABANDONED_REF_SET;
1018 				cam_periph_release_locked(periph);
1019 			}
1020 
1021 			/*
1022 			 * We have already released the CCB, so we can
1023 			 * return.
1024 			 */
1025 			return;
1026 		}
1027 		break;
1028 	}
1029 	}
1030 	xpt_release_ccb(done_ccb);
1031 }
1032 
1033 static int
1034 passcreatezone(struct cam_periph *periph)
1035 {
1036 	struct pass_softc *softc;
1037 	int error;
1038 
1039 	error = 0;
1040 	softc = (struct pass_softc *)periph->softc;
1041 
1042 	cam_periph_assert(periph, MA_OWNED);
1043 	KASSERT(((softc->flags & PASS_FLAG_ZONE_VALID) == 0),
1044 		("%s called when the pass(4) zone is valid!\n", __func__));
1045 	KASSERT((softc->pass_zone == NULL),
1046 		("%s called when the pass(4) zone is allocated!\n", __func__));
1047 
1048 	if ((softc->flags & PASS_FLAG_ZONE_INPROG) == 0) {
1049 		/*
1050 		 * We're the first context through, so we need to create
1051 		 * the pass(4) UMA zone for I/O requests.
1052 		 */
1053 		softc->flags |= PASS_FLAG_ZONE_INPROG;
1054 
1055 		/*
1056 		 * uma_zcreate() does a blocking (M_WAITOK) allocation,
1057 		 * so we cannot hold a mutex while we call it.
1058 		 */
1059 		cam_periph_unlock(periph);
1060 
1061 		softc->pass_zone = uma_zcreate(softc->zone_name,
1062 		    sizeof(struct pass_io_req), NULL, NULL, NULL, NULL,
1063 		    /*align*/ 0, /*flags*/ 0);
1064 
1065 		softc->pass_io_zone = uma_zcreate(softc->io_zone_name,
1066 		    softc->io_zone_size, NULL, NULL, NULL, NULL,
1067 		    /*align*/ 0, /*flags*/ 0);
1068 
1069 		cam_periph_lock(periph);
1070 
1071 		if ((softc->pass_zone == NULL)
1072 		 || (softc->pass_io_zone == NULL)) {
1073 			if (softc->pass_zone == NULL)
1074 				xpt_print(periph->path, "unable to allocate "
1075 				    "IO Req UMA zone\n");
1076 			else
1077 				xpt_print(periph->path, "unable to allocate "
1078 				    "IO UMA zone\n");
1079 			softc->flags &= ~PASS_FLAG_ZONE_INPROG;
1080 			goto bailout;
1081 		}
1082 
1083 		/*
1084 		 * Set the flags appropriately and notify any other waiters.
1085 		 */
1086 		softc->flags &= ~PASS_FLAG_ZONE_INPROG;
1087 		softc->flags |= PASS_FLAG_ZONE_VALID;
1088 		wakeup(&softc->pass_zone);
1089 	} else {
1090 		/*
1091 		 * In this case, the UMA zone has not yet been created, but
1092 		 * another context is in the process of creating it.  We
1093 		 * need to sleep until the creation is either done or has
1094 		 * failed.
1095 		 */
1096 		while ((softc->flags & PASS_FLAG_ZONE_INPROG)
1097 		    && ((softc->flags & PASS_FLAG_ZONE_VALID) == 0)) {
1098 			error = msleep(&softc->pass_zone,
1099 				       cam_periph_mtx(periph), PRIBIO,
1100 				       "paszon", 0);
1101 			if (error != 0)
1102 				goto bailout;
1103 		}
1104 		/*
1105 		 * If the zone creation failed, no luck for the user.
1106 		 */
1107 		if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0){
1108 			error = ENOMEM;
1109 			goto bailout;
1110 		}
1111 	}
1112 bailout:
1113 	return (error);
1114 }
1115 
1116 static void
1117 passiocleanup(struct pass_softc *softc, struct pass_io_req *io_req)
1118 {
1119 	union ccb *ccb;
1120 	uint8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
1121 	int i, numbufs;
1122 
1123 	ccb = &io_req->ccb;
1124 
1125 	switch (ccb->ccb_h.func_code) {
1126 	case XPT_DEV_MATCH:
1127 		numbufs = min(io_req->num_bufs, 2);
1128 
1129 		if (numbufs == 1) {
1130 			data_ptrs[0] = (uint8_t **)&ccb->cdm.matches;
1131 		} else {
1132 			data_ptrs[0] = (uint8_t **)&ccb->cdm.patterns;
1133 			data_ptrs[1] = (uint8_t **)&ccb->cdm.matches;
1134 		}
1135 		break;
1136 	case XPT_SCSI_IO:
1137 	case XPT_CONT_TARGET_IO:
1138 		data_ptrs[0] = &ccb->csio.data_ptr;
1139 		numbufs = min(io_req->num_bufs, 1);
1140 		break;
1141 	case XPT_ATA_IO:
1142 		data_ptrs[0] = &ccb->ataio.data_ptr;
1143 		numbufs = min(io_req->num_bufs, 1);
1144 		break;
1145 	case XPT_SMP_IO:
1146 		numbufs = min(io_req->num_bufs, 2);
1147 		data_ptrs[0] = &ccb->smpio.smp_request;
1148 		data_ptrs[1] = &ccb->smpio.smp_response;
1149 		break;
1150 	case XPT_DEV_ADVINFO:
1151 		numbufs = min(io_req->num_bufs, 1);
1152 		data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
1153 		break;
1154 	case XPT_NVME_IO:
1155 	case XPT_NVME_ADMIN:
1156 		data_ptrs[0] = &ccb->nvmeio.data_ptr;
1157 		numbufs = min(io_req->num_bufs, 1);
1158 		break;
1159 	default:
1160 		/* allow ourselves to be swapped once again */
1161 		return;
1162 		break; /* NOTREACHED */
1163 	}
1164 
1165 	if (io_req->flags & PASS_IO_USER_SEG_MALLOC) {
1166 		free(io_req->user_segptr, M_SCSIPASS);
1167 		io_req->user_segptr = NULL;
1168 	}
1169 
1170 	/*
1171 	 * We only want to free memory we malloced.
1172 	 */
1173 	if (io_req->data_flags == CAM_DATA_VADDR) {
1174 		for (i = 0; i < io_req->num_bufs; i++) {
1175 			if (io_req->kern_bufs[i] == NULL)
1176 				continue;
1177 
1178 			free(io_req->kern_bufs[i], M_SCSIPASS);
1179 			io_req->kern_bufs[i] = NULL;
1180 		}
1181 	} else if (io_req->data_flags == CAM_DATA_SG) {
1182 		for (i = 0; i < io_req->num_kern_segs; i++) {
1183 			if ((uint8_t *)(uintptr_t)
1184 			    io_req->kern_segptr[i].ds_addr == NULL)
1185 				continue;
1186 
1187 			uma_zfree(softc->pass_io_zone, (uint8_t *)(uintptr_t)
1188 			    io_req->kern_segptr[i].ds_addr);
1189 			io_req->kern_segptr[i].ds_addr = 0;
1190 		}
1191 	}
1192 
1193 	if (io_req->flags & PASS_IO_KERN_SEG_MALLOC) {
1194 		free(io_req->kern_segptr, M_SCSIPASS);
1195 		io_req->kern_segptr = NULL;
1196 	}
1197 
1198 	if (io_req->data_flags != CAM_DATA_PADDR) {
1199 		for (i = 0; i < numbufs; i++) {
1200 			/*
1201 			 * Restore the user's buffer pointers to their
1202 			 * previous values.
1203 			 */
1204 			if (io_req->user_bufs[i] != NULL)
1205 				*data_ptrs[i] = io_req->user_bufs[i];
1206 		}
1207 	}
1208 
1209 }
1210 
1211 static int
1212 passcopysglist(struct cam_periph *periph, struct pass_io_req *io_req,
1213 	       ccb_flags direction)
1214 {
1215 	bus_size_t kern_watermark, user_watermark, len_to_copy;
1216 	bus_dma_segment_t *user_sglist, *kern_sglist;
1217 	int i, j, error;
1218 
1219 	error = 0;
1220 	kern_watermark = 0;
1221 	user_watermark = 0;
1222 	len_to_copy = 0;
1223 	user_sglist = io_req->user_segptr;
1224 	kern_sglist = io_req->kern_segptr;
1225 
1226 	for (i = 0, j = 0; i < io_req->num_user_segs &&
1227 	     j < io_req->num_kern_segs;) {
1228 		uint8_t *user_ptr, *kern_ptr;
1229 
1230 		len_to_copy = min(user_sglist[i].ds_len -user_watermark,
1231 		    kern_sglist[j].ds_len - kern_watermark);
1232 
1233 		user_ptr = (uint8_t *)(uintptr_t)user_sglist[i].ds_addr;
1234 		user_ptr = user_ptr + user_watermark;
1235 		kern_ptr = (uint8_t *)(uintptr_t)kern_sglist[j].ds_addr;
1236 		kern_ptr = kern_ptr + kern_watermark;
1237 
1238 		user_watermark += len_to_copy;
1239 		kern_watermark += len_to_copy;
1240 
1241 		if (direction == CAM_DIR_IN) {
1242 			error = copyout(kern_ptr, user_ptr, len_to_copy);
1243 			if (error != 0) {
1244 				xpt_print(periph->path, "%s: copyout of %u "
1245 					  "bytes from %p to %p failed with "
1246 					  "error %d\n", __func__, len_to_copy,
1247 					  kern_ptr, user_ptr, error);
1248 				goto bailout;
1249 			}
1250 		} else {
1251 			error = copyin(user_ptr, kern_ptr, len_to_copy);
1252 			if (error != 0) {
1253 				xpt_print(periph->path, "%s: copyin of %u "
1254 					  "bytes from %p to %p failed with "
1255 					  "error %d\n", __func__, len_to_copy,
1256 					  user_ptr, kern_ptr, error);
1257 				goto bailout;
1258 			}
1259 		}
1260 
1261 		if (user_sglist[i].ds_len == user_watermark) {
1262 			i++;
1263 			user_watermark = 0;
1264 		}
1265 
1266 		if (kern_sglist[j].ds_len == kern_watermark) {
1267 			j++;
1268 			kern_watermark = 0;
1269 		}
1270 	}
1271 
1272 bailout:
1273 
1274 	return (error);
1275 }
1276 
1277 static int
1278 passmemsetup(struct cam_periph *periph, struct pass_io_req *io_req)
1279 {
1280 	union ccb *ccb;
1281 	struct pass_softc *softc;
1282 	int numbufs, i;
1283 	uint8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
1284 	uint32_t lengths[CAM_PERIPH_MAXMAPS];
1285 	uint32_t dirs[CAM_PERIPH_MAXMAPS];
1286 	uint32_t num_segs;
1287 	uint16_t *seg_cnt_ptr;
1288 	size_t maxmap;
1289 	int error;
1290 
1291 	cam_periph_assert(periph, MA_NOTOWNED);
1292 
1293 	softc = periph->softc;
1294 
1295 	error = 0;
1296 	ccb = &io_req->ccb;
1297 	maxmap = 0;
1298 	num_segs = 0;
1299 	seg_cnt_ptr = NULL;
1300 
1301 	switch(ccb->ccb_h.func_code) {
1302 	case XPT_DEV_MATCH:
1303 		if (ccb->cdm.match_buf_len == 0) {
1304 			printf("%s: invalid match buffer length 0\n", __func__);
1305 			return(EINVAL);
1306 		}
1307 		if (ccb->cdm.pattern_buf_len > 0) {
1308 			data_ptrs[0] = (uint8_t **)&ccb->cdm.patterns;
1309 			lengths[0] = ccb->cdm.pattern_buf_len;
1310 			dirs[0] = CAM_DIR_OUT;
1311 			data_ptrs[1] = (uint8_t **)&ccb->cdm.matches;
1312 			lengths[1] = ccb->cdm.match_buf_len;
1313 			dirs[1] = CAM_DIR_IN;
1314 			numbufs = 2;
1315 		} else {
1316 			data_ptrs[0] = (uint8_t **)&ccb->cdm.matches;
1317 			lengths[0] = ccb->cdm.match_buf_len;
1318 			dirs[0] = CAM_DIR_IN;
1319 			numbufs = 1;
1320 		}
1321 		io_req->data_flags = CAM_DATA_VADDR;
1322 		break;
1323 	case XPT_SCSI_IO:
1324 	case XPT_CONT_TARGET_IO:
1325 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
1326 			return(0);
1327 
1328 		/*
1329 		 * The user shouldn't be able to supply a bio.
1330 		 */
1331 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1332 			return (EINVAL);
1333 
1334 		io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK;
1335 
1336 		data_ptrs[0] = &ccb->csio.data_ptr;
1337 		lengths[0] = ccb->csio.dxfer_len;
1338 		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
1339 		num_segs = ccb->csio.sglist_cnt;
1340 		seg_cnt_ptr = &ccb->csio.sglist_cnt;
1341 		numbufs = 1;
1342 		maxmap = softc->maxio;
1343 		break;
1344 	case XPT_ATA_IO:
1345 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
1346 			return(0);
1347 
1348 		/*
1349 		 * We only support a single virtual address for ATA I/O.
1350 		 */
1351 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) != CAM_DATA_VADDR)
1352 			return (EINVAL);
1353 
1354 		io_req->data_flags = CAM_DATA_VADDR;
1355 
1356 		data_ptrs[0] = &ccb->ataio.data_ptr;
1357 		lengths[0] = ccb->ataio.dxfer_len;
1358 		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
1359 		numbufs = 1;
1360 		maxmap = softc->maxio;
1361 		break;
1362 	case XPT_SMP_IO:
1363 		io_req->data_flags = CAM_DATA_VADDR;
1364 
1365 		data_ptrs[0] = &ccb->smpio.smp_request;
1366 		lengths[0] = ccb->smpio.smp_request_len;
1367 		dirs[0] = CAM_DIR_OUT;
1368 		data_ptrs[1] = &ccb->smpio.smp_response;
1369 		lengths[1] = ccb->smpio.smp_response_len;
1370 		dirs[1] = CAM_DIR_IN;
1371 		numbufs = 2;
1372 		maxmap = softc->maxio;
1373 		break;
1374 	case XPT_DEV_ADVINFO:
1375 		if (ccb->cdai.bufsiz == 0)
1376 			return (0);
1377 
1378 		io_req->data_flags = CAM_DATA_VADDR;
1379 
1380 		data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
1381 		lengths[0] = ccb->cdai.bufsiz;
1382 		dirs[0] = CAM_DIR_IN;
1383 		numbufs = 1;
1384 		break;
1385 	case XPT_NVME_ADMIN:
1386 	case XPT_NVME_IO:
1387 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
1388 			return (0);
1389 
1390 		io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK;
1391 
1392 		data_ptrs[0] = &ccb->nvmeio.data_ptr;
1393 		lengths[0] = ccb->nvmeio.dxfer_len;
1394 		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
1395 		num_segs = ccb->nvmeio.sglist_cnt;
1396 		seg_cnt_ptr = &ccb->nvmeio.sglist_cnt;
1397 		numbufs = 1;
1398 		maxmap = softc->maxio;
1399 		break;
1400 	default:
1401 		return(EINVAL);
1402 		break; /* NOTREACHED */
1403 	}
1404 
1405 	io_req->num_bufs = numbufs;
1406 
1407 	/*
1408 	 * If there is a maximum, check to make sure that the user's
1409 	 * request fits within the limit.  In general, we should only have
1410 	 * a maximum length for requests that go to hardware.  Otherwise it
1411 	 * is whatever we're able to malloc.
1412 	 */
1413 	for (i = 0; i < numbufs; i++) {
1414 		io_req->user_bufs[i] = *data_ptrs[i];
1415 		io_req->dirs[i] = dirs[i];
1416 		io_req->lengths[i] = lengths[i];
1417 
1418 		if (maxmap == 0)
1419 			continue;
1420 
1421 		if (lengths[i] <= maxmap)
1422 			continue;
1423 
1424 		xpt_print(periph->path, "%s: data length %u > max allowed %u "
1425 			  "bytes\n", __func__, lengths[i], maxmap);
1426 		error = EINVAL;
1427 		goto bailout;
1428 	}
1429 
1430 	switch (io_req->data_flags) {
1431 	case CAM_DATA_VADDR:
1432 		/* Map or copy the buffer into kernel address space */
1433 		for (i = 0; i < numbufs; i++) {
1434 			uint8_t *tmp_buf;
1435 
1436 			/*
1437 			 * If for some reason no length is specified, we
1438 			 * don't need to allocate anything.
1439 			 */
1440 			if (io_req->lengths[i] == 0)
1441 				continue;
1442 
1443 			tmp_buf = malloc(lengths[i], M_SCSIPASS,
1444 					 M_WAITOK | M_ZERO);
1445 			io_req->kern_bufs[i] = tmp_buf;
1446 			*data_ptrs[i] = tmp_buf;
1447 
1448 #if 0
1449 			xpt_print(periph->path, "%s: malloced %p len %u, user "
1450 				  "buffer %p, operation: %s\n", __func__,
1451 				  tmp_buf, lengths[i], io_req->user_bufs[i],
1452 				  (dirs[i] == CAM_DIR_IN) ? "read" : "write");
1453 #endif
1454 			/*
1455 			 * We only need to copy in if the user is writing.
1456 			 */
1457 			if (dirs[i] != CAM_DIR_OUT)
1458 				continue;
1459 
1460 			error = copyin(io_req->user_bufs[i],
1461 				       io_req->kern_bufs[i], lengths[i]);
1462 			if (error != 0) {
1463 				xpt_print(periph->path, "%s: copy of user "
1464 					  "buffer from %p to %p failed with "
1465 					  "error %d\n", __func__,
1466 					  io_req->user_bufs[i],
1467 					  io_req->kern_bufs[i], error);
1468 				goto bailout;
1469 			}
1470 		}
1471 		break;
1472 	case CAM_DATA_PADDR:
1473 		/* Pass down the pointer as-is */
1474 		break;
1475 	case CAM_DATA_SG: {
1476 		size_t sg_length, size_to_go, alloc_size;
1477 		uint32_t num_segs_needed;
1478 
1479 		/*
1480 		 * Copy the user S/G list in, and then copy in the
1481 		 * individual segments.
1482 		 */
1483 		/*
1484 		 * We shouldn't see this, but check just in case.
1485 		 */
1486 		if (numbufs != 1) {
1487 			xpt_print(periph->path, "%s: cannot currently handle "
1488 				  "more than one S/G list per CCB\n", __func__);
1489 			error = EINVAL;
1490 			goto bailout;
1491 		}
1492 
1493 		/*
1494 		 * We have to have at least one segment.
1495 		 */
1496 		if (num_segs == 0) {
1497 			xpt_print(periph->path, "%s: CAM_DATA_SG flag set, "
1498 				  "but sglist_cnt=0!\n", __func__);
1499 			error = EINVAL;
1500 			goto bailout;
1501 		}
1502 
1503 		/*
1504 		 * Make sure the user specified the total length and didn't
1505 		 * just leave it to us to decode the S/G list.
1506 		 */
1507 		if (lengths[0] == 0) {
1508 			xpt_print(periph->path, "%s: no dxfer_len specified, "
1509 				  "but CAM_DATA_SG flag is set!\n", __func__);
1510 			error = EINVAL;
1511 			goto bailout;
1512 		}
1513 
1514 		/*
1515 		 * We allocate buffers in io_zone_size increments for an
1516 		 * S/G list.  This will generally be maxphys.
1517 		 */
1518 		if (lengths[0] <= softc->io_zone_size)
1519 			num_segs_needed = 1;
1520 		else {
1521 			num_segs_needed = lengths[0] / softc->io_zone_size;
1522 			if ((lengths[0] % softc->io_zone_size) != 0)
1523 				num_segs_needed++;
1524 		}
1525 
1526 		/* Figure out the size of the S/G list */
1527 		sg_length = num_segs * sizeof(bus_dma_segment_t);
1528 		io_req->num_user_segs = num_segs;
1529 		io_req->num_kern_segs = num_segs_needed;
1530 
1531 		/* Save the user's S/G list pointer for later restoration */
1532 		io_req->user_bufs[0] = *data_ptrs[0];
1533 
1534 		/*
1535 		 * If we have enough segments allocated by default to handle
1536 		 * the length of the user's S/G list,
1537 		 */
1538 		if (num_segs > PASS_MAX_SEGS) {
1539 			io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
1540 			    num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
1541 			io_req->flags |= PASS_IO_USER_SEG_MALLOC;
1542 		} else
1543 			io_req->user_segptr = io_req->user_segs;
1544 
1545 		error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
1546 		if (error != 0) {
1547 			xpt_print(periph->path, "%s: copy of user S/G list "
1548 				  "from %p to %p failed with error %d\n",
1549 				  __func__, *data_ptrs[0], io_req->user_segptr,
1550 				  error);
1551 			goto bailout;
1552 		}
1553 
1554 		if (num_segs_needed > PASS_MAX_SEGS) {
1555 			io_req->kern_segptr = malloc(sizeof(bus_dma_segment_t) *
1556 			    num_segs_needed, M_SCSIPASS, M_WAITOK | M_ZERO);
1557 			io_req->flags |= PASS_IO_KERN_SEG_MALLOC;
1558 		} else {
1559 			io_req->kern_segptr = io_req->kern_segs;
1560 		}
1561 
1562 		/*
1563 		 * Allocate the kernel S/G list.
1564 		 */
1565 		for (size_to_go = lengths[0], i = 0;
1566 		     size_to_go > 0 && i < num_segs_needed;
1567 		     i++, size_to_go -= alloc_size) {
1568 			uint8_t *kern_ptr;
1569 
1570 			alloc_size = min(size_to_go, softc->io_zone_size);
1571 			kern_ptr = uma_zalloc(softc->pass_io_zone, M_WAITOK);
1572 			io_req->kern_segptr[i].ds_addr =
1573 			    (bus_addr_t)(uintptr_t)kern_ptr;
1574 			io_req->kern_segptr[i].ds_len = alloc_size;
1575 		}
1576 		if (size_to_go > 0) {
1577 			printf("%s: size_to_go = %zu, software error!\n",
1578 			       __func__, size_to_go);
1579 			error = EINVAL;
1580 			goto bailout;
1581 		}
1582 
1583 		*data_ptrs[0] = (uint8_t *)io_req->kern_segptr;
1584 		*seg_cnt_ptr = io_req->num_kern_segs;
1585 
1586 		/*
1587 		 * We only need to copy data here if the user is writing.
1588 		 */
1589 		if (dirs[0] == CAM_DIR_OUT)
1590 			error = passcopysglist(periph, io_req, dirs[0]);
1591 		break;
1592 	}
1593 	case CAM_DATA_SG_PADDR: {
1594 		size_t sg_length;
1595 
1596 		/*
1597 		 * We shouldn't see this, but check just in case.
1598 		 */
1599 		if (numbufs != 1) {
1600 			printf("%s: cannot currently handle more than one "
1601 			       "S/G list per CCB\n", __func__);
1602 			error = EINVAL;
1603 			goto bailout;
1604 		}
1605 
1606 		/*
1607 		 * We have to have at least one segment.
1608 		 */
1609 		if (num_segs == 0) {
1610 			xpt_print(periph->path, "%s: CAM_DATA_SG_PADDR flag "
1611 				  "set, but sglist_cnt=0!\n", __func__);
1612 			error = EINVAL;
1613 			goto bailout;
1614 		}
1615 
1616 		/*
1617 		 * Make sure the user specified the total length and didn't
1618 		 * just leave it to us to decode the S/G list.
1619 		 */
1620 		if (lengths[0] == 0) {
1621 			xpt_print(periph->path, "%s: no dxfer_len specified, "
1622 				  "but CAM_DATA_SG flag is set!\n", __func__);
1623 			error = EINVAL;
1624 			goto bailout;
1625 		}
1626 
1627 		/* Figure out the size of the S/G list */
1628 		sg_length = num_segs * sizeof(bus_dma_segment_t);
1629 		io_req->num_user_segs = num_segs;
1630 		io_req->num_kern_segs = io_req->num_user_segs;
1631 
1632 		/* Save the user's S/G list pointer for later restoration */
1633 		io_req->user_bufs[0] = *data_ptrs[0];
1634 
1635 		if (num_segs > PASS_MAX_SEGS) {
1636 			io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
1637 			    num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
1638 			io_req->flags |= PASS_IO_USER_SEG_MALLOC;
1639 		} else
1640 			io_req->user_segptr = io_req->user_segs;
1641 
1642 		io_req->kern_segptr = io_req->user_segptr;
1643 
1644 		error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
1645 		if (error != 0) {
1646 			xpt_print(periph->path, "%s: copy of user S/G list "
1647 				  "from %p to %p failed with error %d\n",
1648 				  __func__, *data_ptrs[0], io_req->user_segptr,
1649 				  error);
1650 			goto bailout;
1651 		}
1652 		break;
1653 	}
1654 	default:
1655 	case CAM_DATA_BIO:
1656 		/*
1657 		 * A user shouldn't be attaching a bio to the CCB.  It
1658 		 * isn't a user-accessible structure.
1659 		 */
1660 		error = EINVAL;
1661 		break;
1662 	}
1663 
1664 bailout:
1665 	if (error != 0)
1666 		passiocleanup(softc, io_req);
1667 
1668 	return (error);
1669 }
1670 
1671 static int
1672 passmemdone(struct cam_periph *periph, struct pass_io_req *io_req)
1673 {
1674 	struct pass_softc *softc;
1675 	int error;
1676 	int i;
1677 
1678 	error = 0;
1679 	softc = (struct pass_softc *)periph->softc;
1680 
1681 	switch (io_req->data_flags) {
1682 	case CAM_DATA_VADDR:
1683 		/*
1684 		 * Copy back to the user buffer if this was a read.
1685 		 */
1686 		for (i = 0; i < io_req->num_bufs; i++) {
1687 			if (io_req->dirs[i] != CAM_DIR_IN)
1688 				continue;
1689 
1690 			error = copyout(io_req->kern_bufs[i],
1691 			    io_req->user_bufs[i], io_req->lengths[i]);
1692 			if (error != 0) {
1693 				xpt_print(periph->path, "Unable to copy %u "
1694 					  "bytes from %p to user address %p\n",
1695 					  io_req->lengths[i],
1696 					  io_req->kern_bufs[i],
1697 					  io_req->user_bufs[i]);
1698 				goto bailout;
1699 			}
1700 		}
1701 		break;
1702 	case CAM_DATA_PADDR:
1703 		/* Do nothing.  The pointer is a physical address already */
1704 		break;
1705 	case CAM_DATA_SG:
1706 		/*
1707 		 * Copy back to the user buffer if this was a read.
1708 		 * Restore the user's S/G list buffer pointer.
1709 		 */
1710 		if (io_req->dirs[0] == CAM_DIR_IN)
1711 			error = passcopysglist(periph, io_req, io_req->dirs[0]);
1712 		break;
1713 	case CAM_DATA_SG_PADDR:
1714 		/*
1715 		 * Restore the user's S/G list buffer pointer.  No need to
1716 		 * copy.
1717 		 */
1718 		break;
1719 	default:
1720 	case CAM_DATA_BIO:
1721 		error = EINVAL;
1722 		break;
1723 	}
1724 
1725 bailout:
1726 	/*
1727 	 * Reset the user's pointers to their original values and free
1728 	 * allocated memory.
1729 	 */
1730 	passiocleanup(softc, io_req);
1731 
1732 	return (error);
1733 }
1734 
1735 static int
1736 passioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
1737 {
1738 	int error;
1739 
1740 	if ((error = passdoioctl(dev, cmd, addr, flag, td)) == ENOTTY) {
1741 		error = cam_compat_ioctl(dev, cmd, addr, flag, td, passdoioctl);
1742 	}
1743 	return (error);
1744 }
1745 
1746 static int
1747 passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
1748 {
1749 	struct	cam_periph *periph;
1750 	struct	pass_softc *softc;
1751 	int	error;
1752 	uint32_t priority;
1753 
1754 	periph = (struct cam_periph *)dev->si_drv1;
1755 	cam_periph_lock(periph);
1756 	softc = (struct pass_softc *)periph->softc;
1757 
1758 	error = 0;
1759 
1760 	switch (cmd) {
1761 	case CAMIOCOMMAND:
1762 	{
1763 		union ccb *inccb;
1764 		union ccb *ccb;
1765 		int ccb_malloced;
1766 
1767 		inccb = (union ccb *)addr;
1768 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
1769 		if (inccb->ccb_h.func_code == XPT_SCSI_IO)
1770 			inccb->csio.bio = NULL;
1771 #endif
1772 
1773 		if (inccb->ccb_h.flags & CAM_UNLOCKED) {
1774 			error = EINVAL;
1775 			break;
1776 		}
1777 
1778 		/*
1779 		 * Some CCB types, like scan bus and scan lun can only go
1780 		 * through the transport layer device.
1781 		 */
1782 		if (inccb->ccb_h.func_code & XPT_FC_XPT_ONLY) {
1783 			xpt_print(periph->path, "CCB function code %#x is "
1784 			    "restricted to the XPT device\n",
1785 			    inccb->ccb_h.func_code);
1786 			error = ENODEV;
1787 			break;
1788 		}
1789 
1790 		/* Compatibility for RL/priority-unaware code. */
1791 		priority = inccb->ccb_h.pinfo.priority;
1792 		if (priority <= CAM_PRIORITY_OOB)
1793 		    priority += CAM_PRIORITY_OOB + 1;
1794 
1795 		/*
1796 		 * Non-immediate CCBs need a CCB from the per-device pool
1797 		 * of CCBs, which is scheduled by the transport layer.
1798 		 * Immediate CCBs and user-supplied CCBs should just be
1799 		 * malloced.
1800 		 */
1801 		if ((inccb->ccb_h.func_code & XPT_FC_QUEUED)
1802 		 && ((inccb->ccb_h.func_code & XPT_FC_USER_CCB) == 0)) {
1803 			ccb = cam_periph_getccb(periph, priority);
1804 			ccb_malloced = 0;
1805 		} else {
1806 			ccb = xpt_alloc_ccb_nowait();
1807 
1808 			if (ccb != NULL)
1809 				xpt_setup_ccb(&ccb->ccb_h, periph->path,
1810 					      priority);
1811 			ccb_malloced = 1;
1812 		}
1813 
1814 		if (ccb == NULL) {
1815 			xpt_print(periph->path, "unable to allocate CCB\n");
1816 			error = ENOMEM;
1817 			break;
1818 		}
1819 
1820 		error = passsendccb(periph, ccb, inccb);
1821 
1822 		if (ccb_malloced)
1823 			xpt_free_ccb(ccb);
1824 		else
1825 			xpt_release_ccb(ccb);
1826 
1827 		break;
1828 	}
1829 	case CAMIOQUEUE:
1830 	{
1831 		struct pass_io_req *io_req;
1832 		union ccb **user_ccb, *ccb;
1833 		xpt_opcode fc;
1834 
1835 #ifdef COMPAT_FREEBSD32
1836 		if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
1837 			error = ENOTTY;
1838 			goto bailout;
1839 		}
1840 #endif
1841 		if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0) {
1842 			error = passcreatezone(periph);
1843 			if (error != 0)
1844 				goto bailout;
1845 		}
1846 
1847 		/*
1848 		 * We're going to do a blocking allocation for this I/O
1849 		 * request, so we have to drop the lock.
1850 		 */
1851 		cam_periph_unlock(periph);
1852 
1853 		io_req = uma_zalloc(softc->pass_zone, M_WAITOK | M_ZERO);
1854 		ccb = &io_req->ccb;
1855 		user_ccb = (union ccb **)addr;
1856 
1857 		/*
1858 		 * Unlike the CAMIOCOMMAND ioctl above, we only have a
1859 		 * pointer to the user's CCB, so we have to copy the whole
1860 		 * thing in to a buffer we have allocated (above) instead
1861 		 * of allowing the ioctl code to malloc a buffer and copy
1862 		 * it in.
1863 		 *
1864 		 * This is an advantage for this asynchronous interface,
1865 		 * since we don't want the memory to get freed while the
1866 		 * CCB is outstanding.
1867 		 */
1868 #if 0
1869 		xpt_print(periph->path, "Copying user CCB %p to "
1870 			  "kernel address %p\n", *user_ccb, ccb);
1871 #endif
1872 		error = copyin(*user_ccb, ccb, sizeof(*ccb));
1873 		if (error != 0) {
1874 			xpt_print(periph->path, "Copy of user CCB %p to "
1875 				  "kernel address %p failed with error %d\n",
1876 				  *user_ccb, ccb, error);
1877 			goto camioqueue_error;
1878 		}
1879 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
1880 		if (ccb->ccb_h.func_code == XPT_SCSI_IO)
1881 			ccb->csio.bio = NULL;
1882 #endif
1883 
1884 		if (ccb->ccb_h.flags & CAM_UNLOCKED) {
1885 			error = EINVAL;
1886 			goto camioqueue_error;
1887 		}
1888 
1889 		if (ccb->ccb_h.flags & CAM_CDB_POINTER) {
1890 			if (ccb->csio.cdb_len > IOCDBLEN) {
1891 				error = EINVAL;
1892 				goto camioqueue_error;
1893 			}
1894 			error = copyin(ccb->csio.cdb_io.cdb_ptr,
1895 			    ccb->csio.cdb_io.cdb_bytes, ccb->csio.cdb_len);
1896 			if (error != 0)
1897 				goto camioqueue_error;
1898 			ccb->ccb_h.flags &= ~CAM_CDB_POINTER;
1899 		}
1900 
1901 		/*
1902 		 * Some CCB types, like scan bus and scan lun can only go
1903 		 * through the transport layer device.
1904 		 */
1905 		if (ccb->ccb_h.func_code & XPT_FC_XPT_ONLY) {
1906 			xpt_print(periph->path, "CCB function code %#x is "
1907 			    "restricted to the XPT device\n",
1908 			    ccb->ccb_h.func_code);
1909 			error = ENODEV;
1910 			goto camioqueue_error;
1911 		}
1912 
1913 		/*
1914 		 * Save the user's CCB pointer as well as his linked list
1915 		 * pointers and peripheral private area so that we can
1916 		 * restore these later.
1917 		 */
1918 		io_req->user_ccb_ptr = *user_ccb;
1919 		io_req->user_periph_links = ccb->ccb_h.periph_links;
1920 		io_req->user_periph_priv = ccb->ccb_h.periph_priv;
1921 
1922 		/*
1923 		 * Now that we've saved the user's values, we can set our
1924 		 * own peripheral private entry.
1925 		 */
1926 		ccb->ccb_h.ccb_ioreq = io_req;
1927 
1928 		/* Compatibility for RL/priority-unaware code. */
1929 		priority = ccb->ccb_h.pinfo.priority;
1930 		if (priority <= CAM_PRIORITY_OOB)
1931 		    priority += CAM_PRIORITY_OOB + 1;
1932 
1933 		/*
1934 		 * Setup fields in the CCB like the path and the priority.
1935 		 * The path in particular cannot be done in userland, since
1936 		 * it is a pointer to a kernel data structure.
1937 		 */
1938 		xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, priority,
1939 				    ccb->ccb_h.flags);
1940 
1941 		/*
1942 		 * Setup our done routine.  There is no way for the user to
1943 		 * have a valid pointer here.
1944 		 */
1945 		ccb->ccb_h.cbfcnp = passdone;
1946 
1947 		fc = ccb->ccb_h.func_code;
1948 		/*
1949 		 * If this function code has memory that can be mapped in
1950 		 * or out, we need to call passmemsetup().
1951 		 */
1952 		if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO)
1953 		 || (fc == XPT_SMP_IO) || (fc == XPT_DEV_MATCH)
1954 		 || (fc == XPT_DEV_ADVINFO)
1955 		 || (fc == XPT_NVME_ADMIN) || (fc == XPT_NVME_IO)) {
1956 			error = passmemsetup(periph, io_req);
1957 			if (error != 0)
1958 				goto camioqueue_error;
1959 		} else
1960 			io_req->mapinfo.num_bufs_used = 0;
1961 
1962 		cam_periph_lock(periph);
1963 
1964 		/*
1965 		 * Everything goes on the incoming queue initially.
1966 		 */
1967 		TAILQ_INSERT_TAIL(&softc->incoming_queue, io_req, links);
1968 
1969 		/*
1970 		 * If the CCB is queued, and is not a user CCB, then
1971 		 * we need to allocate a slot for it.  Call xpt_schedule()
1972 		 * so that our start routine will get called when a CCB is
1973 		 * available.
1974 		 */
1975 		if ((fc & XPT_FC_QUEUED)
1976 		 && ((fc & XPT_FC_USER_CCB) == 0)) {
1977 			xpt_schedule(periph, priority);
1978 			break;
1979 		}
1980 
1981 		/*
1982 		 * At this point, the CCB in question is either an
1983 		 * immediate CCB (like XPT_DEV_ADVINFO) or it is a user CCB
1984 		 * and therefore should be malloced, not allocated via a slot.
1985 		 * Remove the CCB from the incoming queue and add it to the
1986 		 * active queue.
1987 		 */
1988 		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
1989 		TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
1990 
1991 		xpt_action(ccb);
1992 
1993 		/*
1994 		 * If this is not a queued CCB (i.e. it is an immediate CCB),
1995 		 * then it is already done.  We need to put it on the done
1996 		 * queue for the user to fetch.
1997 		 */
1998 		if ((fc & XPT_FC_QUEUED) == 0) {
1999 			TAILQ_REMOVE(&softc->active_queue, io_req, links);
2000 			TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
2001 		}
2002 		break;
2003 
2004 camioqueue_error:
2005 		uma_zfree(softc->pass_zone, io_req);
2006 		cam_periph_lock(periph);
2007 		break;
2008 	}
2009 	case CAMIOGET:
2010 	{
2011 		union ccb **user_ccb;
2012 		struct pass_io_req *io_req;
2013 		int old_error;
2014 
2015 #ifdef COMPAT_FREEBSD32
2016 		if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
2017 			error = ENOTTY;
2018 			goto bailout;
2019 		}
2020 #endif
2021 		user_ccb = (union ccb **)addr;
2022 		old_error = 0;
2023 
2024 		io_req = TAILQ_FIRST(&softc->done_queue);
2025 		if (io_req == NULL) {
2026 			error = ENOENT;
2027 			break;
2028 		}
2029 
2030 		/*
2031 		 * Remove the I/O from the done queue.
2032 		 */
2033 		TAILQ_REMOVE(&softc->done_queue, io_req, links);
2034 
2035 		/*
2036 		 * We have to drop the lock during the copyout because the
2037 		 * copyout can result in VM faults that require sleeping.
2038 		 */
2039 		cam_periph_unlock(periph);
2040 
2041 		/*
2042 		 * Do any needed copies (e.g. for reads) and revert the
2043 		 * pointers in the CCB back to the user's pointers.
2044 		 */
2045 		error = passmemdone(periph, io_req);
2046 
2047 		old_error = error;
2048 
2049 		io_req->ccb.ccb_h.periph_links = io_req->user_periph_links;
2050 		io_req->ccb.ccb_h.periph_priv = io_req->user_periph_priv;
2051 
2052 #if 0
2053 		xpt_print(periph->path, "Copying to user CCB %p from "
2054 			  "kernel address %p\n", *user_ccb, &io_req->ccb);
2055 #endif
2056 
2057 		error = copyout(&io_req->ccb, *user_ccb, sizeof(union ccb));
2058 		if (error != 0) {
2059 			xpt_print(periph->path, "Copy to user CCB %p from "
2060 				  "kernel address %p failed with error %d\n",
2061 				  *user_ccb, &io_req->ccb, error);
2062 		}
2063 
2064 		/*
2065 		 * Prefer the first error we got back, and make sure we
2066 		 * don't overwrite bad status with good.
2067 		 */
2068 		if (old_error != 0)
2069 			error = old_error;
2070 
2071 		cam_periph_lock(periph);
2072 
2073 		/*
2074 		 * At this point, if there was an error, we could potentially
2075 		 * re-queue the I/O and try again.  But why?  The error
2076 		 * would almost certainly happen again.  We might as well
2077 		 * not leak memory.
2078 		 */
2079 		uma_zfree(softc->pass_zone, io_req);
2080 		break;
2081 	}
2082 	default:
2083 		error = cam_periph_ioctl(periph, cmd, addr, passerror);
2084 		break;
2085 	}
2086 
2087 bailout:
2088 	cam_periph_unlock(periph);
2089 
2090 	return(error);
2091 }
2092 
2093 static int
2094 passpoll(struct cdev *dev, int poll_events, struct thread *td)
2095 {
2096 	struct cam_periph *periph;
2097 	struct pass_softc *softc;
2098 	int revents;
2099 
2100 	periph = (struct cam_periph *)dev->si_drv1;
2101 	softc = (struct pass_softc *)periph->softc;
2102 
2103 	revents = poll_events & (POLLOUT | POLLWRNORM);
2104 	if ((poll_events & (POLLIN | POLLRDNORM)) != 0) {
2105 		cam_periph_lock(periph);
2106 
2107 		if (!TAILQ_EMPTY(&softc->done_queue)) {
2108 			revents |= poll_events & (POLLIN | POLLRDNORM);
2109 		}
2110 		cam_periph_unlock(periph);
2111 		if (revents == 0)
2112 			selrecord(td, &softc->read_select);
2113 	}
2114 
2115 	return (revents);
2116 }
2117 
2118 static int
2119 passkqfilter(struct cdev *dev, struct knote *kn)
2120 {
2121 	struct cam_periph *periph;
2122 	struct pass_softc *softc;
2123 
2124 	periph = (struct cam_periph *)dev->si_drv1;
2125 	softc = (struct pass_softc *)periph->softc;
2126 
2127 	kn->kn_hook = (caddr_t)periph;
2128 	kn->kn_fop = &passread_filtops;
2129 	knlist_add(&softc->read_select.si_note, kn, 0);
2130 
2131 	return (0);
2132 }
2133 
2134 static void
2135 passreadfiltdetach(struct knote *kn)
2136 {
2137 	struct cam_periph *periph;
2138 	struct pass_softc *softc;
2139 
2140 	periph = (struct cam_periph *)kn->kn_hook;
2141 	softc = (struct pass_softc *)periph->softc;
2142 
2143 	knlist_remove(&softc->read_select.si_note, kn, 0);
2144 }
2145 
2146 static int
2147 passreadfilt(struct knote *kn, long hint)
2148 {
2149 	struct cam_periph *periph;
2150 	struct pass_softc *softc;
2151 	int retval;
2152 
2153 	periph = (struct cam_periph *)kn->kn_hook;
2154 	softc = (struct pass_softc *)periph->softc;
2155 
2156 	cam_periph_assert(periph, MA_OWNED);
2157 
2158 	if (TAILQ_EMPTY(&softc->done_queue))
2159 		retval = 0;
2160 	else
2161 		retval = 1;
2162 
2163 	return (retval);
2164 }
2165 
2166 /*
2167  * Generally, "ccb" should be the CCB supplied by the kernel.  "inccb"
2168  * should be the CCB that is copied in from the user.
2169  */
2170 static int
2171 passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb)
2172 {
2173 	struct pass_softc *softc;
2174 	struct cam_periph_map_info mapinfo;
2175 	uint8_t *cmd;
2176 	xpt_opcode fc;
2177 	int error;
2178 
2179 	softc = (struct pass_softc *)periph->softc;
2180 
2181 	/*
2182 	 * There are some fields in the CCB header that need to be
2183 	 * preserved, the rest we get from the user.
2184 	 */
2185 	xpt_merge_ccb(ccb, inccb);
2186 
2187 	if (ccb->ccb_h.flags & CAM_CDB_POINTER) {
2188 		cmd = __builtin_alloca(ccb->csio.cdb_len);
2189 		error = copyin(ccb->csio.cdb_io.cdb_ptr, cmd, ccb->csio.cdb_len);
2190 		if (error)
2191 			return (error);
2192 		ccb->csio.cdb_io.cdb_ptr = cmd;
2193 	}
2194 
2195 	/*
2196 	 * Let cam_periph_mapmem do a sanity check on the data pointer format.
2197 	 * Even if no data transfer is needed, it's a cheap check and it
2198 	 * simplifies the code.
2199 	 */
2200 	fc = ccb->ccb_h.func_code;
2201 	if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO) || (fc == XPT_SMP_IO)
2202             || (fc == XPT_DEV_MATCH) || (fc == XPT_DEV_ADVINFO) || (fc == XPT_MMC_IO)
2203             || (fc == XPT_NVME_ADMIN) || (fc == XPT_NVME_IO)) {
2204 		bzero(&mapinfo, sizeof(mapinfo));
2205 
2206 		/*
2207 		 * cam_periph_mapmem calls into proc and vm functions that can
2208 		 * sleep as well as trigger I/O, so we can't hold the lock.
2209 		 * Dropping it here is reasonably safe.
2210 		 */
2211 		cam_periph_unlock(periph);
2212 		error = cam_periph_mapmem(ccb, &mapinfo, softc->maxio);
2213 		cam_periph_lock(periph);
2214 
2215 		/*
2216 		 * cam_periph_mapmem returned an error, we can't continue.
2217 		 * Return the error to the user.
2218 		 */
2219 		if (error)
2220 			return(error);
2221 	} else
2222 		/* Ensure that the unmap call later on is a no-op. */
2223 		mapinfo.num_bufs_used = 0;
2224 
2225 	/*
2226 	 * If the user wants us to perform any error recovery, then honor
2227 	 * that request.  Otherwise, it's up to the user to perform any
2228 	 * error recovery.
2229 	 */
2230 	{
2231 		uint32_t cam_flags, sense_flags;
2232 
2233 		passflags(ccb, &cam_flags, &sense_flags);
2234 		cam_periph_runccb(ccb,  passerror, cam_flags,
2235 		    sense_flags, softc->device_stats);
2236 	}
2237 
2238 	cam_periph_unlock(periph);
2239 	cam_periph_unmapmem(ccb, &mapinfo);
2240 	cam_periph_lock(periph);
2241 
2242 	ccb->ccb_h.cbfcnp = NULL;
2243 	ccb->ccb_h.periph_priv = inccb->ccb_h.periph_priv;
2244 	bcopy(ccb, inccb, sizeof(union ccb));
2245 
2246 	return(0);
2247 }
2248 
2249 /*
2250  * Set the cam_flags and sense_flags based on whether or not the request wants
2251  * error recovery. In order to log errors via devctl, we need to do at least
2252  * minimal recovery. We do this by not retrying unit attention (we let the
2253  * requester do it, or not, if appropriate) and specifically asking for no
2254  * recovery, like we do during device probing.
2255  */
2256 static void
2257 passflags(union ccb *ccb, uint32_t *cam_flags, uint32_t *sense_flags)
2258 {
2259 	if ((ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER) != 0) {
2260 		*cam_flags = CAM_RETRY_SELTO;
2261 		*sense_flags = SF_RETRY_UA | SF_NO_PRINT;
2262 	} else {
2263 		*cam_flags = 0;
2264 		*sense_flags = SF_NO_RETRY | SF_NO_RECOVERY | SF_NO_PRINT;
2265 	}
2266 }
2267 
2268 static int
2269 passerror(union ccb *ccb, uint32_t cam_flags, uint32_t sense_flags)
2270 {
2271 
2272 	return(cam_periph_error(ccb, cam_flags, sense_flags));
2273 }
2274