xref: /freebsd/usr.sbin/camdd/camdd.c (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <sys/bus.h>
55 #include <sys/bus_dma.h>
56 #include <sys/mtio.h>
57 #include <sys/conf.h>
58 #include <sys/disk.h>
59 
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <semaphore.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <inttypes.h>
66 #include <limits.h>
67 #include <fcntl.h>
68 #include <ctype.h>
69 #include <err.h>
70 #include <libutil.h>
71 #include <pthread.h>
72 #include <assert.h>
73 #include <bsdxml.h>
74 
75 #include <cam/cam.h>
76 #include <cam/cam_debug.h>
77 #include <cam/cam_ccb.h>
78 #include <cam/scsi/scsi_all.h>
79 #include <cam/scsi/scsi_da.h>
80 #include <cam/scsi/scsi_pass.h>
81 #include <cam/scsi/scsi_message.h>
82 #include <cam/scsi/smp_all.h>
83 #include <cam/nvme/nvme_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87 
88 typedef enum {
89 	CAMDD_CMD_NONE		= 0x00000000,
90 	CAMDD_CMD_HELP		= 0x00000001,
91 	CAMDD_CMD_WRITE		= 0x00000002,
92 	CAMDD_CMD_READ		= 0x00000003
93 } camdd_cmdmask;
94 
95 typedef enum {
96 	CAMDD_ARG_NONE		= 0x00000000,
97 	CAMDD_ARG_VERBOSE	= 0x00000001,
98 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
99 } camdd_argmask;
100 
101 typedef enum {
102 	CAMDD_DEV_NONE		= 0x00,
103 	CAMDD_DEV_PASS		= 0x01,
104 	CAMDD_DEV_FILE		= 0x02
105 } camdd_dev_type;
106 
107 struct camdd_io_opts {
108 	camdd_dev_type	dev_type;
109 	char		*dev_name;
110 	uint64_t	blocksize;
111 	uint64_t	queue_depth;
112 	uint64_t	offset;
113 	int		min_cmd_size;
114 	int		write_dev;
115 	uint64_t	debug;
116 };
117 
118 typedef enum {
119 	CAMDD_BUF_NONE,
120 	CAMDD_BUF_DATA,
121 	CAMDD_BUF_INDIRECT
122 } camdd_buf_type;
123 
124 struct camdd_buf_indirect {
125 	/*
126 	 * Pointer to the source buffer.
127 	 */
128 	struct camdd_buf *src_buf;
129 
130 	/*
131 	 * Offset into the source buffer, in bytes.
132 	 */
133 	uint64_t	  offset;
134 	/*
135 	 * Pointer to the starting point in the source buffer.
136 	 */
137 	uint8_t		 *start_ptr;
138 
139 	/*
140 	 * Length of this chunk in bytes.
141 	 */
142 	size_t		  len;
143 };
144 
145 struct camdd_buf_data {
146 	/*
147 	 * Buffer allocated when we allocate this camdd_buf.  This should
148 	 * be the size of the blocksize for this device.
149 	 */
150 	uint8_t			*buf;
151 
152 	/*
153 	 * The amount of backing store allocated in buf.  Generally this
154 	 * will be the blocksize of the device.
155 	 */
156 	uint32_t		 alloc_len;
157 
158 	/*
159 	 * The amount of data that was put into the buffer (on reads) or
160 	 * the amount of data we have put onto the src_list so far (on
161 	 * writes).
162 	 */
163 	uint32_t		 fill_len;
164 
165 	/*
166 	 * The amount of data that was not transferred.
167 	 */
168 	uint32_t		 resid;
169 
170 	/*
171 	 * Starting byte offset on the reader.
172 	 */
173 	uint64_t		 src_start_offset;
174 
175 	/*
176 	 * CCB used for pass(4) device targets.
177 	 */
178 	union ccb		 ccb;
179 
180 	/*
181 	 * Number of scatter/gather segments.
182 	 */
183 	int			 sg_count;
184 
185 	/*
186 	 * Set if we had to tack on an extra buffer to round the transfer
187 	 * up to a sector size.
188 	 */
189 	int			 extra_buf;
190 
191 	/*
192 	 * Scatter/gather list used generally when we're the writer for a
193 	 * pass(4) device.
194 	 */
195 	bus_dma_segment_t	*segs;
196 
197 	/*
198 	 * Scatter/gather list used generally when we're the writer for a
199 	 * file or block device;
200 	 */
201 	struct iovec		*iovec;
202 };
203 
204 union camdd_buf_types {
205 	struct camdd_buf_indirect	indirect;
206 	struct camdd_buf_data		data;
207 };
208 
209 typedef enum {
210 	CAMDD_STATUS_NONE,
211 	CAMDD_STATUS_OK,
212 	CAMDD_STATUS_SHORT_IO,
213 	CAMDD_STATUS_EOF,
214 	CAMDD_STATUS_ERROR
215 } camdd_buf_status;
216 
217 struct camdd_buf {
218 	camdd_buf_type		 buf_type;
219 	union camdd_buf_types	 buf_type_spec;
220 
221 	camdd_buf_status	 status;
222 
223 	uint64_t		 lba;
224 	size_t			 len;
225 
226 	/*
227 	 * A reference count of how many indirect buffers point to this
228 	 * buffer.
229 	 */
230 	int			 refcount;
231 
232 	/*
233 	 * A link back to our parent device.
234 	 */
235 	struct camdd_dev	*dev;
236 	STAILQ_ENTRY(camdd_buf)  links;
237 	STAILQ_ENTRY(camdd_buf)  work_links;
238 
239 	/*
240 	 * A count of the buffers on the src_list.
241 	 */
242 	int			 src_count;
243 
244 	/*
245 	 * List of buffers from our partner thread that are the components
246 	 * of this buffer for the I/O.  Uses src_links.
247 	 */
248 	STAILQ_HEAD(,camdd_buf)	 src_list;
249 	STAILQ_ENTRY(camdd_buf)  src_links;
250 };
251 
252 #define	NUM_DEV_TYPES	2
253 
254 struct camdd_dev_pass {
255 	int			 scsi_dev_type;
256 	int			 protocol;
257 	struct cam_device	*dev;
258 	uint64_t		 max_sector;
259 	uint32_t		 block_len;
260 	uint32_t		 cpi_maxio;
261 };
262 
263 typedef enum {
264 	CAMDD_FILE_NONE,
265 	CAMDD_FILE_REG,
266 	CAMDD_FILE_STD,
267 	CAMDD_FILE_PIPE,
268 	CAMDD_FILE_DISK,
269 	CAMDD_FILE_TAPE,
270 	CAMDD_FILE_TTY,
271 	CAMDD_FILE_MEM
272 } camdd_file_type;
273 
274 typedef enum {
275 	CAMDD_FF_NONE 		= 0x00,
276 	CAMDD_FF_CAN_SEEK	= 0x01
277 } camdd_file_flags;
278 
279 struct camdd_dev_file {
280 	int			 fd;
281 	struct stat		 sb;
282 	char			 filename[MAXPATHLEN + 1];
283 	camdd_file_type		 file_type;
284 	camdd_file_flags	 file_flags;
285 	uint8_t			*tmp_buf;
286 };
287 
288 struct camdd_dev_block {
289 	int			 fd;
290 	uint64_t		 size_bytes;
291 	uint32_t		 block_len;
292 };
293 
294 union camdd_dev_spec {
295 	struct camdd_dev_pass	pass;
296 	struct camdd_dev_file	file;
297 	struct camdd_dev_block	block;
298 };
299 
300 typedef enum {
301 	CAMDD_DEV_FLAG_NONE		= 0x00,
302 	CAMDD_DEV_FLAG_EOF		= 0x01,
303 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
304 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
305 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
306 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
307 } camdd_dev_flags;
308 
309 struct camdd_dev {
310 	camdd_dev_type		 dev_type;
311 	union camdd_dev_spec	 dev_spec;
312 	camdd_dev_flags		 flags;
313 	char			 device_name[MAXPATHLEN+1];
314 	uint32_t		 blocksize;
315 	uint32_t		 sector_size;
316 	uint64_t		 max_sector;
317 	uint64_t		 sector_io_limit;
318 	int			 min_cmd_size;
319 	int			 write_dev;
320 	int			 retry_count;
321 	int			 io_timeout;
322 	int			 debug;
323 	uint64_t		 start_offset_bytes;
324 	uint64_t		 next_io_pos_bytes;
325 	uint64_t		 next_peer_pos_bytes;
326 	uint64_t		 next_completion_pos_bytes;
327 	uint64_t		 peer_bytes_queued;
328 	uint64_t		 bytes_transferred;
329 	uint32_t		 target_queue_depth;
330 	uint32_t		 cur_active_io;
331 	uint8_t			*extra_buf;
332 	uint32_t		 extra_buf_len;
333 	struct camdd_dev	*peer_dev;
334 	pthread_mutex_t		 mutex;
335 	pthread_cond_t		 cond;
336 	int			 kq;
337 
338 	int			 (*run)(struct camdd_dev *dev);
339 	int			 (*fetch)(struct camdd_dev *dev);
340 
341 	/*
342 	 * Buffers that are available for I/O.  Uses links.
343 	 */
344 	STAILQ_HEAD(,camdd_buf)	 free_queue;
345 
346 	/*
347 	 * Free indirect buffers.  These are used for breaking a large
348 	 * buffer into multiple pieces.
349 	 */
350 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
351 
352 	/*
353 	 * Buffers that have been queued to the kernel.  Uses links.
354 	 */
355 	STAILQ_HEAD(,camdd_buf)	 active_queue;
356 
357 	/*
358 	 * Will generally contain one of our buffers that is waiting for enough
359 	 * I/O from our partner thread to be able to execute.  This will
360 	 * generally happen when our per-I/O-size is larger than the
361 	 * partner thread's per-I/O-size.  Uses links.
362 	 */
363 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
364 
365 	/*
366 	 * Number of buffers on the pending queue
367 	 */
368 	int			 num_pending_queue;
369 
370 	/*
371 	 * Buffers that are filled and ready to execute.  This is used when
372 	 * our partner (reader) thread sends us blocks that are larger than
373 	 * our blocksize, and so we have to split them into multiple pieces.
374 	 */
375 	STAILQ_HEAD(,camdd_buf)	 run_queue;
376 
377 	/*
378 	 * Number of buffers on the run queue.
379 	 */
380 	int			 num_run_queue;
381 
382 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
383 
384 	int			 num_reorder_queue;
385 
386 	/*
387 	 * Buffers that have been queued to us by our partner thread
388 	 * (generally the reader thread) to be written out.  Uses
389 	 * work_links.
390 	 */
391 	STAILQ_HEAD(,camdd_buf)	 work_queue;
392 
393 	/*
394 	 * Buffers that have been completed by our partner thread.  Uses
395 	 * work_links.
396 	 */
397 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
398 
399 	/*
400 	 * Number of buffers on the peer done queue.
401 	 */
402 	uint32_t		 num_peer_done_queue;
403 
404 	/*
405 	 * A list of buffers that we have queued to our peer thread.  Uses
406 	 * links.
407 	 */
408 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
409 
410 	/*
411 	 * Number of buffers on the peer work queue.
412 	 */
413 	uint32_t		 num_peer_work_queue;
414 };
415 
416 static sem_t camdd_sem;
417 static sig_atomic_t need_exit = 0;
418 static sig_atomic_t error_exit = 0;
419 static sig_atomic_t need_status = 0;
420 
421 #ifndef min
422 #define	min(a, b) (a < b) ? a : b
423 #endif
424 
425 
426 /* Generically useful offsets into the peripheral private area */
427 #define ppriv_ptr0 periph_priv.entries[0].ptr
428 #define ppriv_ptr1 periph_priv.entries[1].ptr
429 #define ppriv_field0 periph_priv.entries[0].field
430 #define ppriv_field1 periph_priv.entries[1].field
431 
432 #define	ccb_buf	ppriv_ptr0
433 
434 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
435 #define	CAMDD_FILE_DEFAULT_DEPTH	1
436 #define	CAMDD_PASS_MAX_BLOCK		1048576
437 #define	CAMDD_PASS_DEFAULT_DEPTH	6
438 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
439 
440 static int parse_btl(char *tstr, int *bus, int *target, int *lun);
441 void camdd_free_dev(struct camdd_dev *dev);
442 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
443 				  struct kevent *new_ke, int num_ke,
444 				  int retry_count, int timeout);
445 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
446 					 camdd_buf_type buf_type);
447 void camdd_release_buf(struct camdd_buf *buf);
448 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
449 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
450 			uint32_t sector_size, uint32_t *num_sectors_used,
451 			int *double_buf_needed);
452 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
453 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
454 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
455 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
456 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
457          camdd_argmask arglist, int probe_retry_count,
458          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
459 int camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
460          camdd_argmask arglist, int probe_retry_count,
461          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
462 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
463 				   int retry_count, int timeout);
464 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
465 				   struct camdd_io_opts *io_opts,
466 				   camdd_argmask arglist, int probe_retry_count,
467 				   int probe_timeout, int io_retry_count,
468 				   int io_timeout);
469 void nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
470 		void (*cbfcnp)(struct cam_periph *, union ccb *),
471 		uint32_t nsid, int readop, uint64_t lba,
472 		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
473 		uint32_t timeout);
474 void *camdd_file_worker(void *arg);
475 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
476 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
477 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
478 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
479 void camdd_peer_done(struct camdd_buf *buf);
480 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
481 			int *error_count);
482 int camdd_pass_fetch(struct camdd_dev *dev);
483 int camdd_file_run(struct camdd_dev *dev);
484 int camdd_pass_run(struct camdd_dev *dev);
485 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
486 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
487 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
488 		     uint32_t *peer_depth, uint32_t *our_bytes,
489 		     uint32_t *peer_bytes);
490 void *camdd_worker(void *arg);
491 void camdd_sig_handler(int sig);
492 void camdd_print_status(struct camdd_dev *camdd_dev,
493 			struct camdd_dev *other_dev,
494 			struct timespec *start_time);
495 int camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist,
496 	     int num_io_opts, uint64_t max_io, int retry_count, int timeout);
497 int camdd_parse_io_opts(char *args, int is_write,
498 			struct camdd_io_opts *io_opts);
499 void usage(void);
500 
501 /*
502  * Parse out a bus, or a bus, target and lun in the following
503  * format:
504  * bus
505  * bus:target
506  * bus:target:lun
507  *
508  * Returns the number of parsed components, or 0.
509  */
510 static int
511 parse_btl(char *tstr, int *bus, int *target, int *lun)
512 {
513 	char *tmpstr;
514 	int convs = 0;
515 
516 	while (isspace(*tstr) && (*tstr != '\0'))
517 		tstr++;
518 
519 	tmpstr = (char *)strtok(tstr, ":");
520 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
521 		*bus = strtol(tmpstr, NULL, 0);
522 		convs++;
523 		tmpstr = (char *)strtok(NULL, ":");
524 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
525 			*target = strtol(tmpstr, NULL, 0);
526 			convs++;
527 			tmpstr = (char *)strtok(NULL, ":");
528 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
529 				*lun = strtol(tmpstr, NULL, 0);
530 				convs++;
531 			}
532 		}
533 	}
534 
535 	return convs;
536 }
537 
538 /*
539  * XXX KDM clean up and free all of the buffers on the queue!
540  */
541 void
542 camdd_free_dev(struct camdd_dev *dev)
543 {
544 	if (dev == NULL)
545 		return;
546 
547 	switch (dev->dev_type) {
548 	case CAMDD_DEV_FILE: {
549 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
550 
551 		if (file_dev->fd != -1)
552 			close(file_dev->fd);
553 		free(file_dev->tmp_buf);
554 		break;
555 	}
556 	case CAMDD_DEV_PASS: {
557 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
558 
559 		if (pass_dev->dev != NULL)
560 			cam_close_device(pass_dev->dev);
561 		break;
562 	}
563 	default:
564 		break;
565 	}
566 
567 	free(dev);
568 }
569 
570 struct camdd_dev *
571 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
572 		int retry_count, int timeout)
573 {
574 	struct camdd_dev *dev = NULL;
575 	struct kevent *ke;
576 	size_t ke_size;
577 	int retval = 0;
578 
579 	dev = calloc(1, sizeof(*dev));
580 	if (dev == NULL) {
581 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
582 		goto bailout;
583 	}
584 
585 	dev->dev_type = dev_type;
586 	dev->io_timeout = timeout;
587 	dev->retry_count = retry_count;
588 	STAILQ_INIT(&dev->free_queue);
589 	STAILQ_INIT(&dev->free_indirect_queue);
590 	STAILQ_INIT(&dev->active_queue);
591 	STAILQ_INIT(&dev->pending_queue);
592 	STAILQ_INIT(&dev->run_queue);
593 	STAILQ_INIT(&dev->reorder_queue);
594 	STAILQ_INIT(&dev->work_queue);
595 	STAILQ_INIT(&dev->peer_done_queue);
596 	STAILQ_INIT(&dev->peer_work_queue);
597 	retval = pthread_mutex_init(&dev->mutex, NULL);
598 	if (retval != 0) {
599 		warnc(retval, "%s: failed to initialize mutex", __func__);
600 		goto bailout;
601 	}
602 
603 	retval = pthread_cond_init(&dev->cond, NULL);
604 	if (retval != 0) {
605 		warnc(retval, "%s: failed to initialize condition variable",
606 		      __func__);
607 		goto bailout;
608 	}
609 
610 	dev->kq = kqueue();
611 	if (dev->kq == -1) {
612 		warn("%s: Unable to create kqueue", __func__);
613 		goto bailout;
614 	}
615 
616 	ke_size = sizeof(struct kevent) * (num_ke + 4);
617 	ke = calloc(1, ke_size);
618 	if (ke == NULL) {
619 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
620 		goto bailout;
621 	}
622 	if (num_ke > 0)
623 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
624 
625 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
626 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
627 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
628 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
629 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
630 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
631 
632 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
633 	if (retval == -1) {
634 		warn("%s: Unable to register kevents", __func__);
635 		goto bailout;
636 	}
637 
638 
639 	return (dev);
640 
641 bailout:
642 	free(dev);
643 
644 	return (NULL);
645 }
646 
647 static struct camdd_buf *
648 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
649 {
650 	struct camdd_buf *buf = NULL;
651 	uint8_t *data_ptr = NULL;
652 
653 	/*
654 	 * We only need to allocate data space for data buffers.
655 	 */
656 	switch (buf_type) {
657 	case CAMDD_BUF_DATA:
658 		data_ptr = malloc(dev->blocksize);
659 		if (data_ptr == NULL) {
660 			warn("unable to allocate %u bytes", dev->blocksize);
661 			goto bailout_error;
662 		}
663 		break;
664 	default:
665 		break;
666 	}
667 
668 	buf = calloc(1, sizeof(*buf));
669 	if (buf == NULL) {
670 		warn("unable to allocate %zu bytes", sizeof(*buf));
671 		goto bailout_error;
672 	}
673 
674 	buf->buf_type = buf_type;
675 	buf->dev = dev;
676 	switch (buf_type) {
677 	case CAMDD_BUF_DATA: {
678 		struct camdd_buf_data *data;
679 
680 		data = &buf->buf_type_spec.data;
681 
682 		data->alloc_len = dev->blocksize;
683 		data->buf = data_ptr;
684 		break;
685 	}
686 	case CAMDD_BUF_INDIRECT:
687 		break;
688 	default:
689 		break;
690 	}
691 	STAILQ_INIT(&buf->src_list);
692 
693 	return (buf);
694 
695 bailout_error:
696 	free(data_ptr);
697 
698 	return (NULL);
699 }
700 
701 void
702 camdd_release_buf(struct camdd_buf *buf)
703 {
704 	struct camdd_dev *dev;
705 
706 	dev = buf->dev;
707 
708 	switch (buf->buf_type) {
709 	case CAMDD_BUF_DATA: {
710 		struct camdd_buf_data *data;
711 
712 		data = &buf->buf_type_spec.data;
713 
714 		if (data->segs != NULL) {
715 			if (data->extra_buf != 0) {
716 				void *extra_buf;
717 
718 				extra_buf = (void *)
719 				    data->segs[data->sg_count - 1].ds_addr;
720 				free(extra_buf);
721 				data->extra_buf = 0;
722 			}
723 			free(data->segs);
724 			data->segs = NULL;
725 			data->sg_count = 0;
726 		} else if (data->iovec != NULL) {
727 			if (data->extra_buf != 0) {
728 				free(data->iovec[data->sg_count - 1].iov_base);
729 				data->extra_buf = 0;
730 			}
731 			free(data->iovec);
732 			data->iovec = NULL;
733 			data->sg_count = 0;
734 		}
735 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
736 		break;
737 	}
738 	case CAMDD_BUF_INDIRECT:
739 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
740 		break;
741 	default:
742 		err(1, "%s: Invalid buffer type %d for released buffer",
743 		    __func__, buf->buf_type);
744 		break;
745 	}
746 }
747 
748 struct camdd_buf *
749 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
750 {
751 	struct camdd_buf *buf = NULL;
752 
753 	switch (buf_type) {
754 	case CAMDD_BUF_DATA:
755 		buf = STAILQ_FIRST(&dev->free_queue);
756 		if (buf != NULL) {
757 			struct camdd_buf_data *data;
758 			uint8_t *data_ptr;
759 			uint32_t alloc_len;
760 
761 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
762 			data = &buf->buf_type_spec.data;
763 			data_ptr = data->buf;
764 			alloc_len = data->alloc_len;
765 			bzero(buf, sizeof(*buf));
766 			data->buf = data_ptr;
767 			data->alloc_len = alloc_len;
768 		}
769 		break;
770 	case CAMDD_BUF_INDIRECT:
771 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
772 		if (buf != NULL) {
773 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
774 
775 			bzero(buf, sizeof(*buf));
776 		}
777 		break;
778 	default:
779 		warnx("Unknown buffer type %d requested", buf_type);
780 		break;
781 	}
782 
783 
784 	if (buf == NULL)
785 		return (camdd_alloc_buf(dev, buf_type));
786 	else {
787 		STAILQ_INIT(&buf->src_list);
788 		buf->dev = dev;
789 		buf->buf_type = buf_type;
790 
791 		return (buf);
792 	}
793 }
794 
795 int
796 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
797 		    uint32_t *num_sectors_used, int *double_buf_needed)
798 {
799 	struct camdd_buf *tmp_buf;
800 	struct camdd_buf_data *data;
801 	uint8_t *extra_buf = NULL;
802 	size_t extra_buf_len = 0;
803 	int extra_buf_attached = 0;
804 	int i, retval = 0;
805 
806 	data = &buf->buf_type_spec.data;
807 
808 	data->sg_count = buf->src_count;
809 	/*
810 	 * Compose a scatter/gather list from all of the buffers in the list.
811 	 * If the length of the buffer isn't a multiple of the sector size,
812 	 * we'll have to add an extra buffer.  This should only happen
813 	 * at the end of a transfer.
814 	 */
815 	if ((data->fill_len % sector_size) != 0) {
816 		extra_buf_len = sector_size - (data->fill_len % sector_size);
817 		extra_buf = calloc(extra_buf_len, 1);
818 		if (extra_buf == NULL) {
819 			warn("%s: unable to allocate %zu bytes for extra "
820 			    "buffer space", __func__, extra_buf_len);
821 			retval = 1;
822 			goto bailout;
823 		}
824 		data->extra_buf = 1;
825 		data->sg_count++;
826 	}
827 	if (iovec == 0) {
828 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
829 		if (data->segs == NULL) {
830 			warn("%s: unable to allocate %zu bytes for S/G list",
831 			    __func__, sizeof(bus_dma_segment_t) *
832 			    data->sg_count);
833 			retval = 1;
834 			goto bailout;
835 		}
836 
837 	} else {
838 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
839 		if (data->iovec == NULL) {
840 			warn("%s: unable to allocate %zu bytes for S/G list",
841 			    __func__, sizeof(struct iovec) * data->sg_count);
842 			retval = 1;
843 			goto bailout;
844 		}
845 	}
846 
847 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
848 	     i < buf->src_count && tmp_buf != NULL; i++,
849 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
850 
851 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
852 			struct camdd_buf_data *tmp_data;
853 
854 			tmp_data = &tmp_buf->buf_type_spec.data;
855 			if (iovec == 0) {
856 				data->segs[i].ds_addr =
857 				    (bus_addr_t) tmp_data->buf;
858 				data->segs[i].ds_len = tmp_data->fill_len -
859 				    tmp_data->resid;
860 			} else {
861 				data->iovec[i].iov_base = tmp_data->buf;
862 				data->iovec[i].iov_len = tmp_data->fill_len -
863 				    tmp_data->resid;
864 			}
865 			if (((tmp_data->fill_len - tmp_data->resid) %
866 			     sector_size) != 0)
867 				*double_buf_needed = 1;
868 		} else {
869 			struct camdd_buf_indirect *tmp_ind;
870 
871 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
872 			if (iovec == 0) {
873 				data->segs[i].ds_addr =
874 				    (bus_addr_t)tmp_ind->start_ptr;
875 				data->segs[i].ds_len = tmp_ind->len;
876 			} else {
877 				data->iovec[i].iov_base = tmp_ind->start_ptr;
878 				data->iovec[i].iov_len = tmp_ind->len;
879 			}
880 			if ((tmp_ind->len % sector_size) != 0)
881 				*double_buf_needed = 1;
882 		}
883 	}
884 
885 	if (extra_buf != NULL) {
886 		if (iovec == 0) {
887 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
888 			data->segs[i].ds_len = extra_buf_len;
889 		} else {
890 			data->iovec[i].iov_base = extra_buf;
891 			data->iovec[i].iov_len = extra_buf_len;
892 		}
893 		extra_buf_attached = 1;
894 		i++;
895 	}
896 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
897 		warnx("buffer source count does not match "
898 		      "number of buffers in list!");
899 		retval = 1;
900 		goto bailout;
901 	}
902 
903 bailout:
904 	if (retval == 0) {
905 		*num_sectors_used = (data->fill_len + extra_buf_len) /
906 		    sector_size;
907 	} else if (extra_buf_attached == 0) {
908 		/*
909 		 * If extra_buf isn't attached yet, we need to free it
910 		 * to avoid leaking.
911 		 */
912 		free(extra_buf);
913 		data->extra_buf = 0;
914 		data->sg_count--;
915 	}
916 	return (retval);
917 }
918 
919 uint32_t
920 camdd_buf_get_len(struct camdd_buf *buf)
921 {
922 	uint32_t len = 0;
923 
924 	if (buf->buf_type != CAMDD_BUF_DATA) {
925 		struct camdd_buf_indirect *indirect;
926 
927 		indirect = &buf->buf_type_spec.indirect;
928 		len = indirect->len;
929 	} else {
930 		struct camdd_buf_data *data;
931 
932 		data = &buf->buf_type_spec.data;
933 		len = data->fill_len;
934 	}
935 
936 	return (len);
937 }
938 
939 void
940 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
941 {
942 	struct camdd_buf_data *data;
943 
944 	assert(buf->buf_type == CAMDD_BUF_DATA);
945 
946 	data = &buf->buf_type_spec.data;
947 
948 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
949 	buf->src_count++;
950 
951 	data->fill_len += camdd_buf_get_len(child_buf);
952 }
953 
954 typedef enum {
955 	CAMDD_TS_MAX_BLK,
956 	CAMDD_TS_MIN_BLK,
957 	CAMDD_TS_BLK_GRAN,
958 	CAMDD_TS_EFF_IOSIZE
959 } camdd_status_item_index;
960 
961 static struct camdd_status_items {
962 	const char *name;
963 	struct mt_status_entry *entry;
964 } req_status_items[] = {
965 	{ "max_blk", NULL },
966 	{ "min_blk", NULL },
967 	{ "blk_gran", NULL },
968 	{ "max_effective_iosize", NULL }
969 };
970 
971 int
972 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
973 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
974 {
975 	struct mt_status_data status_data;
976 	char *xml_str = NULL;
977 	unsigned int i;
978 	int retval = 0;
979 
980 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
981 	if (retval != 0)
982 		err(1, "Couldn't get XML string from %s", filename);
983 
984 	retval = mt_get_status(xml_str, &status_data);
985 	if (retval != XML_STATUS_OK) {
986 		warn("couldn't get status for %s", filename);
987 		retval = 1;
988 		goto bailout;
989 	} else
990 		retval = 0;
991 
992 	if (status_data.error != 0) {
993 		warnx("%s", status_data.error_str);
994 		retval = 1;
995 		goto bailout;
996 	}
997 
998 	for (i = 0; i < nitems(req_status_items); i++) {
999                 char *name;
1000 
1001 		name = __DECONST(char *, req_status_items[i].name);
1002 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1003 		    name);
1004 		if (req_status_items[i].entry == NULL) {
1005 			errx(1, "Cannot find status entry %s",
1006 			    req_status_items[i].name);
1007 		}
1008 	}
1009 
1010 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1011 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1012 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1013 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1014 bailout:
1015 
1016 	free(xml_str);
1017 	mt_status_free(&status_data);
1018 
1019 	return (retval);
1020 }
1021 
1022 struct camdd_dev *
1023 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1024     int timeout)
1025 {
1026 	struct camdd_dev *dev = NULL;
1027 	struct camdd_dev_file *file_dev;
1028 	uint64_t blocksize = io_opts->blocksize;
1029 
1030 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1031 	if (dev == NULL)
1032 		goto bailout;
1033 
1034 	file_dev = &dev->dev_spec.file;
1035 	file_dev->fd = fd;
1036 	strlcpy(file_dev->filename, io_opts->dev_name,
1037 	    sizeof(file_dev->filename));
1038 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1039 	if (blocksize == 0)
1040 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1041 	else
1042 		dev->blocksize = blocksize;
1043 
1044 	if ((io_opts->queue_depth != 0)
1045 	 && (io_opts->queue_depth != 1)) {
1046 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1047 		    "command supported", (uintmax_t)io_opts->queue_depth,
1048 		    io_opts->dev_name);
1049 	}
1050 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1051 	dev->run = camdd_file_run;
1052 	dev->fetch = NULL;
1053 
1054 	/*
1055 	 * We can effectively access files on byte boundaries.  We'll reset
1056 	 * this for devices like disks that can be accessed on sector
1057 	 * boundaries.
1058 	 */
1059 	dev->sector_size = 1;
1060 
1061 	if ((fd != STDIN_FILENO)
1062 	 && (fd != STDOUT_FILENO)) {
1063 		int retval;
1064 
1065 		retval = fstat(fd, &file_dev->sb);
1066 		if (retval != 0) {
1067 			warn("Cannot stat %s", dev->device_name);
1068 			goto bailout_error;
1069 		}
1070 		if (S_ISREG(file_dev->sb.st_mode)) {
1071 			file_dev->file_type = CAMDD_FILE_REG;
1072 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1073 			int type;
1074 
1075 			if (ioctl(fd, FIODTYPE, &type) == -1)
1076 				err(1, "FIODTYPE ioctl failed on %s",
1077 				    dev->device_name);
1078 			else {
1079 				if (type & D_TAPE)
1080 					file_dev->file_type = CAMDD_FILE_TAPE;
1081 				else if (type & D_DISK)
1082 					file_dev->file_type = CAMDD_FILE_DISK;
1083 				else if (type & D_MEM)
1084 					file_dev->file_type = CAMDD_FILE_MEM;
1085 				else if (type & D_TTY)
1086 					file_dev->file_type = CAMDD_FILE_TTY;
1087 			}
1088 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1089 			errx(1, "cannot operate on directory %s",
1090 			    dev->device_name);
1091 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1092 			file_dev->file_type = CAMDD_FILE_PIPE;
1093 		} else
1094 			errx(1, "Cannot determine file type for %s",
1095 			    dev->device_name);
1096 
1097 		switch (file_dev->file_type) {
1098 		case CAMDD_FILE_REG:
1099 			if (file_dev->sb.st_size != 0)
1100 				dev->max_sector = file_dev->sb.st_size - 1;
1101 			else
1102 				dev->max_sector = 0;
1103 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1104 			break;
1105 		case CAMDD_FILE_TAPE: {
1106 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1107 			/*
1108 			 * Check block limits and maximum effective iosize.
1109 			 * Make sure the blocksize is within the block
1110 			 * limits (and a multiple of the minimum blocksize)
1111 			 * and that the blocksize is <= maximum effective
1112 			 * iosize.
1113 			 */
1114 			retval = camdd_probe_tape(fd, dev->device_name,
1115 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1116 			if (retval != 0)
1117 				errx(1, "Unable to probe tape %s",
1118 				    dev->device_name);
1119 
1120 			/*
1121 			 * The blocksize needs to be <= the maximum
1122 			 * effective I/O size of the tape device.  Note
1123 			 * that this also takes into account the maximum
1124 			 * blocksize reported by READ BLOCK LIMITS.
1125 			 */
1126 			if (dev->blocksize > max_iosize) {
1127 				warnx("Blocksize %u too big for %s, limiting "
1128 				    "to %ju", dev->blocksize, dev->device_name,
1129 				    max_iosize);
1130 				dev->blocksize = max_iosize;
1131 			}
1132 
1133 			/*
1134 			 * The blocksize needs to be at least min_blk;
1135 			 */
1136 			if (dev->blocksize < min_blk) {
1137 				warnx("Blocksize %u too small for %s, "
1138 				    "increasing to %ju", dev->blocksize,
1139 				    dev->device_name, min_blk);
1140 				dev->blocksize = min_blk;
1141 			}
1142 
1143 			/*
1144 			 * And the blocksize needs to be a multiple of
1145 			 * the block granularity.
1146 			 */
1147 			if ((blk_gran != 0)
1148 			 && (dev->blocksize % (1 << blk_gran))) {
1149 				warnx("Blocksize %u for %s not a multiple of "
1150 				    "%d, adjusting to %d", dev->blocksize,
1151 				    dev->device_name, (1 << blk_gran),
1152 				    dev->blocksize & ~((1 << blk_gran) - 1));
1153 				dev->blocksize &= ~((1 << blk_gran) - 1);
1154 			}
1155 
1156 			if (dev->blocksize == 0) {
1157 				errx(1, "Unable to derive valid blocksize for "
1158 				    "%s", dev->device_name);
1159 			}
1160 
1161 			/*
1162 			 * For tape drives, set the sector size to the
1163 			 * blocksize so that we make sure not to write
1164 			 * less than the blocksize out to the drive.
1165 			 */
1166 			dev->sector_size = dev->blocksize;
1167 			break;
1168 		}
1169 		case CAMDD_FILE_DISK: {
1170 			off_t media_size;
1171 			unsigned int sector_size;
1172 
1173 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1174 
1175 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1176 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1177 				    dev->device_name);
1178 			}
1179 
1180 			if (sector_size == 0) {
1181 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1182 				    "invalid sector size %u for %s",
1183 				    sector_size, dev->device_name);
1184 			}
1185 
1186 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1187 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1188 				    dev->device_name);
1189 			}
1190 
1191 			if (media_size == 0) {
1192 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1193 				    "invalid media size %ju for %s",
1194 				    (uintmax_t)media_size, dev->device_name);
1195 			}
1196 
1197 			if (dev->blocksize % sector_size) {
1198 				errx(1, "%s blocksize %u not a multiple of "
1199 				    "sector size %u", dev->device_name,
1200 				    dev->blocksize, sector_size);
1201 			}
1202 
1203 			dev->sector_size = sector_size;
1204 			dev->max_sector = (media_size / sector_size) - 1;
1205 			break;
1206 		}
1207 		case CAMDD_FILE_MEM:
1208 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1209 			break;
1210 		default:
1211 			break;
1212 		}
1213 	}
1214 
1215 	if ((io_opts->offset != 0)
1216 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1217 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1218 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1219 		goto bailout_error;
1220 	}
1221 #if 0
1222 	else if ((io_opts->offset != 0)
1223 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1224 		warnx("Offset %ju for %s is not a multiple of the "
1225 		      "sector size %u", io_opts->offset,
1226 		      io_opts->dev_name, dev->sector_size);
1227 		goto bailout_error;
1228 	} else {
1229 		dev->start_offset_bytes = io_opts->offset;
1230 	}
1231 #endif
1232 
1233 bailout:
1234 	return (dev);
1235 
1236 bailout_error:
1237 	camdd_free_dev(dev);
1238 	return (NULL);
1239 }
1240 
1241 /*
1242  * Get a get device CCB for the specified device.
1243  */
1244 int
1245 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1246 {
1247         union ccb *ccb;
1248 	int retval = 0;
1249 
1250 	ccb = cam_getccb(device);
1251 
1252 	if (ccb == NULL) {
1253 		warnx("%s: couldn't allocate CCB", __func__);
1254 		return -1;
1255 	}
1256 
1257 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1258 
1259 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1260 
1261 	if (cam_send_ccb(device, ccb) < 0) {
1262 		warn("%s: error sending Get Device Information CCB", __func__);
1263 			cam_error_print(device, ccb, CAM_ESF_ALL,
1264 					CAM_EPF_ALL, stderr);
1265 		retval = -1;
1266 		goto bailout;
1267 	}
1268 
1269 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1270 			cam_error_print(device, ccb, CAM_ESF_ALL,
1271 					CAM_EPF_ALL, stderr);
1272 		retval = -1;
1273 		goto bailout;
1274 	}
1275 
1276 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1277 
1278 bailout:
1279 	cam_freeccb(ccb);
1280 
1281 	return retval;
1282 }
1283 
1284 int
1285 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1286 		 camdd_argmask arglist, int probe_retry_count,
1287 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1288 {
1289 	struct scsi_read_capacity_data rcap;
1290 	struct scsi_read_capacity_data_long rcaplong;
1291 	int retval = -1;
1292 
1293 	if (ccb == NULL) {
1294 		warnx("%s: error passed ccb is NULL", __func__);
1295 		goto bailout;
1296 	}
1297 
1298 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1299 
1300 	scsi_read_capacity(&ccb->csio,
1301 			   /*retries*/ probe_retry_count,
1302 			   /*cbfcnp*/ NULL,
1303 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1304 			   &rcap,
1305 			   SSD_FULL_SIZE,
1306 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1307 
1308 	/* Disable freezing the device queue */
1309 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1310 
1311 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1312 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1313 
1314 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1315 		warn("error sending READ CAPACITY command");
1316 
1317 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1318 				CAM_EPF_ALL, stderr);
1319 
1320 		goto bailout;
1321 	}
1322 
1323 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1324 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1325 		goto bailout;
1326 	}
1327 
1328 	*maxsector = scsi_4btoul(rcap.addr);
1329 	*block_len = scsi_4btoul(rcap.length);
1330 
1331 	/*
1332 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1333 	 * and we need to issue the long READ CAPACITY to get the real
1334 	 * capacity.  Otherwise, we're all set.
1335 	 */
1336 	if (*maxsector != 0xffffffff) {
1337 		retval = 0;
1338 		goto bailout;
1339 	}
1340 
1341 	scsi_read_capacity_16(&ccb->csio,
1342 			      /*retries*/ probe_retry_count,
1343 			      /*cbfcnp*/ NULL,
1344 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1345 			      /*lba*/ 0,
1346 			      /*reladdr*/ 0,
1347 			      /*pmi*/ 0,
1348 			      (uint8_t *)&rcaplong,
1349 			      sizeof(rcaplong),
1350 			      /*sense_len*/ SSD_FULL_SIZE,
1351 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1352 
1353 	/* Disable freezing the device queue */
1354 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1355 
1356 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1357 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1358 
1359 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1360 		warn("error sending READ CAPACITY (16) command");
1361 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1362 				CAM_EPF_ALL, stderr);
1363 		goto bailout;
1364 	}
1365 
1366 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1367 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1368 		goto bailout;
1369 	}
1370 
1371 	*maxsector = scsi_8btou64(rcaplong.addr);
1372 	*block_len = scsi_4btoul(rcaplong.length);
1373 
1374 	retval = 0;
1375 
1376 bailout:
1377 	return retval;
1378 }
1379 
1380 int
1381 camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
1382 		 camdd_argmask arglist, int probe_retry_count,
1383 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1384 {
1385 	struct nvme_command *nc = NULL;
1386 	struct nvme_namespace_data nsdata;
1387 	uint32_t nsid = cam_dev->target_lun & UINT32_MAX;
1388 	uint8_t format = 0, lbads = 0;
1389 	int retval = -1;
1390 
1391 	if (ccb == NULL) {
1392 		warnx("%s: error passed ccb is NULL", __func__);
1393 		goto bailout;
1394 	}
1395 
1396 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
1397 
1398 	/* Send Identify Namespace to get block size and capacity */
1399 	nc = &ccb->nvmeio.cmd;
1400 	nc->opc = NVME_OPC_IDENTIFY;
1401 
1402 	nc->nsid = nsid;
1403 	nc->cdw10 = 0; /* Identify Namespace is CNS = 0 */
1404 
1405 	cam_fill_nvmeadmin(&ccb->nvmeio,
1406 			/*retries*/ probe_retry_count,
1407 			/*cbfcnp*/ NULL,
1408 			CAM_DIR_IN,
1409 			(uint8_t *)&nsdata,
1410 			sizeof(nsdata),
1411 			probe_timeout);
1412 
1413 	/* Disable freezing the device queue */
1414 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1415 
1416 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1417 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1418 
1419 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1420 		warn("error sending Identify Namespace command");
1421 
1422 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1423 				CAM_EPF_ALL, stderr);
1424 
1425 		goto bailout;
1426 	}
1427 
1428 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1429 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1430 		goto bailout;
1431 	}
1432 
1433 	*maxsector = nsdata.nsze;
1434 	/* The LBA Data Size (LBADS) is reported as a power of 2 */
1435 	format = nsdata.flbas & NVME_NS_DATA_FLBAS_FORMAT_MASK;
1436 	lbads = (nsdata.lbaf[format] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) &
1437 	    NVME_NS_DATA_LBAF_LBADS_MASK;
1438 	*block_len = 1 << lbads;
1439 
1440 	retval = 0;
1441 
1442 bailout:
1443 	return retval;
1444 }
1445 
1446 /*
1447  * Need to implement this.  Do a basic probe:
1448  * - Check the inquiry data, make sure we're talking to a device that we
1449  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1450  * - Send a test unit ready, make sure the device is available.
1451  * - Get the capacity and block size.
1452  */
1453 struct camdd_dev *
1454 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1455 		 camdd_argmask arglist, int probe_retry_count,
1456 		 int probe_timeout, int io_retry_count, int io_timeout)
1457 {
1458 	union ccb *ccb;
1459 	uint64_t maxsector = 0;
1460 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1461 	uint32_t block_len = 0;
1462 	struct camdd_dev *dev = NULL;
1463 	struct camdd_dev_pass *pass_dev;
1464 	struct kevent ke;
1465 	struct ccb_getdev cgd;
1466 	int retval;
1467 	int scsi_dev_type = T_NODEVICE;
1468 
1469 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1470 		warnx("%s: error retrieving CGD", __func__);
1471 		return NULL;
1472 	}
1473 
1474 	ccb = cam_getccb(cam_dev);
1475 
1476 	if (ccb == NULL) {
1477 		warnx("%s: error allocating ccb", __func__);
1478 		goto bailout;
1479 	}
1480 
1481 	switch (cgd.protocol) {
1482 	case PROTO_SCSI:
1483 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1484 
1485 		/*
1486 		 * For devices that support READ CAPACITY, we'll attempt to get the
1487 		 * capacity.  Otherwise, we really don't support tape or other
1488 		 * devices via SCSI passthrough, so just return an error in that case.
1489 		 */
1490 		switch (scsi_dev_type) {
1491 		case T_DIRECT:
1492 		case T_WORM:
1493 		case T_CDROM:
1494 		case T_OPTICAL:
1495 		case T_RBC:
1496 		case T_ZBC_HM:
1497 			break;
1498 		default:
1499 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1500 			break; /*NOTREACHED*/
1501 		}
1502 
1503 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1504 						arglist, probe_timeout, &maxsector,
1505 						&block_len))) {
1506 			goto bailout;
1507 		}
1508 		break;
1509 	case PROTO_NVME:
1510 		if ((retval = camdd_probe_pass_nvme(cam_dev, ccb, probe_retry_count,
1511 						arglist, probe_timeout, &maxsector,
1512 						&block_len))) {
1513 			goto bailout;
1514 		}
1515 		break;
1516 	default:
1517 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1518 		break; /*NOTREACHED*/
1519 	}
1520 
1521 	if (block_len == 0) {
1522 		warnx("Sector size for %s%u is 0, cannot continue",
1523 		    cam_dev->device_name, cam_dev->dev_unit_num);
1524 		goto bailout_error;
1525 	}
1526 
1527 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1528 
1529 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1530 	ccb->ccb_h.flags = CAM_DIR_NONE;
1531 	ccb->ccb_h.retry_count = 1;
1532 
1533 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1534 		warn("error sending XPT_PATH_INQ CCB");
1535 
1536 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1537 				CAM_EPF_ALL, stderr);
1538 		goto bailout;
1539 	}
1540 
1541 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1542 
1543 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1544 			      io_timeout);
1545 	if (dev == NULL)
1546 		goto bailout;
1547 
1548 	pass_dev = &dev->dev_spec.pass;
1549 	pass_dev->scsi_dev_type = scsi_dev_type;
1550 	pass_dev->protocol = cgd.protocol;
1551 	pass_dev->dev = cam_dev;
1552 	pass_dev->max_sector = maxsector;
1553 	pass_dev->block_len = block_len;
1554 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1555 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1556 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1557 	dev->sector_size = block_len;
1558 	dev->max_sector = maxsector;
1559 
1560 
1561 	/*
1562 	 * Determine the optimal blocksize to use for this device.
1563 	 */
1564 
1565 	/*
1566 	 * If the controller has not specified a maximum I/O size,
1567 	 * just go with 128K as a somewhat conservative value.
1568 	 */
1569 	if (pass_dev->cpi_maxio == 0)
1570 		cpi_maxio = 131072;
1571 	else
1572 		cpi_maxio = pass_dev->cpi_maxio;
1573 
1574 	/*
1575 	 * If the controller has a large maximum I/O size, limit it
1576 	 * to something smaller so that the kernel doesn't have trouble
1577 	 * allocating buffers to copy data in and out for us.
1578 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1579 	 */
1580 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1581 
1582 	/*
1583 	 * If we weren't able to get a block size for some reason,
1584 	 * default to 512 bytes.
1585 	 */
1586 	block_len = pass_dev->block_len;
1587 	if (block_len == 0)
1588 		block_len = 512;
1589 
1590 	/*
1591 	 * Figure out how many blocksize chunks will fit in the
1592 	 * maximum I/O size.
1593 	 */
1594 	pass_numblocks = max_iosize / block_len;
1595 
1596 	/*
1597 	 * And finally, multiple the number of blocks by the LBA
1598 	 * length to get our maximum block size;
1599 	 */
1600 	dev->blocksize = pass_numblocks * block_len;
1601 
1602 	if (io_opts->blocksize != 0) {
1603 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1604 			warnx("Blocksize %ju for %s is not a multiple of "
1605 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1606 			      dev->device_name, dev->sector_size);
1607 			goto bailout_error;
1608 		}
1609 		dev->blocksize = io_opts->blocksize;
1610 	}
1611 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1612 	if (io_opts->queue_depth != 0)
1613 		dev->target_queue_depth = io_opts->queue_depth;
1614 
1615 	if (io_opts->offset != 0) {
1616 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1617 			warnx("Offset %ju is past the end of device %s",
1618 			    io_opts->offset, dev->device_name);
1619 			goto bailout_error;
1620 		}
1621 #if 0
1622 		else if ((io_opts->offset % dev->sector_size) != 0) {
1623 			warnx("Offset %ju for %s is not a multiple of the "
1624 			      "sector size %u", io_opts->offset,
1625 			      dev->device_name, dev->sector_size);
1626 			goto bailout_error;
1627 		}
1628 		dev->start_offset_bytes = io_opts->offset;
1629 #endif
1630 	}
1631 
1632 	dev->min_cmd_size = io_opts->min_cmd_size;
1633 
1634 	dev->run = camdd_pass_run;
1635 	dev->fetch = camdd_pass_fetch;
1636 
1637 bailout:
1638 	cam_freeccb(ccb);
1639 
1640 	return (dev);
1641 
1642 bailout_error:
1643 	cam_freeccb(ccb);
1644 
1645 	camdd_free_dev(dev);
1646 
1647 	return (NULL);
1648 }
1649 
1650 void
1651 nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
1652 		void (*cbfcnp)(struct cam_periph *, union ccb *),
1653 		uint32_t nsid, int readop, uint64_t lba,
1654 		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
1655 		uint32_t timeout)
1656 {
1657 	struct nvme_command *nc = &nvmeio->cmd;
1658 
1659 	nc->opc = readop ? NVME_OPC_READ : NVME_OPC_WRITE;
1660 
1661 	nc->nsid = nsid;
1662 
1663 	nc->cdw10 = lba & UINT32_MAX;
1664 	nc->cdw11 = lba >> 32;
1665 
1666 	/* NLB (bits 15:0) is a zero based value */
1667 	nc->cdw12 = (block_count - 1) & UINT16_MAX;
1668 
1669 	cam_fill_nvmeio(nvmeio,
1670 			retries,
1671 			cbfcnp,
1672 			readop ? CAM_DIR_IN : CAM_DIR_OUT,
1673 			data_ptr,
1674 			dxfer_len,
1675 			timeout);
1676 }
1677 
1678 void *
1679 camdd_worker(void *arg)
1680 {
1681 	struct camdd_dev *dev = arg;
1682 	struct camdd_buf *buf;
1683 	struct timespec ts, *kq_ts;
1684 
1685 	ts.tv_sec = 0;
1686 	ts.tv_nsec = 0;
1687 
1688 	pthread_mutex_lock(&dev->mutex);
1689 
1690 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1691 
1692 	for (;;) {
1693 		struct kevent ke;
1694 		int retval = 0;
1695 
1696 		/*
1697 		 * XXX KDM check the reorder queue depth?
1698 		 */
1699 		if (dev->write_dev == 0) {
1700 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1701 			uint32_t target_depth = dev->target_queue_depth;
1702 			uint32_t peer_target_depth =
1703 			    dev->peer_dev->target_queue_depth;
1704 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1705 
1706 			camdd_get_depth(dev, &our_depth, &peer_depth,
1707 					&our_bytes, &peer_bytes);
1708 
1709 #if 0
1710 			while (((our_depth < target_depth)
1711 			     && (peer_depth < peer_target_depth))
1712 			    || ((peer_bytes + our_bytes) <
1713 				 (peer_blocksize * 2))) {
1714 #endif
1715 			while (((our_depth + peer_depth) <
1716 			        (target_depth + peer_target_depth))
1717 			    || ((peer_bytes + our_bytes) <
1718 				(peer_blocksize * 3))) {
1719 
1720 				retval = camdd_queue(dev, NULL);
1721 				if (retval == 1)
1722 					break;
1723 				else if (retval != 0) {
1724 					error_exit = 1;
1725 					goto bailout;
1726 				}
1727 
1728 				camdd_get_depth(dev, &our_depth, &peer_depth,
1729 						&our_bytes, &peer_bytes);
1730 			}
1731 		}
1732 		/*
1733 		 * See if we have any I/O that is ready to execute.
1734 		 */
1735 		buf = STAILQ_FIRST(&dev->run_queue);
1736 		if (buf != NULL) {
1737 			while (dev->target_queue_depth > dev->cur_active_io) {
1738 				retval = dev->run(dev);
1739 				if (retval == -1) {
1740 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1741 					error_exit = 1;
1742 					break;
1743 				} else if (retval != 0) {
1744 					break;
1745 				}
1746 			}
1747 		}
1748 
1749 		/*
1750 		 * We've reached EOF, or our partner has reached EOF.
1751 		 */
1752 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1753 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1754 			if (dev->write_dev != 0) {
1755 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1756 				 && (dev->num_run_queue == 0)
1757 				 && (dev->cur_active_io == 0)) {
1758 					goto bailout;
1759 				}
1760 			} else {
1761 				/*
1762 				 * If we're the reader, and the writer
1763 				 * got EOF, he is already done.  If we got
1764 				 * the EOF, then we need to wait until
1765 				 * everything is flushed out for the writer.
1766 				 */
1767 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1768 					goto bailout;
1769 				} else if ((dev->num_peer_work_queue == 0)
1770 					&& (dev->num_peer_done_queue == 0)
1771 					&& (dev->cur_active_io == 0)
1772 					&& (dev->num_run_queue == 0)) {
1773 					goto bailout;
1774 				}
1775 			}
1776 			/*
1777 			 * XXX KDM need to do something about the pending
1778 			 * queue and cleanup resources.
1779 			 */
1780 		}
1781 
1782 		if ((dev->write_dev == 0)
1783 		 && (dev->cur_active_io == 0)
1784 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1785 			kq_ts = &ts;
1786 		else
1787 			kq_ts = NULL;
1788 
1789 		/*
1790 		 * Run kevent to see if there are events to process.
1791 		 */
1792 		pthread_mutex_unlock(&dev->mutex);
1793 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1794 		pthread_mutex_lock(&dev->mutex);
1795 		if (retval == -1) {
1796 			warn("%s: error returned from kevent",__func__);
1797 			goto bailout;
1798 		} else if (retval != 0) {
1799 			switch (ke.filter) {
1800 			case EVFILT_READ:
1801 				if (dev->fetch != NULL) {
1802 					retval = dev->fetch(dev);
1803 					if (retval == -1) {
1804 						error_exit = 1;
1805 						goto bailout;
1806 					}
1807 				}
1808 				break;
1809 			case EVFILT_SIGNAL:
1810 				/*
1811 				 * We register for this so we don't get
1812 				 * an error as a result of a SIGINFO or a
1813 				 * SIGINT.  It will actually get handled
1814 				 * by the signal handler.  If we get a
1815 				 * SIGINT, bail out without printing an
1816 				 * error message.  Any other signals
1817 				 * will result in the error message above.
1818 				 */
1819 				if (ke.ident == SIGINT)
1820 					goto bailout;
1821 				break;
1822 			case EVFILT_USER:
1823 				retval = 0;
1824 				/*
1825 				 * Check to see if the other thread has
1826 				 * queued any I/O for us to do.  (In this
1827 				 * case we're the writer.)
1828 				 */
1829 				for (buf = STAILQ_FIRST(&dev->work_queue);
1830 				     buf != NULL;
1831 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1832 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1833 							   work_links);
1834 					retval = camdd_queue(dev, buf);
1835 					/*
1836 					 * We keep going unless we get an
1837 					 * actual error.  If we get EOF, we
1838 					 * still want to remove the buffers
1839 					 * from the queue and send the back
1840 					 * to the reader thread.
1841 					 */
1842 					if (retval == -1) {
1843 						error_exit = 1;
1844 						goto bailout;
1845 					} else
1846 						retval = 0;
1847 				}
1848 
1849 				/*
1850 				 * Next check to see if the other thread has
1851 				 * queued any completed buffers back to us.
1852 				 * (In this case we're the reader.)
1853 				 */
1854 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1855 				     buf != NULL;
1856 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1857 					STAILQ_REMOVE_HEAD(
1858 					    &dev->peer_done_queue, work_links);
1859 					dev->num_peer_done_queue--;
1860 					camdd_peer_done(buf);
1861 				}
1862 				break;
1863 			default:
1864 				warnx("%s: unknown kevent filter %d",
1865 				      __func__, ke.filter);
1866 				break;
1867 			}
1868 		}
1869 	}
1870 
1871 bailout:
1872 
1873 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1874 
1875 	/* XXX KDM cleanup resources here? */
1876 
1877 	pthread_mutex_unlock(&dev->mutex);
1878 
1879 	need_exit = 1;
1880 	sem_post(&camdd_sem);
1881 
1882 	return (NULL);
1883 }
1884 
1885 /*
1886  * Simplistic translation of CCB status to our local status.
1887  */
1888 camdd_buf_status
1889 camdd_ccb_status(union ccb *ccb, int protocol)
1890 {
1891 	camdd_buf_status status = CAMDD_STATUS_NONE;
1892 	cam_status ccb_status;
1893 
1894 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1895 
1896 	switch (protocol) {
1897 	case PROTO_SCSI:
1898 		switch (ccb_status) {
1899 		case CAM_REQ_CMP: {
1900 			if (ccb->csio.resid == 0) {
1901 				status = CAMDD_STATUS_OK;
1902 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1903 				status = CAMDD_STATUS_SHORT_IO;
1904 			} else {
1905 				status = CAMDD_STATUS_EOF;
1906 			}
1907 			break;
1908 		}
1909 		case CAM_SCSI_STATUS_ERROR: {
1910 			switch (ccb->csio.scsi_status) {
1911 			case SCSI_STATUS_OK:
1912 			case SCSI_STATUS_COND_MET:
1913 			case SCSI_STATUS_INTERMED:
1914 			case SCSI_STATUS_INTERMED_COND_MET:
1915 				status = CAMDD_STATUS_OK;
1916 				break;
1917 			case SCSI_STATUS_CMD_TERMINATED:
1918 			case SCSI_STATUS_CHECK_COND:
1919 			case SCSI_STATUS_QUEUE_FULL:
1920 			case SCSI_STATUS_BUSY:
1921 			case SCSI_STATUS_RESERV_CONFLICT:
1922 			default:
1923 				status = CAMDD_STATUS_ERROR;
1924 				break;
1925 			}
1926 			break;
1927 		}
1928 		default:
1929 			status = CAMDD_STATUS_ERROR;
1930 			break;
1931 		}
1932 		break;
1933 	case PROTO_NVME:
1934 		switch (ccb_status) {
1935 		case CAM_REQ_CMP:
1936 			status = CAMDD_STATUS_OK;
1937 			break;
1938 		default:
1939 			status = CAMDD_STATUS_ERROR;
1940 			break;
1941 		}
1942 		break;
1943 	default:
1944 		status = CAMDD_STATUS_ERROR;
1945 		break;
1946 	}
1947 
1948 	return (status);
1949 }
1950 
1951 /*
1952  * Queue a buffer to our peer's work thread for writing.
1953  *
1954  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1955  */
1956 int
1957 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1958 {
1959 	struct kevent ke;
1960 	STAILQ_HEAD(, camdd_buf) local_queue;
1961 	struct camdd_buf *buf1, *buf2;
1962 	struct camdd_buf_data *data = NULL;
1963 	uint64_t peer_bytes_queued = 0;
1964 	int active = 1;
1965 	int retval = 0;
1966 
1967 	STAILQ_INIT(&local_queue);
1968 
1969 	/*
1970 	 * Since we're the reader, we need to queue our I/O to the writer
1971 	 * in sequential order in order to make sure it gets written out
1972 	 * in sequential order.
1973 	 *
1974 	 * Check the next expected I/O starting offset.  If this doesn't
1975 	 * match, put it on the reorder queue.
1976 	 */
1977 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1978 
1979 		/*
1980 		 * If there is nothing on the queue, there is no sorting
1981 		 * needed.
1982 		 */
1983 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1984 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1985 			dev->num_reorder_queue++;
1986 			goto bailout;
1987 		}
1988 
1989 		/*
1990 		 * Sort in ascending order by starting LBA.  There should
1991 		 * be no identical LBAs.
1992 		 */
1993 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1994 		     buf1 = buf2) {
1995 			buf2 = STAILQ_NEXT(buf1, links);
1996 			if (buf->lba < buf1->lba) {
1997 				/*
1998 				 * If we're less than the first one, then
1999 				 * we insert at the head of the list
2000 				 * because this has to be the first element
2001 				 * on the list.
2002 				 */
2003 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
2004 						   buf, links);
2005 				dev->num_reorder_queue++;
2006 				break;
2007 			} else if (buf->lba > buf1->lba) {
2008 				if (buf2 == NULL) {
2009 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
2010 					    buf, links);
2011 					dev->num_reorder_queue++;
2012 					break;
2013 				} else if (buf->lba < buf2->lba) {
2014 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
2015 					    buf1, buf, links);
2016 					dev->num_reorder_queue++;
2017 					break;
2018 				}
2019 			} else {
2020 				errx(1, "Found buffers with duplicate LBA %ju!",
2021 				     buf->lba);
2022 			}
2023 		}
2024 		goto bailout;
2025 	} else {
2026 
2027 		/*
2028 		 * We're the next expected I/O completion, so put ourselves
2029 		 * on the local queue to be sent to the writer.  We use
2030 		 * work_links here so that we can queue this to the
2031 		 * peer_work_queue before taking the buffer off of the
2032 		 * local_queue.
2033 		 */
2034 		dev->next_completion_pos_bytes += buf->len;
2035 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
2036 
2037 		/*
2038 		 * Go through the reorder queue looking for more sequential
2039 		 * I/O and add it to the local queue.
2040 		 */
2041 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2042 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
2043 			/*
2044 			 * As soon as we see an I/O that is out of sequence,
2045 			 * we're done.
2046 			 */
2047 			if ((buf1->lba * dev->sector_size) !=
2048 			     dev->next_completion_pos_bytes)
2049 				break;
2050 
2051 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
2052 			dev->num_reorder_queue--;
2053 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
2054 			dev->next_completion_pos_bytes += buf1->len;
2055 		}
2056 	}
2057 
2058 	/*
2059 	 * Setup the event to let the other thread know that it has work
2060 	 * pending.
2061 	 */
2062 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
2063 	       NOTE_TRIGGER, 0, NULL);
2064 
2065 	/*
2066 	 * Put this on our shadow queue so that we know what we've queued
2067 	 * to the other thread.
2068 	 */
2069 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
2070 		if (buf1->buf_type != CAMDD_BUF_DATA) {
2071 			errx(1, "%s: should have a data buffer, not an "
2072 			    "indirect buffer", __func__);
2073 		}
2074 		data = &buf1->buf_type_spec.data;
2075 
2076 		/*
2077 		 * We only need to send one EOF to the writer, and don't
2078 		 * need to continue sending EOFs after that.
2079 		 */
2080 		if (buf1->status == CAMDD_STATUS_EOF) {
2081 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
2082 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
2083 				    work_links);
2084 				camdd_release_buf(buf1);
2085 				retval = 1;
2086 				continue;
2087 			}
2088 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2089 		}
2090 
2091 
2092 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2093 		peer_bytes_queued += (data->fill_len - data->resid);
2094 		dev->peer_bytes_queued += (data->fill_len - data->resid);
2095 		dev->num_peer_work_queue++;
2096 	}
2097 
2098 	if (STAILQ_FIRST(&local_queue) == NULL)
2099 		goto bailout;
2100 
2101 	/*
2102 	 * Drop our mutex and pick up the other thread's mutex.  We need to
2103 	 * do this to avoid deadlocks.
2104 	 */
2105 	pthread_mutex_unlock(&dev->mutex);
2106 	pthread_mutex_lock(&dev->peer_dev->mutex);
2107 
2108 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2109 		/*
2110 		 * Put the buffers on the other thread's incoming work queue.
2111 		 */
2112 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2113 		     buf1 = STAILQ_FIRST(&local_queue)) {
2114 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2115 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2116 					   work_links);
2117 		}
2118 		/*
2119 		 * Send an event to the other thread's kqueue to let it know
2120 		 * that there is something on the work queue.
2121 		 */
2122 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2123 		if (retval == -1)
2124 			warn("%s: unable to add peer work_queue kevent",
2125 			     __func__);
2126 		else
2127 			retval = 0;
2128 	} else
2129 		active = 0;
2130 
2131 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2132 	pthread_mutex_lock(&dev->mutex);
2133 
2134 	/*
2135 	 * If the other side isn't active, run through the queue and
2136 	 * release all of the buffers.
2137 	 */
2138 	if (active == 0) {
2139 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2140 		     buf1 = STAILQ_FIRST(&local_queue)) {
2141 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2142 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2143 				      links);
2144 			dev->num_peer_work_queue--;
2145 			camdd_release_buf(buf1);
2146 		}
2147 		dev->peer_bytes_queued -= peer_bytes_queued;
2148 		retval = 1;
2149 	}
2150 
2151 bailout:
2152 	return (retval);
2153 }
2154 
2155 /*
2156  * Return a buffer to the reader thread when we have completed writing it.
2157  */
2158 int
2159 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2160 {
2161 	struct kevent ke;
2162 	int retval = 0;
2163 
2164 	/*
2165 	 * Setup the event to let the other thread know that we have
2166 	 * completed a buffer.
2167 	 */
2168 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2169 	       NOTE_TRIGGER, 0, NULL);
2170 
2171 	/*
2172 	 * Drop our lock and acquire the other thread's lock before
2173 	 * manipulating
2174 	 */
2175 	pthread_mutex_unlock(&dev->mutex);
2176 	pthread_mutex_lock(&dev->peer_dev->mutex);
2177 
2178 	/*
2179 	 * Put the buffer on the reader thread's peer done queue now that
2180 	 * we have completed it.
2181 	 */
2182 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2183 			   work_links);
2184 	dev->peer_dev->num_peer_done_queue++;
2185 
2186 	/*
2187 	 * Send an event to the peer thread to let it know that we've added
2188 	 * something to its peer done queue.
2189 	 */
2190 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2191 	if (retval == -1)
2192 		warn("%s: unable to add peer_done_queue kevent", __func__);
2193 	else
2194 		retval = 0;
2195 
2196 	/*
2197 	 * Drop the other thread's lock and reacquire ours.
2198 	 */
2199 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2200 	pthread_mutex_lock(&dev->mutex);
2201 
2202 	return (retval);
2203 }
2204 
2205 /*
2206  * Free a buffer that was written out by the writer thread and returned to
2207  * the reader thread.
2208  */
2209 void
2210 camdd_peer_done(struct camdd_buf *buf)
2211 {
2212 	struct camdd_dev *dev;
2213 	struct camdd_buf_data *data;
2214 
2215 	dev = buf->dev;
2216 	if (buf->buf_type != CAMDD_BUF_DATA) {
2217 		errx(1, "%s: should have a data buffer, not an "
2218 		    "indirect buffer", __func__);
2219 	}
2220 
2221 	data = &buf->buf_type_spec.data;
2222 
2223 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2224 	dev->num_peer_work_queue--;
2225 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2226 
2227 	if (buf->status == CAMDD_STATUS_EOF)
2228 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2229 
2230 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2231 }
2232 
2233 /*
2234  * Assumes caller holds the lock for this device.
2235  */
2236 void
2237 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2238 		   int *error_count)
2239 {
2240 	int retval = 0;
2241 
2242 	/*
2243 	 * If we're the reader, we need to send the completed I/O
2244 	 * to the writer.  If we're the writer, we need to just
2245 	 * free up resources, or let the reader know if we've
2246 	 * encountered an error.
2247 	 */
2248 	if (dev->write_dev == 0) {
2249 		retval = camdd_queue_peer_buf(dev, buf);
2250 		if (retval != 0)
2251 			(*error_count)++;
2252 	} else {
2253 		struct camdd_buf *tmp_buf, *next_buf;
2254 
2255 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2256 				    next_buf) {
2257 			struct camdd_buf *src_buf;
2258 			struct camdd_buf_indirect *indirect;
2259 
2260 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2261 				      camdd_buf, src_links);
2262 
2263 			tmp_buf->status = buf->status;
2264 
2265 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2266 				camdd_complete_peer_buf(dev, tmp_buf);
2267 				continue;
2268 			}
2269 
2270 			indirect = &tmp_buf->buf_type_spec.indirect;
2271 			src_buf = indirect->src_buf;
2272 			src_buf->refcount--;
2273 			/*
2274 			 * XXX KDM we probably need to account for
2275 			 * exactly how many bytes we were able to
2276 			 * write.  Allocate the residual to the
2277 			 * first N buffers?  Or just track the
2278 			 * number of bytes written?  Right now the reader
2279 			 * doesn't do anything with a residual.
2280 			 */
2281 			src_buf->status = buf->status;
2282 			if (src_buf->refcount <= 0)
2283 				camdd_complete_peer_buf(dev, src_buf);
2284 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2285 					   tmp_buf, links);
2286 		}
2287 
2288 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2289 	}
2290 }
2291 
2292 /*
2293  * Fetch all completed commands from the pass(4) device.
2294  *
2295  * Returns the number of commands received, or -1 if any of the commands
2296  * completed with an error.  Returns 0 if no commands are available.
2297  */
2298 int
2299 camdd_pass_fetch(struct camdd_dev *dev)
2300 {
2301 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2302 	union ccb ccb;
2303 	int retval = 0, num_fetched = 0, error_count = 0;
2304 
2305 	pthread_mutex_unlock(&dev->mutex);
2306 	/*
2307 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2308 	 */
2309 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2310 		struct camdd_buf *buf;
2311 		struct camdd_buf_data *data;
2312 		cam_status ccb_status;
2313 		union ccb *buf_ccb;
2314 
2315 		buf = ccb.ccb_h.ccb_buf;
2316 		data = &buf->buf_type_spec.data;
2317 		buf_ccb = &data->ccb;
2318 
2319 		num_fetched++;
2320 
2321 		/*
2322 		 * Copy the CCB back out so we get status, sense data, etc.
2323 		 */
2324 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2325 
2326 		pthread_mutex_lock(&dev->mutex);
2327 
2328 		/*
2329 		 * We're now done, so take this off the active queue.
2330 		 */
2331 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2332 		dev->cur_active_io--;
2333 
2334 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2335 		if (ccb_status != CAM_REQ_CMP) {
2336 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2337 					CAM_EPF_ALL, stderr);
2338 		}
2339 
2340 		switch (pass_dev->protocol) {
2341 		case PROTO_SCSI:
2342 			data->resid = ccb.csio.resid;
2343 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2344 			break;
2345 		case PROTO_NVME:
2346 			data->resid = 0;
2347 			dev->bytes_transferred += ccb.nvmeio.dxfer_len;
2348 			break;
2349 		default:
2350 			return -1;
2351 			break;
2352 		}
2353 
2354 		if (buf->status == CAMDD_STATUS_NONE)
2355 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2356 		if (buf->status == CAMDD_STATUS_ERROR)
2357 			error_count++;
2358 		else if (buf->status == CAMDD_STATUS_EOF) {
2359 			/*
2360 			 * Once we queue this buffer to our partner thread,
2361 			 * he will know that we've hit EOF.
2362 			 */
2363 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2364 		}
2365 
2366 		camdd_complete_buf(dev, buf, &error_count);
2367 
2368 		/*
2369 		 * Unlock in preparation for the ioctl call.
2370 		 */
2371 		pthread_mutex_unlock(&dev->mutex);
2372 	}
2373 
2374 	pthread_mutex_lock(&dev->mutex);
2375 
2376 	if (error_count > 0)
2377 		return (-1);
2378 	else
2379 		return (num_fetched);
2380 }
2381 
2382 /*
2383  * Returns -1 for error, 0 for success/continue, and 1 for resource
2384  * shortage/stop processing.
2385  */
2386 int
2387 camdd_file_run(struct camdd_dev *dev)
2388 {
2389 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2390 	struct camdd_buf_data *data;
2391 	struct camdd_buf *buf;
2392 	off_t io_offset;
2393 	int retval = 0, write_dev = dev->write_dev;
2394 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2395 	uint32_t num_sectors = 0, db_len = 0;
2396 
2397 	buf = STAILQ_FIRST(&dev->run_queue);
2398 	if (buf == NULL) {
2399 		no_resources = 1;
2400 		goto bailout;
2401 	} else if ((dev->write_dev == 0)
2402 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2403 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2404 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2405 		dev->num_run_queue--;
2406 		buf->status = CAMDD_STATUS_EOF;
2407 		error_count++;
2408 		goto bailout;
2409 	}
2410 
2411 	/*
2412 	 * If we're writing, we need to go through the source buffer list
2413 	 * and create an S/G list.
2414 	 */
2415 	if (write_dev != 0) {
2416 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2417 		    dev->sector_size, &num_sectors, &double_buf_needed);
2418 		if (retval != 0) {
2419 			no_resources = 1;
2420 			goto bailout;
2421 		}
2422 	}
2423 
2424 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2425 	dev->num_run_queue--;
2426 
2427 	data = &buf->buf_type_spec.data;
2428 
2429 	/*
2430 	 * pread(2) and pwrite(2) offsets are byte offsets.
2431 	 */
2432 	io_offset = buf->lba * dev->sector_size;
2433 
2434 	/*
2435 	 * Unlock the mutex while we read or write.
2436 	 */
2437 	pthread_mutex_unlock(&dev->mutex);
2438 
2439 	/*
2440 	 * Note that we don't need to double buffer if we're the reader
2441 	 * because in that case, we have allocated a single buffer of
2442 	 * sufficient size to do the read.  This copy is necessary on
2443 	 * writes because if one of the components of the S/G list is not
2444 	 * a sector size multiple, the kernel will reject the write.  This
2445 	 * is unfortunate but not surprising.  So this will make sure that
2446 	 * we're using a single buffer that is a multiple of the sector size.
2447 	 */
2448 	if ((double_buf_needed != 0)
2449 	 && (data->sg_count > 1)
2450 	 && (write_dev != 0)) {
2451 		uint32_t cur_offset;
2452 		int i;
2453 
2454 		if (file_dev->tmp_buf == NULL)
2455 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2456 		if (file_dev->tmp_buf == NULL) {
2457 			buf->status = CAMDD_STATUS_ERROR;
2458 			error_count++;
2459 			pthread_mutex_lock(&dev->mutex);
2460 			goto bailout;
2461 		}
2462 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2463 			bcopy(data->iovec[i].iov_base,
2464 			    &file_dev->tmp_buf[cur_offset],
2465 			    data->iovec[i].iov_len);
2466 			cur_offset += data->iovec[i].iov_len;
2467 		}
2468 		db_len = cur_offset;
2469 	}
2470 
2471 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2472 		if (write_dev == 0) {
2473 			/*
2474 			 * XXX KDM is there any way we would need a S/G
2475 			 * list here?
2476 			 */
2477 			retval = pread(file_dev->fd, data->buf,
2478 			    buf->len, io_offset);
2479 		} else {
2480 			if (double_buf_needed != 0) {
2481 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2482 				    db_len, io_offset);
2483 			} else if (data->sg_count == 0) {
2484 				retval = pwrite(file_dev->fd, data->buf,
2485 				    data->fill_len, io_offset);
2486 			} else {
2487 				retval = pwritev(file_dev->fd, data->iovec,
2488 				    data->sg_count, io_offset);
2489 			}
2490 		}
2491 	} else {
2492 		if (write_dev == 0) {
2493 			/*
2494 			 * XXX KDM is there any way we would need a S/G
2495 			 * list here?
2496 			 */
2497 			retval = read(file_dev->fd, data->buf, buf->len);
2498 		} else {
2499 			if (double_buf_needed != 0) {
2500 				retval = write(file_dev->fd, file_dev->tmp_buf,
2501 				    db_len);
2502 			} else if (data->sg_count == 0) {
2503 				retval = write(file_dev->fd, data->buf,
2504 				    data->fill_len);
2505 			} else {
2506 				retval = writev(file_dev->fd, data->iovec,
2507 				    data->sg_count);
2508 			}
2509 		}
2510 	}
2511 
2512 	/* We're done, re-acquire the lock */
2513 	pthread_mutex_lock(&dev->mutex);
2514 
2515 	if (retval >= (ssize_t)data->fill_len) {
2516 		/*
2517 		 * If the bytes transferred is more than the request size,
2518 		 * that indicates an overrun, which should only happen at
2519 		 * the end of a transfer if we have to round up to a sector
2520 		 * boundary.
2521 		 */
2522 		if (buf->status == CAMDD_STATUS_NONE)
2523 			buf->status = CAMDD_STATUS_OK;
2524 		data->resid = 0;
2525 		dev->bytes_transferred += retval;
2526 	} else if (retval == -1) {
2527 		warn("Error %s %s", (write_dev) ? "writing to" :
2528 		    "reading from", file_dev->filename);
2529 
2530 		buf->status = CAMDD_STATUS_ERROR;
2531 		data->resid = data->fill_len;
2532 		error_count++;
2533 
2534 		if (dev->debug == 0)
2535 			goto bailout;
2536 
2537 		if ((double_buf_needed != 0)
2538 		 && (write_dev != 0)) {
2539 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2540 			    "offset %ju\n", __func__, file_dev->fd,
2541 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2542 			    (uintmax_t)io_offset);
2543 		} else if (data->sg_count == 0) {
2544 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2545 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2546 			    data->fill_len, (uintmax_t)buf->lba,
2547 			    (uintmax_t)io_offset);
2548 		} else {
2549 			int i;
2550 
2551 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2552 			    "offset %ju\n", __func__, file_dev->fd,
2553 			    data->fill_len, (uintmax_t)buf->lba,
2554 			    (uintmax_t)io_offset);
2555 
2556 			for (i = 0; i < data->sg_count; i++) {
2557 				fprintf(stderr, "index %d ptr %p len %zu\n",
2558 				    i, data->iovec[i].iov_base,
2559 				    data->iovec[i].iov_len);
2560 			}
2561 		}
2562 	} else if (retval == 0) {
2563 		buf->status = CAMDD_STATUS_EOF;
2564 		if (dev->debug != 0)
2565 			printf("%s: got EOF from %s!\n", __func__,
2566 			    file_dev->filename);
2567 		data->resid = data->fill_len;
2568 		error_count++;
2569 	} else if (retval < (ssize_t)data->fill_len) {
2570 		if (buf->status == CAMDD_STATUS_NONE)
2571 			buf->status = CAMDD_STATUS_SHORT_IO;
2572 		data->resid = data->fill_len - retval;
2573 		dev->bytes_transferred += retval;
2574 	}
2575 
2576 bailout:
2577 	if (buf != NULL) {
2578 		if (buf->status == CAMDD_STATUS_EOF) {
2579 			struct camdd_buf *buf2;
2580 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2581 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2582 				buf2->status = CAMDD_STATUS_EOF;
2583 		}
2584 
2585 		camdd_complete_buf(dev, buf, &error_count);
2586 	}
2587 
2588 	if (error_count != 0)
2589 		return (-1);
2590 	else if (no_resources != 0)
2591 		return (1);
2592 	else
2593 		return (0);
2594 }
2595 
2596 /*
2597  * Execute one command from the run queue.  Returns 0 for success, 1 for
2598  * stop processing, and -1 for error.
2599  */
2600 int
2601 camdd_pass_run(struct camdd_dev *dev)
2602 {
2603 	struct camdd_buf *buf = NULL;
2604 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2605 	struct camdd_buf_data *data;
2606 	uint32_t num_blocks, sectors_used = 0;
2607 	union ccb *ccb;
2608 	int retval = 0, is_write = dev->write_dev;
2609 	int double_buf_needed = 0;
2610 
2611 	buf = STAILQ_FIRST(&dev->run_queue);
2612 	if (buf == NULL) {
2613 		retval = 1;
2614 		goto bailout;
2615 	}
2616 
2617 	/*
2618 	 * If we're writing, we need to go through the source buffer list
2619 	 * and create an S/G list.
2620 	 */
2621 	if (is_write != 0) {
2622 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2623 		    &sectors_used, &double_buf_needed);
2624 		if (retval != 0) {
2625 			retval = -1;
2626 			goto bailout;
2627 		}
2628 	}
2629 
2630 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2631 	dev->num_run_queue--;
2632 
2633 	data = &buf->buf_type_spec.data;
2634 
2635 	/*
2636 	 * In almost every case the number of blocks should be the device
2637 	 * block size.  The exception may be at the end of an I/O stream
2638 	 * for a partial block or at the end of a device.
2639 	 */
2640 	if (is_write != 0)
2641 		num_blocks = sectors_used;
2642 	else
2643 		num_blocks = data->fill_len / pass_dev->block_len;
2644 
2645 	ccb = &data->ccb;
2646 
2647 	switch (pass_dev->protocol) {
2648 	case PROTO_SCSI:
2649 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2650 
2651 		scsi_read_write(&ccb->csio,
2652 				/*retries*/ dev->retry_count,
2653 				/*cbfcnp*/ NULL,
2654 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2655 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2656 					   SCSI_RW_WRITE,
2657 				/*byte2*/ 0,
2658 				/*minimum_cmd_size*/ dev->min_cmd_size,
2659 				/*lba*/ buf->lba,
2660 				/*block_count*/ num_blocks,
2661 				/*data_ptr*/ (data->sg_count != 0) ?
2662 					     (uint8_t *)data->segs : data->buf,
2663 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2664 				/*sense_len*/ SSD_FULL_SIZE,
2665 				/*timeout*/ dev->io_timeout);
2666 
2667 		if (data->sg_count != 0) {
2668 			ccb->csio.sglist_cnt = data->sg_count;
2669 		}
2670 		break;
2671 	case PROTO_NVME:
2672 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
2673 
2674 		nvme_read_write(&ccb->nvmeio,
2675 				/*retries*/ dev->retry_count,
2676 				/*cbfcnp*/ NULL,
2677 				/*nsid*/ pass_dev->dev->target_lun & UINT32_MAX,
2678 				/*readop*/ dev->write_dev == 0,
2679 				/*lba*/ buf->lba,
2680 				/*block_count*/ num_blocks,
2681 				/*data_ptr*/ (data->sg_count != 0) ?
2682 					     (uint8_t *)data->segs : data->buf,
2683 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2684 				/*timeout*/ dev->io_timeout);
2685 
2686 		ccb->nvmeio.sglist_cnt = data->sg_count;
2687 		break;
2688 	default:
2689 		retval = -1;
2690 		goto bailout;
2691 	}
2692 
2693 	/* Disable freezing the device queue */
2694 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2695 
2696 	if (dev->retry_count != 0)
2697 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2698 
2699 	if (data->sg_count != 0) {
2700 		ccb->ccb_h.flags |= CAM_DATA_SG;
2701 	}
2702 
2703 	/*
2704 	 * Store a pointer to the buffer in the CCB.  The kernel will
2705 	 * restore this when we get it back, and we'll use it to identify
2706 	 * the buffer this CCB came from.
2707 	 */
2708 	ccb->ccb_h.ccb_buf = buf;
2709 
2710 	/*
2711 	 * Unlock our mutex in preparation for issuing the ioctl.
2712 	 */
2713 	pthread_mutex_unlock(&dev->mutex);
2714 	/*
2715 	 * Queue the CCB to the pass(4) driver.
2716 	 */
2717 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2718 		pthread_mutex_lock(&dev->mutex);
2719 
2720 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2721 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2722 		warn("%s: CCB address is %p", __func__, ccb);
2723 		retval = -1;
2724 
2725 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2726 	} else {
2727 		pthread_mutex_lock(&dev->mutex);
2728 
2729 		dev->cur_active_io++;
2730 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2731 	}
2732 
2733 bailout:
2734 	return (retval);
2735 }
2736 
2737 int
2738 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2739 {
2740 	uint32_t num_blocks;
2741 	int retval = 0;
2742 
2743 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2744 	*len = dev->blocksize;
2745 	num_blocks = *len / dev->sector_size;
2746 
2747 	/*
2748 	 * If max_sector is 0, then we have no set limit.  This can happen
2749 	 * if we're writing to a file in a filesystem, or reading from
2750 	 * something like /dev/zero.
2751 	 */
2752 	if ((dev->max_sector != 0)
2753 	 || (dev->sector_io_limit != 0)) {
2754 		uint64_t max_sector;
2755 
2756 		if ((dev->max_sector != 0)
2757 		 && (dev->sector_io_limit != 0))
2758 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2759 		else if (dev->max_sector != 0)
2760 			max_sector = dev->max_sector;
2761 		else
2762 			max_sector = dev->sector_io_limit;
2763 
2764 
2765 		/*
2766 		 * Check to see whether we're starting off past the end of
2767 		 * the device.  If so, we need to just send an EOF
2768 		 * notification to the writer.
2769 		 */
2770 		if (*lba > max_sector) {
2771 			*len = 0;
2772 			retval = 1;
2773 		} else if (((*lba + num_blocks) > max_sector + 1)
2774 			|| ((*lba + num_blocks) < *lba)) {
2775 			/*
2776 			 * If we get here (but pass the first check), we
2777 			 * can trim the request length down to go to the
2778 			 * end of the device.
2779 			 */
2780 			num_blocks = (max_sector + 1) - *lba;
2781 			*len = num_blocks * dev->sector_size;
2782 			retval = 1;
2783 		}
2784 	}
2785 
2786 	dev->next_io_pos_bytes += *len;
2787 
2788 	return (retval);
2789 }
2790 
2791 /*
2792  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2793  */
2794 int
2795 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2796 {
2797 	struct camdd_buf *buf = NULL;
2798 	struct camdd_buf_data *data;
2799 	size_t new_len;
2800 	struct camdd_buf_data *rb_data;
2801 	int is_write = dev->write_dev;
2802 	int eof_flush_needed = 0;
2803 	int retval = 0;
2804 
2805 	/*
2806 	 * If we've gotten EOF or our partner has, we should not continue
2807 	 * queueing I/O.  If we're a writer, though, we should continue
2808 	 * to write any buffers that don't have EOF status.
2809 	 */
2810 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2811 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2812 	  && (is_write == 0))) {
2813 		/*
2814 		 * Tell the worker thread that we have seen EOF.
2815 		 */
2816 		retval = 1;
2817 
2818 		/*
2819 		 * If we're the writer, send the buffer back with EOF status.
2820 		 */
2821 		if (is_write) {
2822 			read_buf->status = CAMDD_STATUS_EOF;
2823 
2824 			camdd_complete_peer_buf(dev, read_buf);
2825 		}
2826 		goto bailout;
2827 	}
2828 
2829 	if (is_write == 0) {
2830 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2831 		if (buf == NULL) {
2832 			retval = -1;
2833 			goto bailout;
2834 		}
2835 		data = &buf->buf_type_spec.data;
2836 
2837 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2838 		if (retval != 0) {
2839 			buf->status = CAMDD_STATUS_EOF;
2840 
2841 		 	if ((buf->len == 0)
2842 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2843 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2844 				camdd_release_buf(buf);
2845 				goto bailout;
2846 			}
2847 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2848 		}
2849 
2850 		data->fill_len = buf->len;
2851 		data->src_start_offset = buf->lba * dev->sector_size;
2852 
2853 		/*
2854 		 * Put this on the run queue.
2855 		 */
2856 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2857 		dev->num_run_queue++;
2858 
2859 		/* We're done. */
2860 		goto bailout;
2861 	}
2862 
2863 	/*
2864 	 * Check for new EOF status from the reader.
2865 	 */
2866 	if ((read_buf->status == CAMDD_STATUS_EOF)
2867 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2868 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2869 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2870 		 && (read_buf->len == 0)) {
2871 			camdd_complete_peer_buf(dev, read_buf);
2872 			retval = 1;
2873 			goto bailout;
2874 		} else
2875 			eof_flush_needed = 1;
2876 	}
2877 
2878 	/*
2879 	 * See if we have a buffer we're composing with pieces from our
2880 	 * partner thread.
2881 	 */
2882 	buf = STAILQ_FIRST(&dev->pending_queue);
2883 	if (buf == NULL) {
2884 		uint64_t lba;
2885 		ssize_t len;
2886 
2887 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2888 		if (retval != 0) {
2889 			read_buf->status = CAMDD_STATUS_EOF;
2890 
2891 			if (len == 0) {
2892 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2893 				camdd_complete_peer_buf(dev, read_buf);
2894 				goto bailout;
2895 			}
2896 		}
2897 
2898 		/*
2899 		 * If we don't have a pending buffer, we need to grab a new
2900 		 * one from the free list or allocate another one.
2901 		 */
2902 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2903 		if (buf == NULL) {
2904 			retval = 1;
2905 			goto bailout;
2906 		}
2907 
2908 		buf->lba = lba;
2909 		buf->len = len;
2910 
2911 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2912 		dev->num_pending_queue++;
2913 	}
2914 
2915 	data = &buf->buf_type_spec.data;
2916 
2917 	rb_data = &read_buf->buf_type_spec.data;
2918 
2919 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2920 	 && (dev->debug != 0)) {
2921 		printf("%s: WARNING: reader offset %#jx != expected offset "
2922 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2923 		    (uintmax_t)dev->next_peer_pos_bytes);
2924 	}
2925 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2926 	    (rb_data->fill_len - rb_data->resid);
2927 
2928 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2929 	if (new_len < buf->len) {
2930 		/*
2931 		 * There are three cases here:
2932 		 * 1. We need more data to fill up a block, so we put
2933 		 *    this I/O on the queue and wait for more I/O.
2934 		 * 2. We have a pending buffer in the queue that is
2935 		 *    smaller than our blocksize, but we got an EOF.  So we
2936 		 *    need to go ahead and flush the write out.
2937 		 * 3. We got an error.
2938 		 */
2939 
2940 		/*
2941 		 * Increment our fill length.
2942 		 */
2943 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2944 
2945 		/*
2946 		 * Add the new read buffer to the list for writing.
2947 		 */
2948 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2949 
2950 		/* Increment the count */
2951 		buf->src_count++;
2952 
2953 		if (eof_flush_needed == 0) {
2954 			/*
2955 			 * We need to exit, because we don't have enough
2956 			 * data yet.
2957 			 */
2958 			goto bailout;
2959 		} else {
2960 			/*
2961 			 * Take the buffer off of the pending queue.
2962 			 */
2963 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2964 				      links);
2965 			dev->num_pending_queue--;
2966 
2967 			/*
2968 			 * If we need an EOF flush, but there is no data
2969 			 * to flush, go ahead and return this buffer.
2970 			 */
2971 			if (data->fill_len == 0) {
2972 				camdd_complete_buf(dev, buf, /*error_count*/0);
2973 				retval = 1;
2974 				goto bailout;
2975 			}
2976 
2977 			/*
2978 			 * Put this on the next queue for execution.
2979 			 */
2980 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2981 			dev->num_run_queue++;
2982 		}
2983 	} else if (new_len == buf->len) {
2984 		/*
2985 		 * We have enough data to completey fill one block,
2986 		 * so we're ready to issue the I/O.
2987 		 */
2988 
2989 		/*
2990 		 * Take the buffer off of the pending queue.
2991 		 */
2992 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2993 		dev->num_pending_queue--;
2994 
2995 		/*
2996 		 * Add the new read buffer to the list for writing.
2997 		 */
2998 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2999 
3000 		/* Increment the count */
3001 		buf->src_count++;
3002 
3003 		/*
3004 		 * Increment our fill length.
3005 		 */
3006 		data->fill_len += (rb_data->fill_len - rb_data->resid);
3007 
3008 		/*
3009 		 * Put this on the next queue for execution.
3010 		 */
3011 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3012 		dev->num_run_queue++;
3013 	} else {
3014 		struct camdd_buf *idb;
3015 		struct camdd_buf_indirect *indirect;
3016 		uint32_t len_to_go, cur_offset;
3017 
3018 
3019 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3020 		if (idb == NULL) {
3021 			retval = 1;
3022 			goto bailout;
3023 		}
3024 		indirect = &idb->buf_type_spec.indirect;
3025 		indirect->src_buf = read_buf;
3026 		read_buf->refcount++;
3027 		indirect->offset = 0;
3028 		indirect->start_ptr = rb_data->buf;
3029 		/*
3030 		 * We've already established that there is more
3031 		 * data in read_buf than we have room for in our
3032 		 * current write request.  So this particular chunk
3033 		 * of the request should just be the remainder
3034 		 * needed to fill up a block.
3035 		 */
3036 		indirect->len = buf->len - (data->fill_len - data->resid);
3037 
3038 		camdd_buf_add_child(buf, idb);
3039 
3040 		/*
3041 		 * This buffer is ready to execute, so we can take
3042 		 * it off the pending queue and put it on the run
3043 		 * queue.
3044 		 */
3045 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
3046 			      links);
3047 		dev->num_pending_queue--;
3048 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3049 		dev->num_run_queue++;
3050 
3051 		cur_offset = indirect->offset + indirect->len;
3052 
3053 		/*
3054 		 * The resulting I/O would be too large to fit in
3055 		 * one block.  We need to split this I/O into
3056 		 * multiple pieces.  Allocate as many buffers as needed.
3057 		 */
3058 		for (len_to_go = rb_data->fill_len - rb_data->resid -
3059 		     indirect->len; len_to_go > 0;) {
3060 			struct camdd_buf *new_buf;
3061 			struct camdd_buf_data *new_data;
3062 			uint64_t lba;
3063 			ssize_t len;
3064 
3065 			retval = camdd_get_next_lba_len(dev, &lba, &len);
3066 			if ((retval != 0)
3067 			 && (len == 0)) {
3068 				/*
3069 				 * The device has already been marked
3070 				 * as EOF, and there is no space left.
3071 				 */
3072 				goto bailout;
3073 			}
3074 
3075 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
3076 			if (new_buf == NULL) {
3077 				retval = 1;
3078 				goto bailout;
3079 			}
3080 
3081 			new_buf->lba = lba;
3082 			new_buf->len = len;
3083 
3084 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3085 			if (idb == NULL) {
3086 				retval = 1;
3087 				goto bailout;
3088 			}
3089 
3090 			indirect = &idb->buf_type_spec.indirect;
3091 
3092 			indirect->src_buf = read_buf;
3093 			read_buf->refcount++;
3094 			indirect->offset = cur_offset;
3095 			indirect->start_ptr = rb_data->buf + cur_offset;
3096 			indirect->len = min(len_to_go, new_buf->len);
3097 #if 0
3098 			if (((indirect->len % dev->sector_size) != 0)
3099 			 || ((indirect->offset % dev->sector_size) != 0)) {
3100 				warnx("offset %ju len %ju not aligned with "
3101 				    "sector size %u", indirect->offset,
3102 				    (uintmax_t)indirect->len, dev->sector_size);
3103 			}
3104 #endif
3105 			cur_offset += indirect->len;
3106 			len_to_go -= indirect->len;
3107 
3108 			camdd_buf_add_child(new_buf, idb);
3109 
3110 			new_data = &new_buf->buf_type_spec.data;
3111 
3112 			if ((new_data->fill_len == new_buf->len)
3113 			 || (eof_flush_needed != 0)) {
3114 				STAILQ_INSERT_TAIL(&dev->run_queue,
3115 						   new_buf, links);
3116 				dev->num_run_queue++;
3117 			} else if (new_data->fill_len < buf->len) {
3118 				STAILQ_INSERT_TAIL(&dev->pending_queue,
3119 					   	new_buf, links);
3120 				dev->num_pending_queue++;
3121 			} else {
3122 				warnx("%s: too much data in new "
3123 				      "buffer!", __func__);
3124 				retval = 1;
3125 				goto bailout;
3126 			}
3127 		}
3128 	}
3129 
3130 bailout:
3131 	return (retval);
3132 }
3133 
3134 void
3135 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3136 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3137 {
3138 	*our_depth = dev->cur_active_io + dev->num_run_queue;
3139 	if (dev->num_peer_work_queue >
3140 	    dev->num_peer_done_queue)
3141 		*peer_depth = dev->num_peer_work_queue -
3142 			      dev->num_peer_done_queue;
3143 	else
3144 		*peer_depth = 0;
3145 	*our_bytes = *our_depth * dev->blocksize;
3146 	*peer_bytes = dev->peer_bytes_queued;
3147 }
3148 
3149 void
3150 camdd_sig_handler(int sig)
3151 {
3152 	if (sig == SIGINFO)
3153 		need_status = 1;
3154 	else {
3155 		need_exit = 1;
3156 		error_exit = 1;
3157 	}
3158 
3159 	sem_post(&camdd_sem);
3160 }
3161 
3162 void
3163 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3164 		   struct timespec *start_time)
3165 {
3166 	struct timespec done_time;
3167 	uint64_t total_ns;
3168 	long double mb_sec, total_sec;
3169 	int error = 0;
3170 
3171 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3172 	if (error != 0) {
3173 		warn("Unable to get done time");
3174 		return;
3175 	}
3176 
3177 	timespecsub(&done_time, start_time, &done_time);
3178 
3179 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3180 	total_sec = total_ns;
3181 	total_sec /= 1000000000;
3182 
3183 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3184 		"%.4Lf seconds elapsed\n",
3185 		(uintmax_t)camdd_dev->bytes_transferred,
3186 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3187 		camdd_dev->device_name,
3188 		(uintmax_t)other_dev->bytes_transferred,
3189 		(other_dev->write_dev == 0) ? "read from" : "written to",
3190 		other_dev->device_name, total_sec);
3191 
3192 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3193 	mb_sec /= 1024 * 1024;
3194 	mb_sec *= 1000000000;
3195 	mb_sec /= total_ns;
3196 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3197 }
3198 
3199 int
3200 camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist, int num_io_opts,
3201 	 uint64_t max_io, int retry_count, int timeout)
3202 {
3203 	struct cam_device *new_cam_dev = NULL;
3204 	struct camdd_dev *devs[2];
3205 	struct timespec start_time;
3206 	pthread_t threads[2];
3207 	int unit = 0;
3208 	int error = 0;
3209 	int i;
3210 
3211 	bzero(devs, sizeof(devs));
3212 
3213 	if (num_io_opts != 2) {
3214 		warnx("Must have one input and one output path");
3215 		error = 1;
3216 		goto bailout;
3217 	}
3218 
3219 	for (i = 0; i < num_io_opts; i++) {
3220 		switch (io_opts[i].dev_type) {
3221 		case CAMDD_DEV_PASS: {
3222 			if (isdigit(io_opts[i].dev_name[0])) {
3223 				int bus = 0, target = 0, lun = 0;
3224 				int rv;
3225 
3226 				/* device specified as bus:target[:lun] */
3227 				rv = parse_btl(io_opts[i].dev_name, &bus,
3228 				    &target, &lun);
3229 				if (rv < 2) {
3230 					warnx("numeric device specification "
3231 					     "must be either bus:target, or "
3232 					     "bus:target:lun");
3233 					error = 1;
3234 					goto bailout;
3235 				}
3236 				/* default to 0 if lun was not specified */
3237 				if (rv == 2) {
3238 					lun = 0;
3239 				}
3240 				new_cam_dev = cam_open_btl(bus, target, lun,
3241 				    O_RDWR, NULL);
3242 			} else {
3243 				char name[30];
3244 
3245 				if (cam_get_device(io_opts[i].dev_name, name,
3246 						   sizeof name, &unit) == -1) {
3247 					warnx("%s", cam_errbuf);
3248 					error = 1;
3249 					goto bailout;
3250 				}
3251 				new_cam_dev = cam_open_spec_device(name, unit,
3252 				    O_RDWR, NULL);
3253 			}
3254 
3255 			if (new_cam_dev == NULL) {
3256 				warnx("%s", cam_errbuf);
3257 				error = 1;
3258 				goto bailout;
3259 			}
3260 
3261 			devs[i] = camdd_probe_pass(new_cam_dev,
3262 			    /*io_opts*/ &io_opts[i],
3263 			    arglist,
3264 			    /*probe_retry_count*/ 3,
3265 			    /*probe_timeout*/ 5000,
3266 			    /*io_retry_count*/ retry_count,
3267 			    /*io_timeout*/ timeout);
3268 			if (devs[i] == NULL) {
3269 				warn("Unable to probe device %s%u",
3270 				     new_cam_dev->device_name,
3271 				     new_cam_dev->dev_unit_num);
3272 				error = 1;
3273 				goto bailout;
3274 			}
3275 			break;
3276 		}
3277 		case CAMDD_DEV_FILE: {
3278 			int fd = -1;
3279 
3280 			if (io_opts[i].dev_name[0] == '-') {
3281 				if (io_opts[i].write_dev != 0)
3282 					fd = STDOUT_FILENO;
3283 				else
3284 					fd = STDIN_FILENO;
3285 			} else {
3286 				if (io_opts[i].write_dev != 0) {
3287 					fd = open(io_opts[i].dev_name,
3288 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3289 				} else {
3290 					fd = open(io_opts[i].dev_name,
3291 					    O_RDONLY);
3292 				}
3293 			}
3294 			if (fd == -1) {
3295 				warn("error opening file %s",
3296 				    io_opts[i].dev_name);
3297 				error = 1;
3298 				goto bailout;
3299 			}
3300 
3301 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3302 			    retry_count, timeout);
3303 			if (devs[i] == NULL) {
3304 				error = 1;
3305 				goto bailout;
3306 			}
3307 
3308 			break;
3309 		}
3310 		default:
3311 			warnx("Unknown device type %d (%s)",
3312 			    io_opts[i].dev_type, io_opts[i].dev_name);
3313 			error = 1;
3314 			goto bailout;
3315 			break; /*NOTREACHED */
3316 		}
3317 
3318 		devs[i]->write_dev = io_opts[i].write_dev;
3319 
3320 		devs[i]->start_offset_bytes = io_opts[i].offset;
3321 
3322 		if (max_io != 0) {
3323 			devs[i]->sector_io_limit =
3324 			    (devs[i]->start_offset_bytes /
3325 			    devs[i]->sector_size) +
3326 			    (max_io / devs[i]->sector_size) - 1;
3327 		}
3328 
3329 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3330 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3331 	}
3332 
3333 	devs[0]->peer_dev = devs[1];
3334 	devs[1]->peer_dev = devs[0];
3335 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3336 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3337 
3338 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3339 
3340 	signal(SIGINFO, camdd_sig_handler);
3341 	signal(SIGINT, camdd_sig_handler);
3342 
3343 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3344 	if (error != 0) {
3345 		warn("Unable to get start time");
3346 		goto bailout;
3347 	}
3348 
3349 	for (i = 0; i < num_io_opts; i++) {
3350 		error = pthread_create(&threads[i], NULL, camdd_worker,
3351 				       (void *)devs[i]);
3352 		if (error != 0) {
3353 			warnc(error, "pthread_create() failed");
3354 			goto bailout;
3355 		}
3356 	}
3357 
3358 	for (;;) {
3359 		if ((sem_wait(&camdd_sem) == -1)
3360 		 || (need_exit != 0)) {
3361 			struct kevent ke;
3362 
3363 			for (i = 0; i < num_io_opts; i++) {
3364 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3365 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3366 
3367 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3368 
3369 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3370 						NULL);
3371 				if (error == -1)
3372 					warn("%s: unable to wake up thread",
3373 					    __func__);
3374 				error = 0;
3375 			}
3376 			break;
3377 		} else if (need_status != 0) {
3378 			camdd_print_status(devs[0], devs[1], &start_time);
3379 			need_status = 0;
3380 		}
3381 	}
3382 	for (i = 0; i < num_io_opts; i++) {
3383 		pthread_join(threads[i], NULL);
3384 	}
3385 
3386 	camdd_print_status(devs[0], devs[1], &start_time);
3387 
3388 bailout:
3389 
3390 	for (i = 0; i < num_io_opts; i++)
3391 		camdd_free_dev(devs[i]);
3392 
3393 	return (error + error_exit);
3394 }
3395 
3396 void
3397 usage(void)
3398 {
3399 	fprintf(stderr,
3400 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3401 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3402 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3403 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3404 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3405 "Option description\n"
3406 "-i <arg=val>  Specify input device/file and parameters\n"
3407 "-o <arg=val>  Specify output device/file and parameters\n"
3408 "Input and Output parameters\n"
3409 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3410 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3411 "              or - for stdin/stdout\n"
3412 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3413 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3414 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3415 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3416 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3417 "Optional arguments\n"
3418 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3419 "-E            Enable CAM error recovery for pass(4) devices\n"
3420 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3421 "              using K, G, M, etc. suffixes\n"
3422 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3423 "-v            Enable verbose error recovery\n"
3424 "-h            Print this message\n");
3425 }
3426 
3427 
3428 int
3429 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3430 {
3431 	char *tmpstr, *tmpstr2;
3432 	char *orig_tmpstr = NULL;
3433 	int retval = 0;
3434 
3435 	io_opts->write_dev = is_write;
3436 
3437 	tmpstr = strdup(args);
3438 	if (tmpstr == NULL) {
3439 		warn("strdup failed");
3440 		retval = 1;
3441 		goto bailout;
3442 	}
3443 	orig_tmpstr = tmpstr;
3444 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3445 		char *name, *value;
3446 
3447 		/*
3448 		 * If the user creates an empty parameter by putting in two
3449 		 * commas, skip over it and look for the next field.
3450 		 */
3451 		if (*tmpstr2 == '\0')
3452 			continue;
3453 
3454 		name = strsep(&tmpstr2, "=");
3455 		if (*name == '\0') {
3456 			warnx("Got empty I/O parameter name");
3457 			retval = 1;
3458 			goto bailout;
3459 		}
3460 		value = strsep(&tmpstr2, "=");
3461 		if ((value == NULL)
3462 		 || (*value == '\0')) {
3463 			warnx("Empty I/O parameter value for %s", name);
3464 			retval = 1;
3465 			goto bailout;
3466 		}
3467 		if (strncasecmp(name, "file", 4) == 0) {
3468 			io_opts->dev_type = CAMDD_DEV_FILE;
3469 			io_opts->dev_name = strdup(value);
3470 			if (io_opts->dev_name == NULL) {
3471 				warn("Error allocating memory");
3472 				retval = 1;
3473 				goto bailout;
3474 			}
3475 		} else if (strncasecmp(name, "pass", 4) == 0) {
3476 			io_opts->dev_type = CAMDD_DEV_PASS;
3477 			io_opts->dev_name = strdup(value);
3478 			if (io_opts->dev_name == NULL) {
3479 				warn("Error allocating memory");
3480 				retval = 1;
3481 				goto bailout;
3482 			}
3483 		} else if ((strncasecmp(name, "bs", 2) == 0)
3484 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3485 			retval = expand_number(value, &io_opts->blocksize);
3486 			if (retval == -1) {
3487 				warn("expand_number(3) failed on %s=%s", name,
3488 				    value);
3489 				retval = 1;
3490 				goto bailout;
3491 			}
3492 		} else if (strncasecmp(name, "depth", 5) == 0) {
3493 			char *endptr;
3494 
3495 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3496 			if (*endptr != '\0') {
3497 				warnx("invalid queue depth %s", value);
3498 				retval = 1;
3499 				goto bailout;
3500 			}
3501 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3502 			char *endptr;
3503 
3504 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3505 			if ((*endptr != '\0')
3506 			 || ((io_opts->min_cmd_size > 16)
3507 			  || (io_opts->min_cmd_size < 0))) {
3508 				warnx("invalid minimum cmd size %s", value);
3509 				retval = 1;
3510 				goto bailout;
3511 			}
3512 		} else if (strncasecmp(name, "offset", 6) == 0) {
3513 			retval = expand_number(value, &io_opts->offset);
3514 			if (retval == -1) {
3515 				warn("expand_number(3) failed on %s=%s", name,
3516 				    value);
3517 				retval = 1;
3518 				goto bailout;
3519 			}
3520 		} else if (strncasecmp(name, "debug", 5) == 0) {
3521 			char *endptr;
3522 
3523 			io_opts->debug = strtoull(value, &endptr, 0);
3524 			if (*endptr != '\0') {
3525 				warnx("invalid debug level %s", value);
3526 				retval = 1;
3527 				goto bailout;
3528 			}
3529 		} else {
3530 			warnx("Unrecognized parameter %s=%s", name, value);
3531 		}
3532 	}
3533 bailout:
3534 	free(orig_tmpstr);
3535 
3536 	return (retval);
3537 }
3538 
3539 int
3540 main(int argc, char **argv)
3541 {
3542 	int c;
3543 	camdd_argmask arglist = CAMDD_ARG_NONE;
3544 	int timeout = 0, retry_count = 1;
3545 	int error = 0;
3546 	uint64_t max_io = 0;
3547 	struct camdd_io_opts *opt_list = NULL;
3548 
3549 	if (argc == 1) {
3550 		usage();
3551 		exit(1);
3552 	}
3553 
3554 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3555 	if (opt_list == NULL) {
3556 		warn("Unable to allocate option list");
3557 		error = 1;
3558 		goto bailout;
3559 	}
3560 
3561 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3562 		switch (c) {
3563 		case 'C':
3564 			retry_count = strtol(optarg, NULL, 0);
3565 			if (retry_count < 0)
3566 				errx(1, "retry count %d is < 0",
3567 				     retry_count);
3568 			break;
3569 		case 'E':
3570 			arglist |= CAMDD_ARG_ERR_RECOVER;
3571 			break;
3572 		case 'i':
3573 		case 'o':
3574 			if (((c == 'i')
3575 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3576 			 || ((c == 'o')
3577 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3578 				errx(1, "Only one input and output path "
3579 				    "allowed");
3580 			}
3581 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3582 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3583 			if (error != 0)
3584 				goto bailout;
3585 			break;
3586 		case 'm':
3587 			error = expand_number(optarg, &max_io);
3588 			if (error == -1) {
3589 				warn("invalid maximum I/O amount %s", optarg);
3590 				error = 1;
3591 				goto bailout;
3592 			}
3593 			break;
3594 		case 't':
3595 			timeout = strtol(optarg, NULL, 0);
3596 			if (timeout < 0)
3597 				errx(1, "invalid timeout %d", timeout);
3598 			/* Convert the timeout from seconds to ms */
3599 			timeout *= 1000;
3600 			break;
3601 		case 'v':
3602 			arglist |= CAMDD_ARG_VERBOSE;
3603 			break;
3604 		case 'h':
3605 		default:
3606 			usage();
3607 			exit(1);
3608 			break; /*NOTREACHED*/
3609 		}
3610 	}
3611 
3612 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3613 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3614 		errx(1, "Must specify both -i and -o");
3615 
3616 	/*
3617 	 * Set the timeout if the user hasn't specified one.
3618 	 */
3619 	if (timeout == 0)
3620 		timeout = CAMDD_PASS_RW_TIMEOUT;
3621 
3622 	error = camdd_rw(opt_list, arglist, 2, max_io, retry_count, timeout);
3623 
3624 bailout:
3625 	free(opt_list);
3626 
3627 	exit(error);
3628 }
3629