xref: /freebsd/usr.sbin/camdd/camdd.c (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <sys/bus.h>
55 #include <sys/bus_dma.h>
56 #include <sys/mtio.h>
57 #include <sys/conf.h>
58 #include <sys/disk.h>
59 
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <semaphore.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <inttypes.h>
66 #include <limits.h>
67 #include <fcntl.h>
68 #include <ctype.h>
69 #include <err.h>
70 #include <libutil.h>
71 #include <pthread.h>
72 #include <assert.h>
73 #include <bsdxml.h>
74 
75 #include <cam/cam.h>
76 #include <cam/cam_debug.h>
77 #include <cam/cam_ccb.h>
78 #include <cam/scsi/scsi_all.h>
79 #include <cam/scsi/scsi_da.h>
80 #include <cam/scsi/scsi_pass.h>
81 #include <cam/scsi/scsi_message.h>
82 #include <cam/scsi/smp_all.h>
83 #include <cam/nvme/nvme_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87 
88 typedef enum {
89 	CAMDD_CMD_NONE		= 0x00000000,
90 	CAMDD_CMD_HELP		= 0x00000001,
91 	CAMDD_CMD_WRITE		= 0x00000002,
92 	CAMDD_CMD_READ		= 0x00000003
93 } camdd_cmdmask;
94 
95 typedef enum {
96 	CAMDD_ARG_NONE		= 0x00000000,
97 	CAMDD_ARG_VERBOSE	= 0x00000001,
98 	CAMDD_ARG_DEVICE	= 0x00000002,
99 	CAMDD_ARG_BUS		= 0x00000004,
100 	CAMDD_ARG_TARGET	= 0x00000008,
101 	CAMDD_ARG_LUN		= 0x00000010,
102 	CAMDD_ARG_UNIT		= 0x00000020,
103 	CAMDD_ARG_TIMEOUT	= 0x00000040,
104 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
105 	CAMDD_ARG_RETRIES	= 0x00000100
106 } camdd_argmask;
107 
108 typedef enum {
109 	CAMDD_DEV_NONE		= 0x00,
110 	CAMDD_DEV_PASS		= 0x01,
111 	CAMDD_DEV_FILE		= 0x02
112 } camdd_dev_type;
113 
114 struct camdd_io_opts {
115 	camdd_dev_type	dev_type;
116 	char		*dev_name;
117 	uint64_t	blocksize;
118 	uint64_t	queue_depth;
119 	uint64_t	offset;
120 	int		min_cmd_size;
121 	int		write_dev;
122 	uint64_t	debug;
123 };
124 
125 typedef enum {
126 	CAMDD_BUF_NONE,
127 	CAMDD_BUF_DATA,
128 	CAMDD_BUF_INDIRECT
129 } camdd_buf_type;
130 
131 struct camdd_buf_indirect {
132 	/*
133 	 * Pointer to the source buffer.
134 	 */
135 	struct camdd_buf *src_buf;
136 
137 	/*
138 	 * Offset into the source buffer, in bytes.
139 	 */
140 	uint64_t	  offset;
141 	/*
142 	 * Pointer to the starting point in the source buffer.
143 	 */
144 	uint8_t		 *start_ptr;
145 
146 	/*
147 	 * Length of this chunk in bytes.
148 	 */
149 	size_t		  len;
150 };
151 
152 struct camdd_buf_data {
153 	/*
154 	 * Buffer allocated when we allocate this camdd_buf.  This should
155 	 * be the size of the blocksize for this device.
156 	 */
157 	uint8_t			*buf;
158 
159 	/*
160 	 * The amount of backing store allocated in buf.  Generally this
161 	 * will be the blocksize of the device.
162 	 */
163 	uint32_t		 alloc_len;
164 
165 	/*
166 	 * The amount of data that was put into the buffer (on reads) or
167 	 * the amount of data we have put onto the src_list so far (on
168 	 * writes).
169 	 */
170 	uint32_t		 fill_len;
171 
172 	/*
173 	 * The amount of data that was not transferred.
174 	 */
175 	uint32_t		 resid;
176 
177 	/*
178 	 * Starting byte offset on the reader.
179 	 */
180 	uint64_t		 src_start_offset;
181 
182 	/*
183 	 * CCB used for pass(4) device targets.
184 	 */
185 	union ccb		 ccb;
186 
187 	/*
188 	 * Number of scatter/gather segments.
189 	 */
190 	int			 sg_count;
191 
192 	/*
193 	 * Set if we had to tack on an extra buffer to round the transfer
194 	 * up to a sector size.
195 	 */
196 	int			 extra_buf;
197 
198 	/*
199 	 * Scatter/gather list used generally when we're the writer for a
200 	 * pass(4) device.
201 	 */
202 	bus_dma_segment_t	*segs;
203 
204 	/*
205 	 * Scatter/gather list used generally when we're the writer for a
206 	 * file or block device;
207 	 */
208 	struct iovec		*iovec;
209 };
210 
211 union camdd_buf_types {
212 	struct camdd_buf_indirect	indirect;
213 	struct camdd_buf_data		data;
214 };
215 
216 typedef enum {
217 	CAMDD_STATUS_NONE,
218 	CAMDD_STATUS_OK,
219 	CAMDD_STATUS_SHORT_IO,
220 	CAMDD_STATUS_EOF,
221 	CAMDD_STATUS_ERROR
222 } camdd_buf_status;
223 
224 struct camdd_buf {
225 	camdd_buf_type		 buf_type;
226 	union camdd_buf_types	 buf_type_spec;
227 
228 	camdd_buf_status	 status;
229 
230 	uint64_t		 lba;
231 	size_t			 len;
232 
233 	/*
234 	 * A reference count of how many indirect buffers point to this
235 	 * buffer.
236 	 */
237 	int			 refcount;
238 
239 	/*
240 	 * A link back to our parent device.
241 	 */
242 	struct camdd_dev	*dev;
243 	STAILQ_ENTRY(camdd_buf)  links;
244 	STAILQ_ENTRY(camdd_buf)  work_links;
245 
246 	/*
247 	 * A count of the buffers on the src_list.
248 	 */
249 	int			 src_count;
250 
251 	/*
252 	 * List of buffers from our partner thread that are the components
253 	 * of this buffer for the I/O.  Uses src_links.
254 	 */
255 	STAILQ_HEAD(,camdd_buf)	 src_list;
256 	STAILQ_ENTRY(camdd_buf)  src_links;
257 };
258 
259 #define	NUM_DEV_TYPES	2
260 
261 struct camdd_dev_pass {
262 	int			 scsi_dev_type;
263 	int			 protocol;
264 	struct cam_device	*dev;
265 	uint64_t		 max_sector;
266 	uint32_t		 block_len;
267 	uint32_t		 cpi_maxio;
268 };
269 
270 typedef enum {
271 	CAMDD_FILE_NONE,
272 	CAMDD_FILE_REG,
273 	CAMDD_FILE_STD,
274 	CAMDD_FILE_PIPE,
275 	CAMDD_FILE_DISK,
276 	CAMDD_FILE_TAPE,
277 	CAMDD_FILE_TTY,
278 	CAMDD_FILE_MEM
279 } camdd_file_type;
280 
281 typedef enum {
282 	CAMDD_FF_NONE 		= 0x00,
283 	CAMDD_FF_CAN_SEEK	= 0x01
284 } camdd_file_flags;
285 
286 struct camdd_dev_file {
287 	int			 fd;
288 	struct stat		 sb;
289 	char			 filename[MAXPATHLEN + 1];
290 	camdd_file_type		 file_type;
291 	camdd_file_flags	 file_flags;
292 	uint8_t			*tmp_buf;
293 };
294 
295 struct camdd_dev_block {
296 	int			 fd;
297 	uint64_t		 size_bytes;
298 	uint32_t		 block_len;
299 };
300 
301 union camdd_dev_spec {
302 	struct camdd_dev_pass	pass;
303 	struct camdd_dev_file	file;
304 	struct camdd_dev_block	block;
305 };
306 
307 typedef enum {
308 	CAMDD_DEV_FLAG_NONE		= 0x00,
309 	CAMDD_DEV_FLAG_EOF		= 0x01,
310 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
311 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
312 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
313 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
314 } camdd_dev_flags;
315 
316 struct camdd_dev {
317 	camdd_dev_type		 dev_type;
318 	union camdd_dev_spec	 dev_spec;
319 	camdd_dev_flags		 flags;
320 	char			 device_name[MAXPATHLEN+1];
321 	uint32_t		 blocksize;
322 	uint32_t		 sector_size;
323 	uint64_t		 max_sector;
324 	uint64_t		 sector_io_limit;
325 	int			 min_cmd_size;
326 	int			 write_dev;
327 	int			 retry_count;
328 	int			 io_timeout;
329 	int			 debug;
330 	uint64_t		 start_offset_bytes;
331 	uint64_t		 next_io_pos_bytes;
332 	uint64_t		 next_peer_pos_bytes;
333 	uint64_t		 next_completion_pos_bytes;
334 	uint64_t		 peer_bytes_queued;
335 	uint64_t		 bytes_transferred;
336 	uint32_t		 target_queue_depth;
337 	uint32_t		 cur_active_io;
338 	uint8_t			*extra_buf;
339 	uint32_t		 extra_buf_len;
340 	struct camdd_dev	*peer_dev;
341 	pthread_mutex_t		 mutex;
342 	pthread_cond_t		 cond;
343 	int			 kq;
344 
345 	int			 (*run)(struct camdd_dev *dev);
346 	int			 (*fetch)(struct camdd_dev *dev);
347 
348 	/*
349 	 * Buffers that are available for I/O.  Uses links.
350 	 */
351 	STAILQ_HEAD(,camdd_buf)	 free_queue;
352 
353 	/*
354 	 * Free indirect buffers.  These are used for breaking a large
355 	 * buffer into multiple pieces.
356 	 */
357 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
358 
359 	/*
360 	 * Buffers that have been queued to the kernel.  Uses links.
361 	 */
362 	STAILQ_HEAD(,camdd_buf)	 active_queue;
363 
364 	/*
365 	 * Will generally contain one of our buffers that is waiting for enough
366 	 * I/O from our partner thread to be able to execute.  This will
367 	 * generally happen when our per-I/O-size is larger than the
368 	 * partner thread's per-I/O-size.  Uses links.
369 	 */
370 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
371 
372 	/*
373 	 * Number of buffers on the pending queue
374 	 */
375 	int			 num_pending_queue;
376 
377 	/*
378 	 * Buffers that are filled and ready to execute.  This is used when
379 	 * our partner (reader) thread sends us blocks that are larger than
380 	 * our blocksize, and so we have to split them into multiple pieces.
381 	 */
382 	STAILQ_HEAD(,camdd_buf)	 run_queue;
383 
384 	/*
385 	 * Number of buffers on the run queue.
386 	 */
387 	int			 num_run_queue;
388 
389 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
390 
391 	int			 num_reorder_queue;
392 
393 	/*
394 	 * Buffers that have been queued to us by our partner thread
395 	 * (generally the reader thread) to be written out.  Uses
396 	 * work_links.
397 	 */
398 	STAILQ_HEAD(,camdd_buf)	 work_queue;
399 
400 	/*
401 	 * Buffers that have been completed by our partner thread.  Uses
402 	 * work_links.
403 	 */
404 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
405 
406 	/*
407 	 * Number of buffers on the peer done queue.
408 	 */
409 	uint32_t		 num_peer_done_queue;
410 
411 	/*
412 	 * A list of buffers that we have queued to our peer thread.  Uses
413 	 * links.
414 	 */
415 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
416 
417 	/*
418 	 * Number of buffers on the peer work queue.
419 	 */
420 	uint32_t		 num_peer_work_queue;
421 };
422 
423 static sem_t camdd_sem;
424 static sig_atomic_t need_exit = 0;
425 static sig_atomic_t error_exit = 0;
426 static sig_atomic_t need_status = 0;
427 
428 #ifndef min
429 #define	min(a, b) (a < b) ? a : b
430 #endif
431 
432 
433 /* Generically useful offsets into the peripheral private area */
434 #define ppriv_ptr0 periph_priv.entries[0].ptr
435 #define ppriv_ptr1 periph_priv.entries[1].ptr
436 #define ppriv_field0 periph_priv.entries[0].field
437 #define ppriv_field1 periph_priv.entries[1].field
438 
439 #define	ccb_buf	ppriv_ptr0
440 
441 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
442 #define	CAMDD_FILE_DEFAULT_DEPTH	1
443 #define	CAMDD_PASS_MAX_BLOCK		1048576
444 #define	CAMDD_PASS_DEFAULT_DEPTH	6
445 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
446 
447 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
448 		     camdd_argmask *arglst);
449 void camdd_free_dev(struct camdd_dev *dev);
450 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
451 				  struct kevent *new_ke, int num_ke,
452 				  int retry_count, int timeout);
453 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
454 					 camdd_buf_type buf_type);
455 void camdd_release_buf(struct camdd_buf *buf);
456 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
457 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
458 			uint32_t sector_size, uint32_t *num_sectors_used,
459 			int *double_buf_needed);
460 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
461 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
462 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
463 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
464 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
465          camdd_argmask arglist, int probe_retry_count,
466          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
467 int camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
468          camdd_argmask arglist, int probe_retry_count,
469          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
470 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
471 				   int retry_count, int timeout);
472 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
473 				   struct camdd_io_opts *io_opts,
474 				   camdd_argmask arglist, int probe_retry_count,
475 				   int probe_timeout, int io_retry_count,
476 				   int io_timeout);
477 void nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
478 		void (*cbfcnp)(struct cam_periph *, union ccb *),
479 		uint32_t nsid, int readop, uint64_t lba,
480 		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
481 		uint32_t timeout);
482 void *camdd_file_worker(void *arg);
483 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
484 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
485 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
486 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
487 void camdd_peer_done(struct camdd_buf *buf);
488 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
489 			int *error_count);
490 int camdd_pass_fetch(struct camdd_dev *dev);
491 int camdd_file_run(struct camdd_dev *dev);
492 int camdd_pass_run(struct camdd_dev *dev);
493 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
494 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
495 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
496 		     uint32_t *peer_depth, uint32_t *our_bytes,
497 		     uint32_t *peer_bytes);
498 void *camdd_worker(void *arg);
499 void camdd_sig_handler(int sig);
500 void camdd_print_status(struct camdd_dev *camdd_dev,
501 			struct camdd_dev *other_dev,
502 			struct timespec *start_time);
503 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
504 	     uint64_t max_io, int retry_count, int timeout);
505 int camdd_parse_io_opts(char *args, int is_write,
506 			struct camdd_io_opts *io_opts);
507 void usage(void);
508 
509 /*
510  * Parse out a bus, or a bus, target and lun in the following
511  * format:
512  * bus
513  * bus:target
514  * bus:target:lun
515  *
516  * Returns the number of parsed components, or 0.
517  */
518 static int
519 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
520 {
521 	char *tmpstr;
522 	int convs = 0;
523 
524 	while (isspace(*tstr) && (*tstr != '\0'))
525 		tstr++;
526 
527 	tmpstr = (char *)strtok(tstr, ":");
528 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
529 		*bus = strtol(tmpstr, NULL, 0);
530 		*arglst |= CAMDD_ARG_BUS;
531 		convs++;
532 		tmpstr = (char *)strtok(NULL, ":");
533 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
534 			*target = strtol(tmpstr, NULL, 0);
535 			*arglst |= CAMDD_ARG_TARGET;
536 			convs++;
537 			tmpstr = (char *)strtok(NULL, ":");
538 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
539 				*lun = strtol(tmpstr, NULL, 0);
540 				*arglst |= CAMDD_ARG_LUN;
541 				convs++;
542 			}
543 		}
544 	}
545 
546 	return convs;
547 }
548 
549 /*
550  * XXX KDM clean up and free all of the buffers on the queue!
551  */
552 void
553 camdd_free_dev(struct camdd_dev *dev)
554 {
555 	if (dev == NULL)
556 		return;
557 
558 	switch (dev->dev_type) {
559 	case CAMDD_DEV_FILE: {
560 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
561 
562 		if (file_dev->fd != -1)
563 			close(file_dev->fd);
564 		free(file_dev->tmp_buf);
565 		break;
566 	}
567 	case CAMDD_DEV_PASS: {
568 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
569 
570 		if (pass_dev->dev != NULL)
571 			cam_close_device(pass_dev->dev);
572 		break;
573 	}
574 	default:
575 		break;
576 	}
577 
578 	free(dev);
579 }
580 
581 struct camdd_dev *
582 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
583 		int retry_count, int timeout)
584 {
585 	struct camdd_dev *dev = NULL;
586 	struct kevent *ke;
587 	size_t ke_size;
588 	int retval = 0;
589 
590 	dev = calloc(1, sizeof(*dev));
591 	if (dev == NULL) {
592 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
593 		goto bailout;
594 	}
595 
596 	dev->dev_type = dev_type;
597 	dev->io_timeout = timeout;
598 	dev->retry_count = retry_count;
599 	STAILQ_INIT(&dev->free_queue);
600 	STAILQ_INIT(&dev->free_indirect_queue);
601 	STAILQ_INIT(&dev->active_queue);
602 	STAILQ_INIT(&dev->pending_queue);
603 	STAILQ_INIT(&dev->run_queue);
604 	STAILQ_INIT(&dev->reorder_queue);
605 	STAILQ_INIT(&dev->work_queue);
606 	STAILQ_INIT(&dev->peer_done_queue);
607 	STAILQ_INIT(&dev->peer_work_queue);
608 	retval = pthread_mutex_init(&dev->mutex, NULL);
609 	if (retval != 0) {
610 		warnc(retval, "%s: failed to initialize mutex", __func__);
611 		goto bailout;
612 	}
613 
614 	retval = pthread_cond_init(&dev->cond, NULL);
615 	if (retval != 0) {
616 		warnc(retval, "%s: failed to initialize condition variable",
617 		      __func__);
618 		goto bailout;
619 	}
620 
621 	dev->kq = kqueue();
622 	if (dev->kq == -1) {
623 		warn("%s: Unable to create kqueue", __func__);
624 		goto bailout;
625 	}
626 
627 	ke_size = sizeof(struct kevent) * (num_ke + 4);
628 	ke = calloc(1, ke_size);
629 	if (ke == NULL) {
630 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
631 		goto bailout;
632 	}
633 	if (num_ke > 0)
634 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
635 
636 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
637 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
638 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
639 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
640 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
641 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
642 
643 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
644 	if (retval == -1) {
645 		warn("%s: Unable to register kevents", __func__);
646 		goto bailout;
647 	}
648 
649 
650 	return (dev);
651 
652 bailout:
653 	free(dev);
654 
655 	return (NULL);
656 }
657 
658 static struct camdd_buf *
659 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
660 {
661 	struct camdd_buf *buf = NULL;
662 	uint8_t *data_ptr = NULL;
663 
664 	/*
665 	 * We only need to allocate data space for data buffers.
666 	 */
667 	switch (buf_type) {
668 	case CAMDD_BUF_DATA:
669 		data_ptr = malloc(dev->blocksize);
670 		if (data_ptr == NULL) {
671 			warn("unable to allocate %u bytes", dev->blocksize);
672 			goto bailout_error;
673 		}
674 		break;
675 	default:
676 		break;
677 	}
678 
679 	buf = calloc(1, sizeof(*buf));
680 	if (buf == NULL) {
681 		warn("unable to allocate %zu bytes", sizeof(*buf));
682 		goto bailout_error;
683 	}
684 
685 	buf->buf_type = buf_type;
686 	buf->dev = dev;
687 	switch (buf_type) {
688 	case CAMDD_BUF_DATA: {
689 		struct camdd_buf_data *data;
690 
691 		data = &buf->buf_type_spec.data;
692 
693 		data->alloc_len = dev->blocksize;
694 		data->buf = data_ptr;
695 		break;
696 	}
697 	case CAMDD_BUF_INDIRECT:
698 		break;
699 	default:
700 		break;
701 	}
702 	STAILQ_INIT(&buf->src_list);
703 
704 	return (buf);
705 
706 bailout_error:
707 	free(data_ptr);
708 
709 	return (NULL);
710 }
711 
712 void
713 camdd_release_buf(struct camdd_buf *buf)
714 {
715 	struct camdd_dev *dev;
716 
717 	dev = buf->dev;
718 
719 	switch (buf->buf_type) {
720 	case CAMDD_BUF_DATA: {
721 		struct camdd_buf_data *data;
722 
723 		data = &buf->buf_type_spec.data;
724 
725 		if (data->segs != NULL) {
726 			if (data->extra_buf != 0) {
727 				void *extra_buf;
728 
729 				extra_buf = (void *)
730 				    data->segs[data->sg_count - 1].ds_addr;
731 				free(extra_buf);
732 				data->extra_buf = 0;
733 			}
734 			free(data->segs);
735 			data->segs = NULL;
736 			data->sg_count = 0;
737 		} else if (data->iovec != NULL) {
738 			if (data->extra_buf != 0) {
739 				free(data->iovec[data->sg_count - 1].iov_base);
740 				data->extra_buf = 0;
741 			}
742 			free(data->iovec);
743 			data->iovec = NULL;
744 			data->sg_count = 0;
745 		}
746 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
747 		break;
748 	}
749 	case CAMDD_BUF_INDIRECT:
750 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
751 		break;
752 	default:
753 		err(1, "%s: Invalid buffer type %d for released buffer",
754 		    __func__, buf->buf_type);
755 		break;
756 	}
757 }
758 
759 struct camdd_buf *
760 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
761 {
762 	struct camdd_buf *buf = NULL;
763 
764 	switch (buf_type) {
765 	case CAMDD_BUF_DATA:
766 		buf = STAILQ_FIRST(&dev->free_queue);
767 		if (buf != NULL) {
768 			struct camdd_buf_data *data;
769 			uint8_t *data_ptr;
770 			uint32_t alloc_len;
771 
772 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
773 			data = &buf->buf_type_spec.data;
774 			data_ptr = data->buf;
775 			alloc_len = data->alloc_len;
776 			bzero(buf, sizeof(*buf));
777 			data->buf = data_ptr;
778 			data->alloc_len = alloc_len;
779 		}
780 		break;
781 	case CAMDD_BUF_INDIRECT:
782 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
783 		if (buf != NULL) {
784 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
785 
786 			bzero(buf, sizeof(*buf));
787 		}
788 		break;
789 	default:
790 		warnx("Unknown buffer type %d requested", buf_type);
791 		break;
792 	}
793 
794 
795 	if (buf == NULL)
796 		return (camdd_alloc_buf(dev, buf_type));
797 	else {
798 		STAILQ_INIT(&buf->src_list);
799 		buf->dev = dev;
800 		buf->buf_type = buf_type;
801 
802 		return (buf);
803 	}
804 }
805 
806 int
807 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
808 		    uint32_t *num_sectors_used, int *double_buf_needed)
809 {
810 	struct camdd_buf *tmp_buf;
811 	struct camdd_buf_data *data;
812 	uint8_t *extra_buf = NULL;
813 	size_t extra_buf_len = 0;
814 	int extra_buf_attached = 0;
815 	int i, retval = 0;
816 
817 	data = &buf->buf_type_spec.data;
818 
819 	data->sg_count = buf->src_count;
820 	/*
821 	 * Compose a scatter/gather list from all of the buffers in the list.
822 	 * If the length of the buffer isn't a multiple of the sector size,
823 	 * we'll have to add an extra buffer.  This should only happen
824 	 * at the end of a transfer.
825 	 */
826 	if ((data->fill_len % sector_size) != 0) {
827 		extra_buf_len = sector_size - (data->fill_len % sector_size);
828 		extra_buf = calloc(extra_buf_len, 1);
829 		if (extra_buf == NULL) {
830 			warn("%s: unable to allocate %zu bytes for extra "
831 			    "buffer space", __func__, extra_buf_len);
832 			retval = 1;
833 			goto bailout;
834 		}
835 		data->extra_buf = 1;
836 		data->sg_count++;
837 	}
838 	if (iovec == 0) {
839 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
840 		if (data->segs == NULL) {
841 			warn("%s: unable to allocate %zu bytes for S/G list",
842 			    __func__, sizeof(bus_dma_segment_t) *
843 			    data->sg_count);
844 			retval = 1;
845 			goto bailout;
846 		}
847 
848 	} else {
849 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
850 		if (data->iovec == NULL) {
851 			warn("%s: unable to allocate %zu bytes for S/G list",
852 			    __func__, sizeof(struct iovec) * data->sg_count);
853 			retval = 1;
854 			goto bailout;
855 		}
856 	}
857 
858 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
859 	     i < buf->src_count && tmp_buf != NULL; i++,
860 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
861 
862 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
863 			struct camdd_buf_data *tmp_data;
864 
865 			tmp_data = &tmp_buf->buf_type_spec.data;
866 			if (iovec == 0) {
867 				data->segs[i].ds_addr =
868 				    (bus_addr_t) tmp_data->buf;
869 				data->segs[i].ds_len = tmp_data->fill_len -
870 				    tmp_data->resid;
871 			} else {
872 				data->iovec[i].iov_base = tmp_data->buf;
873 				data->iovec[i].iov_len = tmp_data->fill_len -
874 				    tmp_data->resid;
875 			}
876 			if (((tmp_data->fill_len - tmp_data->resid) %
877 			     sector_size) != 0)
878 				*double_buf_needed = 1;
879 		} else {
880 			struct camdd_buf_indirect *tmp_ind;
881 
882 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
883 			if (iovec == 0) {
884 				data->segs[i].ds_addr =
885 				    (bus_addr_t)tmp_ind->start_ptr;
886 				data->segs[i].ds_len = tmp_ind->len;
887 			} else {
888 				data->iovec[i].iov_base = tmp_ind->start_ptr;
889 				data->iovec[i].iov_len = tmp_ind->len;
890 			}
891 			if ((tmp_ind->len % sector_size) != 0)
892 				*double_buf_needed = 1;
893 		}
894 	}
895 
896 	if (extra_buf != NULL) {
897 		if (iovec == 0) {
898 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
899 			data->segs[i].ds_len = extra_buf_len;
900 		} else {
901 			data->iovec[i].iov_base = extra_buf;
902 			data->iovec[i].iov_len = extra_buf_len;
903 		}
904 		extra_buf_attached = 1;
905 		i++;
906 	}
907 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
908 		warnx("buffer source count does not match "
909 		      "number of buffers in list!");
910 		retval = 1;
911 		goto bailout;
912 	}
913 
914 bailout:
915 	if (retval == 0) {
916 		*num_sectors_used = (data->fill_len + extra_buf_len) /
917 		    sector_size;
918 	} else if (extra_buf_attached == 0) {
919 		/*
920 		 * If extra_buf isn't attached yet, we need to free it
921 		 * to avoid leaking.
922 		 */
923 		free(extra_buf);
924 		data->extra_buf = 0;
925 		data->sg_count--;
926 	}
927 	return (retval);
928 }
929 
930 uint32_t
931 camdd_buf_get_len(struct camdd_buf *buf)
932 {
933 	uint32_t len = 0;
934 
935 	if (buf->buf_type != CAMDD_BUF_DATA) {
936 		struct camdd_buf_indirect *indirect;
937 
938 		indirect = &buf->buf_type_spec.indirect;
939 		len = indirect->len;
940 	} else {
941 		struct camdd_buf_data *data;
942 
943 		data = &buf->buf_type_spec.data;
944 		len = data->fill_len;
945 	}
946 
947 	return (len);
948 }
949 
950 void
951 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
952 {
953 	struct camdd_buf_data *data;
954 
955 	assert(buf->buf_type == CAMDD_BUF_DATA);
956 
957 	data = &buf->buf_type_spec.data;
958 
959 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
960 	buf->src_count++;
961 
962 	data->fill_len += camdd_buf_get_len(child_buf);
963 }
964 
965 typedef enum {
966 	CAMDD_TS_MAX_BLK,
967 	CAMDD_TS_MIN_BLK,
968 	CAMDD_TS_BLK_GRAN,
969 	CAMDD_TS_EFF_IOSIZE
970 } camdd_status_item_index;
971 
972 static struct camdd_status_items {
973 	const char *name;
974 	struct mt_status_entry *entry;
975 } req_status_items[] = {
976 	{ "max_blk", NULL },
977 	{ "min_blk", NULL },
978 	{ "blk_gran", NULL },
979 	{ "max_effective_iosize", NULL }
980 };
981 
982 int
983 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
984 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
985 {
986 	struct mt_status_data status_data;
987 	char *xml_str = NULL;
988 	unsigned int i;
989 	int retval = 0;
990 
991 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
992 	if (retval != 0)
993 		err(1, "Couldn't get XML string from %s", filename);
994 
995 	retval = mt_get_status(xml_str, &status_data);
996 	if (retval != XML_STATUS_OK) {
997 		warn("couldn't get status for %s", filename);
998 		retval = 1;
999 		goto bailout;
1000 	} else
1001 		retval = 0;
1002 
1003 	if (status_data.error != 0) {
1004 		warnx("%s", status_data.error_str);
1005 		retval = 1;
1006 		goto bailout;
1007 	}
1008 
1009 	for (i = 0; i < nitems(req_status_items); i++) {
1010                 char *name;
1011 
1012 		name = __DECONST(char *, req_status_items[i].name);
1013 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1014 		    name);
1015 		if (req_status_items[i].entry == NULL) {
1016 			errx(1, "Cannot find status entry %s",
1017 			    req_status_items[i].name);
1018 		}
1019 	}
1020 
1021 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1022 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1023 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1024 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1025 bailout:
1026 
1027 	free(xml_str);
1028 	mt_status_free(&status_data);
1029 
1030 	return (retval);
1031 }
1032 
1033 struct camdd_dev *
1034 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1035     int timeout)
1036 {
1037 	struct camdd_dev *dev = NULL;
1038 	struct camdd_dev_file *file_dev;
1039 	uint64_t blocksize = io_opts->blocksize;
1040 
1041 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1042 	if (dev == NULL)
1043 		goto bailout;
1044 
1045 	file_dev = &dev->dev_spec.file;
1046 	file_dev->fd = fd;
1047 	strlcpy(file_dev->filename, io_opts->dev_name,
1048 	    sizeof(file_dev->filename));
1049 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1050 	if (blocksize == 0)
1051 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1052 	else
1053 		dev->blocksize = blocksize;
1054 
1055 	if ((io_opts->queue_depth != 0)
1056 	 && (io_opts->queue_depth != 1)) {
1057 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1058 		    "command supported", (uintmax_t)io_opts->queue_depth,
1059 		    io_opts->dev_name);
1060 	}
1061 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1062 	dev->run = camdd_file_run;
1063 	dev->fetch = NULL;
1064 
1065 	/*
1066 	 * We can effectively access files on byte boundaries.  We'll reset
1067 	 * this for devices like disks that can be accessed on sector
1068 	 * boundaries.
1069 	 */
1070 	dev->sector_size = 1;
1071 
1072 	if ((fd != STDIN_FILENO)
1073 	 && (fd != STDOUT_FILENO)) {
1074 		int retval;
1075 
1076 		retval = fstat(fd, &file_dev->sb);
1077 		if (retval != 0) {
1078 			warn("Cannot stat %s", dev->device_name);
1079 			goto bailout_error;
1080 		}
1081 		if (S_ISREG(file_dev->sb.st_mode)) {
1082 			file_dev->file_type = CAMDD_FILE_REG;
1083 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1084 			int type;
1085 
1086 			if (ioctl(fd, FIODTYPE, &type) == -1)
1087 				err(1, "FIODTYPE ioctl failed on %s",
1088 				    dev->device_name);
1089 			else {
1090 				if (type & D_TAPE)
1091 					file_dev->file_type = CAMDD_FILE_TAPE;
1092 				else if (type & D_DISK)
1093 					file_dev->file_type = CAMDD_FILE_DISK;
1094 				else if (type & D_MEM)
1095 					file_dev->file_type = CAMDD_FILE_MEM;
1096 				else if (type & D_TTY)
1097 					file_dev->file_type = CAMDD_FILE_TTY;
1098 			}
1099 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1100 			errx(1, "cannot operate on directory %s",
1101 			    dev->device_name);
1102 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1103 			file_dev->file_type = CAMDD_FILE_PIPE;
1104 		} else
1105 			errx(1, "Cannot determine file type for %s",
1106 			    dev->device_name);
1107 
1108 		switch (file_dev->file_type) {
1109 		case CAMDD_FILE_REG:
1110 			if (file_dev->sb.st_size != 0)
1111 				dev->max_sector = file_dev->sb.st_size - 1;
1112 			else
1113 				dev->max_sector = 0;
1114 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1115 			break;
1116 		case CAMDD_FILE_TAPE: {
1117 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1118 			/*
1119 			 * Check block limits and maximum effective iosize.
1120 			 * Make sure the blocksize is within the block
1121 			 * limits (and a multiple of the minimum blocksize)
1122 			 * and that the blocksize is <= maximum effective
1123 			 * iosize.
1124 			 */
1125 			retval = camdd_probe_tape(fd, dev->device_name,
1126 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1127 			if (retval != 0)
1128 				errx(1, "Unable to probe tape %s",
1129 				    dev->device_name);
1130 
1131 			/*
1132 			 * The blocksize needs to be <= the maximum
1133 			 * effective I/O size of the tape device.  Note
1134 			 * that this also takes into account the maximum
1135 			 * blocksize reported by READ BLOCK LIMITS.
1136 			 */
1137 			if (dev->blocksize > max_iosize) {
1138 				warnx("Blocksize %u too big for %s, limiting "
1139 				    "to %ju", dev->blocksize, dev->device_name,
1140 				    max_iosize);
1141 				dev->blocksize = max_iosize;
1142 			}
1143 
1144 			/*
1145 			 * The blocksize needs to be at least min_blk;
1146 			 */
1147 			if (dev->blocksize < min_blk) {
1148 				warnx("Blocksize %u too small for %s, "
1149 				    "increasing to %ju", dev->blocksize,
1150 				    dev->device_name, min_blk);
1151 				dev->blocksize = min_blk;
1152 			}
1153 
1154 			/*
1155 			 * And the blocksize needs to be a multiple of
1156 			 * the block granularity.
1157 			 */
1158 			if ((blk_gran != 0)
1159 			 && (dev->blocksize % (1 << blk_gran))) {
1160 				warnx("Blocksize %u for %s not a multiple of "
1161 				    "%d, adjusting to %d", dev->blocksize,
1162 				    dev->device_name, (1 << blk_gran),
1163 				    dev->blocksize & ~((1 << blk_gran) - 1));
1164 				dev->blocksize &= ~((1 << blk_gran) - 1);
1165 			}
1166 
1167 			if (dev->blocksize == 0) {
1168 				errx(1, "Unable to derive valid blocksize for "
1169 				    "%s", dev->device_name);
1170 			}
1171 
1172 			/*
1173 			 * For tape drives, set the sector size to the
1174 			 * blocksize so that we make sure not to write
1175 			 * less than the blocksize out to the drive.
1176 			 */
1177 			dev->sector_size = dev->blocksize;
1178 			break;
1179 		}
1180 		case CAMDD_FILE_DISK: {
1181 			off_t media_size;
1182 			unsigned int sector_size;
1183 
1184 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1185 
1186 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1187 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1188 				    dev->device_name);
1189 			}
1190 
1191 			if (sector_size == 0) {
1192 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1193 				    "invalid sector size %u for %s",
1194 				    sector_size, dev->device_name);
1195 			}
1196 
1197 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1198 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1199 				    dev->device_name);
1200 			}
1201 
1202 			if (media_size == 0) {
1203 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1204 				    "invalid media size %ju for %s",
1205 				    (uintmax_t)media_size, dev->device_name);
1206 			}
1207 
1208 			if (dev->blocksize % sector_size) {
1209 				errx(1, "%s blocksize %u not a multiple of "
1210 				    "sector size %u", dev->device_name,
1211 				    dev->blocksize, sector_size);
1212 			}
1213 
1214 			dev->sector_size = sector_size;
1215 			dev->max_sector = (media_size / sector_size) - 1;
1216 			break;
1217 		}
1218 		case CAMDD_FILE_MEM:
1219 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1220 			break;
1221 		default:
1222 			break;
1223 		}
1224 	}
1225 
1226 	if ((io_opts->offset != 0)
1227 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1228 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1229 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1230 		goto bailout_error;
1231 	}
1232 #if 0
1233 	else if ((io_opts->offset != 0)
1234 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1235 		warnx("Offset %ju for %s is not a multiple of the "
1236 		      "sector size %u", io_opts->offset,
1237 		      io_opts->dev_name, dev->sector_size);
1238 		goto bailout_error;
1239 	} else {
1240 		dev->start_offset_bytes = io_opts->offset;
1241 	}
1242 #endif
1243 
1244 bailout:
1245 	return (dev);
1246 
1247 bailout_error:
1248 	camdd_free_dev(dev);
1249 	return (NULL);
1250 }
1251 
1252 /*
1253  * Get a get device CCB for the specified device.
1254  */
1255 int
1256 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1257 {
1258         union ccb *ccb;
1259 	int retval = 0;
1260 
1261 	ccb = cam_getccb(device);
1262 
1263 	if (ccb == NULL) {
1264 		warnx("%s: couldn't allocate CCB", __func__);
1265 		return -1;
1266 	}
1267 
1268 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1269 
1270 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1271 
1272 	if (cam_send_ccb(device, ccb) < 0) {
1273 		warn("%s: error sending Get Device Information CCB", __func__);
1274 			cam_error_print(device, ccb, CAM_ESF_ALL,
1275 					CAM_EPF_ALL, stderr);
1276 		retval = -1;
1277 		goto bailout;
1278 	}
1279 
1280 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1281 			cam_error_print(device, ccb, CAM_ESF_ALL,
1282 					CAM_EPF_ALL, stderr);
1283 		retval = -1;
1284 		goto bailout;
1285 	}
1286 
1287 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1288 
1289 bailout:
1290 	cam_freeccb(ccb);
1291 
1292 	return retval;
1293 }
1294 
1295 int
1296 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1297 		 camdd_argmask arglist, int probe_retry_count,
1298 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1299 {
1300 	struct scsi_read_capacity_data rcap;
1301 	struct scsi_read_capacity_data_long rcaplong;
1302 	int retval = -1;
1303 
1304 	if (ccb == NULL) {
1305 		warnx("%s: error passed ccb is NULL", __func__);
1306 		goto bailout;
1307 	}
1308 
1309 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1310 
1311 	scsi_read_capacity(&ccb->csio,
1312 			   /*retries*/ probe_retry_count,
1313 			   /*cbfcnp*/ NULL,
1314 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1315 			   &rcap,
1316 			   SSD_FULL_SIZE,
1317 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1318 
1319 	/* Disable freezing the device queue */
1320 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1321 
1322 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1323 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1324 
1325 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1326 		warn("error sending READ CAPACITY command");
1327 
1328 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1329 				CAM_EPF_ALL, stderr);
1330 
1331 		goto bailout;
1332 	}
1333 
1334 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1335 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1336 		goto bailout;
1337 	}
1338 
1339 	*maxsector = scsi_4btoul(rcap.addr);
1340 	*block_len = scsi_4btoul(rcap.length);
1341 
1342 	/*
1343 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1344 	 * and we need to issue the long READ CAPACITY to get the real
1345 	 * capacity.  Otherwise, we're all set.
1346 	 */
1347 	if (*maxsector != 0xffffffff) {
1348 		retval = 0;
1349 		goto bailout;
1350 	}
1351 
1352 	scsi_read_capacity_16(&ccb->csio,
1353 			      /*retries*/ probe_retry_count,
1354 			      /*cbfcnp*/ NULL,
1355 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1356 			      /*lba*/ 0,
1357 			      /*reladdr*/ 0,
1358 			      /*pmi*/ 0,
1359 			      (uint8_t *)&rcaplong,
1360 			      sizeof(rcaplong),
1361 			      /*sense_len*/ SSD_FULL_SIZE,
1362 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1363 
1364 	/* Disable freezing the device queue */
1365 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1366 
1367 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1368 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1369 
1370 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1371 		warn("error sending READ CAPACITY (16) command");
1372 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1373 				CAM_EPF_ALL, stderr);
1374 		goto bailout;
1375 	}
1376 
1377 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1378 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1379 		goto bailout;
1380 	}
1381 
1382 	*maxsector = scsi_8btou64(rcaplong.addr);
1383 	*block_len = scsi_4btoul(rcaplong.length);
1384 
1385 	retval = 0;
1386 
1387 bailout:
1388 	return retval;
1389 }
1390 
1391 int
1392 camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
1393 		 camdd_argmask arglist, int probe_retry_count,
1394 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1395 {
1396 	struct nvme_command *nc = NULL;
1397 	struct nvme_namespace_data nsdata;
1398 	uint32_t nsid = cam_dev->target_lun & UINT32_MAX;
1399 	uint8_t format = 0, lbads = 0;
1400 	int retval = -1;
1401 
1402 	if (ccb == NULL) {
1403 		warnx("%s: error passed ccb is NULL", __func__);
1404 		goto bailout;
1405 	}
1406 
1407 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
1408 
1409 	/* Send Identify Namespace to get block size and capacity */
1410 	nc = &ccb->nvmeio.cmd;
1411 	nc->opc = NVME_OPC_IDENTIFY;
1412 
1413 	nc->nsid = nsid;
1414 	nc->cdw10 = 0; /* Identify Namespace is CNS = 0 */
1415 
1416 	cam_fill_nvmeadmin(&ccb->nvmeio,
1417 			/*retries*/ probe_retry_count,
1418 			/*cbfcnp*/ NULL,
1419 			CAM_DIR_IN,
1420 			(uint8_t *)&nsdata,
1421 			sizeof(nsdata),
1422 			probe_timeout);
1423 
1424 	/* Disable freezing the device queue */
1425 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1426 
1427 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1428 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1429 
1430 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1431 		warn("error sending Identify Namespace command");
1432 
1433 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1434 				CAM_EPF_ALL, stderr);
1435 
1436 		goto bailout;
1437 	}
1438 
1439 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1440 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1441 		goto bailout;
1442 	}
1443 
1444 	*maxsector = nsdata.nsze;
1445 	/* The LBA Data Size (LBADS) is reported as a power of 2 */
1446 	format = nsdata.flbas & NVME_NS_DATA_FLBAS_FORMAT_MASK;
1447 	lbads = (nsdata.lbaf[format] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) &
1448 	    NVME_NS_DATA_LBAF_LBADS_MASK;
1449 	*block_len = 1 << lbads;
1450 
1451 	retval = 0;
1452 
1453 bailout:
1454 	return retval;
1455 }
1456 
1457 /*
1458  * Need to implement this.  Do a basic probe:
1459  * - Check the inquiry data, make sure we're talking to a device that we
1460  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1461  * - Send a test unit ready, make sure the device is available.
1462  * - Get the capacity and block size.
1463  */
1464 struct camdd_dev *
1465 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1466 		 camdd_argmask arglist, int probe_retry_count,
1467 		 int probe_timeout, int io_retry_count, int io_timeout)
1468 {
1469 	union ccb *ccb;
1470 	uint64_t maxsector = 0;
1471 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1472 	uint32_t block_len = 0;
1473 	struct camdd_dev *dev = NULL;
1474 	struct camdd_dev_pass *pass_dev;
1475 	struct kevent ke;
1476 	struct ccb_getdev cgd;
1477 	int retval;
1478 	int scsi_dev_type = T_NODEVICE;
1479 
1480 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1481 		warnx("%s: error retrieving CGD", __func__);
1482 		return NULL;
1483 	}
1484 
1485 	ccb = cam_getccb(cam_dev);
1486 
1487 	if (ccb == NULL) {
1488 		warnx("%s: error allocating ccb", __func__);
1489 		goto bailout;
1490 	}
1491 
1492 	switch (cgd.protocol) {
1493 	case PROTO_SCSI:
1494 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1495 
1496 		/*
1497 		 * For devices that support READ CAPACITY, we'll attempt to get the
1498 		 * capacity.  Otherwise, we really don't support tape or other
1499 		 * devices via SCSI passthrough, so just return an error in that case.
1500 		 */
1501 		switch (scsi_dev_type) {
1502 		case T_DIRECT:
1503 		case T_WORM:
1504 		case T_CDROM:
1505 		case T_OPTICAL:
1506 		case T_RBC:
1507 		case T_ZBC_HM:
1508 			break;
1509 		default:
1510 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1511 			break; /*NOTREACHED*/
1512 		}
1513 
1514 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1515 						arglist, probe_timeout, &maxsector,
1516 						&block_len))) {
1517 			goto bailout;
1518 		}
1519 		break;
1520 	case PROTO_NVME:
1521 		if ((retval = camdd_probe_pass_nvme(cam_dev, ccb, probe_retry_count,
1522 						arglist, probe_timeout, &maxsector,
1523 						&block_len))) {
1524 			goto bailout;
1525 		}
1526 		break;
1527 	default:
1528 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1529 		break; /*NOTREACHED*/
1530 	}
1531 
1532 	if (block_len == 0) {
1533 		warnx("Sector size for %s%u is 0, cannot continue",
1534 		    cam_dev->device_name, cam_dev->dev_unit_num);
1535 		goto bailout_error;
1536 	}
1537 
1538 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1539 
1540 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1541 	ccb->ccb_h.flags = CAM_DIR_NONE;
1542 	ccb->ccb_h.retry_count = 1;
1543 
1544 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1545 		warn("error sending XPT_PATH_INQ CCB");
1546 
1547 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1548 				CAM_EPF_ALL, stderr);
1549 		goto bailout;
1550 	}
1551 
1552 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1553 
1554 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1555 			      io_timeout);
1556 	if (dev == NULL)
1557 		goto bailout;
1558 
1559 	pass_dev = &dev->dev_spec.pass;
1560 	pass_dev->scsi_dev_type = scsi_dev_type;
1561 	pass_dev->protocol = cgd.protocol;
1562 	pass_dev->dev = cam_dev;
1563 	pass_dev->max_sector = maxsector;
1564 	pass_dev->block_len = block_len;
1565 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1566 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1567 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1568 	dev->sector_size = block_len;
1569 	dev->max_sector = maxsector;
1570 
1571 
1572 	/*
1573 	 * Determine the optimal blocksize to use for this device.
1574 	 */
1575 
1576 	/*
1577 	 * If the controller has not specified a maximum I/O size,
1578 	 * just go with 128K as a somewhat conservative value.
1579 	 */
1580 	if (pass_dev->cpi_maxio == 0)
1581 		cpi_maxio = 131072;
1582 	else
1583 		cpi_maxio = pass_dev->cpi_maxio;
1584 
1585 	/*
1586 	 * If the controller has a large maximum I/O size, limit it
1587 	 * to something smaller so that the kernel doesn't have trouble
1588 	 * allocating buffers to copy data in and out for us.
1589 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1590 	 */
1591 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1592 
1593 	/*
1594 	 * If we weren't able to get a block size for some reason,
1595 	 * default to 512 bytes.
1596 	 */
1597 	block_len = pass_dev->block_len;
1598 	if (block_len == 0)
1599 		block_len = 512;
1600 
1601 	/*
1602 	 * Figure out how many blocksize chunks will fit in the
1603 	 * maximum I/O size.
1604 	 */
1605 	pass_numblocks = max_iosize / block_len;
1606 
1607 	/*
1608 	 * And finally, multiple the number of blocks by the LBA
1609 	 * length to get our maximum block size;
1610 	 */
1611 	dev->blocksize = pass_numblocks * block_len;
1612 
1613 	if (io_opts->blocksize != 0) {
1614 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1615 			warnx("Blocksize %ju for %s is not a multiple of "
1616 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1617 			      dev->device_name, dev->sector_size);
1618 			goto bailout_error;
1619 		}
1620 		dev->blocksize = io_opts->blocksize;
1621 	}
1622 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1623 	if (io_opts->queue_depth != 0)
1624 		dev->target_queue_depth = io_opts->queue_depth;
1625 
1626 	if (io_opts->offset != 0) {
1627 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1628 			warnx("Offset %ju is past the end of device %s",
1629 			    io_opts->offset, dev->device_name);
1630 			goto bailout_error;
1631 		}
1632 #if 0
1633 		else if ((io_opts->offset % dev->sector_size) != 0) {
1634 			warnx("Offset %ju for %s is not a multiple of the "
1635 			      "sector size %u", io_opts->offset,
1636 			      dev->device_name, dev->sector_size);
1637 			goto bailout_error;
1638 		}
1639 		dev->start_offset_bytes = io_opts->offset;
1640 #endif
1641 	}
1642 
1643 	dev->min_cmd_size = io_opts->min_cmd_size;
1644 
1645 	dev->run = camdd_pass_run;
1646 	dev->fetch = camdd_pass_fetch;
1647 
1648 bailout:
1649 	cam_freeccb(ccb);
1650 
1651 	return (dev);
1652 
1653 bailout_error:
1654 	cam_freeccb(ccb);
1655 
1656 	camdd_free_dev(dev);
1657 
1658 	return (NULL);
1659 }
1660 
1661 void
1662 nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
1663 		void (*cbfcnp)(struct cam_periph *, union ccb *),
1664 		uint32_t nsid, int readop, uint64_t lba,
1665 		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
1666 		uint32_t timeout)
1667 {
1668 	struct nvme_command *nc = &nvmeio->cmd;
1669 
1670 	nc->opc = readop ? NVME_OPC_READ : NVME_OPC_WRITE;
1671 
1672 	nc->nsid = nsid;
1673 
1674 	nc->cdw10 = lba & UINT32_MAX;
1675 	nc->cdw11 = lba >> 32;
1676 
1677 	/* NLB (bits 15:0) is a zero based value */
1678 	nc->cdw12 = (block_count - 1) & UINT16_MAX;
1679 
1680 	cam_fill_nvmeio(nvmeio,
1681 			retries,
1682 			cbfcnp,
1683 			readop ? CAM_DIR_IN : CAM_DIR_OUT,
1684 			data_ptr,
1685 			dxfer_len,
1686 			timeout);
1687 }
1688 
1689 void *
1690 camdd_worker(void *arg)
1691 {
1692 	struct camdd_dev *dev = arg;
1693 	struct camdd_buf *buf;
1694 	struct timespec ts, *kq_ts;
1695 
1696 	ts.tv_sec = 0;
1697 	ts.tv_nsec = 0;
1698 
1699 	pthread_mutex_lock(&dev->mutex);
1700 
1701 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1702 
1703 	for (;;) {
1704 		struct kevent ke;
1705 		int retval = 0;
1706 
1707 		/*
1708 		 * XXX KDM check the reorder queue depth?
1709 		 */
1710 		if (dev->write_dev == 0) {
1711 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1712 			uint32_t target_depth = dev->target_queue_depth;
1713 			uint32_t peer_target_depth =
1714 			    dev->peer_dev->target_queue_depth;
1715 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1716 
1717 			camdd_get_depth(dev, &our_depth, &peer_depth,
1718 					&our_bytes, &peer_bytes);
1719 
1720 #if 0
1721 			while (((our_depth < target_depth)
1722 			     && (peer_depth < peer_target_depth))
1723 			    || ((peer_bytes + our_bytes) <
1724 				 (peer_blocksize * 2))) {
1725 #endif
1726 			while (((our_depth + peer_depth) <
1727 			        (target_depth + peer_target_depth))
1728 			    || ((peer_bytes + our_bytes) <
1729 				(peer_blocksize * 3))) {
1730 
1731 				retval = camdd_queue(dev, NULL);
1732 				if (retval == 1)
1733 					break;
1734 				else if (retval != 0) {
1735 					error_exit = 1;
1736 					goto bailout;
1737 				}
1738 
1739 				camdd_get_depth(dev, &our_depth, &peer_depth,
1740 						&our_bytes, &peer_bytes);
1741 			}
1742 		}
1743 		/*
1744 		 * See if we have any I/O that is ready to execute.
1745 		 */
1746 		buf = STAILQ_FIRST(&dev->run_queue);
1747 		if (buf != NULL) {
1748 			while (dev->target_queue_depth > dev->cur_active_io) {
1749 				retval = dev->run(dev);
1750 				if (retval == -1) {
1751 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1752 					error_exit = 1;
1753 					break;
1754 				} else if (retval != 0) {
1755 					break;
1756 				}
1757 			}
1758 		}
1759 
1760 		/*
1761 		 * We've reached EOF, or our partner has reached EOF.
1762 		 */
1763 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1764 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1765 			if (dev->write_dev != 0) {
1766 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1767 				 && (dev->num_run_queue == 0)
1768 				 && (dev->cur_active_io == 0)) {
1769 					goto bailout;
1770 				}
1771 			} else {
1772 				/*
1773 				 * If we're the reader, and the writer
1774 				 * got EOF, he is already done.  If we got
1775 				 * the EOF, then we need to wait until
1776 				 * everything is flushed out for the writer.
1777 				 */
1778 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1779 					goto bailout;
1780 				} else if ((dev->num_peer_work_queue == 0)
1781 					&& (dev->num_peer_done_queue == 0)
1782 					&& (dev->cur_active_io == 0)
1783 					&& (dev->num_run_queue == 0)) {
1784 					goto bailout;
1785 				}
1786 			}
1787 			/*
1788 			 * XXX KDM need to do something about the pending
1789 			 * queue and cleanup resources.
1790 			 */
1791 		}
1792 
1793 		if ((dev->write_dev == 0)
1794 		 && (dev->cur_active_io == 0)
1795 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1796 			kq_ts = &ts;
1797 		else
1798 			kq_ts = NULL;
1799 
1800 		/*
1801 		 * Run kevent to see if there are events to process.
1802 		 */
1803 		pthread_mutex_unlock(&dev->mutex);
1804 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1805 		pthread_mutex_lock(&dev->mutex);
1806 		if (retval == -1) {
1807 			warn("%s: error returned from kevent",__func__);
1808 			goto bailout;
1809 		} else if (retval != 0) {
1810 			switch (ke.filter) {
1811 			case EVFILT_READ:
1812 				if (dev->fetch != NULL) {
1813 					retval = dev->fetch(dev);
1814 					if (retval == -1) {
1815 						error_exit = 1;
1816 						goto bailout;
1817 					}
1818 				}
1819 				break;
1820 			case EVFILT_SIGNAL:
1821 				/*
1822 				 * We register for this so we don't get
1823 				 * an error as a result of a SIGINFO or a
1824 				 * SIGINT.  It will actually get handled
1825 				 * by the signal handler.  If we get a
1826 				 * SIGINT, bail out without printing an
1827 				 * error message.  Any other signals
1828 				 * will result in the error message above.
1829 				 */
1830 				if (ke.ident == SIGINT)
1831 					goto bailout;
1832 				break;
1833 			case EVFILT_USER:
1834 				retval = 0;
1835 				/*
1836 				 * Check to see if the other thread has
1837 				 * queued any I/O for us to do.  (In this
1838 				 * case we're the writer.)
1839 				 */
1840 				for (buf = STAILQ_FIRST(&dev->work_queue);
1841 				     buf != NULL;
1842 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1843 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1844 							   work_links);
1845 					retval = camdd_queue(dev, buf);
1846 					/*
1847 					 * We keep going unless we get an
1848 					 * actual error.  If we get EOF, we
1849 					 * still want to remove the buffers
1850 					 * from the queue and send the back
1851 					 * to the reader thread.
1852 					 */
1853 					if (retval == -1) {
1854 						error_exit = 1;
1855 						goto bailout;
1856 					} else
1857 						retval = 0;
1858 				}
1859 
1860 				/*
1861 				 * Next check to see if the other thread has
1862 				 * queued any completed buffers back to us.
1863 				 * (In this case we're the reader.)
1864 				 */
1865 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1866 				     buf != NULL;
1867 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1868 					STAILQ_REMOVE_HEAD(
1869 					    &dev->peer_done_queue, work_links);
1870 					dev->num_peer_done_queue--;
1871 					camdd_peer_done(buf);
1872 				}
1873 				break;
1874 			default:
1875 				warnx("%s: unknown kevent filter %d",
1876 				      __func__, ke.filter);
1877 				break;
1878 			}
1879 		}
1880 	}
1881 
1882 bailout:
1883 
1884 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1885 
1886 	/* XXX KDM cleanup resources here? */
1887 
1888 	pthread_mutex_unlock(&dev->mutex);
1889 
1890 	need_exit = 1;
1891 	sem_post(&camdd_sem);
1892 
1893 	return (NULL);
1894 }
1895 
1896 /*
1897  * Simplistic translation of CCB status to our local status.
1898  */
1899 camdd_buf_status
1900 camdd_ccb_status(union ccb *ccb, int protocol)
1901 {
1902 	camdd_buf_status status = CAMDD_STATUS_NONE;
1903 	cam_status ccb_status;
1904 
1905 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1906 
1907 	switch (protocol) {
1908 	case PROTO_SCSI:
1909 		switch (ccb_status) {
1910 		case CAM_REQ_CMP: {
1911 			if (ccb->csio.resid == 0) {
1912 				status = CAMDD_STATUS_OK;
1913 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1914 				status = CAMDD_STATUS_SHORT_IO;
1915 			} else {
1916 				status = CAMDD_STATUS_EOF;
1917 			}
1918 			break;
1919 		}
1920 		case CAM_SCSI_STATUS_ERROR: {
1921 			switch (ccb->csio.scsi_status) {
1922 			case SCSI_STATUS_OK:
1923 			case SCSI_STATUS_COND_MET:
1924 			case SCSI_STATUS_INTERMED:
1925 			case SCSI_STATUS_INTERMED_COND_MET:
1926 				status = CAMDD_STATUS_OK;
1927 				break;
1928 			case SCSI_STATUS_CMD_TERMINATED:
1929 			case SCSI_STATUS_CHECK_COND:
1930 			case SCSI_STATUS_QUEUE_FULL:
1931 			case SCSI_STATUS_BUSY:
1932 			case SCSI_STATUS_RESERV_CONFLICT:
1933 			default:
1934 				status = CAMDD_STATUS_ERROR;
1935 				break;
1936 			}
1937 			break;
1938 		}
1939 		default:
1940 			status = CAMDD_STATUS_ERROR;
1941 			break;
1942 		}
1943 		break;
1944 	case PROTO_NVME:
1945 		switch (ccb_status) {
1946 		case CAM_REQ_CMP:
1947 			status = CAMDD_STATUS_OK;
1948 			break;
1949 		default:
1950 			status = CAMDD_STATUS_ERROR;
1951 			break;
1952 		}
1953 		break;
1954 	default:
1955 		status = CAMDD_STATUS_ERROR;
1956 		break;
1957 	}
1958 
1959 	return (status);
1960 }
1961 
1962 /*
1963  * Queue a buffer to our peer's work thread for writing.
1964  *
1965  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1966  */
1967 int
1968 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1969 {
1970 	struct kevent ke;
1971 	STAILQ_HEAD(, camdd_buf) local_queue;
1972 	struct camdd_buf *buf1, *buf2;
1973 	struct camdd_buf_data *data = NULL;
1974 	uint64_t peer_bytes_queued = 0;
1975 	int active = 1;
1976 	int retval = 0;
1977 
1978 	STAILQ_INIT(&local_queue);
1979 
1980 	/*
1981 	 * Since we're the reader, we need to queue our I/O to the writer
1982 	 * in sequential order in order to make sure it gets written out
1983 	 * in sequential order.
1984 	 *
1985 	 * Check the next expected I/O starting offset.  If this doesn't
1986 	 * match, put it on the reorder queue.
1987 	 */
1988 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1989 
1990 		/*
1991 		 * If there is nothing on the queue, there is no sorting
1992 		 * needed.
1993 		 */
1994 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1995 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1996 			dev->num_reorder_queue++;
1997 			goto bailout;
1998 		}
1999 
2000 		/*
2001 		 * Sort in ascending order by starting LBA.  There should
2002 		 * be no identical LBAs.
2003 		 */
2004 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2005 		     buf1 = buf2) {
2006 			buf2 = STAILQ_NEXT(buf1, links);
2007 			if (buf->lba < buf1->lba) {
2008 				/*
2009 				 * If we're less than the first one, then
2010 				 * we insert at the head of the list
2011 				 * because this has to be the first element
2012 				 * on the list.
2013 				 */
2014 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
2015 						   buf, links);
2016 				dev->num_reorder_queue++;
2017 				break;
2018 			} else if (buf->lba > buf1->lba) {
2019 				if (buf2 == NULL) {
2020 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
2021 					    buf, links);
2022 					dev->num_reorder_queue++;
2023 					break;
2024 				} else if (buf->lba < buf2->lba) {
2025 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
2026 					    buf1, buf, links);
2027 					dev->num_reorder_queue++;
2028 					break;
2029 				}
2030 			} else {
2031 				errx(1, "Found buffers with duplicate LBA %ju!",
2032 				     buf->lba);
2033 			}
2034 		}
2035 		goto bailout;
2036 	} else {
2037 
2038 		/*
2039 		 * We're the next expected I/O completion, so put ourselves
2040 		 * on the local queue to be sent to the writer.  We use
2041 		 * work_links here so that we can queue this to the
2042 		 * peer_work_queue before taking the buffer off of the
2043 		 * local_queue.
2044 		 */
2045 		dev->next_completion_pos_bytes += buf->len;
2046 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
2047 
2048 		/*
2049 		 * Go through the reorder queue looking for more sequential
2050 		 * I/O and add it to the local queue.
2051 		 */
2052 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2053 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
2054 			/*
2055 			 * As soon as we see an I/O that is out of sequence,
2056 			 * we're done.
2057 			 */
2058 			if ((buf1->lba * dev->sector_size) !=
2059 			     dev->next_completion_pos_bytes)
2060 				break;
2061 
2062 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
2063 			dev->num_reorder_queue--;
2064 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
2065 			dev->next_completion_pos_bytes += buf1->len;
2066 		}
2067 	}
2068 
2069 	/*
2070 	 * Setup the event to let the other thread know that it has work
2071 	 * pending.
2072 	 */
2073 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
2074 	       NOTE_TRIGGER, 0, NULL);
2075 
2076 	/*
2077 	 * Put this on our shadow queue so that we know what we've queued
2078 	 * to the other thread.
2079 	 */
2080 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
2081 		if (buf1->buf_type != CAMDD_BUF_DATA) {
2082 			errx(1, "%s: should have a data buffer, not an "
2083 			    "indirect buffer", __func__);
2084 		}
2085 		data = &buf1->buf_type_spec.data;
2086 
2087 		/*
2088 		 * We only need to send one EOF to the writer, and don't
2089 		 * need to continue sending EOFs after that.
2090 		 */
2091 		if (buf1->status == CAMDD_STATUS_EOF) {
2092 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
2093 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
2094 				    work_links);
2095 				camdd_release_buf(buf1);
2096 				retval = 1;
2097 				continue;
2098 			}
2099 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2100 		}
2101 
2102 
2103 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2104 		peer_bytes_queued += (data->fill_len - data->resid);
2105 		dev->peer_bytes_queued += (data->fill_len - data->resid);
2106 		dev->num_peer_work_queue++;
2107 	}
2108 
2109 	if (STAILQ_FIRST(&local_queue) == NULL)
2110 		goto bailout;
2111 
2112 	/*
2113 	 * Drop our mutex and pick up the other thread's mutex.  We need to
2114 	 * do this to avoid deadlocks.
2115 	 */
2116 	pthread_mutex_unlock(&dev->mutex);
2117 	pthread_mutex_lock(&dev->peer_dev->mutex);
2118 
2119 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2120 		/*
2121 		 * Put the buffers on the other thread's incoming work queue.
2122 		 */
2123 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2124 		     buf1 = STAILQ_FIRST(&local_queue)) {
2125 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2126 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2127 					   work_links);
2128 		}
2129 		/*
2130 		 * Send an event to the other thread's kqueue to let it know
2131 		 * that there is something on the work queue.
2132 		 */
2133 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2134 		if (retval == -1)
2135 			warn("%s: unable to add peer work_queue kevent",
2136 			     __func__);
2137 		else
2138 			retval = 0;
2139 	} else
2140 		active = 0;
2141 
2142 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2143 	pthread_mutex_lock(&dev->mutex);
2144 
2145 	/*
2146 	 * If the other side isn't active, run through the queue and
2147 	 * release all of the buffers.
2148 	 */
2149 	if (active == 0) {
2150 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2151 		     buf1 = STAILQ_FIRST(&local_queue)) {
2152 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2153 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2154 				      links);
2155 			dev->num_peer_work_queue--;
2156 			camdd_release_buf(buf1);
2157 		}
2158 		dev->peer_bytes_queued -= peer_bytes_queued;
2159 		retval = 1;
2160 	}
2161 
2162 bailout:
2163 	return (retval);
2164 }
2165 
2166 /*
2167  * Return a buffer to the reader thread when we have completed writing it.
2168  */
2169 int
2170 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2171 {
2172 	struct kevent ke;
2173 	int retval = 0;
2174 
2175 	/*
2176 	 * Setup the event to let the other thread know that we have
2177 	 * completed a buffer.
2178 	 */
2179 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2180 	       NOTE_TRIGGER, 0, NULL);
2181 
2182 	/*
2183 	 * Drop our lock and acquire the other thread's lock before
2184 	 * manipulating
2185 	 */
2186 	pthread_mutex_unlock(&dev->mutex);
2187 	pthread_mutex_lock(&dev->peer_dev->mutex);
2188 
2189 	/*
2190 	 * Put the buffer on the reader thread's peer done queue now that
2191 	 * we have completed it.
2192 	 */
2193 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2194 			   work_links);
2195 	dev->peer_dev->num_peer_done_queue++;
2196 
2197 	/*
2198 	 * Send an event to the peer thread to let it know that we've added
2199 	 * something to its peer done queue.
2200 	 */
2201 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2202 	if (retval == -1)
2203 		warn("%s: unable to add peer_done_queue kevent", __func__);
2204 	else
2205 		retval = 0;
2206 
2207 	/*
2208 	 * Drop the other thread's lock and reacquire ours.
2209 	 */
2210 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2211 	pthread_mutex_lock(&dev->mutex);
2212 
2213 	return (retval);
2214 }
2215 
2216 /*
2217  * Free a buffer that was written out by the writer thread and returned to
2218  * the reader thread.
2219  */
2220 void
2221 camdd_peer_done(struct camdd_buf *buf)
2222 {
2223 	struct camdd_dev *dev;
2224 	struct camdd_buf_data *data;
2225 
2226 	dev = buf->dev;
2227 	if (buf->buf_type != CAMDD_BUF_DATA) {
2228 		errx(1, "%s: should have a data buffer, not an "
2229 		    "indirect buffer", __func__);
2230 	}
2231 
2232 	data = &buf->buf_type_spec.data;
2233 
2234 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2235 	dev->num_peer_work_queue--;
2236 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2237 
2238 	if (buf->status == CAMDD_STATUS_EOF)
2239 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2240 
2241 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2242 }
2243 
2244 /*
2245  * Assumes caller holds the lock for this device.
2246  */
2247 void
2248 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2249 		   int *error_count)
2250 {
2251 	int retval = 0;
2252 
2253 	/*
2254 	 * If we're the reader, we need to send the completed I/O
2255 	 * to the writer.  If we're the writer, we need to just
2256 	 * free up resources, or let the reader know if we've
2257 	 * encountered an error.
2258 	 */
2259 	if (dev->write_dev == 0) {
2260 		retval = camdd_queue_peer_buf(dev, buf);
2261 		if (retval != 0)
2262 			(*error_count)++;
2263 	} else {
2264 		struct camdd_buf *tmp_buf, *next_buf;
2265 
2266 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2267 				    next_buf) {
2268 			struct camdd_buf *src_buf;
2269 			struct camdd_buf_indirect *indirect;
2270 
2271 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2272 				      camdd_buf, src_links);
2273 
2274 			tmp_buf->status = buf->status;
2275 
2276 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2277 				camdd_complete_peer_buf(dev, tmp_buf);
2278 				continue;
2279 			}
2280 
2281 			indirect = &tmp_buf->buf_type_spec.indirect;
2282 			src_buf = indirect->src_buf;
2283 			src_buf->refcount--;
2284 			/*
2285 			 * XXX KDM we probably need to account for
2286 			 * exactly how many bytes we were able to
2287 			 * write.  Allocate the residual to the
2288 			 * first N buffers?  Or just track the
2289 			 * number of bytes written?  Right now the reader
2290 			 * doesn't do anything with a residual.
2291 			 */
2292 			src_buf->status = buf->status;
2293 			if (src_buf->refcount <= 0)
2294 				camdd_complete_peer_buf(dev, src_buf);
2295 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2296 					   tmp_buf, links);
2297 		}
2298 
2299 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2300 	}
2301 }
2302 
2303 /*
2304  * Fetch all completed commands from the pass(4) device.
2305  *
2306  * Returns the number of commands received, or -1 if any of the commands
2307  * completed with an error.  Returns 0 if no commands are available.
2308  */
2309 int
2310 camdd_pass_fetch(struct camdd_dev *dev)
2311 {
2312 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2313 	union ccb ccb;
2314 	int retval = 0, num_fetched = 0, error_count = 0;
2315 
2316 	pthread_mutex_unlock(&dev->mutex);
2317 	/*
2318 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2319 	 */
2320 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2321 		struct camdd_buf *buf;
2322 		struct camdd_buf_data *data;
2323 		cam_status ccb_status;
2324 		union ccb *buf_ccb;
2325 
2326 		buf = ccb.ccb_h.ccb_buf;
2327 		data = &buf->buf_type_spec.data;
2328 		buf_ccb = &data->ccb;
2329 
2330 		num_fetched++;
2331 
2332 		/*
2333 		 * Copy the CCB back out so we get status, sense data, etc.
2334 		 */
2335 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2336 
2337 		pthread_mutex_lock(&dev->mutex);
2338 
2339 		/*
2340 		 * We're now done, so take this off the active queue.
2341 		 */
2342 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2343 		dev->cur_active_io--;
2344 
2345 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2346 		if (ccb_status != CAM_REQ_CMP) {
2347 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2348 					CAM_EPF_ALL, stderr);
2349 		}
2350 
2351 		switch (pass_dev->protocol) {
2352 		case PROTO_SCSI:
2353 			data->resid = ccb.csio.resid;
2354 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2355 			break;
2356 		case PROTO_NVME:
2357 			data->resid = 0;
2358 			dev->bytes_transferred += ccb.nvmeio.dxfer_len;
2359 			break;
2360 		default:
2361 			return -1;
2362 			break;
2363 		}
2364 
2365 		if (buf->status == CAMDD_STATUS_NONE)
2366 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2367 		if (buf->status == CAMDD_STATUS_ERROR)
2368 			error_count++;
2369 		else if (buf->status == CAMDD_STATUS_EOF) {
2370 			/*
2371 			 * Once we queue this buffer to our partner thread,
2372 			 * he will know that we've hit EOF.
2373 			 */
2374 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2375 		}
2376 
2377 		camdd_complete_buf(dev, buf, &error_count);
2378 
2379 		/*
2380 		 * Unlock in preparation for the ioctl call.
2381 		 */
2382 		pthread_mutex_unlock(&dev->mutex);
2383 	}
2384 
2385 	pthread_mutex_lock(&dev->mutex);
2386 
2387 	if (error_count > 0)
2388 		return (-1);
2389 	else
2390 		return (num_fetched);
2391 }
2392 
2393 /*
2394  * Returns -1 for error, 0 for success/continue, and 1 for resource
2395  * shortage/stop processing.
2396  */
2397 int
2398 camdd_file_run(struct camdd_dev *dev)
2399 {
2400 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2401 	struct camdd_buf_data *data;
2402 	struct camdd_buf *buf;
2403 	off_t io_offset;
2404 	int retval = 0, write_dev = dev->write_dev;
2405 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2406 	uint32_t num_sectors = 0, db_len = 0;
2407 
2408 	buf = STAILQ_FIRST(&dev->run_queue);
2409 	if (buf == NULL) {
2410 		no_resources = 1;
2411 		goto bailout;
2412 	} else if ((dev->write_dev == 0)
2413 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2414 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2415 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2416 		dev->num_run_queue--;
2417 		buf->status = CAMDD_STATUS_EOF;
2418 		error_count++;
2419 		goto bailout;
2420 	}
2421 
2422 	/*
2423 	 * If we're writing, we need to go through the source buffer list
2424 	 * and create an S/G list.
2425 	 */
2426 	if (write_dev != 0) {
2427 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2428 		    dev->sector_size, &num_sectors, &double_buf_needed);
2429 		if (retval != 0) {
2430 			no_resources = 1;
2431 			goto bailout;
2432 		}
2433 	}
2434 
2435 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2436 	dev->num_run_queue--;
2437 
2438 	data = &buf->buf_type_spec.data;
2439 
2440 	/*
2441 	 * pread(2) and pwrite(2) offsets are byte offsets.
2442 	 */
2443 	io_offset = buf->lba * dev->sector_size;
2444 
2445 	/*
2446 	 * Unlock the mutex while we read or write.
2447 	 */
2448 	pthread_mutex_unlock(&dev->mutex);
2449 
2450 	/*
2451 	 * Note that we don't need to double buffer if we're the reader
2452 	 * because in that case, we have allocated a single buffer of
2453 	 * sufficient size to do the read.  This copy is necessary on
2454 	 * writes because if one of the components of the S/G list is not
2455 	 * a sector size multiple, the kernel will reject the write.  This
2456 	 * is unfortunate but not surprising.  So this will make sure that
2457 	 * we're using a single buffer that is a multiple of the sector size.
2458 	 */
2459 	if ((double_buf_needed != 0)
2460 	 && (data->sg_count > 1)
2461 	 && (write_dev != 0)) {
2462 		uint32_t cur_offset;
2463 		int i;
2464 
2465 		if (file_dev->tmp_buf == NULL)
2466 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2467 		if (file_dev->tmp_buf == NULL) {
2468 			buf->status = CAMDD_STATUS_ERROR;
2469 			error_count++;
2470 			pthread_mutex_lock(&dev->mutex);
2471 			goto bailout;
2472 		}
2473 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2474 			bcopy(data->iovec[i].iov_base,
2475 			    &file_dev->tmp_buf[cur_offset],
2476 			    data->iovec[i].iov_len);
2477 			cur_offset += data->iovec[i].iov_len;
2478 		}
2479 		db_len = cur_offset;
2480 	}
2481 
2482 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2483 		if (write_dev == 0) {
2484 			/*
2485 			 * XXX KDM is there any way we would need a S/G
2486 			 * list here?
2487 			 */
2488 			retval = pread(file_dev->fd, data->buf,
2489 			    buf->len, io_offset);
2490 		} else {
2491 			if (double_buf_needed != 0) {
2492 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2493 				    db_len, io_offset);
2494 			} else if (data->sg_count == 0) {
2495 				retval = pwrite(file_dev->fd, data->buf,
2496 				    data->fill_len, io_offset);
2497 			} else {
2498 				retval = pwritev(file_dev->fd, data->iovec,
2499 				    data->sg_count, io_offset);
2500 			}
2501 		}
2502 	} else {
2503 		if (write_dev == 0) {
2504 			/*
2505 			 * XXX KDM is there any way we would need a S/G
2506 			 * list here?
2507 			 */
2508 			retval = read(file_dev->fd, data->buf, buf->len);
2509 		} else {
2510 			if (double_buf_needed != 0) {
2511 				retval = write(file_dev->fd, file_dev->tmp_buf,
2512 				    db_len);
2513 			} else if (data->sg_count == 0) {
2514 				retval = write(file_dev->fd, data->buf,
2515 				    data->fill_len);
2516 			} else {
2517 				retval = writev(file_dev->fd, data->iovec,
2518 				    data->sg_count);
2519 			}
2520 		}
2521 	}
2522 
2523 	/* We're done, re-acquire the lock */
2524 	pthread_mutex_lock(&dev->mutex);
2525 
2526 	if (retval >= (ssize_t)data->fill_len) {
2527 		/*
2528 		 * If the bytes transferred is more than the request size,
2529 		 * that indicates an overrun, which should only happen at
2530 		 * the end of a transfer if we have to round up to a sector
2531 		 * boundary.
2532 		 */
2533 		if (buf->status == CAMDD_STATUS_NONE)
2534 			buf->status = CAMDD_STATUS_OK;
2535 		data->resid = 0;
2536 		dev->bytes_transferred += retval;
2537 	} else if (retval == -1) {
2538 		warn("Error %s %s", (write_dev) ? "writing to" :
2539 		    "reading from", file_dev->filename);
2540 
2541 		buf->status = CAMDD_STATUS_ERROR;
2542 		data->resid = data->fill_len;
2543 		error_count++;
2544 
2545 		if (dev->debug == 0)
2546 			goto bailout;
2547 
2548 		if ((double_buf_needed != 0)
2549 		 && (write_dev != 0)) {
2550 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2551 			    "offset %ju\n", __func__, file_dev->fd,
2552 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2553 			    (uintmax_t)io_offset);
2554 		} else if (data->sg_count == 0) {
2555 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2556 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2557 			    data->fill_len, (uintmax_t)buf->lba,
2558 			    (uintmax_t)io_offset);
2559 		} else {
2560 			int i;
2561 
2562 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2563 			    "offset %ju\n", __func__, file_dev->fd,
2564 			    data->fill_len, (uintmax_t)buf->lba,
2565 			    (uintmax_t)io_offset);
2566 
2567 			for (i = 0; i < data->sg_count; i++) {
2568 				fprintf(stderr, "index %d ptr %p len %zu\n",
2569 				    i, data->iovec[i].iov_base,
2570 				    data->iovec[i].iov_len);
2571 			}
2572 		}
2573 	} else if (retval == 0) {
2574 		buf->status = CAMDD_STATUS_EOF;
2575 		if (dev->debug != 0)
2576 			printf("%s: got EOF from %s!\n", __func__,
2577 			    file_dev->filename);
2578 		data->resid = data->fill_len;
2579 		error_count++;
2580 	} else if (retval < (ssize_t)data->fill_len) {
2581 		if (buf->status == CAMDD_STATUS_NONE)
2582 			buf->status = CAMDD_STATUS_SHORT_IO;
2583 		data->resid = data->fill_len - retval;
2584 		dev->bytes_transferred += retval;
2585 	}
2586 
2587 bailout:
2588 	if (buf != NULL) {
2589 		if (buf->status == CAMDD_STATUS_EOF) {
2590 			struct camdd_buf *buf2;
2591 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2592 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2593 				buf2->status = CAMDD_STATUS_EOF;
2594 		}
2595 
2596 		camdd_complete_buf(dev, buf, &error_count);
2597 	}
2598 
2599 	if (error_count != 0)
2600 		return (-1);
2601 	else if (no_resources != 0)
2602 		return (1);
2603 	else
2604 		return (0);
2605 }
2606 
2607 /*
2608  * Execute one command from the run queue.  Returns 0 for success, 1 for
2609  * stop processing, and -1 for error.
2610  */
2611 int
2612 camdd_pass_run(struct camdd_dev *dev)
2613 {
2614 	struct camdd_buf *buf = NULL;
2615 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2616 	struct camdd_buf_data *data;
2617 	uint32_t num_blocks, sectors_used = 0;
2618 	union ccb *ccb;
2619 	int retval = 0, is_write = dev->write_dev;
2620 	int double_buf_needed = 0;
2621 
2622 	buf = STAILQ_FIRST(&dev->run_queue);
2623 	if (buf == NULL) {
2624 		retval = 1;
2625 		goto bailout;
2626 	}
2627 
2628 	/*
2629 	 * If we're writing, we need to go through the source buffer list
2630 	 * and create an S/G list.
2631 	 */
2632 	if (is_write != 0) {
2633 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2634 		    &sectors_used, &double_buf_needed);
2635 		if (retval != 0) {
2636 			retval = -1;
2637 			goto bailout;
2638 		}
2639 	}
2640 
2641 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2642 	dev->num_run_queue--;
2643 
2644 	data = &buf->buf_type_spec.data;
2645 
2646 	/*
2647 	 * In almost every case the number of blocks should be the device
2648 	 * block size.  The exception may be at the end of an I/O stream
2649 	 * for a partial block or at the end of a device.
2650 	 */
2651 	if (is_write != 0)
2652 		num_blocks = sectors_used;
2653 	else
2654 		num_blocks = data->fill_len / pass_dev->block_len;
2655 
2656 	ccb = &data->ccb;
2657 
2658 	switch (pass_dev->protocol) {
2659 	case PROTO_SCSI:
2660 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2661 
2662 		scsi_read_write(&ccb->csio,
2663 				/*retries*/ dev->retry_count,
2664 				/*cbfcnp*/ NULL,
2665 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2666 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2667 					   SCSI_RW_WRITE,
2668 				/*byte2*/ 0,
2669 				/*minimum_cmd_size*/ dev->min_cmd_size,
2670 				/*lba*/ buf->lba,
2671 				/*block_count*/ num_blocks,
2672 				/*data_ptr*/ (data->sg_count != 0) ?
2673 					     (uint8_t *)data->segs : data->buf,
2674 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2675 				/*sense_len*/ SSD_FULL_SIZE,
2676 				/*timeout*/ dev->io_timeout);
2677 
2678 		if (data->sg_count != 0) {
2679 			ccb->csio.sglist_cnt = data->sg_count;
2680 		}
2681 		break;
2682 	case PROTO_NVME:
2683 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
2684 
2685 		nvme_read_write(&ccb->nvmeio,
2686 				/*retries*/ dev->retry_count,
2687 				/*cbfcnp*/ NULL,
2688 				/*nsid*/ pass_dev->dev->target_lun & UINT32_MAX,
2689 				/*readop*/ dev->write_dev == 0,
2690 				/*lba*/ buf->lba,
2691 				/*block_count*/ num_blocks,
2692 				/*data_ptr*/ (data->sg_count != 0) ?
2693 					     (uint8_t *)data->segs : data->buf,
2694 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2695 				/*timeout*/ dev->io_timeout);
2696 
2697 		ccb->nvmeio.sglist_cnt = data->sg_count;
2698 		break;
2699 	default:
2700 		retval = -1;
2701 		goto bailout;
2702 	}
2703 
2704 	/* Disable freezing the device queue */
2705 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2706 
2707 	if (dev->retry_count != 0)
2708 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2709 
2710 	if (data->sg_count != 0) {
2711 		ccb->ccb_h.flags |= CAM_DATA_SG;
2712 	}
2713 
2714 	/*
2715 	 * Store a pointer to the buffer in the CCB.  The kernel will
2716 	 * restore this when we get it back, and we'll use it to identify
2717 	 * the buffer this CCB came from.
2718 	 */
2719 	ccb->ccb_h.ccb_buf = buf;
2720 
2721 	/*
2722 	 * Unlock our mutex in preparation for issuing the ioctl.
2723 	 */
2724 	pthread_mutex_unlock(&dev->mutex);
2725 	/*
2726 	 * Queue the CCB to the pass(4) driver.
2727 	 */
2728 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2729 		pthread_mutex_lock(&dev->mutex);
2730 
2731 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2732 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2733 		warn("%s: CCB address is %p", __func__, ccb);
2734 		retval = -1;
2735 
2736 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2737 	} else {
2738 		pthread_mutex_lock(&dev->mutex);
2739 
2740 		dev->cur_active_io++;
2741 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2742 	}
2743 
2744 bailout:
2745 	return (retval);
2746 }
2747 
2748 int
2749 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2750 {
2751 	struct camdd_dev_pass *pass_dev;
2752 	uint32_t num_blocks;
2753 	int retval = 0;
2754 
2755 	pass_dev = &dev->dev_spec.pass;
2756 
2757 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2758 	*len = dev->blocksize;
2759 	num_blocks = *len / dev->sector_size;
2760 
2761 	/*
2762 	 * If max_sector is 0, then we have no set limit.  This can happen
2763 	 * if we're writing to a file in a filesystem, or reading from
2764 	 * something like /dev/zero.
2765 	 */
2766 	if ((dev->max_sector != 0)
2767 	 || (dev->sector_io_limit != 0)) {
2768 		uint64_t max_sector;
2769 
2770 		if ((dev->max_sector != 0)
2771 		 && (dev->sector_io_limit != 0))
2772 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2773 		else if (dev->max_sector != 0)
2774 			max_sector = dev->max_sector;
2775 		else
2776 			max_sector = dev->sector_io_limit;
2777 
2778 
2779 		/*
2780 		 * Check to see whether we're starting off past the end of
2781 		 * the device.  If so, we need to just send an EOF
2782 		 * notification to the writer.
2783 		 */
2784 		if (*lba > max_sector) {
2785 			*len = 0;
2786 			retval = 1;
2787 		} else if (((*lba + num_blocks) > max_sector + 1)
2788 			|| ((*lba + num_blocks) < *lba)) {
2789 			/*
2790 			 * If we get here (but pass the first check), we
2791 			 * can trim the request length down to go to the
2792 			 * end of the device.
2793 			 */
2794 			num_blocks = (max_sector + 1) - *lba;
2795 			*len = num_blocks * dev->sector_size;
2796 			retval = 1;
2797 		}
2798 	}
2799 
2800 	dev->next_io_pos_bytes += *len;
2801 
2802 	return (retval);
2803 }
2804 
2805 /*
2806  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2807  */
2808 int
2809 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2810 {
2811 	struct camdd_buf *buf = NULL;
2812 	struct camdd_buf_data *data;
2813 	struct camdd_dev_pass *pass_dev;
2814 	size_t new_len;
2815 	struct camdd_buf_data *rb_data;
2816 	int is_write = dev->write_dev;
2817 	int eof_flush_needed = 0;
2818 	int retval = 0;
2819 	int error;
2820 
2821 	pass_dev = &dev->dev_spec.pass;
2822 
2823 	/*
2824 	 * If we've gotten EOF or our partner has, we should not continue
2825 	 * queueing I/O.  If we're a writer, though, we should continue
2826 	 * to write any buffers that don't have EOF status.
2827 	 */
2828 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2829 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2830 	  && (is_write == 0))) {
2831 		/*
2832 		 * Tell the worker thread that we have seen EOF.
2833 		 */
2834 		retval = 1;
2835 
2836 		/*
2837 		 * If we're the writer, send the buffer back with EOF status.
2838 		 */
2839 		if (is_write) {
2840 			read_buf->status = CAMDD_STATUS_EOF;
2841 
2842 			error = camdd_complete_peer_buf(dev, read_buf);
2843 		}
2844 		goto bailout;
2845 	}
2846 
2847 	if (is_write == 0) {
2848 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2849 		if (buf == NULL) {
2850 			retval = -1;
2851 			goto bailout;
2852 		}
2853 		data = &buf->buf_type_spec.data;
2854 
2855 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2856 		if (retval != 0) {
2857 			buf->status = CAMDD_STATUS_EOF;
2858 
2859 		 	if ((buf->len == 0)
2860 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2861 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2862 				camdd_release_buf(buf);
2863 				goto bailout;
2864 			}
2865 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2866 		}
2867 
2868 		data->fill_len = buf->len;
2869 		data->src_start_offset = buf->lba * dev->sector_size;
2870 
2871 		/*
2872 		 * Put this on the run queue.
2873 		 */
2874 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2875 		dev->num_run_queue++;
2876 
2877 		/* We're done. */
2878 		goto bailout;
2879 	}
2880 
2881 	/*
2882 	 * Check for new EOF status from the reader.
2883 	 */
2884 	if ((read_buf->status == CAMDD_STATUS_EOF)
2885 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2886 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2887 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2888 		 && (read_buf->len == 0)) {
2889 			camdd_complete_peer_buf(dev, read_buf);
2890 			retval = 1;
2891 			goto bailout;
2892 		} else
2893 			eof_flush_needed = 1;
2894 	}
2895 
2896 	/*
2897 	 * See if we have a buffer we're composing with pieces from our
2898 	 * partner thread.
2899 	 */
2900 	buf = STAILQ_FIRST(&dev->pending_queue);
2901 	if (buf == NULL) {
2902 		uint64_t lba;
2903 		ssize_t len;
2904 
2905 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2906 		if (retval != 0) {
2907 			read_buf->status = CAMDD_STATUS_EOF;
2908 
2909 			if (len == 0) {
2910 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2911 				error = camdd_complete_peer_buf(dev, read_buf);
2912 				goto bailout;
2913 			}
2914 		}
2915 
2916 		/*
2917 		 * If we don't have a pending buffer, we need to grab a new
2918 		 * one from the free list or allocate another one.
2919 		 */
2920 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2921 		if (buf == NULL) {
2922 			retval = 1;
2923 			goto bailout;
2924 		}
2925 
2926 		buf->lba = lba;
2927 		buf->len = len;
2928 
2929 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2930 		dev->num_pending_queue++;
2931 	}
2932 
2933 	data = &buf->buf_type_spec.data;
2934 
2935 	rb_data = &read_buf->buf_type_spec.data;
2936 
2937 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2938 	 && (dev->debug != 0)) {
2939 		printf("%s: WARNING: reader offset %#jx != expected offset "
2940 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2941 		    (uintmax_t)dev->next_peer_pos_bytes);
2942 	}
2943 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2944 	    (rb_data->fill_len - rb_data->resid);
2945 
2946 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2947 	if (new_len < buf->len) {
2948 		/*
2949 		 * There are three cases here:
2950 		 * 1. We need more data to fill up a block, so we put
2951 		 *    this I/O on the queue and wait for more I/O.
2952 		 * 2. We have a pending buffer in the queue that is
2953 		 *    smaller than our blocksize, but we got an EOF.  So we
2954 		 *    need to go ahead and flush the write out.
2955 		 * 3. We got an error.
2956 		 */
2957 
2958 		/*
2959 		 * Increment our fill length.
2960 		 */
2961 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2962 
2963 		/*
2964 		 * Add the new read buffer to the list for writing.
2965 		 */
2966 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2967 
2968 		/* Increment the count */
2969 		buf->src_count++;
2970 
2971 		if (eof_flush_needed == 0) {
2972 			/*
2973 			 * We need to exit, because we don't have enough
2974 			 * data yet.
2975 			 */
2976 			goto bailout;
2977 		} else {
2978 			/*
2979 			 * Take the buffer off of the pending queue.
2980 			 */
2981 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2982 				      links);
2983 			dev->num_pending_queue--;
2984 
2985 			/*
2986 			 * If we need an EOF flush, but there is no data
2987 			 * to flush, go ahead and return this buffer.
2988 			 */
2989 			if (data->fill_len == 0) {
2990 				camdd_complete_buf(dev, buf, /*error_count*/0);
2991 				retval = 1;
2992 				goto bailout;
2993 			}
2994 
2995 			/*
2996 			 * Put this on the next queue for execution.
2997 			 */
2998 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2999 			dev->num_run_queue++;
3000 		}
3001 	} else if (new_len == buf->len) {
3002 		/*
3003 		 * We have enough data to completey fill one block,
3004 		 * so we're ready to issue the I/O.
3005 		 */
3006 
3007 		/*
3008 		 * Take the buffer off of the pending queue.
3009 		 */
3010 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
3011 		dev->num_pending_queue--;
3012 
3013 		/*
3014 		 * Add the new read buffer to the list for writing.
3015 		 */
3016 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
3017 
3018 		/* Increment the count */
3019 		buf->src_count++;
3020 
3021 		/*
3022 		 * Increment our fill length.
3023 		 */
3024 		data->fill_len += (rb_data->fill_len - rb_data->resid);
3025 
3026 		/*
3027 		 * Put this on the next queue for execution.
3028 		 */
3029 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3030 		dev->num_run_queue++;
3031 	} else {
3032 		struct camdd_buf *idb;
3033 		struct camdd_buf_indirect *indirect;
3034 		uint32_t len_to_go, cur_offset;
3035 
3036 
3037 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3038 		if (idb == NULL) {
3039 			retval = 1;
3040 			goto bailout;
3041 		}
3042 		indirect = &idb->buf_type_spec.indirect;
3043 		indirect->src_buf = read_buf;
3044 		read_buf->refcount++;
3045 		indirect->offset = 0;
3046 		indirect->start_ptr = rb_data->buf;
3047 		/*
3048 		 * We've already established that there is more
3049 		 * data in read_buf than we have room for in our
3050 		 * current write request.  So this particular chunk
3051 		 * of the request should just be the remainder
3052 		 * needed to fill up a block.
3053 		 */
3054 		indirect->len = buf->len - (data->fill_len - data->resid);
3055 
3056 		camdd_buf_add_child(buf, idb);
3057 
3058 		/*
3059 		 * This buffer is ready to execute, so we can take
3060 		 * it off the pending queue and put it on the run
3061 		 * queue.
3062 		 */
3063 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
3064 			      links);
3065 		dev->num_pending_queue--;
3066 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3067 		dev->num_run_queue++;
3068 
3069 		cur_offset = indirect->offset + indirect->len;
3070 
3071 		/*
3072 		 * The resulting I/O would be too large to fit in
3073 		 * one block.  We need to split this I/O into
3074 		 * multiple pieces.  Allocate as many buffers as needed.
3075 		 */
3076 		for (len_to_go = rb_data->fill_len - rb_data->resid -
3077 		     indirect->len; len_to_go > 0;) {
3078 			struct camdd_buf *new_buf;
3079 			struct camdd_buf_data *new_data;
3080 			uint64_t lba;
3081 			ssize_t len;
3082 
3083 			retval = camdd_get_next_lba_len(dev, &lba, &len);
3084 			if ((retval != 0)
3085 			 && (len == 0)) {
3086 				/*
3087 				 * The device has already been marked
3088 				 * as EOF, and there is no space left.
3089 				 */
3090 				goto bailout;
3091 			}
3092 
3093 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
3094 			if (new_buf == NULL) {
3095 				retval = 1;
3096 				goto bailout;
3097 			}
3098 
3099 			new_buf->lba = lba;
3100 			new_buf->len = len;
3101 
3102 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3103 			if (idb == NULL) {
3104 				retval = 1;
3105 				goto bailout;
3106 			}
3107 
3108 			indirect = &idb->buf_type_spec.indirect;
3109 
3110 			indirect->src_buf = read_buf;
3111 			read_buf->refcount++;
3112 			indirect->offset = cur_offset;
3113 			indirect->start_ptr = rb_data->buf + cur_offset;
3114 			indirect->len = min(len_to_go, new_buf->len);
3115 #if 0
3116 			if (((indirect->len % dev->sector_size) != 0)
3117 			 || ((indirect->offset % dev->sector_size) != 0)) {
3118 				warnx("offset %ju len %ju not aligned with "
3119 				    "sector size %u", indirect->offset,
3120 				    (uintmax_t)indirect->len, dev->sector_size);
3121 			}
3122 #endif
3123 			cur_offset += indirect->len;
3124 			len_to_go -= indirect->len;
3125 
3126 			camdd_buf_add_child(new_buf, idb);
3127 
3128 			new_data = &new_buf->buf_type_spec.data;
3129 
3130 			if ((new_data->fill_len == new_buf->len)
3131 			 || (eof_flush_needed != 0)) {
3132 				STAILQ_INSERT_TAIL(&dev->run_queue,
3133 						   new_buf, links);
3134 				dev->num_run_queue++;
3135 			} else if (new_data->fill_len < buf->len) {
3136 				STAILQ_INSERT_TAIL(&dev->pending_queue,
3137 					   	new_buf, links);
3138 				dev->num_pending_queue++;
3139 			} else {
3140 				warnx("%s: too much data in new "
3141 				      "buffer!", __func__);
3142 				retval = 1;
3143 				goto bailout;
3144 			}
3145 		}
3146 	}
3147 
3148 bailout:
3149 	return (retval);
3150 }
3151 
3152 void
3153 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3154 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3155 {
3156 	*our_depth = dev->cur_active_io + dev->num_run_queue;
3157 	if (dev->num_peer_work_queue >
3158 	    dev->num_peer_done_queue)
3159 		*peer_depth = dev->num_peer_work_queue -
3160 			      dev->num_peer_done_queue;
3161 	else
3162 		*peer_depth = 0;
3163 	*our_bytes = *our_depth * dev->blocksize;
3164 	*peer_bytes = dev->peer_bytes_queued;
3165 }
3166 
3167 void
3168 camdd_sig_handler(int sig)
3169 {
3170 	if (sig == SIGINFO)
3171 		need_status = 1;
3172 	else {
3173 		need_exit = 1;
3174 		error_exit = 1;
3175 	}
3176 
3177 	sem_post(&camdd_sem);
3178 }
3179 
3180 void
3181 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3182 		   struct timespec *start_time)
3183 {
3184 	struct timespec done_time;
3185 	uint64_t total_ns;
3186 	long double mb_sec, total_sec;
3187 	int error = 0;
3188 
3189 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3190 	if (error != 0) {
3191 		warn("Unable to get done time");
3192 		return;
3193 	}
3194 
3195 	timespecsub(&done_time, start_time, &done_time);
3196 
3197 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3198 	total_sec = total_ns;
3199 	total_sec /= 1000000000;
3200 
3201 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3202 		"%.4Lf seconds elapsed\n",
3203 		(uintmax_t)camdd_dev->bytes_transferred,
3204 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3205 		camdd_dev->device_name,
3206 		(uintmax_t)other_dev->bytes_transferred,
3207 		(other_dev->write_dev == 0) ? "read from" : "written to",
3208 		other_dev->device_name, total_sec);
3209 
3210 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3211 	mb_sec /= 1024 * 1024;
3212 	mb_sec *= 1000000000;
3213 	mb_sec /= total_ns;
3214 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3215 }
3216 
3217 int
3218 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
3219 	 int retry_count, int timeout)
3220 {
3221 	struct cam_device *new_cam_dev = NULL;
3222 	struct camdd_dev *devs[2];
3223 	struct timespec start_time;
3224 	pthread_t threads[2];
3225 	int unit = 0;
3226 	int error = 0;
3227 	int i;
3228 
3229 	bzero(devs, sizeof(devs));
3230 
3231 	if (num_io_opts != 2) {
3232 		warnx("Must have one input and one output path");
3233 		error = 1;
3234 		goto bailout;
3235 	}
3236 
3237 	for (i = 0; i < num_io_opts; i++) {
3238 		switch (io_opts[i].dev_type) {
3239 		case CAMDD_DEV_PASS: {
3240 			if (isdigit(io_opts[i].dev_name[0])) {
3241 				camdd_argmask new_arglist = CAMDD_ARG_NONE;
3242 				int bus = 0, target = 0, lun = 0;
3243 				int rv;
3244 
3245 				/* device specified as bus:target[:lun] */
3246 				rv = parse_btl(io_opts[i].dev_name, &bus,
3247 				    &target, &lun, &new_arglist);
3248 				if (rv < 2) {
3249 					warnx("numeric device specification "
3250 					     "must be either bus:target, or "
3251 					     "bus:target:lun");
3252 					error = 1;
3253 					goto bailout;
3254 				}
3255 				/* default to 0 if lun was not specified */
3256 				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3257 					lun = 0;
3258 					new_arglist |= CAMDD_ARG_LUN;
3259 				}
3260 				new_cam_dev = cam_open_btl(bus, target, lun,
3261 				    O_RDWR, NULL);
3262 			} else {
3263 				char name[30];
3264 
3265 				if (cam_get_device(io_opts[i].dev_name, name,
3266 						   sizeof name, &unit) == -1) {
3267 					warnx("%s", cam_errbuf);
3268 					error = 1;
3269 					goto bailout;
3270 				}
3271 				new_cam_dev = cam_open_spec_device(name, unit,
3272 				    O_RDWR, NULL);
3273 			}
3274 
3275 			if (new_cam_dev == NULL) {
3276 				warnx("%s", cam_errbuf);
3277 				error = 1;
3278 				goto bailout;
3279 			}
3280 
3281 			devs[i] = camdd_probe_pass(new_cam_dev,
3282 			    /*io_opts*/ &io_opts[i],
3283 			    CAMDD_ARG_ERR_RECOVER,
3284 			    /*probe_retry_count*/ 3,
3285 			    /*probe_timeout*/ 5000,
3286 			    /*io_retry_count*/ retry_count,
3287 			    /*io_timeout*/ timeout);
3288 			if (devs[i] == NULL) {
3289 				warn("Unable to probe device %s%u",
3290 				     new_cam_dev->device_name,
3291 				     new_cam_dev->dev_unit_num);
3292 				error = 1;
3293 				goto bailout;
3294 			}
3295 			break;
3296 		}
3297 		case CAMDD_DEV_FILE: {
3298 			int fd = -1;
3299 
3300 			if (io_opts[i].dev_name[0] == '-') {
3301 				if (io_opts[i].write_dev != 0)
3302 					fd = STDOUT_FILENO;
3303 				else
3304 					fd = STDIN_FILENO;
3305 			} else {
3306 				if (io_opts[i].write_dev != 0) {
3307 					fd = open(io_opts[i].dev_name,
3308 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3309 				} else {
3310 					fd = open(io_opts[i].dev_name,
3311 					    O_RDONLY);
3312 				}
3313 			}
3314 			if (fd == -1) {
3315 				warn("error opening file %s",
3316 				    io_opts[i].dev_name);
3317 				error = 1;
3318 				goto bailout;
3319 			}
3320 
3321 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3322 			    retry_count, timeout);
3323 			if (devs[i] == NULL) {
3324 				error = 1;
3325 				goto bailout;
3326 			}
3327 
3328 			break;
3329 		}
3330 		default:
3331 			warnx("Unknown device type %d (%s)",
3332 			    io_opts[i].dev_type, io_opts[i].dev_name);
3333 			error = 1;
3334 			goto bailout;
3335 			break; /*NOTREACHED */
3336 		}
3337 
3338 		devs[i]->write_dev = io_opts[i].write_dev;
3339 
3340 		devs[i]->start_offset_bytes = io_opts[i].offset;
3341 
3342 		if (max_io != 0) {
3343 			devs[i]->sector_io_limit =
3344 			    (devs[i]->start_offset_bytes /
3345 			    devs[i]->sector_size) +
3346 			    (max_io / devs[i]->sector_size) - 1;
3347 		}
3348 
3349 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3350 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3351 	}
3352 
3353 	devs[0]->peer_dev = devs[1];
3354 	devs[1]->peer_dev = devs[0];
3355 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3356 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3357 
3358 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3359 
3360 	signal(SIGINFO, camdd_sig_handler);
3361 	signal(SIGINT, camdd_sig_handler);
3362 
3363 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3364 	if (error != 0) {
3365 		warn("Unable to get start time");
3366 		goto bailout;
3367 	}
3368 
3369 	for (i = 0; i < num_io_opts; i++) {
3370 		error = pthread_create(&threads[i], NULL, camdd_worker,
3371 				       (void *)devs[i]);
3372 		if (error != 0) {
3373 			warnc(error, "pthread_create() failed");
3374 			goto bailout;
3375 		}
3376 	}
3377 
3378 	for (;;) {
3379 		if ((sem_wait(&camdd_sem) == -1)
3380 		 || (need_exit != 0)) {
3381 			struct kevent ke;
3382 
3383 			for (i = 0; i < num_io_opts; i++) {
3384 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3385 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3386 
3387 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3388 
3389 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3390 						NULL);
3391 				if (error == -1)
3392 					warn("%s: unable to wake up thread",
3393 					    __func__);
3394 				error = 0;
3395 			}
3396 			break;
3397 		} else if (need_status != 0) {
3398 			camdd_print_status(devs[0], devs[1], &start_time);
3399 			need_status = 0;
3400 		}
3401 	}
3402 	for (i = 0; i < num_io_opts; i++) {
3403 		pthread_join(threads[i], NULL);
3404 	}
3405 
3406 	camdd_print_status(devs[0], devs[1], &start_time);
3407 
3408 bailout:
3409 
3410 	for (i = 0; i < num_io_opts; i++)
3411 		camdd_free_dev(devs[i]);
3412 
3413 	return (error + error_exit);
3414 }
3415 
3416 void
3417 usage(void)
3418 {
3419 	fprintf(stderr,
3420 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3421 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3422 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3423 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3424 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3425 "Option description\n"
3426 "-i <arg=val>  Specify input device/file and parameters\n"
3427 "-o <arg=val>  Specify output device/file and parameters\n"
3428 "Input and Output parameters\n"
3429 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3430 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3431 "              or - for stdin/stdout\n"
3432 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3433 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3434 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3435 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3436 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3437 "Optional arguments\n"
3438 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3439 "-E            Enable CAM error recovery for pass(4) devices\n"
3440 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3441 "              using K, G, M, etc. suffixes\n"
3442 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3443 "-v            Enable verbose error recovery\n"
3444 "-h            Print this message\n");
3445 }
3446 
3447 
3448 int
3449 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3450 {
3451 	char *tmpstr, *tmpstr2;
3452 	char *orig_tmpstr = NULL;
3453 	int retval = 0;
3454 
3455 	io_opts->write_dev = is_write;
3456 
3457 	tmpstr = strdup(args);
3458 	if (tmpstr == NULL) {
3459 		warn("strdup failed");
3460 		retval = 1;
3461 		goto bailout;
3462 	}
3463 	orig_tmpstr = tmpstr;
3464 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3465 		char *name, *value;
3466 
3467 		/*
3468 		 * If the user creates an empty parameter by putting in two
3469 		 * commas, skip over it and look for the next field.
3470 		 */
3471 		if (*tmpstr2 == '\0')
3472 			continue;
3473 
3474 		name = strsep(&tmpstr2, "=");
3475 		if (*name == '\0') {
3476 			warnx("Got empty I/O parameter name");
3477 			retval = 1;
3478 			goto bailout;
3479 		}
3480 		value = strsep(&tmpstr2, "=");
3481 		if ((value == NULL)
3482 		 || (*value == '\0')) {
3483 			warnx("Empty I/O parameter value for %s", name);
3484 			retval = 1;
3485 			goto bailout;
3486 		}
3487 		if (strncasecmp(name, "file", 4) == 0) {
3488 			io_opts->dev_type = CAMDD_DEV_FILE;
3489 			io_opts->dev_name = strdup(value);
3490 			if (io_opts->dev_name == NULL) {
3491 				warn("Error allocating memory");
3492 				retval = 1;
3493 				goto bailout;
3494 			}
3495 		} else if (strncasecmp(name, "pass", 4) == 0) {
3496 			io_opts->dev_type = CAMDD_DEV_PASS;
3497 			io_opts->dev_name = strdup(value);
3498 			if (io_opts->dev_name == NULL) {
3499 				warn("Error allocating memory");
3500 				retval = 1;
3501 				goto bailout;
3502 			}
3503 		} else if ((strncasecmp(name, "bs", 2) == 0)
3504 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3505 			retval = expand_number(value, &io_opts->blocksize);
3506 			if (retval == -1) {
3507 				warn("expand_number(3) failed on %s=%s", name,
3508 				    value);
3509 				retval = 1;
3510 				goto bailout;
3511 			}
3512 		} else if (strncasecmp(name, "depth", 5) == 0) {
3513 			char *endptr;
3514 
3515 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3516 			if (*endptr != '\0') {
3517 				warnx("invalid queue depth %s", value);
3518 				retval = 1;
3519 				goto bailout;
3520 			}
3521 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3522 			char *endptr;
3523 
3524 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3525 			if ((*endptr != '\0')
3526 			 || ((io_opts->min_cmd_size > 16)
3527 			  || (io_opts->min_cmd_size < 0))) {
3528 				warnx("invalid minimum cmd size %s", value);
3529 				retval = 1;
3530 				goto bailout;
3531 			}
3532 		} else if (strncasecmp(name, "offset", 6) == 0) {
3533 			retval = expand_number(value, &io_opts->offset);
3534 			if (retval == -1) {
3535 				warn("expand_number(3) failed on %s=%s", name,
3536 				    value);
3537 				retval = 1;
3538 				goto bailout;
3539 			}
3540 		} else if (strncasecmp(name, "debug", 5) == 0) {
3541 			char *endptr;
3542 
3543 			io_opts->debug = strtoull(value, &endptr, 0);
3544 			if (*endptr != '\0') {
3545 				warnx("invalid debug level %s", value);
3546 				retval = 1;
3547 				goto bailout;
3548 			}
3549 		} else {
3550 			warnx("Unrecognized parameter %s=%s", name, value);
3551 		}
3552 	}
3553 bailout:
3554 	free(orig_tmpstr);
3555 
3556 	return (retval);
3557 }
3558 
3559 int
3560 main(int argc, char **argv)
3561 {
3562 	int c;
3563 	camdd_argmask arglist = CAMDD_ARG_NONE;
3564 	int timeout = 0, retry_count = 1;
3565 	int error = 0;
3566 	uint64_t max_io = 0;
3567 	struct camdd_io_opts *opt_list = NULL;
3568 
3569 	if (argc == 1) {
3570 		usage();
3571 		exit(1);
3572 	}
3573 
3574 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3575 	if (opt_list == NULL) {
3576 		warn("Unable to allocate option list");
3577 		error = 1;
3578 		goto bailout;
3579 	}
3580 
3581 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3582 		switch (c) {
3583 		case 'C':
3584 			retry_count = strtol(optarg, NULL, 0);
3585 			if (retry_count < 0)
3586 				errx(1, "retry count %d is < 0",
3587 				     retry_count);
3588 			arglist |= CAMDD_ARG_RETRIES;
3589 			break;
3590 		case 'E':
3591 			arglist |= CAMDD_ARG_ERR_RECOVER;
3592 			break;
3593 		case 'i':
3594 		case 'o':
3595 			if (((c == 'i')
3596 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3597 			 || ((c == 'o')
3598 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3599 				errx(1, "Only one input and output path "
3600 				    "allowed");
3601 			}
3602 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3603 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3604 			if (error != 0)
3605 				goto bailout;
3606 			break;
3607 		case 'm':
3608 			error = expand_number(optarg, &max_io);
3609 			if (error == -1) {
3610 				warn("invalid maximum I/O amount %s", optarg);
3611 				error = 1;
3612 				goto bailout;
3613 			}
3614 			break;
3615 		case 't':
3616 			timeout = strtol(optarg, NULL, 0);
3617 			if (timeout < 0)
3618 				errx(1, "invalid timeout %d", timeout);
3619 			/* Convert the timeout from seconds to ms */
3620 			timeout *= 1000;
3621 			arglist |= CAMDD_ARG_TIMEOUT;
3622 			break;
3623 		case 'v':
3624 			arglist |= CAMDD_ARG_VERBOSE;
3625 			break;
3626 		case 'h':
3627 		default:
3628 			usage();
3629 			exit(1);
3630 			break; /*NOTREACHED*/
3631 		}
3632 	}
3633 
3634 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3635 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3636 		errx(1, "Must specify both -i and -o");
3637 
3638 	/*
3639 	 * Set the timeout if the user hasn't specified one.
3640 	 */
3641 	if (timeout == 0)
3642 		timeout = CAMDD_PASS_RW_TIMEOUT;
3643 
3644 	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3645 
3646 bailout:
3647 	free(opt_list);
3648 
3649 	exit(error);
3650 }
3651