xref: /freebsd/usr.sbin/camdd/camdd.c (revision 3806950135d2c8633ec0764e8807eacc87cf3e10)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <machine/bus.h>
55 #include <sys/bus.h>
56 #include <sys/bus_dma.h>
57 #include <sys/mtio.h>
58 #include <sys/conf.h>
59 #include <sys/disk.h>
60 
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <semaphore.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <inttypes.h>
67 #include <limits.h>
68 #include <fcntl.h>
69 #include <ctype.h>
70 #include <err.h>
71 #include <libutil.h>
72 #include <pthread.h>
73 #include <assert.h>
74 #include <bsdxml.h>
75 
76 #include <cam/cam.h>
77 #include <cam/cam_debug.h>
78 #include <cam/cam_ccb.h>
79 #include <cam/scsi/scsi_all.h>
80 #include <cam/scsi/scsi_da.h>
81 #include <cam/scsi/scsi_pass.h>
82 #include <cam/scsi/scsi_message.h>
83 #include <cam/scsi/smp_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87 
88 typedef enum {
89 	CAMDD_CMD_NONE		= 0x00000000,
90 	CAMDD_CMD_HELP		= 0x00000001,
91 	CAMDD_CMD_WRITE		= 0x00000002,
92 	CAMDD_CMD_READ		= 0x00000003
93 } camdd_cmdmask;
94 
95 typedef enum {
96 	CAMDD_ARG_NONE		= 0x00000000,
97 	CAMDD_ARG_VERBOSE	= 0x00000001,
98 	CAMDD_ARG_DEVICE	= 0x00000002,
99 	CAMDD_ARG_BUS		= 0x00000004,
100 	CAMDD_ARG_TARGET	= 0x00000008,
101 	CAMDD_ARG_LUN		= 0x00000010,
102 	CAMDD_ARG_UNIT		= 0x00000020,
103 	CAMDD_ARG_TIMEOUT	= 0x00000040,
104 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
105 	CAMDD_ARG_RETRIES	= 0x00000100
106 } camdd_argmask;
107 
108 typedef enum {
109 	CAMDD_DEV_NONE		= 0x00,
110 	CAMDD_DEV_PASS		= 0x01,
111 	CAMDD_DEV_FILE		= 0x02
112 } camdd_dev_type;
113 
114 struct camdd_io_opts {
115 	camdd_dev_type	dev_type;
116 	char		*dev_name;
117 	uint64_t	blocksize;
118 	uint64_t	queue_depth;
119 	uint64_t	offset;
120 	int		min_cmd_size;
121 	int		write_dev;
122 	uint64_t	debug;
123 };
124 
125 typedef enum {
126 	CAMDD_BUF_NONE,
127 	CAMDD_BUF_DATA,
128 	CAMDD_BUF_INDIRECT
129 } camdd_buf_type;
130 
131 struct camdd_buf_indirect {
132 	/*
133 	 * Pointer to the source buffer.
134 	 */
135 	struct camdd_buf *src_buf;
136 
137 	/*
138 	 * Offset into the source buffer, in bytes.
139 	 */
140 	uint64_t	  offset;
141 	/*
142 	 * Pointer to the starting point in the source buffer.
143 	 */
144 	uint8_t		 *start_ptr;
145 
146 	/*
147 	 * Length of this chunk in bytes.
148 	 */
149 	size_t		  len;
150 };
151 
152 struct camdd_buf_data {
153 	/*
154 	 * Buffer allocated when we allocate this camdd_buf.  This should
155 	 * be the size of the blocksize for this device.
156 	 */
157 	uint8_t			*buf;
158 
159 	/*
160 	 * The amount of backing store allocated in buf.  Generally this
161 	 * will be the blocksize of the device.
162 	 */
163 	uint32_t		 alloc_len;
164 
165 	/*
166 	 * The amount of data that was put into the buffer (on reads) or
167 	 * the amount of data we have put onto the src_list so far (on
168 	 * writes).
169 	 */
170 	uint32_t		 fill_len;
171 
172 	/*
173 	 * The amount of data that was not transferred.
174 	 */
175 	uint32_t		 resid;
176 
177 	/*
178 	 * Starting byte offset on the reader.
179 	 */
180 	uint64_t		 src_start_offset;
181 
182 	/*
183 	 * CCB used for pass(4) device targets.
184 	 */
185 	union ccb		 ccb;
186 
187 	/*
188 	 * Number of scatter/gather segments.
189 	 */
190 	int			 sg_count;
191 
192 	/*
193 	 * Set if we had to tack on an extra buffer to round the transfer
194 	 * up to a sector size.
195 	 */
196 	int			 extra_buf;
197 
198 	/*
199 	 * Scatter/gather list used generally when we're the writer for a
200 	 * pass(4) device.
201 	 */
202 	bus_dma_segment_t	*segs;
203 
204 	/*
205 	 * Scatter/gather list used generally when we're the writer for a
206 	 * file or block device;
207 	 */
208 	struct iovec		*iovec;
209 };
210 
211 union camdd_buf_types {
212 	struct camdd_buf_indirect	indirect;
213 	struct camdd_buf_data		data;
214 };
215 
216 typedef enum {
217 	CAMDD_STATUS_NONE,
218 	CAMDD_STATUS_OK,
219 	CAMDD_STATUS_SHORT_IO,
220 	CAMDD_STATUS_EOF,
221 	CAMDD_STATUS_ERROR
222 } camdd_buf_status;
223 
224 struct camdd_buf {
225 	camdd_buf_type		 buf_type;
226 	union camdd_buf_types	 buf_type_spec;
227 
228 	camdd_buf_status	 status;
229 
230 	uint64_t		 lba;
231 	size_t			 len;
232 
233 	/*
234 	 * A reference count of how many indirect buffers point to this
235 	 * buffer.
236 	 */
237 	int			 refcount;
238 
239 	/*
240 	 * A link back to our parent device.
241 	 */
242 	struct camdd_dev	*dev;
243 	STAILQ_ENTRY(camdd_buf)  links;
244 	STAILQ_ENTRY(camdd_buf)  work_links;
245 
246 	/*
247 	 * A count of the buffers on the src_list.
248 	 */
249 	int			 src_count;
250 
251 	/*
252 	 * List of buffers from our partner thread that are the components
253 	 * of this buffer for the I/O.  Uses src_links.
254 	 */
255 	STAILQ_HEAD(,camdd_buf)	 src_list;
256 	STAILQ_ENTRY(camdd_buf)  src_links;
257 };
258 
259 #define	NUM_DEV_TYPES	2
260 
261 struct camdd_dev_pass {
262 	int			 scsi_dev_type;
263 	int			 protocol;
264 	struct cam_device	*dev;
265 	uint64_t		 max_sector;
266 	uint32_t		 block_len;
267 	uint32_t		 cpi_maxio;
268 };
269 
270 typedef enum {
271 	CAMDD_FILE_NONE,
272 	CAMDD_FILE_REG,
273 	CAMDD_FILE_STD,
274 	CAMDD_FILE_PIPE,
275 	CAMDD_FILE_DISK,
276 	CAMDD_FILE_TAPE,
277 	CAMDD_FILE_TTY,
278 	CAMDD_FILE_MEM
279 } camdd_file_type;
280 
281 typedef enum {
282 	CAMDD_FF_NONE 		= 0x00,
283 	CAMDD_FF_CAN_SEEK	= 0x01
284 } camdd_file_flags;
285 
286 struct camdd_dev_file {
287 	int			 fd;
288 	struct stat		 sb;
289 	char			 filename[MAXPATHLEN + 1];
290 	camdd_file_type		 file_type;
291 	camdd_file_flags	 file_flags;
292 	uint8_t			*tmp_buf;
293 };
294 
295 struct camdd_dev_block {
296 	int			 fd;
297 	uint64_t		 size_bytes;
298 	uint32_t		 block_len;
299 };
300 
301 union camdd_dev_spec {
302 	struct camdd_dev_pass	pass;
303 	struct camdd_dev_file	file;
304 	struct camdd_dev_block	block;
305 };
306 
307 typedef enum {
308 	CAMDD_DEV_FLAG_NONE		= 0x00,
309 	CAMDD_DEV_FLAG_EOF		= 0x01,
310 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
311 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
312 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
313 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
314 } camdd_dev_flags;
315 
316 struct camdd_dev {
317 	camdd_dev_type		 dev_type;
318 	union camdd_dev_spec	 dev_spec;
319 	camdd_dev_flags		 flags;
320 	char			 device_name[MAXPATHLEN+1];
321 	uint32_t		 blocksize;
322 	uint32_t		 sector_size;
323 	uint64_t		 max_sector;
324 	uint64_t		 sector_io_limit;
325 	int			 min_cmd_size;
326 	int			 write_dev;
327 	int			 retry_count;
328 	int			 io_timeout;
329 	int			 debug;
330 	uint64_t		 start_offset_bytes;
331 	uint64_t		 next_io_pos_bytes;
332 	uint64_t		 next_peer_pos_bytes;
333 	uint64_t		 next_completion_pos_bytes;
334 	uint64_t		 peer_bytes_queued;
335 	uint64_t		 bytes_transferred;
336 	uint32_t		 target_queue_depth;
337 	uint32_t		 cur_active_io;
338 	uint8_t			*extra_buf;
339 	uint32_t		 extra_buf_len;
340 	struct camdd_dev	*peer_dev;
341 	pthread_mutex_t		 mutex;
342 	pthread_cond_t		 cond;
343 	int			 kq;
344 
345 	int			 (*run)(struct camdd_dev *dev);
346 	int			 (*fetch)(struct camdd_dev *dev);
347 
348 	/*
349 	 * Buffers that are available for I/O.  Uses links.
350 	 */
351 	STAILQ_HEAD(,camdd_buf)	 free_queue;
352 
353 	/*
354 	 * Free indirect buffers.  These are used for breaking a large
355 	 * buffer into multiple pieces.
356 	 */
357 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
358 
359 	/*
360 	 * Buffers that have been queued to the kernel.  Uses links.
361 	 */
362 	STAILQ_HEAD(,camdd_buf)	 active_queue;
363 
364 	/*
365 	 * Will generally contain one of our buffers that is waiting for enough
366 	 * I/O from our partner thread to be able to execute.  This will
367 	 * generally happen when our per-I/O-size is larger than the
368 	 * partner thread's per-I/O-size.  Uses links.
369 	 */
370 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
371 
372 	/*
373 	 * Number of buffers on the pending queue
374 	 */
375 	int			 num_pending_queue;
376 
377 	/*
378 	 * Buffers that are filled and ready to execute.  This is used when
379 	 * our partner (reader) thread sends us blocks that are larger than
380 	 * our blocksize, and so we have to split them into multiple pieces.
381 	 */
382 	STAILQ_HEAD(,camdd_buf)	 run_queue;
383 
384 	/*
385 	 * Number of buffers on the run queue.
386 	 */
387 	int			 num_run_queue;
388 
389 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
390 
391 	int			 num_reorder_queue;
392 
393 	/*
394 	 * Buffers that have been queued to us by our partner thread
395 	 * (generally the reader thread) to be written out.  Uses
396 	 * work_links.
397 	 */
398 	STAILQ_HEAD(,camdd_buf)	 work_queue;
399 
400 	/*
401 	 * Buffers that have been completed by our partner thread.  Uses
402 	 * work_links.
403 	 */
404 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
405 
406 	/*
407 	 * Number of buffers on the peer done queue.
408 	 */
409 	uint32_t		 num_peer_done_queue;
410 
411 	/*
412 	 * A list of buffers that we have queued to our peer thread.  Uses
413 	 * links.
414 	 */
415 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
416 
417 	/*
418 	 * Number of buffers on the peer work queue.
419 	 */
420 	uint32_t		 num_peer_work_queue;
421 };
422 
423 static sem_t camdd_sem;
424 static sig_atomic_t need_exit = 0;
425 static sig_atomic_t error_exit = 0;
426 static sig_atomic_t need_status = 0;
427 
428 #ifndef min
429 #define	min(a, b) (a < b) ? a : b
430 #endif
431 
432 /*
433  * XXX KDM private copy of timespecsub().  This is normally defined in
434  * sys/time.h, but is only enabled in the kernel.  If that definition is
435  * enabled in userland, it breaks the build of libnetbsd.
436  */
437 #ifndef timespecsub
438 #define	timespecsub(vvp, uvp)						\
439 	do {								\
440 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
441 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
442 		if ((vvp)->tv_nsec < 0) {				\
443 			(vvp)->tv_sec--;				\
444 			(vvp)->tv_nsec += 1000000000;			\
445 		}							\
446 	} while (0)
447 #endif
448 
449 
450 /* Generically useful offsets into the peripheral private area */
451 #define ppriv_ptr0 periph_priv.entries[0].ptr
452 #define ppriv_ptr1 periph_priv.entries[1].ptr
453 #define ppriv_field0 periph_priv.entries[0].field
454 #define ppriv_field1 periph_priv.entries[1].field
455 
456 #define	ccb_buf	ppriv_ptr0
457 
458 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
459 #define	CAMDD_FILE_DEFAULT_DEPTH	1
460 #define	CAMDD_PASS_MAX_BLOCK		1048576
461 #define	CAMDD_PASS_DEFAULT_DEPTH	6
462 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
463 
464 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
465 		     camdd_argmask *arglst);
466 void camdd_free_dev(struct camdd_dev *dev);
467 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
468 				  struct kevent *new_ke, int num_ke,
469 				  int retry_count, int timeout);
470 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
471 					 camdd_buf_type buf_type);
472 void camdd_release_buf(struct camdd_buf *buf);
473 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
474 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
475 			uint32_t sector_size, uint32_t *num_sectors_used,
476 			int *double_buf_needed);
477 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
478 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
479 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
480 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
481 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
482          camdd_argmask arglist, int probe_retry_count,
483          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
484 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
485 				   int retry_count, int timeout);
486 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
487 				   struct camdd_io_opts *io_opts,
488 				   camdd_argmask arglist, int probe_retry_count,
489 				   int probe_timeout, int io_retry_count,
490 				   int io_timeout);
491 void *camdd_file_worker(void *arg);
492 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
493 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
494 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
495 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
496 void camdd_peer_done(struct camdd_buf *buf);
497 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
498 			int *error_count);
499 int camdd_pass_fetch(struct camdd_dev *dev);
500 int camdd_file_run(struct camdd_dev *dev);
501 int camdd_pass_run(struct camdd_dev *dev);
502 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
503 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
504 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
505 		     uint32_t *peer_depth, uint32_t *our_bytes,
506 		     uint32_t *peer_bytes);
507 void *camdd_worker(void *arg);
508 void camdd_sig_handler(int sig);
509 void camdd_print_status(struct camdd_dev *camdd_dev,
510 			struct camdd_dev *other_dev,
511 			struct timespec *start_time);
512 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
513 	     uint64_t max_io, int retry_count, int timeout);
514 int camdd_parse_io_opts(char *args, int is_write,
515 			struct camdd_io_opts *io_opts);
516 void usage(void);
517 
518 /*
519  * Parse out a bus, or a bus, target and lun in the following
520  * format:
521  * bus
522  * bus:target
523  * bus:target:lun
524  *
525  * Returns the number of parsed components, or 0.
526  */
527 static int
528 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
529 {
530 	char *tmpstr;
531 	int convs = 0;
532 
533 	while (isspace(*tstr) && (*tstr != '\0'))
534 		tstr++;
535 
536 	tmpstr = (char *)strtok(tstr, ":");
537 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
538 		*bus = strtol(tmpstr, NULL, 0);
539 		*arglst |= CAMDD_ARG_BUS;
540 		convs++;
541 		tmpstr = (char *)strtok(NULL, ":");
542 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
543 			*target = strtol(tmpstr, NULL, 0);
544 			*arglst |= CAMDD_ARG_TARGET;
545 			convs++;
546 			tmpstr = (char *)strtok(NULL, ":");
547 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
548 				*lun = strtol(tmpstr, NULL, 0);
549 				*arglst |= CAMDD_ARG_LUN;
550 				convs++;
551 			}
552 		}
553 	}
554 
555 	return convs;
556 }
557 
558 /*
559  * XXX KDM clean up and free all of the buffers on the queue!
560  */
561 void
562 camdd_free_dev(struct camdd_dev *dev)
563 {
564 	if (dev == NULL)
565 		return;
566 
567 	switch (dev->dev_type) {
568 	case CAMDD_DEV_FILE: {
569 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
570 
571 		if (file_dev->fd != -1)
572 			close(file_dev->fd);
573 		free(file_dev->tmp_buf);
574 		break;
575 	}
576 	case CAMDD_DEV_PASS: {
577 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
578 
579 		if (pass_dev->dev != NULL)
580 			cam_close_device(pass_dev->dev);
581 		break;
582 	}
583 	default:
584 		break;
585 	}
586 
587 	free(dev);
588 }
589 
590 struct camdd_dev *
591 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
592 		int retry_count, int timeout)
593 {
594 	struct camdd_dev *dev = NULL;
595 	struct kevent *ke;
596 	size_t ke_size;
597 	int retval = 0;
598 
599 	dev = malloc(sizeof(*dev));
600 	if (dev == NULL) {
601 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
602 		goto bailout;
603 	}
604 
605 	bzero(dev, sizeof(*dev));
606 
607 	dev->dev_type = dev_type;
608 	dev->io_timeout = timeout;
609 	dev->retry_count = retry_count;
610 	STAILQ_INIT(&dev->free_queue);
611 	STAILQ_INIT(&dev->free_indirect_queue);
612 	STAILQ_INIT(&dev->active_queue);
613 	STAILQ_INIT(&dev->pending_queue);
614 	STAILQ_INIT(&dev->run_queue);
615 	STAILQ_INIT(&dev->reorder_queue);
616 	STAILQ_INIT(&dev->work_queue);
617 	STAILQ_INIT(&dev->peer_done_queue);
618 	STAILQ_INIT(&dev->peer_work_queue);
619 	retval = pthread_mutex_init(&dev->mutex, NULL);
620 	if (retval != 0) {
621 		warnc(retval, "%s: failed to initialize mutex", __func__);
622 		goto bailout;
623 	}
624 
625 	retval = pthread_cond_init(&dev->cond, NULL);
626 	if (retval != 0) {
627 		warnc(retval, "%s: failed to initialize condition variable",
628 		      __func__);
629 		goto bailout;
630 	}
631 
632 	dev->kq = kqueue();
633 	if (dev->kq == -1) {
634 		warn("%s: Unable to create kqueue", __func__);
635 		goto bailout;
636 	}
637 
638 	ke_size = sizeof(struct kevent) * (num_ke + 4);
639 	ke = malloc(ke_size);
640 	if (ke == NULL) {
641 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
642 		goto bailout;
643 	}
644 	bzero(ke, ke_size);
645 	if (num_ke > 0)
646 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
647 
648 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
649 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
650 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
651 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
652 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
653 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
654 
655 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
656 	if (retval == -1) {
657 		warn("%s: Unable to register kevents", __func__);
658 		goto bailout;
659 	}
660 
661 
662 	return (dev);
663 
664 bailout:
665 	free(dev);
666 
667 	return (NULL);
668 }
669 
670 static struct camdd_buf *
671 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
672 {
673 	struct camdd_buf *buf = NULL;
674 	uint8_t *data_ptr = NULL;
675 
676 	/*
677 	 * We only need to allocate data space for data buffers.
678 	 */
679 	switch (buf_type) {
680 	case CAMDD_BUF_DATA:
681 		data_ptr = malloc(dev->blocksize);
682 		if (data_ptr == NULL) {
683 			warn("unable to allocate %u bytes", dev->blocksize);
684 			goto bailout_error;
685 		}
686 		break;
687 	default:
688 		break;
689 	}
690 
691 	buf = malloc(sizeof(*buf));
692 	if (buf == NULL) {
693 		warn("unable to allocate %zu bytes", sizeof(*buf));
694 		goto bailout_error;
695 	}
696 
697 	bzero(buf, sizeof(*buf));
698 	buf->buf_type = buf_type;
699 	buf->dev = dev;
700 	switch (buf_type) {
701 	case CAMDD_BUF_DATA: {
702 		struct camdd_buf_data *data;
703 
704 		data = &buf->buf_type_spec.data;
705 
706 		data->alloc_len = dev->blocksize;
707 		data->buf = data_ptr;
708 		break;
709 	}
710 	case CAMDD_BUF_INDIRECT:
711 		break;
712 	default:
713 		break;
714 	}
715 	STAILQ_INIT(&buf->src_list);
716 
717 	return (buf);
718 
719 bailout_error:
720 	free(data_ptr);
721 
722 	return (NULL);
723 }
724 
725 void
726 camdd_release_buf(struct camdd_buf *buf)
727 {
728 	struct camdd_dev *dev;
729 
730 	dev = buf->dev;
731 
732 	switch (buf->buf_type) {
733 	case CAMDD_BUF_DATA: {
734 		struct camdd_buf_data *data;
735 
736 		data = &buf->buf_type_spec.data;
737 
738 		if (data->segs != NULL) {
739 			if (data->extra_buf != 0) {
740 				void *extra_buf;
741 
742 				extra_buf = (void *)
743 				    data->segs[data->sg_count - 1].ds_addr;
744 				free(extra_buf);
745 				data->extra_buf = 0;
746 			}
747 			free(data->segs);
748 			data->segs = NULL;
749 			data->sg_count = 0;
750 		} else if (data->iovec != NULL) {
751 			if (data->extra_buf != 0) {
752 				free(data->iovec[data->sg_count - 1].iov_base);
753 				data->extra_buf = 0;
754 			}
755 			free(data->iovec);
756 			data->iovec = NULL;
757 			data->sg_count = 0;
758 		}
759 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
760 		break;
761 	}
762 	case CAMDD_BUF_INDIRECT:
763 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
764 		break;
765 	default:
766 		err(1, "%s: Invalid buffer type %d for released buffer",
767 		    __func__, buf->buf_type);
768 		break;
769 	}
770 }
771 
772 struct camdd_buf *
773 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
774 {
775 	struct camdd_buf *buf = NULL;
776 
777 	switch (buf_type) {
778 	case CAMDD_BUF_DATA:
779 		buf = STAILQ_FIRST(&dev->free_queue);
780 		if (buf != NULL) {
781 			struct camdd_buf_data *data;
782 			uint8_t *data_ptr;
783 			uint32_t alloc_len;
784 
785 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
786 			data = &buf->buf_type_spec.data;
787 			data_ptr = data->buf;
788 			alloc_len = data->alloc_len;
789 			bzero(buf, sizeof(*buf));
790 			data->buf = data_ptr;
791 			data->alloc_len = alloc_len;
792 		}
793 		break;
794 	case CAMDD_BUF_INDIRECT:
795 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
796 		if (buf != NULL) {
797 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
798 
799 			bzero(buf, sizeof(*buf));
800 		}
801 		break;
802 	default:
803 		warnx("Unknown buffer type %d requested", buf_type);
804 		break;
805 	}
806 
807 
808 	if (buf == NULL)
809 		return (camdd_alloc_buf(dev, buf_type));
810 	else {
811 		STAILQ_INIT(&buf->src_list);
812 		buf->dev = dev;
813 		buf->buf_type = buf_type;
814 
815 		return (buf);
816 	}
817 }
818 
819 int
820 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
821 		    uint32_t *num_sectors_used, int *double_buf_needed)
822 {
823 	struct camdd_buf *tmp_buf;
824 	struct camdd_buf_data *data;
825 	uint8_t *extra_buf = NULL;
826 	size_t extra_buf_len = 0;
827 	int extra_buf_attached = 0;
828 	int i, retval = 0;
829 
830 	data = &buf->buf_type_spec.data;
831 
832 	data->sg_count = buf->src_count;
833 	/*
834 	 * Compose a scatter/gather list from all of the buffers in the list.
835 	 * If the length of the buffer isn't a multiple of the sector size,
836 	 * we'll have to add an extra buffer.  This should only happen
837 	 * at the end of a transfer.
838 	 */
839 	if ((data->fill_len % sector_size) != 0) {
840 		extra_buf_len = sector_size - (data->fill_len % sector_size);
841 		extra_buf = calloc(extra_buf_len, 1);
842 		if (extra_buf == NULL) {
843 			warn("%s: unable to allocate %zu bytes for extra "
844 			    "buffer space", __func__, extra_buf_len);
845 			retval = 1;
846 			goto bailout;
847 		}
848 		data->extra_buf = 1;
849 		data->sg_count++;
850 	}
851 	if (iovec == 0) {
852 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
853 		if (data->segs == NULL) {
854 			warn("%s: unable to allocate %zu bytes for S/G list",
855 			    __func__, sizeof(bus_dma_segment_t) *
856 			    data->sg_count);
857 			retval = 1;
858 			goto bailout;
859 		}
860 
861 	} else {
862 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
863 		if (data->iovec == NULL) {
864 			warn("%s: unable to allocate %zu bytes for S/G list",
865 			    __func__, sizeof(struct iovec) * data->sg_count);
866 			retval = 1;
867 			goto bailout;
868 		}
869 	}
870 
871 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
872 	     i < buf->src_count && tmp_buf != NULL; i++,
873 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
874 
875 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
876 			struct camdd_buf_data *tmp_data;
877 
878 			tmp_data = &tmp_buf->buf_type_spec.data;
879 			if (iovec == 0) {
880 				data->segs[i].ds_addr =
881 				    (bus_addr_t) tmp_data->buf;
882 				data->segs[i].ds_len = tmp_data->fill_len -
883 				    tmp_data->resid;
884 			} else {
885 				data->iovec[i].iov_base = tmp_data->buf;
886 				data->iovec[i].iov_len = tmp_data->fill_len -
887 				    tmp_data->resid;
888 			}
889 			if (((tmp_data->fill_len - tmp_data->resid) %
890 			     sector_size) != 0)
891 				*double_buf_needed = 1;
892 		} else {
893 			struct camdd_buf_indirect *tmp_ind;
894 
895 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
896 			if (iovec == 0) {
897 				data->segs[i].ds_addr =
898 				    (bus_addr_t)tmp_ind->start_ptr;
899 				data->segs[i].ds_len = tmp_ind->len;
900 			} else {
901 				data->iovec[i].iov_base = tmp_ind->start_ptr;
902 				data->iovec[i].iov_len = tmp_ind->len;
903 			}
904 			if ((tmp_ind->len % sector_size) != 0)
905 				*double_buf_needed = 1;
906 		}
907 	}
908 
909 	if (extra_buf != NULL) {
910 		if (iovec == 0) {
911 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
912 			data->segs[i].ds_len = extra_buf_len;
913 		} else {
914 			data->iovec[i].iov_base = extra_buf;
915 			data->iovec[i].iov_len = extra_buf_len;
916 		}
917 		extra_buf_attached = 1;
918 		i++;
919 	}
920 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
921 		warnx("buffer source count does not match "
922 		      "number of buffers in list!");
923 		retval = 1;
924 		goto bailout;
925 	}
926 
927 bailout:
928 	if (retval == 0) {
929 		*num_sectors_used = (data->fill_len + extra_buf_len) /
930 		    sector_size;
931 	} else if (extra_buf_attached == 0) {
932 		/*
933 		 * If extra_buf isn't attached yet, we need to free it
934 		 * to avoid leaking.
935 		 */
936 		free(extra_buf);
937 		data->extra_buf = 0;
938 		data->sg_count--;
939 	}
940 	return (retval);
941 }
942 
943 uint32_t
944 camdd_buf_get_len(struct camdd_buf *buf)
945 {
946 	uint32_t len = 0;
947 
948 	if (buf->buf_type != CAMDD_BUF_DATA) {
949 		struct camdd_buf_indirect *indirect;
950 
951 		indirect = &buf->buf_type_spec.indirect;
952 		len = indirect->len;
953 	} else {
954 		struct camdd_buf_data *data;
955 
956 		data = &buf->buf_type_spec.data;
957 		len = data->fill_len;
958 	}
959 
960 	return (len);
961 }
962 
963 void
964 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
965 {
966 	struct camdd_buf_data *data;
967 
968 	assert(buf->buf_type == CAMDD_BUF_DATA);
969 
970 	data = &buf->buf_type_spec.data;
971 
972 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
973 	buf->src_count++;
974 
975 	data->fill_len += camdd_buf_get_len(child_buf);
976 }
977 
978 typedef enum {
979 	CAMDD_TS_MAX_BLK,
980 	CAMDD_TS_MIN_BLK,
981 	CAMDD_TS_BLK_GRAN,
982 	CAMDD_TS_EFF_IOSIZE
983 } camdd_status_item_index;
984 
985 static struct camdd_status_items {
986 	const char *name;
987 	struct mt_status_entry *entry;
988 } req_status_items[] = {
989 	{ "max_blk", NULL },
990 	{ "min_blk", NULL },
991 	{ "blk_gran", NULL },
992 	{ "max_effective_iosize", NULL }
993 };
994 
995 int
996 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
997 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
998 {
999 	struct mt_status_data status_data;
1000 	char *xml_str = NULL;
1001 	unsigned int i;
1002 	int retval = 0;
1003 
1004 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
1005 	if (retval != 0)
1006 		err(1, "Couldn't get XML string from %s", filename);
1007 
1008 	retval = mt_get_status(xml_str, &status_data);
1009 	if (retval != XML_STATUS_OK) {
1010 		warn("couldn't get status for %s", filename);
1011 		retval = 1;
1012 		goto bailout;
1013 	} else
1014 		retval = 0;
1015 
1016 	if (status_data.error != 0) {
1017 		warnx("%s", status_data.error_str);
1018 		retval = 1;
1019 		goto bailout;
1020 	}
1021 
1022 	for (i = 0; i < nitems(req_status_items); i++) {
1023                 char *name;
1024 
1025 		name = __DECONST(char *, req_status_items[i].name);
1026 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1027 		    name);
1028 		if (req_status_items[i].entry == NULL) {
1029 			errx(1, "Cannot find status entry %s",
1030 			    req_status_items[i].name);
1031 		}
1032 	}
1033 
1034 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1035 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1036 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1037 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1038 bailout:
1039 
1040 	free(xml_str);
1041 	mt_status_free(&status_data);
1042 
1043 	return (retval);
1044 }
1045 
1046 struct camdd_dev *
1047 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1048     int timeout)
1049 {
1050 	struct camdd_dev *dev = NULL;
1051 	struct camdd_dev_file *file_dev;
1052 	uint64_t blocksize = io_opts->blocksize;
1053 
1054 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1055 	if (dev == NULL)
1056 		goto bailout;
1057 
1058 	file_dev = &dev->dev_spec.file;
1059 	file_dev->fd = fd;
1060 	strlcpy(file_dev->filename, io_opts->dev_name,
1061 	    sizeof(file_dev->filename));
1062 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1063 	if (blocksize == 0)
1064 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1065 	else
1066 		dev->blocksize = blocksize;
1067 
1068 	if ((io_opts->queue_depth != 0)
1069 	 && (io_opts->queue_depth != 1)) {
1070 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1071 		    "command supported", (uintmax_t)io_opts->queue_depth,
1072 		    io_opts->dev_name);
1073 	}
1074 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1075 	dev->run = camdd_file_run;
1076 	dev->fetch = NULL;
1077 
1078 	/*
1079 	 * We can effectively access files on byte boundaries.  We'll reset
1080 	 * this for devices like disks that can be accessed on sector
1081 	 * boundaries.
1082 	 */
1083 	dev->sector_size = 1;
1084 
1085 	if ((fd != STDIN_FILENO)
1086 	 && (fd != STDOUT_FILENO)) {
1087 		int retval;
1088 
1089 		retval = fstat(fd, &file_dev->sb);
1090 		if (retval != 0) {
1091 			warn("Cannot stat %s", dev->device_name);
1092 			goto bailout_error;
1093 		}
1094 		if (S_ISREG(file_dev->sb.st_mode)) {
1095 			file_dev->file_type = CAMDD_FILE_REG;
1096 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1097 			int type;
1098 
1099 			if (ioctl(fd, FIODTYPE, &type) == -1)
1100 				err(1, "FIODTYPE ioctl failed on %s",
1101 				    dev->device_name);
1102 			else {
1103 				if (type & D_TAPE)
1104 					file_dev->file_type = CAMDD_FILE_TAPE;
1105 				else if (type & D_DISK)
1106 					file_dev->file_type = CAMDD_FILE_DISK;
1107 				else if (type & D_MEM)
1108 					file_dev->file_type = CAMDD_FILE_MEM;
1109 				else if (type & D_TTY)
1110 					file_dev->file_type = CAMDD_FILE_TTY;
1111 			}
1112 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1113 			errx(1, "cannot operate on directory %s",
1114 			    dev->device_name);
1115 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1116 			file_dev->file_type = CAMDD_FILE_PIPE;
1117 		} else
1118 			errx(1, "Cannot determine file type for %s",
1119 			    dev->device_name);
1120 
1121 		switch (file_dev->file_type) {
1122 		case CAMDD_FILE_REG:
1123 			if (file_dev->sb.st_size != 0)
1124 				dev->max_sector = file_dev->sb.st_size - 1;
1125 			else
1126 				dev->max_sector = 0;
1127 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1128 			break;
1129 		case CAMDD_FILE_TAPE: {
1130 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1131 			/*
1132 			 * Check block limits and maximum effective iosize.
1133 			 * Make sure the blocksize is within the block
1134 			 * limits (and a multiple of the minimum blocksize)
1135 			 * and that the blocksize is <= maximum effective
1136 			 * iosize.
1137 			 */
1138 			retval = camdd_probe_tape(fd, dev->device_name,
1139 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1140 			if (retval != 0)
1141 				errx(1, "Unable to probe tape %s",
1142 				    dev->device_name);
1143 
1144 			/*
1145 			 * The blocksize needs to be <= the maximum
1146 			 * effective I/O size of the tape device.  Note
1147 			 * that this also takes into account the maximum
1148 			 * blocksize reported by READ BLOCK LIMITS.
1149 			 */
1150 			if (dev->blocksize > max_iosize) {
1151 				warnx("Blocksize %u too big for %s, limiting "
1152 				    "to %ju", dev->blocksize, dev->device_name,
1153 				    max_iosize);
1154 				dev->blocksize = max_iosize;
1155 			}
1156 
1157 			/*
1158 			 * The blocksize needs to be at least min_blk;
1159 			 */
1160 			if (dev->blocksize < min_blk) {
1161 				warnx("Blocksize %u too small for %s, "
1162 				    "increasing to %ju", dev->blocksize,
1163 				    dev->device_name, min_blk);
1164 				dev->blocksize = min_blk;
1165 			}
1166 
1167 			/*
1168 			 * And the blocksize needs to be a multiple of
1169 			 * the block granularity.
1170 			 */
1171 			if ((blk_gran != 0)
1172 			 && (dev->blocksize % (1 << blk_gran))) {
1173 				warnx("Blocksize %u for %s not a multiple of "
1174 				    "%d, adjusting to %d", dev->blocksize,
1175 				    dev->device_name, (1 << blk_gran),
1176 				    dev->blocksize & ~((1 << blk_gran) - 1));
1177 				dev->blocksize &= ~((1 << blk_gran) - 1);
1178 			}
1179 
1180 			if (dev->blocksize == 0) {
1181 				errx(1, "Unable to derive valid blocksize for "
1182 				    "%s", dev->device_name);
1183 			}
1184 
1185 			/*
1186 			 * For tape drives, set the sector size to the
1187 			 * blocksize so that we make sure not to write
1188 			 * less than the blocksize out to the drive.
1189 			 */
1190 			dev->sector_size = dev->blocksize;
1191 			break;
1192 		}
1193 		case CAMDD_FILE_DISK: {
1194 			off_t media_size;
1195 			unsigned int sector_size;
1196 
1197 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1198 
1199 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1200 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1201 				    dev->device_name);
1202 			}
1203 
1204 			if (sector_size == 0) {
1205 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1206 				    "invalid sector size %u for %s",
1207 				    sector_size, dev->device_name);
1208 			}
1209 
1210 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1211 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1212 				    dev->device_name);
1213 			}
1214 
1215 			if (media_size == 0) {
1216 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1217 				    "invalid media size %ju for %s",
1218 				    (uintmax_t)media_size, dev->device_name);
1219 			}
1220 
1221 			if (dev->blocksize % sector_size) {
1222 				errx(1, "%s blocksize %u not a multiple of "
1223 				    "sector size %u", dev->device_name,
1224 				    dev->blocksize, sector_size);
1225 			}
1226 
1227 			dev->sector_size = sector_size;
1228 			dev->max_sector = (media_size / sector_size) - 1;
1229 			break;
1230 		}
1231 		case CAMDD_FILE_MEM:
1232 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1233 			break;
1234 		default:
1235 			break;
1236 		}
1237 	}
1238 
1239 	if ((io_opts->offset != 0)
1240 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1241 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1242 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1243 		goto bailout_error;
1244 	}
1245 #if 0
1246 	else if ((io_opts->offset != 0)
1247 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1248 		warnx("Offset %ju for %s is not a multiple of the "
1249 		      "sector size %u", io_opts->offset,
1250 		      io_opts->dev_name, dev->sector_size);
1251 		goto bailout_error;
1252 	} else {
1253 		dev->start_offset_bytes = io_opts->offset;
1254 	}
1255 #endif
1256 
1257 bailout:
1258 	return (dev);
1259 
1260 bailout_error:
1261 	camdd_free_dev(dev);
1262 	return (NULL);
1263 }
1264 
1265 /*
1266  * Get a get device CCB for the specified device.
1267  */
1268 int
1269 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1270 {
1271         union ccb *ccb;
1272 	int retval = 0;
1273 
1274 	ccb = cam_getccb(device);
1275 
1276 	if (ccb == NULL) {
1277 		warnx("%s: couldn't allocate CCB", __func__);
1278 		return -1;
1279 	}
1280 
1281 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1282 
1283 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1284 
1285 	if (cam_send_ccb(device, ccb) < 0) {
1286 		warn("%s: error sending Get Device Information CCB", __func__);
1287 			cam_error_print(device, ccb, CAM_ESF_ALL,
1288 					CAM_EPF_ALL, stderr);
1289 		retval = -1;
1290 		goto bailout;
1291 	}
1292 
1293 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1294 			cam_error_print(device, ccb, CAM_ESF_ALL,
1295 					CAM_EPF_ALL, stderr);
1296 		retval = -1;
1297 		goto bailout;
1298 	}
1299 
1300 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1301 
1302 bailout:
1303 	cam_freeccb(ccb);
1304 
1305 	return retval;
1306 }
1307 
1308 int
1309 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1310 		 camdd_argmask arglist, int probe_retry_count,
1311 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1312 {
1313 	struct scsi_read_capacity_data rcap;
1314 	struct scsi_read_capacity_data_long rcaplong;
1315 	int retval = -1;
1316 
1317 	if (ccb == NULL) {
1318 		warnx("%s: error passed ccb is NULL", __func__);
1319 		goto bailout;
1320 	}
1321 
1322 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1323 
1324 	scsi_read_capacity(&ccb->csio,
1325 			   /*retries*/ probe_retry_count,
1326 			   /*cbfcnp*/ NULL,
1327 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1328 			   &rcap,
1329 			   SSD_FULL_SIZE,
1330 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1331 
1332 	/* Disable freezing the device queue */
1333 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1334 
1335 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1336 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1337 
1338 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1339 		warn("error sending READ CAPACITY command");
1340 
1341 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1342 				CAM_EPF_ALL, stderr);
1343 
1344 		goto bailout;
1345 	}
1346 
1347 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1348 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1349 		goto bailout;
1350 	}
1351 
1352 	*maxsector = scsi_4btoul(rcap.addr);
1353 	*block_len = scsi_4btoul(rcap.length);
1354 
1355 	/*
1356 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1357 	 * and we need to issue the long READ CAPACITY to get the real
1358 	 * capacity.  Otherwise, we're all set.
1359 	 */
1360 	if (*maxsector != 0xffffffff) {
1361 		retval = 0;
1362 		goto bailout;
1363 	}
1364 
1365 	scsi_read_capacity_16(&ccb->csio,
1366 			      /*retries*/ probe_retry_count,
1367 			      /*cbfcnp*/ NULL,
1368 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1369 			      /*lba*/ 0,
1370 			      /*reladdr*/ 0,
1371 			      /*pmi*/ 0,
1372 			      (uint8_t *)&rcaplong,
1373 			      sizeof(rcaplong),
1374 			      /*sense_len*/ SSD_FULL_SIZE,
1375 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1376 
1377 	/* Disable freezing the device queue */
1378 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1379 
1380 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1381 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1382 
1383 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1384 		warn("error sending READ CAPACITY (16) command");
1385 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1386 				CAM_EPF_ALL, stderr);
1387 		goto bailout;
1388 	}
1389 
1390 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1391 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1392 		goto bailout;
1393 	}
1394 
1395 	*maxsector = scsi_8btou64(rcaplong.addr);
1396 	*block_len = scsi_4btoul(rcaplong.length);
1397 
1398 	retval = 0;
1399 
1400 bailout:
1401 	return retval;
1402 }
1403 
1404 /*
1405  * Need to implement this.  Do a basic probe:
1406  * - Check the inquiry data, make sure we're talking to a device that we
1407  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1408  * - Send a test unit ready, make sure the device is available.
1409  * - Get the capacity and block size.
1410  */
1411 struct camdd_dev *
1412 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1413 		 camdd_argmask arglist, int probe_retry_count,
1414 		 int probe_timeout, int io_retry_count, int io_timeout)
1415 {
1416 	union ccb *ccb;
1417 	uint64_t maxsector = 0;
1418 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1419 	uint32_t block_len = 0;
1420 	struct camdd_dev *dev = NULL;
1421 	struct camdd_dev_pass *pass_dev;
1422 	struct kevent ke;
1423 	struct ccb_getdev cgd;
1424 	int retval;
1425 	int scsi_dev_type;
1426 
1427 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1428 		warnx("%s: error retrieving CGD", __func__);
1429 		return NULL;
1430 	}
1431 
1432 	ccb = cam_getccb(cam_dev);
1433 
1434 	if (ccb == NULL) {
1435 		warnx("%s: error allocating ccb", __func__);
1436 		goto bailout;
1437 	}
1438 
1439 	switch (cgd.protocol) {
1440 	case PROTO_SCSI:
1441 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1442 
1443 		/*
1444 		 * For devices that support READ CAPACITY, we'll attempt to get the
1445 		 * capacity.  Otherwise, we really don't support tape or other
1446 		 * devices via SCSI passthrough, so just return an error in that case.
1447 		 */
1448 		switch (scsi_dev_type) {
1449 		case T_DIRECT:
1450 		case T_WORM:
1451 		case T_CDROM:
1452 		case T_OPTICAL:
1453 		case T_RBC:
1454 		case T_ZBC_HM:
1455 			break;
1456 		default:
1457 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1458 			break; /*NOTREACHED*/
1459 		}
1460 
1461 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1462 						arglist, probe_timeout, &maxsector,
1463 						&block_len))) {
1464 			goto bailout;
1465 		}
1466 		break;
1467 	default:
1468 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1469 		break; /*NOTREACHED*/
1470 	}
1471 
1472 	if (block_len == 0) {
1473 		warnx("Sector size for %s%u is 0, cannot continue",
1474 		    cam_dev->device_name, cam_dev->dev_unit_num);
1475 		goto bailout_error;
1476 	}
1477 
1478 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1479 
1480 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1481 	ccb->ccb_h.flags = CAM_DIR_NONE;
1482 	ccb->ccb_h.retry_count = 1;
1483 
1484 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1485 		warn("error sending XPT_PATH_INQ CCB");
1486 
1487 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1488 				CAM_EPF_ALL, stderr);
1489 		goto bailout;
1490 	}
1491 
1492 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1493 
1494 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1495 			      io_timeout);
1496 	if (dev == NULL)
1497 		goto bailout;
1498 
1499 	pass_dev = &dev->dev_spec.pass;
1500 	pass_dev->scsi_dev_type = scsi_dev_type;
1501 	pass_dev->protocol = cgd.protocol;
1502 	pass_dev->dev = cam_dev;
1503 	pass_dev->max_sector = maxsector;
1504 	pass_dev->block_len = block_len;
1505 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1506 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1507 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1508 	dev->sector_size = block_len;
1509 	dev->max_sector = maxsector;
1510 
1511 
1512 	/*
1513 	 * Determine the optimal blocksize to use for this device.
1514 	 */
1515 
1516 	/*
1517 	 * If the controller has not specified a maximum I/O size,
1518 	 * just go with 128K as a somewhat conservative value.
1519 	 */
1520 	if (pass_dev->cpi_maxio == 0)
1521 		cpi_maxio = 131072;
1522 	else
1523 		cpi_maxio = pass_dev->cpi_maxio;
1524 
1525 	/*
1526 	 * If the controller has a large maximum I/O size, limit it
1527 	 * to something smaller so that the kernel doesn't have trouble
1528 	 * allocating buffers to copy data in and out for us.
1529 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1530 	 */
1531 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1532 
1533 	/*
1534 	 * If we weren't able to get a block size for some reason,
1535 	 * default to 512 bytes.
1536 	 */
1537 	block_len = pass_dev->block_len;
1538 	if (block_len == 0)
1539 		block_len = 512;
1540 
1541 	/*
1542 	 * Figure out how many blocksize chunks will fit in the
1543 	 * maximum I/O size.
1544 	 */
1545 	pass_numblocks = max_iosize / block_len;
1546 
1547 	/*
1548 	 * And finally, multiple the number of blocks by the LBA
1549 	 * length to get our maximum block size;
1550 	 */
1551 	dev->blocksize = pass_numblocks * block_len;
1552 
1553 	if (io_opts->blocksize != 0) {
1554 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1555 			warnx("Blocksize %ju for %s is not a multiple of "
1556 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1557 			      dev->device_name, dev->sector_size);
1558 			goto bailout_error;
1559 		}
1560 		dev->blocksize = io_opts->blocksize;
1561 	}
1562 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1563 	if (io_opts->queue_depth != 0)
1564 		dev->target_queue_depth = io_opts->queue_depth;
1565 
1566 	if (io_opts->offset != 0) {
1567 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1568 			warnx("Offset %ju is past the end of device %s",
1569 			    io_opts->offset, dev->device_name);
1570 			goto bailout_error;
1571 		}
1572 #if 0
1573 		else if ((io_opts->offset % dev->sector_size) != 0) {
1574 			warnx("Offset %ju for %s is not a multiple of the "
1575 			      "sector size %u", io_opts->offset,
1576 			      dev->device_name, dev->sector_size);
1577 			goto bailout_error;
1578 		}
1579 		dev->start_offset_bytes = io_opts->offset;
1580 #endif
1581 	}
1582 
1583 	dev->min_cmd_size = io_opts->min_cmd_size;
1584 
1585 	dev->run = camdd_pass_run;
1586 	dev->fetch = camdd_pass_fetch;
1587 
1588 bailout:
1589 	cam_freeccb(ccb);
1590 
1591 	return (dev);
1592 
1593 bailout_error:
1594 	cam_freeccb(ccb);
1595 
1596 	camdd_free_dev(dev);
1597 
1598 	return (NULL);
1599 }
1600 
1601 void *
1602 camdd_worker(void *arg)
1603 {
1604 	struct camdd_dev *dev = arg;
1605 	struct camdd_buf *buf;
1606 	struct timespec ts, *kq_ts;
1607 
1608 	ts.tv_sec = 0;
1609 	ts.tv_nsec = 0;
1610 
1611 	pthread_mutex_lock(&dev->mutex);
1612 
1613 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1614 
1615 	for (;;) {
1616 		struct kevent ke;
1617 		int retval = 0;
1618 
1619 		/*
1620 		 * XXX KDM check the reorder queue depth?
1621 		 */
1622 		if (dev->write_dev == 0) {
1623 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1624 			uint32_t target_depth = dev->target_queue_depth;
1625 			uint32_t peer_target_depth =
1626 			    dev->peer_dev->target_queue_depth;
1627 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1628 
1629 			camdd_get_depth(dev, &our_depth, &peer_depth,
1630 					&our_bytes, &peer_bytes);
1631 
1632 #if 0
1633 			while (((our_depth < target_depth)
1634 			     && (peer_depth < peer_target_depth))
1635 			    || ((peer_bytes + our_bytes) <
1636 				 (peer_blocksize * 2))) {
1637 #endif
1638 			while (((our_depth + peer_depth) <
1639 			        (target_depth + peer_target_depth))
1640 			    || ((peer_bytes + our_bytes) <
1641 				(peer_blocksize * 3))) {
1642 
1643 				retval = camdd_queue(dev, NULL);
1644 				if (retval == 1)
1645 					break;
1646 				else if (retval != 0) {
1647 					error_exit = 1;
1648 					goto bailout;
1649 				}
1650 
1651 				camdd_get_depth(dev, &our_depth, &peer_depth,
1652 						&our_bytes, &peer_bytes);
1653 			}
1654 		}
1655 		/*
1656 		 * See if we have any I/O that is ready to execute.
1657 		 */
1658 		buf = STAILQ_FIRST(&dev->run_queue);
1659 		if (buf != NULL) {
1660 			while (dev->target_queue_depth > dev->cur_active_io) {
1661 				retval = dev->run(dev);
1662 				if (retval == -1) {
1663 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1664 					error_exit = 1;
1665 					break;
1666 				} else if (retval != 0) {
1667 					break;
1668 				}
1669 			}
1670 		}
1671 
1672 		/*
1673 		 * We've reached EOF, or our partner has reached EOF.
1674 		 */
1675 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1676 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1677 			if (dev->write_dev != 0) {
1678 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1679 				 && (dev->num_run_queue == 0)
1680 				 && (dev->cur_active_io == 0)) {
1681 					goto bailout;
1682 				}
1683 			} else {
1684 				/*
1685 				 * If we're the reader, and the writer
1686 				 * got EOF, he is already done.  If we got
1687 				 * the EOF, then we need to wait until
1688 				 * everything is flushed out for the writer.
1689 				 */
1690 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1691 					goto bailout;
1692 				} else if ((dev->num_peer_work_queue == 0)
1693 					&& (dev->num_peer_done_queue == 0)
1694 					&& (dev->cur_active_io == 0)
1695 					&& (dev->num_run_queue == 0)) {
1696 					goto bailout;
1697 				}
1698 			}
1699 			/*
1700 			 * XXX KDM need to do something about the pending
1701 			 * queue and cleanup resources.
1702 			 */
1703 		}
1704 
1705 		if ((dev->write_dev == 0)
1706 		 && (dev->cur_active_io == 0)
1707 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1708 			kq_ts = &ts;
1709 		else
1710 			kq_ts = NULL;
1711 
1712 		/*
1713 		 * Run kevent to see if there are events to process.
1714 		 */
1715 		pthread_mutex_unlock(&dev->mutex);
1716 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1717 		pthread_mutex_lock(&dev->mutex);
1718 		if (retval == -1) {
1719 			warn("%s: error returned from kevent",__func__);
1720 			goto bailout;
1721 		} else if (retval != 0) {
1722 			switch (ke.filter) {
1723 			case EVFILT_READ:
1724 				if (dev->fetch != NULL) {
1725 					retval = dev->fetch(dev);
1726 					if (retval == -1) {
1727 						error_exit = 1;
1728 						goto bailout;
1729 					}
1730 				}
1731 				break;
1732 			case EVFILT_SIGNAL:
1733 				/*
1734 				 * We register for this so we don't get
1735 				 * an error as a result of a SIGINFO or a
1736 				 * SIGINT.  It will actually get handled
1737 				 * by the signal handler.  If we get a
1738 				 * SIGINT, bail out without printing an
1739 				 * error message.  Any other signals
1740 				 * will result in the error message above.
1741 				 */
1742 				if (ke.ident == SIGINT)
1743 					goto bailout;
1744 				break;
1745 			case EVFILT_USER:
1746 				retval = 0;
1747 				/*
1748 				 * Check to see if the other thread has
1749 				 * queued any I/O for us to do.  (In this
1750 				 * case we're the writer.)
1751 				 */
1752 				for (buf = STAILQ_FIRST(&dev->work_queue);
1753 				     buf != NULL;
1754 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1755 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1756 							   work_links);
1757 					retval = camdd_queue(dev, buf);
1758 					/*
1759 					 * We keep going unless we get an
1760 					 * actual error.  If we get EOF, we
1761 					 * still want to remove the buffers
1762 					 * from the queue and send the back
1763 					 * to the reader thread.
1764 					 */
1765 					if (retval == -1) {
1766 						error_exit = 1;
1767 						goto bailout;
1768 					} else
1769 						retval = 0;
1770 				}
1771 
1772 				/*
1773 				 * Next check to see if the other thread has
1774 				 * queued any completed buffers back to us.
1775 				 * (In this case we're the reader.)
1776 				 */
1777 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1778 				     buf != NULL;
1779 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1780 					STAILQ_REMOVE_HEAD(
1781 					    &dev->peer_done_queue, work_links);
1782 					dev->num_peer_done_queue--;
1783 					camdd_peer_done(buf);
1784 				}
1785 				break;
1786 			default:
1787 				warnx("%s: unknown kevent filter %d",
1788 				      __func__, ke.filter);
1789 				break;
1790 			}
1791 		}
1792 	}
1793 
1794 bailout:
1795 
1796 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1797 
1798 	/* XXX KDM cleanup resources here? */
1799 
1800 	pthread_mutex_unlock(&dev->mutex);
1801 
1802 	need_exit = 1;
1803 	sem_post(&camdd_sem);
1804 
1805 	return (NULL);
1806 }
1807 
1808 /*
1809  * Simplistic translation of CCB status to our local status.
1810  */
1811 camdd_buf_status
1812 camdd_ccb_status(union ccb *ccb, int protocol)
1813 {
1814 	camdd_buf_status status = CAMDD_STATUS_NONE;
1815 	cam_status ccb_status;
1816 
1817 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1818 
1819 	switch (protocol) {
1820 	case PROTO_SCSI:
1821 		switch (ccb_status) {
1822 		case CAM_REQ_CMP: {
1823 			if (ccb->csio.resid == 0) {
1824 				status = CAMDD_STATUS_OK;
1825 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1826 				status = CAMDD_STATUS_SHORT_IO;
1827 			} else {
1828 				status = CAMDD_STATUS_EOF;
1829 			}
1830 			break;
1831 		}
1832 		case CAM_SCSI_STATUS_ERROR: {
1833 			switch (ccb->csio.scsi_status) {
1834 			case SCSI_STATUS_OK:
1835 			case SCSI_STATUS_COND_MET:
1836 			case SCSI_STATUS_INTERMED:
1837 			case SCSI_STATUS_INTERMED_COND_MET:
1838 				status = CAMDD_STATUS_OK;
1839 				break;
1840 			case SCSI_STATUS_CMD_TERMINATED:
1841 			case SCSI_STATUS_CHECK_COND:
1842 			case SCSI_STATUS_QUEUE_FULL:
1843 			case SCSI_STATUS_BUSY:
1844 			case SCSI_STATUS_RESERV_CONFLICT:
1845 			default:
1846 				status = CAMDD_STATUS_ERROR;
1847 				break;
1848 			}
1849 			break;
1850 		}
1851 		default:
1852 			status = CAMDD_STATUS_ERROR;
1853 			break;
1854 		}
1855 		break;
1856 	default:
1857 		status = CAMDD_STATUS_ERROR;
1858 		break;
1859 	}
1860 
1861 	return (status);
1862 }
1863 
1864 /*
1865  * Queue a buffer to our peer's work thread for writing.
1866  *
1867  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1868  */
1869 int
1870 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1871 {
1872 	struct kevent ke;
1873 	STAILQ_HEAD(, camdd_buf) local_queue;
1874 	struct camdd_buf *buf1, *buf2;
1875 	struct camdd_buf_data *data = NULL;
1876 	uint64_t peer_bytes_queued = 0;
1877 	int active = 1;
1878 	int retval = 0;
1879 
1880 	STAILQ_INIT(&local_queue);
1881 
1882 	/*
1883 	 * Since we're the reader, we need to queue our I/O to the writer
1884 	 * in sequential order in order to make sure it gets written out
1885 	 * in sequential order.
1886 	 *
1887 	 * Check the next expected I/O starting offset.  If this doesn't
1888 	 * match, put it on the reorder queue.
1889 	 */
1890 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1891 
1892 		/*
1893 		 * If there is nothing on the queue, there is no sorting
1894 		 * needed.
1895 		 */
1896 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1897 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1898 			dev->num_reorder_queue++;
1899 			goto bailout;
1900 		}
1901 
1902 		/*
1903 		 * Sort in ascending order by starting LBA.  There should
1904 		 * be no identical LBAs.
1905 		 */
1906 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1907 		     buf1 = buf2) {
1908 			buf2 = STAILQ_NEXT(buf1, links);
1909 			if (buf->lba < buf1->lba) {
1910 				/*
1911 				 * If we're less than the first one, then
1912 				 * we insert at the head of the list
1913 				 * because this has to be the first element
1914 				 * on the list.
1915 				 */
1916 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
1917 						   buf, links);
1918 				dev->num_reorder_queue++;
1919 				break;
1920 			} else if (buf->lba > buf1->lba) {
1921 				if (buf2 == NULL) {
1922 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
1923 					    buf, links);
1924 					dev->num_reorder_queue++;
1925 					break;
1926 				} else if (buf->lba < buf2->lba) {
1927 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
1928 					    buf1, buf, links);
1929 					dev->num_reorder_queue++;
1930 					break;
1931 				}
1932 			} else {
1933 				errx(1, "Found buffers with duplicate LBA %ju!",
1934 				     buf->lba);
1935 			}
1936 		}
1937 		goto bailout;
1938 	} else {
1939 
1940 		/*
1941 		 * We're the next expected I/O completion, so put ourselves
1942 		 * on the local queue to be sent to the writer.  We use
1943 		 * work_links here so that we can queue this to the
1944 		 * peer_work_queue before taking the buffer off of the
1945 		 * local_queue.
1946 		 */
1947 		dev->next_completion_pos_bytes += buf->len;
1948 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
1949 
1950 		/*
1951 		 * Go through the reorder queue looking for more sequential
1952 		 * I/O and add it to the local queue.
1953 		 */
1954 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1955 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
1956 			/*
1957 			 * As soon as we see an I/O that is out of sequence,
1958 			 * we're done.
1959 			 */
1960 			if ((buf1->lba * dev->sector_size) !=
1961 			     dev->next_completion_pos_bytes)
1962 				break;
1963 
1964 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
1965 			dev->num_reorder_queue--;
1966 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
1967 			dev->next_completion_pos_bytes += buf1->len;
1968 		}
1969 	}
1970 
1971 	/*
1972 	 * Setup the event to let the other thread know that it has work
1973 	 * pending.
1974 	 */
1975 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
1976 	       NOTE_TRIGGER, 0, NULL);
1977 
1978 	/*
1979 	 * Put this on our shadow queue so that we know what we've queued
1980 	 * to the other thread.
1981 	 */
1982 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
1983 		if (buf1->buf_type != CAMDD_BUF_DATA) {
1984 			errx(1, "%s: should have a data buffer, not an "
1985 			    "indirect buffer", __func__);
1986 		}
1987 		data = &buf1->buf_type_spec.data;
1988 
1989 		/*
1990 		 * We only need to send one EOF to the writer, and don't
1991 		 * need to continue sending EOFs after that.
1992 		 */
1993 		if (buf1->status == CAMDD_STATUS_EOF) {
1994 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
1995 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
1996 				    work_links);
1997 				camdd_release_buf(buf1);
1998 				retval = 1;
1999 				continue;
2000 			}
2001 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2002 		}
2003 
2004 
2005 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2006 		peer_bytes_queued += (data->fill_len - data->resid);
2007 		dev->peer_bytes_queued += (data->fill_len - data->resid);
2008 		dev->num_peer_work_queue++;
2009 	}
2010 
2011 	if (STAILQ_FIRST(&local_queue) == NULL)
2012 		goto bailout;
2013 
2014 	/*
2015 	 * Drop our mutex and pick up the other thread's mutex.  We need to
2016 	 * do this to avoid deadlocks.
2017 	 */
2018 	pthread_mutex_unlock(&dev->mutex);
2019 	pthread_mutex_lock(&dev->peer_dev->mutex);
2020 
2021 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2022 		/*
2023 		 * Put the buffers on the other thread's incoming work queue.
2024 		 */
2025 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2026 		     buf1 = STAILQ_FIRST(&local_queue)) {
2027 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2028 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2029 					   work_links);
2030 		}
2031 		/*
2032 		 * Send an event to the other thread's kqueue to let it know
2033 		 * that there is something on the work queue.
2034 		 */
2035 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2036 		if (retval == -1)
2037 			warn("%s: unable to add peer work_queue kevent",
2038 			     __func__);
2039 		else
2040 			retval = 0;
2041 	} else
2042 		active = 0;
2043 
2044 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2045 	pthread_mutex_lock(&dev->mutex);
2046 
2047 	/*
2048 	 * If the other side isn't active, run through the queue and
2049 	 * release all of the buffers.
2050 	 */
2051 	if (active == 0) {
2052 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2053 		     buf1 = STAILQ_FIRST(&local_queue)) {
2054 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2055 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2056 				      links);
2057 			dev->num_peer_work_queue--;
2058 			camdd_release_buf(buf1);
2059 		}
2060 		dev->peer_bytes_queued -= peer_bytes_queued;
2061 		retval = 1;
2062 	}
2063 
2064 bailout:
2065 	return (retval);
2066 }
2067 
2068 /*
2069  * Return a buffer to the reader thread when we have completed writing it.
2070  */
2071 int
2072 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2073 {
2074 	struct kevent ke;
2075 	int retval = 0;
2076 
2077 	/*
2078 	 * Setup the event to let the other thread know that we have
2079 	 * completed a buffer.
2080 	 */
2081 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2082 	       NOTE_TRIGGER, 0, NULL);
2083 
2084 	/*
2085 	 * Drop our lock and acquire the other thread's lock before
2086 	 * manipulating
2087 	 */
2088 	pthread_mutex_unlock(&dev->mutex);
2089 	pthread_mutex_lock(&dev->peer_dev->mutex);
2090 
2091 	/*
2092 	 * Put the buffer on the reader thread's peer done queue now that
2093 	 * we have completed it.
2094 	 */
2095 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2096 			   work_links);
2097 	dev->peer_dev->num_peer_done_queue++;
2098 
2099 	/*
2100 	 * Send an event to the peer thread to let it know that we've added
2101 	 * something to its peer done queue.
2102 	 */
2103 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2104 	if (retval == -1)
2105 		warn("%s: unable to add peer_done_queue kevent", __func__);
2106 	else
2107 		retval = 0;
2108 
2109 	/*
2110 	 * Drop the other thread's lock and reacquire ours.
2111 	 */
2112 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2113 	pthread_mutex_lock(&dev->mutex);
2114 
2115 	return (retval);
2116 }
2117 
2118 /*
2119  * Free a buffer that was written out by the writer thread and returned to
2120  * the reader thread.
2121  */
2122 void
2123 camdd_peer_done(struct camdd_buf *buf)
2124 {
2125 	struct camdd_dev *dev;
2126 	struct camdd_buf_data *data;
2127 
2128 	dev = buf->dev;
2129 	if (buf->buf_type != CAMDD_BUF_DATA) {
2130 		errx(1, "%s: should have a data buffer, not an "
2131 		    "indirect buffer", __func__);
2132 	}
2133 
2134 	data = &buf->buf_type_spec.data;
2135 
2136 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2137 	dev->num_peer_work_queue--;
2138 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2139 
2140 	if (buf->status == CAMDD_STATUS_EOF)
2141 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2142 
2143 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2144 }
2145 
2146 /*
2147  * Assumes caller holds the lock for this device.
2148  */
2149 void
2150 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2151 		   int *error_count)
2152 {
2153 	int retval = 0;
2154 
2155 	/*
2156 	 * If we're the reader, we need to send the completed I/O
2157 	 * to the writer.  If we're the writer, we need to just
2158 	 * free up resources, or let the reader know if we've
2159 	 * encountered an error.
2160 	 */
2161 	if (dev->write_dev == 0) {
2162 		retval = camdd_queue_peer_buf(dev, buf);
2163 		if (retval != 0)
2164 			(*error_count)++;
2165 	} else {
2166 		struct camdd_buf *tmp_buf, *next_buf;
2167 
2168 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2169 				    next_buf) {
2170 			struct camdd_buf *src_buf;
2171 			struct camdd_buf_indirect *indirect;
2172 
2173 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2174 				      camdd_buf, src_links);
2175 
2176 			tmp_buf->status = buf->status;
2177 
2178 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2179 				camdd_complete_peer_buf(dev, tmp_buf);
2180 				continue;
2181 			}
2182 
2183 			indirect = &tmp_buf->buf_type_spec.indirect;
2184 			src_buf = indirect->src_buf;
2185 			src_buf->refcount--;
2186 			/*
2187 			 * XXX KDM we probably need to account for
2188 			 * exactly how many bytes we were able to
2189 			 * write.  Allocate the residual to the
2190 			 * first N buffers?  Or just track the
2191 			 * number of bytes written?  Right now the reader
2192 			 * doesn't do anything with a residual.
2193 			 */
2194 			src_buf->status = buf->status;
2195 			if (src_buf->refcount <= 0)
2196 				camdd_complete_peer_buf(dev, src_buf);
2197 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2198 					   tmp_buf, links);
2199 		}
2200 
2201 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2202 	}
2203 }
2204 
2205 /*
2206  * Fetch all completed commands from the pass(4) device.
2207  *
2208  * Returns the number of commands received, or -1 if any of the commands
2209  * completed with an error.  Returns 0 if no commands are available.
2210  */
2211 int
2212 camdd_pass_fetch(struct camdd_dev *dev)
2213 {
2214 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2215 	union ccb ccb;
2216 	int retval = 0, num_fetched = 0, error_count = 0;
2217 
2218 	pthread_mutex_unlock(&dev->mutex);
2219 	/*
2220 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2221 	 */
2222 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2223 		struct camdd_buf *buf;
2224 		struct camdd_buf_data *data;
2225 		cam_status ccb_status;
2226 		union ccb *buf_ccb;
2227 
2228 		buf = ccb.ccb_h.ccb_buf;
2229 		data = &buf->buf_type_spec.data;
2230 		buf_ccb = &data->ccb;
2231 
2232 		num_fetched++;
2233 
2234 		/*
2235 		 * Copy the CCB back out so we get status, sense data, etc.
2236 		 */
2237 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2238 
2239 		pthread_mutex_lock(&dev->mutex);
2240 
2241 		/*
2242 		 * We're now done, so take this off the active queue.
2243 		 */
2244 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2245 		dev->cur_active_io--;
2246 
2247 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2248 		if (ccb_status != CAM_REQ_CMP) {
2249 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2250 					CAM_EPF_ALL, stderr);
2251 		}
2252 
2253 		switch (pass_dev->protocol) {
2254 		case PROTO_SCSI:
2255 			data->resid = ccb.csio.resid;
2256 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2257 			break;
2258 		default:
2259 			return -1;
2260 			break;
2261 		}
2262 
2263 		if (buf->status == CAMDD_STATUS_NONE)
2264 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2265 		if (buf->status == CAMDD_STATUS_ERROR)
2266 			error_count++;
2267 		else if (buf->status == CAMDD_STATUS_EOF) {
2268 			/*
2269 			 * Once we queue this buffer to our partner thread,
2270 			 * he will know that we've hit EOF.
2271 			 */
2272 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2273 		}
2274 
2275 		camdd_complete_buf(dev, buf, &error_count);
2276 
2277 		/*
2278 		 * Unlock in preparation for the ioctl call.
2279 		 */
2280 		pthread_mutex_unlock(&dev->mutex);
2281 	}
2282 
2283 	pthread_mutex_lock(&dev->mutex);
2284 
2285 	if (error_count > 0)
2286 		return (-1);
2287 	else
2288 		return (num_fetched);
2289 }
2290 
2291 /*
2292  * Returns -1 for error, 0 for success/continue, and 1 for resource
2293  * shortage/stop processing.
2294  */
2295 int
2296 camdd_file_run(struct camdd_dev *dev)
2297 {
2298 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2299 	struct camdd_buf_data *data;
2300 	struct camdd_buf *buf;
2301 	off_t io_offset;
2302 	int retval = 0, write_dev = dev->write_dev;
2303 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2304 	uint32_t num_sectors = 0, db_len = 0;
2305 
2306 	buf = STAILQ_FIRST(&dev->run_queue);
2307 	if (buf == NULL) {
2308 		no_resources = 1;
2309 		goto bailout;
2310 	} else if ((dev->write_dev == 0)
2311 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2312 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2313 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2314 		dev->num_run_queue--;
2315 		buf->status = CAMDD_STATUS_EOF;
2316 		error_count++;
2317 		goto bailout;
2318 	}
2319 
2320 	/*
2321 	 * If we're writing, we need to go through the source buffer list
2322 	 * and create an S/G list.
2323 	 */
2324 	if (write_dev != 0) {
2325 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2326 		    dev->sector_size, &num_sectors, &double_buf_needed);
2327 		if (retval != 0) {
2328 			no_resources = 1;
2329 			goto bailout;
2330 		}
2331 	}
2332 
2333 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2334 	dev->num_run_queue--;
2335 
2336 	data = &buf->buf_type_spec.data;
2337 
2338 	/*
2339 	 * pread(2) and pwrite(2) offsets are byte offsets.
2340 	 */
2341 	io_offset = buf->lba * dev->sector_size;
2342 
2343 	/*
2344 	 * Unlock the mutex while we read or write.
2345 	 */
2346 	pthread_mutex_unlock(&dev->mutex);
2347 
2348 	/*
2349 	 * Note that we don't need to double buffer if we're the reader
2350 	 * because in that case, we have allocated a single buffer of
2351 	 * sufficient size to do the read.  This copy is necessary on
2352 	 * writes because if one of the components of the S/G list is not
2353 	 * a sector size multiple, the kernel will reject the write.  This
2354 	 * is unfortunate but not surprising.  So this will make sure that
2355 	 * we're using a single buffer that is a multiple of the sector size.
2356 	 */
2357 	if ((double_buf_needed != 0)
2358 	 && (data->sg_count > 1)
2359 	 && (write_dev != 0)) {
2360 		uint32_t cur_offset;
2361 		int i;
2362 
2363 		if (file_dev->tmp_buf == NULL)
2364 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2365 		if (file_dev->tmp_buf == NULL) {
2366 			buf->status = CAMDD_STATUS_ERROR;
2367 			error_count++;
2368 			pthread_mutex_lock(&dev->mutex);
2369 			goto bailout;
2370 		}
2371 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2372 			bcopy(data->iovec[i].iov_base,
2373 			    &file_dev->tmp_buf[cur_offset],
2374 			    data->iovec[i].iov_len);
2375 			cur_offset += data->iovec[i].iov_len;
2376 		}
2377 		db_len = cur_offset;
2378 	}
2379 
2380 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2381 		if (write_dev == 0) {
2382 			/*
2383 			 * XXX KDM is there any way we would need a S/G
2384 			 * list here?
2385 			 */
2386 			retval = pread(file_dev->fd, data->buf,
2387 			    buf->len, io_offset);
2388 		} else {
2389 			if (double_buf_needed != 0) {
2390 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2391 				    db_len, io_offset);
2392 			} else if (data->sg_count == 0) {
2393 				retval = pwrite(file_dev->fd, data->buf,
2394 				    data->fill_len, io_offset);
2395 			} else {
2396 				retval = pwritev(file_dev->fd, data->iovec,
2397 				    data->sg_count, io_offset);
2398 			}
2399 		}
2400 	} else {
2401 		if (write_dev == 0) {
2402 			/*
2403 			 * XXX KDM is there any way we would need a S/G
2404 			 * list here?
2405 			 */
2406 			retval = read(file_dev->fd, data->buf, buf->len);
2407 		} else {
2408 			if (double_buf_needed != 0) {
2409 				retval = write(file_dev->fd, file_dev->tmp_buf,
2410 				    db_len);
2411 			} else if (data->sg_count == 0) {
2412 				retval = write(file_dev->fd, data->buf,
2413 				    data->fill_len);
2414 			} else {
2415 				retval = writev(file_dev->fd, data->iovec,
2416 				    data->sg_count);
2417 			}
2418 		}
2419 	}
2420 
2421 	/* We're done, re-acquire the lock */
2422 	pthread_mutex_lock(&dev->mutex);
2423 
2424 	if (retval >= (ssize_t)data->fill_len) {
2425 		/*
2426 		 * If the bytes transferred is more than the request size,
2427 		 * that indicates an overrun, which should only happen at
2428 		 * the end of a transfer if we have to round up to a sector
2429 		 * boundary.
2430 		 */
2431 		if (buf->status == CAMDD_STATUS_NONE)
2432 			buf->status = CAMDD_STATUS_OK;
2433 		data->resid = 0;
2434 		dev->bytes_transferred += retval;
2435 	} else if (retval == -1) {
2436 		warn("Error %s %s", (write_dev) ? "writing to" :
2437 		    "reading from", file_dev->filename);
2438 
2439 		buf->status = CAMDD_STATUS_ERROR;
2440 		data->resid = data->fill_len;
2441 		error_count++;
2442 
2443 		if (dev->debug == 0)
2444 			goto bailout;
2445 
2446 		if ((double_buf_needed != 0)
2447 		 && (write_dev != 0)) {
2448 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2449 			    "offset %ju\n", __func__, file_dev->fd,
2450 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2451 			    (uintmax_t)io_offset);
2452 		} else if (data->sg_count == 0) {
2453 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2454 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2455 			    data->fill_len, (uintmax_t)buf->lba,
2456 			    (uintmax_t)io_offset);
2457 		} else {
2458 			int i;
2459 
2460 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2461 			    "offset %ju\n", __func__, file_dev->fd,
2462 			    data->fill_len, (uintmax_t)buf->lba,
2463 			    (uintmax_t)io_offset);
2464 
2465 			for (i = 0; i < data->sg_count; i++) {
2466 				fprintf(stderr, "index %d ptr %p len %zu\n",
2467 				    i, data->iovec[i].iov_base,
2468 				    data->iovec[i].iov_len);
2469 			}
2470 		}
2471 	} else if (retval == 0) {
2472 		buf->status = CAMDD_STATUS_EOF;
2473 		if (dev->debug != 0)
2474 			printf("%s: got EOF from %s!\n", __func__,
2475 			    file_dev->filename);
2476 		data->resid = data->fill_len;
2477 		error_count++;
2478 	} else if (retval < (ssize_t)data->fill_len) {
2479 		if (buf->status == CAMDD_STATUS_NONE)
2480 			buf->status = CAMDD_STATUS_SHORT_IO;
2481 		data->resid = data->fill_len - retval;
2482 		dev->bytes_transferred += retval;
2483 	}
2484 
2485 bailout:
2486 	if (buf != NULL) {
2487 		if (buf->status == CAMDD_STATUS_EOF) {
2488 			struct camdd_buf *buf2;
2489 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2490 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2491 				buf2->status = CAMDD_STATUS_EOF;
2492 		}
2493 
2494 		camdd_complete_buf(dev, buf, &error_count);
2495 	}
2496 
2497 	if (error_count != 0)
2498 		return (-1);
2499 	else if (no_resources != 0)
2500 		return (1);
2501 	else
2502 		return (0);
2503 }
2504 
2505 /*
2506  * Execute one command from the run queue.  Returns 0 for success, 1 for
2507  * stop processing, and -1 for error.
2508  */
2509 int
2510 camdd_pass_run(struct camdd_dev *dev)
2511 {
2512 	struct camdd_buf *buf = NULL;
2513 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2514 	struct camdd_buf_data *data;
2515 	uint32_t num_blocks, sectors_used = 0;
2516 	union ccb *ccb;
2517 	int retval = 0, is_write = dev->write_dev;
2518 	int double_buf_needed = 0;
2519 
2520 	buf = STAILQ_FIRST(&dev->run_queue);
2521 	if (buf == NULL) {
2522 		retval = 1;
2523 		goto bailout;
2524 	}
2525 
2526 	/*
2527 	 * If we're writing, we need to go through the source buffer list
2528 	 * and create an S/G list.
2529 	 */
2530 	if (is_write != 0) {
2531 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2532 		    &sectors_used, &double_buf_needed);
2533 		if (retval != 0) {
2534 			retval = -1;
2535 			goto bailout;
2536 		}
2537 	}
2538 
2539 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2540 	dev->num_run_queue--;
2541 
2542 	data = &buf->buf_type_spec.data;
2543 
2544 	/*
2545 	 * In almost every case the number of blocks should be the device
2546 	 * block size.  The exception may be at the end of an I/O stream
2547 	 * for a partial block or at the end of a device.
2548 	 */
2549 	if (is_write != 0)
2550 		num_blocks = sectors_used;
2551 	else
2552 		num_blocks = data->fill_len / pass_dev->block_len;
2553 
2554 	ccb = &data->ccb;
2555 
2556 	switch (pass_dev->protocol) {
2557 	case PROTO_SCSI:
2558 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2559 
2560 		scsi_read_write(&ccb->csio,
2561 				/*retries*/ dev->retry_count,
2562 				/*cbfcnp*/ NULL,
2563 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2564 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2565 					   SCSI_RW_WRITE,
2566 				/*byte2*/ 0,
2567 				/*minimum_cmd_size*/ dev->min_cmd_size,
2568 				/*lba*/ buf->lba,
2569 				/*block_count*/ num_blocks,
2570 				/*data_ptr*/ (data->sg_count != 0) ?
2571 					     (uint8_t *)data->segs : data->buf,
2572 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2573 				/*sense_len*/ SSD_FULL_SIZE,
2574 				/*timeout*/ dev->io_timeout);
2575 
2576 		if (data->sg_count != 0) {
2577 			ccb->csio.sglist_cnt = data->sg_count;
2578 		}
2579 		break;
2580 	default:
2581 		retval = -1;
2582 		goto bailout;
2583 	}
2584 
2585 	/* Disable freezing the device queue */
2586 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2587 
2588 	if (dev->retry_count != 0)
2589 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2590 
2591 	if (data->sg_count != 0) {
2592 		ccb->ccb_h.flags |= CAM_DATA_SG;
2593 	}
2594 
2595 	/*
2596 	 * Store a pointer to the buffer in the CCB.  The kernel will
2597 	 * restore this when we get it back, and we'll use it to identify
2598 	 * the buffer this CCB came from.
2599 	 */
2600 	ccb->ccb_h.ccb_buf = buf;
2601 
2602 	/*
2603 	 * Unlock our mutex in preparation for issuing the ioctl.
2604 	 */
2605 	pthread_mutex_unlock(&dev->mutex);
2606 	/*
2607 	 * Queue the CCB to the pass(4) driver.
2608 	 */
2609 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2610 		pthread_mutex_lock(&dev->mutex);
2611 
2612 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2613 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2614 		warn("%s: CCB address is %p", __func__, ccb);
2615 		retval = -1;
2616 
2617 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2618 	} else {
2619 		pthread_mutex_lock(&dev->mutex);
2620 
2621 		dev->cur_active_io++;
2622 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2623 	}
2624 
2625 bailout:
2626 	return (retval);
2627 }
2628 
2629 int
2630 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2631 {
2632 	struct camdd_dev_pass *pass_dev;
2633 	uint32_t num_blocks;
2634 	int retval = 0;
2635 
2636 	pass_dev = &dev->dev_spec.pass;
2637 
2638 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2639 	*len = dev->blocksize;
2640 	num_blocks = *len / dev->sector_size;
2641 
2642 	/*
2643 	 * If max_sector is 0, then we have no set limit.  This can happen
2644 	 * if we're writing to a file in a filesystem, or reading from
2645 	 * something like /dev/zero.
2646 	 */
2647 	if ((dev->max_sector != 0)
2648 	 || (dev->sector_io_limit != 0)) {
2649 		uint64_t max_sector;
2650 
2651 		if ((dev->max_sector != 0)
2652 		 && (dev->sector_io_limit != 0))
2653 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2654 		else if (dev->max_sector != 0)
2655 			max_sector = dev->max_sector;
2656 		else
2657 			max_sector = dev->sector_io_limit;
2658 
2659 
2660 		/*
2661 		 * Check to see whether we're starting off past the end of
2662 		 * the device.  If so, we need to just send an EOF
2663 		 * notification to the writer.
2664 		 */
2665 		if (*lba > max_sector) {
2666 			*len = 0;
2667 			retval = 1;
2668 		} else if (((*lba + num_blocks) > max_sector + 1)
2669 			|| ((*lba + num_blocks) < *lba)) {
2670 			/*
2671 			 * If we get here (but pass the first check), we
2672 			 * can trim the request length down to go to the
2673 			 * end of the device.
2674 			 */
2675 			num_blocks = (max_sector + 1) - *lba;
2676 			*len = num_blocks * dev->sector_size;
2677 			retval = 1;
2678 		}
2679 	}
2680 
2681 	dev->next_io_pos_bytes += *len;
2682 
2683 	return (retval);
2684 }
2685 
2686 /*
2687  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2688  */
2689 int
2690 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2691 {
2692 	struct camdd_buf *buf = NULL;
2693 	struct camdd_buf_data *data;
2694 	struct camdd_dev_pass *pass_dev;
2695 	size_t new_len;
2696 	struct camdd_buf_data *rb_data;
2697 	int is_write = dev->write_dev;
2698 	int eof_flush_needed = 0;
2699 	int retval = 0;
2700 	int error;
2701 
2702 	pass_dev = &dev->dev_spec.pass;
2703 
2704 	/*
2705 	 * If we've gotten EOF or our partner has, we should not continue
2706 	 * queueing I/O.  If we're a writer, though, we should continue
2707 	 * to write any buffers that don't have EOF status.
2708 	 */
2709 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2710 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2711 	  && (is_write == 0))) {
2712 		/*
2713 		 * Tell the worker thread that we have seen EOF.
2714 		 */
2715 		retval = 1;
2716 
2717 		/*
2718 		 * If we're the writer, send the buffer back with EOF status.
2719 		 */
2720 		if (is_write) {
2721 			read_buf->status = CAMDD_STATUS_EOF;
2722 
2723 			error = camdd_complete_peer_buf(dev, read_buf);
2724 		}
2725 		goto bailout;
2726 	}
2727 
2728 	if (is_write == 0) {
2729 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2730 		if (buf == NULL) {
2731 			retval = -1;
2732 			goto bailout;
2733 		}
2734 		data = &buf->buf_type_spec.data;
2735 
2736 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2737 		if (retval != 0) {
2738 			buf->status = CAMDD_STATUS_EOF;
2739 
2740 		 	if ((buf->len == 0)
2741 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2742 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2743 				camdd_release_buf(buf);
2744 				goto bailout;
2745 			}
2746 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2747 		}
2748 
2749 		data->fill_len = buf->len;
2750 		data->src_start_offset = buf->lba * dev->sector_size;
2751 
2752 		/*
2753 		 * Put this on the run queue.
2754 		 */
2755 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2756 		dev->num_run_queue++;
2757 
2758 		/* We're done. */
2759 		goto bailout;
2760 	}
2761 
2762 	/*
2763 	 * Check for new EOF status from the reader.
2764 	 */
2765 	if ((read_buf->status == CAMDD_STATUS_EOF)
2766 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2767 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2768 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2769 		 && (read_buf->len == 0)) {
2770 			camdd_complete_peer_buf(dev, read_buf);
2771 			retval = 1;
2772 			goto bailout;
2773 		} else
2774 			eof_flush_needed = 1;
2775 	}
2776 
2777 	/*
2778 	 * See if we have a buffer we're composing with pieces from our
2779 	 * partner thread.
2780 	 */
2781 	buf = STAILQ_FIRST(&dev->pending_queue);
2782 	if (buf == NULL) {
2783 		uint64_t lba;
2784 		ssize_t len;
2785 
2786 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2787 		if (retval != 0) {
2788 			read_buf->status = CAMDD_STATUS_EOF;
2789 
2790 			if (len == 0) {
2791 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2792 				error = camdd_complete_peer_buf(dev, read_buf);
2793 				goto bailout;
2794 			}
2795 		}
2796 
2797 		/*
2798 		 * If we don't have a pending buffer, we need to grab a new
2799 		 * one from the free list or allocate another one.
2800 		 */
2801 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2802 		if (buf == NULL) {
2803 			retval = 1;
2804 			goto bailout;
2805 		}
2806 
2807 		buf->lba = lba;
2808 		buf->len = len;
2809 
2810 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2811 		dev->num_pending_queue++;
2812 	}
2813 
2814 	data = &buf->buf_type_spec.data;
2815 
2816 	rb_data = &read_buf->buf_type_spec.data;
2817 
2818 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2819 	 && (dev->debug != 0)) {
2820 		printf("%s: WARNING: reader offset %#jx != expected offset "
2821 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2822 		    (uintmax_t)dev->next_peer_pos_bytes);
2823 	}
2824 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2825 	    (rb_data->fill_len - rb_data->resid);
2826 
2827 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2828 	if (new_len < buf->len) {
2829 		/*
2830 		 * There are three cases here:
2831 		 * 1. We need more data to fill up a block, so we put
2832 		 *    this I/O on the queue and wait for more I/O.
2833 		 * 2. We have a pending buffer in the queue that is
2834 		 *    smaller than our blocksize, but we got an EOF.  So we
2835 		 *    need to go ahead and flush the write out.
2836 		 * 3. We got an error.
2837 		 */
2838 
2839 		/*
2840 		 * Increment our fill length.
2841 		 */
2842 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2843 
2844 		/*
2845 		 * Add the new read buffer to the list for writing.
2846 		 */
2847 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2848 
2849 		/* Increment the count */
2850 		buf->src_count++;
2851 
2852 		if (eof_flush_needed == 0) {
2853 			/*
2854 			 * We need to exit, because we don't have enough
2855 			 * data yet.
2856 			 */
2857 			goto bailout;
2858 		} else {
2859 			/*
2860 			 * Take the buffer off of the pending queue.
2861 			 */
2862 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2863 				      links);
2864 			dev->num_pending_queue--;
2865 
2866 			/*
2867 			 * If we need an EOF flush, but there is no data
2868 			 * to flush, go ahead and return this buffer.
2869 			 */
2870 			if (data->fill_len == 0) {
2871 				camdd_complete_buf(dev, buf, /*error_count*/0);
2872 				retval = 1;
2873 				goto bailout;
2874 			}
2875 
2876 			/*
2877 			 * Put this on the next queue for execution.
2878 			 */
2879 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2880 			dev->num_run_queue++;
2881 		}
2882 	} else if (new_len == buf->len) {
2883 		/*
2884 		 * We have enough data to completey fill one block,
2885 		 * so we're ready to issue the I/O.
2886 		 */
2887 
2888 		/*
2889 		 * Take the buffer off of the pending queue.
2890 		 */
2891 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2892 		dev->num_pending_queue--;
2893 
2894 		/*
2895 		 * Add the new read buffer to the list for writing.
2896 		 */
2897 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2898 
2899 		/* Increment the count */
2900 		buf->src_count++;
2901 
2902 		/*
2903 		 * Increment our fill length.
2904 		 */
2905 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2906 
2907 		/*
2908 		 * Put this on the next queue for execution.
2909 		 */
2910 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2911 		dev->num_run_queue++;
2912 	} else {
2913 		struct camdd_buf *idb;
2914 		struct camdd_buf_indirect *indirect;
2915 		uint32_t len_to_go, cur_offset;
2916 
2917 
2918 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2919 		if (idb == NULL) {
2920 			retval = 1;
2921 			goto bailout;
2922 		}
2923 		indirect = &idb->buf_type_spec.indirect;
2924 		indirect->src_buf = read_buf;
2925 		read_buf->refcount++;
2926 		indirect->offset = 0;
2927 		indirect->start_ptr = rb_data->buf;
2928 		/*
2929 		 * We've already established that there is more
2930 		 * data in read_buf than we have room for in our
2931 		 * current write request.  So this particular chunk
2932 		 * of the request should just be the remainder
2933 		 * needed to fill up a block.
2934 		 */
2935 		indirect->len = buf->len - (data->fill_len - data->resid);
2936 
2937 		camdd_buf_add_child(buf, idb);
2938 
2939 		/*
2940 		 * This buffer is ready to execute, so we can take
2941 		 * it off the pending queue and put it on the run
2942 		 * queue.
2943 		 */
2944 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2945 			      links);
2946 		dev->num_pending_queue--;
2947 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2948 		dev->num_run_queue++;
2949 
2950 		cur_offset = indirect->offset + indirect->len;
2951 
2952 		/*
2953 		 * The resulting I/O would be too large to fit in
2954 		 * one block.  We need to split this I/O into
2955 		 * multiple pieces.  Allocate as many buffers as needed.
2956 		 */
2957 		for (len_to_go = rb_data->fill_len - rb_data->resid -
2958 		     indirect->len; len_to_go > 0;) {
2959 			struct camdd_buf *new_buf;
2960 			struct camdd_buf_data *new_data;
2961 			uint64_t lba;
2962 			ssize_t len;
2963 
2964 			retval = camdd_get_next_lba_len(dev, &lba, &len);
2965 			if ((retval != 0)
2966 			 && (len == 0)) {
2967 				/*
2968 				 * The device has already been marked
2969 				 * as EOF, and there is no space left.
2970 				 */
2971 				goto bailout;
2972 			}
2973 
2974 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2975 			if (new_buf == NULL) {
2976 				retval = 1;
2977 				goto bailout;
2978 			}
2979 
2980 			new_buf->lba = lba;
2981 			new_buf->len = len;
2982 
2983 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2984 			if (idb == NULL) {
2985 				retval = 1;
2986 				goto bailout;
2987 			}
2988 
2989 			indirect = &idb->buf_type_spec.indirect;
2990 
2991 			indirect->src_buf = read_buf;
2992 			read_buf->refcount++;
2993 			indirect->offset = cur_offset;
2994 			indirect->start_ptr = rb_data->buf + cur_offset;
2995 			indirect->len = min(len_to_go, new_buf->len);
2996 #if 0
2997 			if (((indirect->len % dev->sector_size) != 0)
2998 			 || ((indirect->offset % dev->sector_size) != 0)) {
2999 				warnx("offset %ju len %ju not aligned with "
3000 				    "sector size %u", indirect->offset,
3001 				    (uintmax_t)indirect->len, dev->sector_size);
3002 			}
3003 #endif
3004 			cur_offset += indirect->len;
3005 			len_to_go -= indirect->len;
3006 
3007 			camdd_buf_add_child(new_buf, idb);
3008 
3009 			new_data = &new_buf->buf_type_spec.data;
3010 
3011 			if ((new_data->fill_len == new_buf->len)
3012 			 || (eof_flush_needed != 0)) {
3013 				STAILQ_INSERT_TAIL(&dev->run_queue,
3014 						   new_buf, links);
3015 				dev->num_run_queue++;
3016 			} else if (new_data->fill_len < buf->len) {
3017 				STAILQ_INSERT_TAIL(&dev->pending_queue,
3018 					   	new_buf, links);
3019 				dev->num_pending_queue++;
3020 			} else {
3021 				warnx("%s: too much data in new "
3022 				      "buffer!", __func__);
3023 				retval = 1;
3024 				goto bailout;
3025 			}
3026 		}
3027 	}
3028 
3029 bailout:
3030 	return (retval);
3031 }
3032 
3033 void
3034 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3035 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3036 {
3037 	*our_depth = dev->cur_active_io + dev->num_run_queue;
3038 	if (dev->num_peer_work_queue >
3039 	    dev->num_peer_done_queue)
3040 		*peer_depth = dev->num_peer_work_queue -
3041 			      dev->num_peer_done_queue;
3042 	else
3043 		*peer_depth = 0;
3044 	*our_bytes = *our_depth * dev->blocksize;
3045 	*peer_bytes = dev->peer_bytes_queued;
3046 }
3047 
3048 void
3049 camdd_sig_handler(int sig)
3050 {
3051 	if (sig == SIGINFO)
3052 		need_status = 1;
3053 	else {
3054 		need_exit = 1;
3055 		error_exit = 1;
3056 	}
3057 
3058 	sem_post(&camdd_sem);
3059 }
3060 
3061 void
3062 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3063 		   struct timespec *start_time)
3064 {
3065 	struct timespec done_time;
3066 	uint64_t total_ns;
3067 	long double mb_sec, total_sec;
3068 	int error = 0;
3069 
3070 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3071 	if (error != 0) {
3072 		warn("Unable to get done time");
3073 		return;
3074 	}
3075 
3076 	timespecsub(&done_time, start_time);
3077 
3078 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3079 	total_sec = total_ns;
3080 	total_sec /= 1000000000;
3081 
3082 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3083 		"%.4Lf seconds elapsed\n",
3084 		(uintmax_t)camdd_dev->bytes_transferred,
3085 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3086 		camdd_dev->device_name,
3087 		(uintmax_t)other_dev->bytes_transferred,
3088 		(other_dev->write_dev == 0) ? "read from" : "written to",
3089 		other_dev->device_name, total_sec);
3090 
3091 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3092 	mb_sec /= 1024 * 1024;
3093 	mb_sec *= 1000000000;
3094 	mb_sec /= total_ns;
3095 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3096 }
3097 
3098 int
3099 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
3100 	 int retry_count, int timeout)
3101 {
3102 	struct cam_device *new_cam_dev = NULL;
3103 	struct camdd_dev *devs[2];
3104 	struct timespec start_time;
3105 	pthread_t threads[2];
3106 	int unit = 0;
3107 	int error = 0;
3108 	int i;
3109 
3110 	if (num_io_opts != 2) {
3111 		warnx("Must have one input and one output path");
3112 		error = 1;
3113 		goto bailout;
3114 	}
3115 
3116 	bzero(devs, sizeof(devs));
3117 
3118 	for (i = 0; i < num_io_opts; i++) {
3119 		switch (io_opts[i].dev_type) {
3120 		case CAMDD_DEV_PASS: {
3121 			if (isdigit(io_opts[i].dev_name[0])) {
3122 				camdd_argmask new_arglist = CAMDD_ARG_NONE;
3123 				int bus = 0, target = 0, lun = 0;
3124 				int rv;
3125 
3126 				/* device specified as bus:target[:lun] */
3127 				rv = parse_btl(io_opts[i].dev_name, &bus,
3128 				    &target, &lun, &new_arglist);
3129 				if (rv < 2) {
3130 					warnx("numeric device specification "
3131 					     "must be either bus:target, or "
3132 					     "bus:target:lun");
3133 					error = 1;
3134 					goto bailout;
3135 				}
3136 				/* default to 0 if lun was not specified */
3137 				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3138 					lun = 0;
3139 					new_arglist |= CAMDD_ARG_LUN;
3140 				}
3141 				new_cam_dev = cam_open_btl(bus, target, lun,
3142 				    O_RDWR, NULL);
3143 			} else {
3144 				char name[30];
3145 
3146 				if (cam_get_device(io_opts[i].dev_name, name,
3147 						   sizeof name, &unit) == -1) {
3148 					warnx("%s", cam_errbuf);
3149 					error = 1;
3150 					goto bailout;
3151 				}
3152 				new_cam_dev = cam_open_spec_device(name, unit,
3153 				    O_RDWR, NULL);
3154 			}
3155 
3156 			if (new_cam_dev == NULL) {
3157 				warnx("%s", cam_errbuf);
3158 				error = 1;
3159 				goto bailout;
3160 			}
3161 
3162 			devs[i] = camdd_probe_pass(new_cam_dev,
3163 			    /*io_opts*/ &io_opts[i],
3164 			    CAMDD_ARG_ERR_RECOVER,
3165 			    /*probe_retry_count*/ 3,
3166 			    /*probe_timeout*/ 5000,
3167 			    /*io_retry_count*/ retry_count,
3168 			    /*io_timeout*/ timeout);
3169 			if (devs[i] == NULL) {
3170 				warn("Unable to probe device %s%u",
3171 				     new_cam_dev->device_name,
3172 				     new_cam_dev->dev_unit_num);
3173 				error = 1;
3174 				goto bailout;
3175 			}
3176 			break;
3177 		}
3178 		case CAMDD_DEV_FILE: {
3179 			int fd = -1;
3180 
3181 			if (io_opts[i].dev_name[0] == '-') {
3182 				if (io_opts[i].write_dev != 0)
3183 					fd = STDOUT_FILENO;
3184 				else
3185 					fd = STDIN_FILENO;
3186 			} else {
3187 				if (io_opts[i].write_dev != 0) {
3188 					fd = open(io_opts[i].dev_name,
3189 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3190 				} else {
3191 					fd = open(io_opts[i].dev_name,
3192 					    O_RDONLY);
3193 				}
3194 			}
3195 			if (fd == -1) {
3196 				warn("error opening file %s",
3197 				    io_opts[i].dev_name);
3198 				error = 1;
3199 				goto bailout;
3200 			}
3201 
3202 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3203 			    retry_count, timeout);
3204 			if (devs[i] == NULL) {
3205 				error = 1;
3206 				goto bailout;
3207 			}
3208 
3209 			break;
3210 		}
3211 		default:
3212 			warnx("Unknown device type %d (%s)",
3213 			    io_opts[i].dev_type, io_opts[i].dev_name);
3214 			error = 1;
3215 			goto bailout;
3216 			break; /*NOTREACHED */
3217 		}
3218 
3219 		devs[i]->write_dev = io_opts[i].write_dev;
3220 
3221 		devs[i]->start_offset_bytes = io_opts[i].offset;
3222 
3223 		if (max_io != 0) {
3224 			devs[i]->sector_io_limit =
3225 			    (devs[i]->start_offset_bytes /
3226 			    devs[i]->sector_size) +
3227 			    (max_io / devs[i]->sector_size) - 1;
3228 		}
3229 
3230 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3231 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3232 	}
3233 
3234 	devs[0]->peer_dev = devs[1];
3235 	devs[1]->peer_dev = devs[0];
3236 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3237 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3238 
3239 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3240 
3241 	signal(SIGINFO, camdd_sig_handler);
3242 	signal(SIGINT, camdd_sig_handler);
3243 
3244 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3245 	if (error != 0) {
3246 		warn("Unable to get start time");
3247 		goto bailout;
3248 	}
3249 
3250 	for (i = 0; i < num_io_opts; i++) {
3251 		error = pthread_create(&threads[i], NULL, camdd_worker,
3252 				       (void *)devs[i]);
3253 		if (error != 0) {
3254 			warnc(error, "pthread_create() failed");
3255 			goto bailout;
3256 		}
3257 	}
3258 
3259 	for (;;) {
3260 		if ((sem_wait(&camdd_sem) == -1)
3261 		 || (need_exit != 0)) {
3262 			struct kevent ke;
3263 
3264 			for (i = 0; i < num_io_opts; i++) {
3265 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3266 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3267 
3268 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3269 
3270 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3271 						NULL);
3272 				if (error == -1)
3273 					warn("%s: unable to wake up thread",
3274 					    __func__);
3275 				error = 0;
3276 			}
3277 			break;
3278 		} else if (need_status != 0) {
3279 			camdd_print_status(devs[0], devs[1], &start_time);
3280 			need_status = 0;
3281 		}
3282 	}
3283 	for (i = 0; i < num_io_opts; i++) {
3284 		pthread_join(threads[i], NULL);
3285 	}
3286 
3287 	camdd_print_status(devs[0], devs[1], &start_time);
3288 
3289 bailout:
3290 
3291 	for (i = 0; i < num_io_opts; i++)
3292 		camdd_free_dev(devs[i]);
3293 
3294 	return (error + error_exit);
3295 }
3296 
3297 void
3298 usage(void)
3299 {
3300 	fprintf(stderr,
3301 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3302 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3303 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3304 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3305 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3306 "Option description\n"
3307 "-i <arg=val>  Specify input device/file and parameters\n"
3308 "-o <arg=val>  Specify output device/file and parameters\n"
3309 "Input and Output parameters\n"
3310 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3311 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3312 "              or - for stdin/stdout\n"
3313 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3314 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3315 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3316 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3317 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3318 "Optional arguments\n"
3319 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3320 "-E            Enable CAM error recovery for pass(4) devices\n"
3321 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3322 "              using K, G, M, etc. suffixes\n"
3323 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3324 "-v            Enable verbose error recovery\n"
3325 "-h            Print this message\n");
3326 }
3327 
3328 
3329 int
3330 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3331 {
3332 	char *tmpstr, *tmpstr2;
3333 	char *orig_tmpstr = NULL;
3334 	int retval = 0;
3335 
3336 	io_opts->write_dev = is_write;
3337 
3338 	tmpstr = strdup(args);
3339 	if (tmpstr == NULL) {
3340 		warn("strdup failed");
3341 		retval = 1;
3342 		goto bailout;
3343 	}
3344 	orig_tmpstr = tmpstr;
3345 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3346 		char *name, *value;
3347 
3348 		/*
3349 		 * If the user creates an empty parameter by putting in two
3350 		 * commas, skip over it and look for the next field.
3351 		 */
3352 		if (*tmpstr2 == '\0')
3353 			continue;
3354 
3355 		name = strsep(&tmpstr2, "=");
3356 		if (*name == '\0') {
3357 			warnx("Got empty I/O parameter name");
3358 			retval = 1;
3359 			goto bailout;
3360 		}
3361 		value = strsep(&tmpstr2, "=");
3362 		if ((value == NULL)
3363 		 || (*value == '\0')) {
3364 			warnx("Empty I/O parameter value for %s", name);
3365 			retval = 1;
3366 			goto bailout;
3367 		}
3368 		if (strncasecmp(name, "file", 4) == 0) {
3369 			io_opts->dev_type = CAMDD_DEV_FILE;
3370 			io_opts->dev_name = strdup(value);
3371 			if (io_opts->dev_name == NULL) {
3372 				warn("Error allocating memory");
3373 				retval = 1;
3374 				goto bailout;
3375 			}
3376 		} else if (strncasecmp(name, "pass", 4) == 0) {
3377 			io_opts->dev_type = CAMDD_DEV_PASS;
3378 			io_opts->dev_name = strdup(value);
3379 			if (io_opts->dev_name == NULL) {
3380 				warn("Error allocating memory");
3381 				retval = 1;
3382 				goto bailout;
3383 			}
3384 		} else if ((strncasecmp(name, "bs", 2) == 0)
3385 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3386 			retval = expand_number(value, &io_opts->blocksize);
3387 			if (retval == -1) {
3388 				warn("expand_number(3) failed on %s=%s", name,
3389 				    value);
3390 				retval = 1;
3391 				goto bailout;
3392 			}
3393 		} else if (strncasecmp(name, "depth", 5) == 0) {
3394 			char *endptr;
3395 
3396 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3397 			if (*endptr != '\0') {
3398 				warnx("invalid queue depth %s", value);
3399 				retval = 1;
3400 				goto bailout;
3401 			}
3402 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3403 			char *endptr;
3404 
3405 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3406 			if ((*endptr != '\0')
3407 			 || ((io_opts->min_cmd_size > 16)
3408 			  || (io_opts->min_cmd_size < 0))) {
3409 				warnx("invalid minimum cmd size %s", value);
3410 				retval = 1;
3411 				goto bailout;
3412 			}
3413 		} else if (strncasecmp(name, "offset", 6) == 0) {
3414 			retval = expand_number(value, &io_opts->offset);
3415 			if (retval == -1) {
3416 				warn("expand_number(3) failed on %s=%s", name,
3417 				    value);
3418 				retval = 1;
3419 				goto bailout;
3420 			}
3421 		} else if (strncasecmp(name, "debug", 5) == 0) {
3422 			char *endptr;
3423 
3424 			io_opts->debug = strtoull(value, &endptr, 0);
3425 			if (*endptr != '\0') {
3426 				warnx("invalid debug level %s", value);
3427 				retval = 1;
3428 				goto bailout;
3429 			}
3430 		} else {
3431 			warnx("Unrecognized parameter %s=%s", name, value);
3432 		}
3433 	}
3434 bailout:
3435 	free(orig_tmpstr);
3436 
3437 	return (retval);
3438 }
3439 
3440 int
3441 main(int argc, char **argv)
3442 {
3443 	int c;
3444 	camdd_argmask arglist = CAMDD_ARG_NONE;
3445 	int timeout = 0, retry_count = 1;
3446 	int error = 0;
3447 	uint64_t max_io = 0;
3448 	struct camdd_io_opts *opt_list = NULL;
3449 
3450 	if (argc == 1) {
3451 		usage();
3452 		exit(1);
3453 	}
3454 
3455 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3456 	if (opt_list == NULL) {
3457 		warn("Unable to allocate option list");
3458 		error = 1;
3459 		goto bailout;
3460 	}
3461 
3462 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3463 		switch (c) {
3464 		case 'C':
3465 			retry_count = strtol(optarg, NULL, 0);
3466 			if (retry_count < 0)
3467 				errx(1, "retry count %d is < 0",
3468 				     retry_count);
3469 			arglist |= CAMDD_ARG_RETRIES;
3470 			break;
3471 		case 'E':
3472 			arglist |= CAMDD_ARG_ERR_RECOVER;
3473 			break;
3474 		case 'i':
3475 		case 'o':
3476 			if (((c == 'i')
3477 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3478 			 || ((c == 'o')
3479 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3480 				errx(1, "Only one input and output path "
3481 				    "allowed");
3482 			}
3483 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3484 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3485 			if (error != 0)
3486 				goto bailout;
3487 			break;
3488 		case 'm':
3489 			error = expand_number(optarg, &max_io);
3490 			if (error == -1) {
3491 				warn("invalid maximum I/O amount %s", optarg);
3492 				error = 1;
3493 				goto bailout;
3494 			}
3495 			break;
3496 		case 't':
3497 			timeout = strtol(optarg, NULL, 0);
3498 			if (timeout < 0)
3499 				errx(1, "invalid timeout %d", timeout);
3500 			/* Convert the timeout from seconds to ms */
3501 			timeout *= 1000;
3502 			arglist |= CAMDD_ARG_TIMEOUT;
3503 			break;
3504 		case 'v':
3505 			arglist |= CAMDD_ARG_VERBOSE;
3506 			break;
3507 		case 'h':
3508 		default:
3509 			usage();
3510 			exit(1);
3511 			break; /*NOTREACHED*/
3512 		}
3513 	}
3514 
3515 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3516 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3517 		errx(1, "Must specify both -i and -o");
3518 
3519 	/*
3520 	 * Set the timeout if the user hasn't specified one.
3521 	 */
3522 	if (timeout == 0)
3523 		timeout = CAMDD_PASS_RW_TIMEOUT;
3524 
3525 	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3526 
3527 bailout:
3528 	free(opt_list);
3529 
3530 	exit(error);
3531 }
3532