xref: /freebsd/usr.sbin/camdd/camdd.c (revision 67ca7330cf34a789afbbff9ae7e4cdc4a4917ae3)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <sys/bus.h>
55 #include <sys/bus_dma.h>
56 #include <sys/mtio.h>
57 #include <sys/conf.h>
58 #include <sys/disk.h>
59 
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <semaphore.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <inttypes.h>
66 #include <limits.h>
67 #include <fcntl.h>
68 #include <ctype.h>
69 #include <err.h>
70 #include <libutil.h>
71 #include <pthread.h>
72 #include <assert.h>
73 #include <bsdxml.h>
74 
75 #include <cam/cam.h>
76 #include <cam/cam_debug.h>
77 #include <cam/cam_ccb.h>
78 #include <cam/scsi/scsi_all.h>
79 #include <cam/scsi/scsi_da.h>
80 #include <cam/scsi/scsi_pass.h>
81 #include <cam/scsi/scsi_message.h>
82 #include <cam/scsi/smp_all.h>
83 #include <camlib.h>
84 #include <mtlib.h>
85 #include <zlib.h>
86 
87 typedef enum {
88 	CAMDD_CMD_NONE		= 0x00000000,
89 	CAMDD_CMD_HELP		= 0x00000001,
90 	CAMDD_CMD_WRITE		= 0x00000002,
91 	CAMDD_CMD_READ		= 0x00000003
92 } camdd_cmdmask;
93 
94 typedef enum {
95 	CAMDD_ARG_NONE		= 0x00000000,
96 	CAMDD_ARG_VERBOSE	= 0x00000001,
97 	CAMDD_ARG_DEVICE	= 0x00000002,
98 	CAMDD_ARG_BUS		= 0x00000004,
99 	CAMDD_ARG_TARGET	= 0x00000008,
100 	CAMDD_ARG_LUN		= 0x00000010,
101 	CAMDD_ARG_UNIT		= 0x00000020,
102 	CAMDD_ARG_TIMEOUT	= 0x00000040,
103 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
104 	CAMDD_ARG_RETRIES	= 0x00000100
105 } camdd_argmask;
106 
107 typedef enum {
108 	CAMDD_DEV_NONE		= 0x00,
109 	CAMDD_DEV_PASS		= 0x01,
110 	CAMDD_DEV_FILE		= 0x02
111 } camdd_dev_type;
112 
113 struct camdd_io_opts {
114 	camdd_dev_type	dev_type;
115 	char		*dev_name;
116 	uint64_t	blocksize;
117 	uint64_t	queue_depth;
118 	uint64_t	offset;
119 	int		min_cmd_size;
120 	int		write_dev;
121 	uint64_t	debug;
122 };
123 
124 typedef enum {
125 	CAMDD_BUF_NONE,
126 	CAMDD_BUF_DATA,
127 	CAMDD_BUF_INDIRECT
128 } camdd_buf_type;
129 
130 struct camdd_buf_indirect {
131 	/*
132 	 * Pointer to the source buffer.
133 	 */
134 	struct camdd_buf *src_buf;
135 
136 	/*
137 	 * Offset into the source buffer, in bytes.
138 	 */
139 	uint64_t	  offset;
140 	/*
141 	 * Pointer to the starting point in the source buffer.
142 	 */
143 	uint8_t		 *start_ptr;
144 
145 	/*
146 	 * Length of this chunk in bytes.
147 	 */
148 	size_t		  len;
149 };
150 
151 struct camdd_buf_data {
152 	/*
153 	 * Buffer allocated when we allocate this camdd_buf.  This should
154 	 * be the size of the blocksize for this device.
155 	 */
156 	uint8_t			*buf;
157 
158 	/*
159 	 * The amount of backing store allocated in buf.  Generally this
160 	 * will be the blocksize of the device.
161 	 */
162 	uint32_t		 alloc_len;
163 
164 	/*
165 	 * The amount of data that was put into the buffer (on reads) or
166 	 * the amount of data we have put onto the src_list so far (on
167 	 * writes).
168 	 */
169 	uint32_t		 fill_len;
170 
171 	/*
172 	 * The amount of data that was not transferred.
173 	 */
174 	uint32_t		 resid;
175 
176 	/*
177 	 * Starting byte offset on the reader.
178 	 */
179 	uint64_t		 src_start_offset;
180 
181 	/*
182 	 * CCB used for pass(4) device targets.
183 	 */
184 	union ccb		 ccb;
185 
186 	/*
187 	 * Number of scatter/gather segments.
188 	 */
189 	int			 sg_count;
190 
191 	/*
192 	 * Set if we had to tack on an extra buffer to round the transfer
193 	 * up to a sector size.
194 	 */
195 	int			 extra_buf;
196 
197 	/*
198 	 * Scatter/gather list used generally when we're the writer for a
199 	 * pass(4) device.
200 	 */
201 	bus_dma_segment_t	*segs;
202 
203 	/*
204 	 * Scatter/gather list used generally when we're the writer for a
205 	 * file or block device;
206 	 */
207 	struct iovec		*iovec;
208 };
209 
210 union camdd_buf_types {
211 	struct camdd_buf_indirect	indirect;
212 	struct camdd_buf_data		data;
213 };
214 
215 typedef enum {
216 	CAMDD_STATUS_NONE,
217 	CAMDD_STATUS_OK,
218 	CAMDD_STATUS_SHORT_IO,
219 	CAMDD_STATUS_EOF,
220 	CAMDD_STATUS_ERROR
221 } camdd_buf_status;
222 
223 struct camdd_buf {
224 	camdd_buf_type		 buf_type;
225 	union camdd_buf_types	 buf_type_spec;
226 
227 	camdd_buf_status	 status;
228 
229 	uint64_t		 lba;
230 	size_t			 len;
231 
232 	/*
233 	 * A reference count of how many indirect buffers point to this
234 	 * buffer.
235 	 */
236 	int			 refcount;
237 
238 	/*
239 	 * A link back to our parent device.
240 	 */
241 	struct camdd_dev	*dev;
242 	STAILQ_ENTRY(camdd_buf)  links;
243 	STAILQ_ENTRY(camdd_buf)  work_links;
244 
245 	/*
246 	 * A count of the buffers on the src_list.
247 	 */
248 	int			 src_count;
249 
250 	/*
251 	 * List of buffers from our partner thread that are the components
252 	 * of this buffer for the I/O.  Uses src_links.
253 	 */
254 	STAILQ_HEAD(,camdd_buf)	 src_list;
255 	STAILQ_ENTRY(camdd_buf)  src_links;
256 };
257 
258 #define	NUM_DEV_TYPES	2
259 
260 struct camdd_dev_pass {
261 	int			 scsi_dev_type;
262 	int			 protocol;
263 	struct cam_device	*dev;
264 	uint64_t		 max_sector;
265 	uint32_t		 block_len;
266 	uint32_t		 cpi_maxio;
267 };
268 
269 typedef enum {
270 	CAMDD_FILE_NONE,
271 	CAMDD_FILE_REG,
272 	CAMDD_FILE_STD,
273 	CAMDD_FILE_PIPE,
274 	CAMDD_FILE_DISK,
275 	CAMDD_FILE_TAPE,
276 	CAMDD_FILE_TTY,
277 	CAMDD_FILE_MEM
278 } camdd_file_type;
279 
280 typedef enum {
281 	CAMDD_FF_NONE 		= 0x00,
282 	CAMDD_FF_CAN_SEEK	= 0x01
283 } camdd_file_flags;
284 
285 struct camdd_dev_file {
286 	int			 fd;
287 	struct stat		 sb;
288 	char			 filename[MAXPATHLEN + 1];
289 	camdd_file_type		 file_type;
290 	camdd_file_flags	 file_flags;
291 	uint8_t			*tmp_buf;
292 };
293 
294 struct camdd_dev_block {
295 	int			 fd;
296 	uint64_t		 size_bytes;
297 	uint32_t		 block_len;
298 };
299 
300 union camdd_dev_spec {
301 	struct camdd_dev_pass	pass;
302 	struct camdd_dev_file	file;
303 	struct camdd_dev_block	block;
304 };
305 
306 typedef enum {
307 	CAMDD_DEV_FLAG_NONE		= 0x00,
308 	CAMDD_DEV_FLAG_EOF		= 0x01,
309 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
310 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
311 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
312 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
313 } camdd_dev_flags;
314 
315 struct camdd_dev {
316 	camdd_dev_type		 dev_type;
317 	union camdd_dev_spec	 dev_spec;
318 	camdd_dev_flags		 flags;
319 	char			 device_name[MAXPATHLEN+1];
320 	uint32_t		 blocksize;
321 	uint32_t		 sector_size;
322 	uint64_t		 max_sector;
323 	uint64_t		 sector_io_limit;
324 	int			 min_cmd_size;
325 	int			 write_dev;
326 	int			 retry_count;
327 	int			 io_timeout;
328 	int			 debug;
329 	uint64_t		 start_offset_bytes;
330 	uint64_t		 next_io_pos_bytes;
331 	uint64_t		 next_peer_pos_bytes;
332 	uint64_t		 next_completion_pos_bytes;
333 	uint64_t		 peer_bytes_queued;
334 	uint64_t		 bytes_transferred;
335 	uint32_t		 target_queue_depth;
336 	uint32_t		 cur_active_io;
337 	uint8_t			*extra_buf;
338 	uint32_t		 extra_buf_len;
339 	struct camdd_dev	*peer_dev;
340 	pthread_mutex_t		 mutex;
341 	pthread_cond_t		 cond;
342 	int			 kq;
343 
344 	int			 (*run)(struct camdd_dev *dev);
345 	int			 (*fetch)(struct camdd_dev *dev);
346 
347 	/*
348 	 * Buffers that are available for I/O.  Uses links.
349 	 */
350 	STAILQ_HEAD(,camdd_buf)	 free_queue;
351 
352 	/*
353 	 * Free indirect buffers.  These are used for breaking a large
354 	 * buffer into multiple pieces.
355 	 */
356 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
357 
358 	/*
359 	 * Buffers that have been queued to the kernel.  Uses links.
360 	 */
361 	STAILQ_HEAD(,camdd_buf)	 active_queue;
362 
363 	/*
364 	 * Will generally contain one of our buffers that is waiting for enough
365 	 * I/O from our partner thread to be able to execute.  This will
366 	 * generally happen when our per-I/O-size is larger than the
367 	 * partner thread's per-I/O-size.  Uses links.
368 	 */
369 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
370 
371 	/*
372 	 * Number of buffers on the pending queue
373 	 */
374 	int			 num_pending_queue;
375 
376 	/*
377 	 * Buffers that are filled and ready to execute.  This is used when
378 	 * our partner (reader) thread sends us blocks that are larger than
379 	 * our blocksize, and so we have to split them into multiple pieces.
380 	 */
381 	STAILQ_HEAD(,camdd_buf)	 run_queue;
382 
383 	/*
384 	 * Number of buffers on the run queue.
385 	 */
386 	int			 num_run_queue;
387 
388 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
389 
390 	int			 num_reorder_queue;
391 
392 	/*
393 	 * Buffers that have been queued to us by our partner thread
394 	 * (generally the reader thread) to be written out.  Uses
395 	 * work_links.
396 	 */
397 	STAILQ_HEAD(,camdd_buf)	 work_queue;
398 
399 	/*
400 	 * Buffers that have been completed by our partner thread.  Uses
401 	 * work_links.
402 	 */
403 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
404 
405 	/*
406 	 * Number of buffers on the peer done queue.
407 	 */
408 	uint32_t		 num_peer_done_queue;
409 
410 	/*
411 	 * A list of buffers that we have queued to our peer thread.  Uses
412 	 * links.
413 	 */
414 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
415 
416 	/*
417 	 * Number of buffers on the peer work queue.
418 	 */
419 	uint32_t		 num_peer_work_queue;
420 };
421 
422 static sem_t camdd_sem;
423 static sig_atomic_t need_exit = 0;
424 static sig_atomic_t error_exit = 0;
425 static sig_atomic_t need_status = 0;
426 
427 #ifndef min
428 #define	min(a, b) (a < b) ? a : b
429 #endif
430 
431 
432 /* Generically useful offsets into the peripheral private area */
433 #define ppriv_ptr0 periph_priv.entries[0].ptr
434 #define ppriv_ptr1 periph_priv.entries[1].ptr
435 #define ppriv_field0 periph_priv.entries[0].field
436 #define ppriv_field1 periph_priv.entries[1].field
437 
438 #define	ccb_buf	ppriv_ptr0
439 
440 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
441 #define	CAMDD_FILE_DEFAULT_DEPTH	1
442 #define	CAMDD_PASS_MAX_BLOCK		1048576
443 #define	CAMDD_PASS_DEFAULT_DEPTH	6
444 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
445 
446 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
447 		     camdd_argmask *arglst);
448 void camdd_free_dev(struct camdd_dev *dev);
449 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
450 				  struct kevent *new_ke, int num_ke,
451 				  int retry_count, int timeout);
452 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
453 					 camdd_buf_type buf_type);
454 void camdd_release_buf(struct camdd_buf *buf);
455 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
456 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
457 			uint32_t sector_size, uint32_t *num_sectors_used,
458 			int *double_buf_needed);
459 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
460 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
461 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
462 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
463 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
464          camdd_argmask arglist, int probe_retry_count,
465          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
466 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
467 				   int retry_count, int timeout);
468 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
469 				   struct camdd_io_opts *io_opts,
470 				   camdd_argmask arglist, int probe_retry_count,
471 				   int probe_timeout, int io_retry_count,
472 				   int io_timeout);
473 void *camdd_file_worker(void *arg);
474 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
475 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
476 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
477 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
478 void camdd_peer_done(struct camdd_buf *buf);
479 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
480 			int *error_count);
481 int camdd_pass_fetch(struct camdd_dev *dev);
482 int camdd_file_run(struct camdd_dev *dev);
483 int camdd_pass_run(struct camdd_dev *dev);
484 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
485 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
486 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
487 		     uint32_t *peer_depth, uint32_t *our_bytes,
488 		     uint32_t *peer_bytes);
489 void *camdd_worker(void *arg);
490 void camdd_sig_handler(int sig);
491 void camdd_print_status(struct camdd_dev *camdd_dev,
492 			struct camdd_dev *other_dev,
493 			struct timespec *start_time);
494 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
495 	     uint64_t max_io, int retry_count, int timeout);
496 int camdd_parse_io_opts(char *args, int is_write,
497 			struct camdd_io_opts *io_opts);
498 void usage(void);
499 
500 /*
501  * Parse out a bus, or a bus, target and lun in the following
502  * format:
503  * bus
504  * bus:target
505  * bus:target:lun
506  *
507  * Returns the number of parsed components, or 0.
508  */
509 static int
510 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
511 {
512 	char *tmpstr;
513 	int convs = 0;
514 
515 	while (isspace(*tstr) && (*tstr != '\0'))
516 		tstr++;
517 
518 	tmpstr = (char *)strtok(tstr, ":");
519 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
520 		*bus = strtol(tmpstr, NULL, 0);
521 		*arglst |= CAMDD_ARG_BUS;
522 		convs++;
523 		tmpstr = (char *)strtok(NULL, ":");
524 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
525 			*target = strtol(tmpstr, NULL, 0);
526 			*arglst |= CAMDD_ARG_TARGET;
527 			convs++;
528 			tmpstr = (char *)strtok(NULL, ":");
529 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
530 				*lun = strtol(tmpstr, NULL, 0);
531 				*arglst |= CAMDD_ARG_LUN;
532 				convs++;
533 			}
534 		}
535 	}
536 
537 	return convs;
538 }
539 
540 /*
541  * XXX KDM clean up and free all of the buffers on the queue!
542  */
543 void
544 camdd_free_dev(struct camdd_dev *dev)
545 {
546 	if (dev == NULL)
547 		return;
548 
549 	switch (dev->dev_type) {
550 	case CAMDD_DEV_FILE: {
551 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
552 
553 		if (file_dev->fd != -1)
554 			close(file_dev->fd);
555 		free(file_dev->tmp_buf);
556 		break;
557 	}
558 	case CAMDD_DEV_PASS: {
559 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
560 
561 		if (pass_dev->dev != NULL)
562 			cam_close_device(pass_dev->dev);
563 		break;
564 	}
565 	default:
566 		break;
567 	}
568 
569 	free(dev);
570 }
571 
572 struct camdd_dev *
573 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
574 		int retry_count, int timeout)
575 {
576 	struct camdd_dev *dev = NULL;
577 	struct kevent *ke;
578 	size_t ke_size;
579 	int retval = 0;
580 
581 	dev = calloc(1, sizeof(*dev));
582 	if (dev == NULL) {
583 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
584 		goto bailout;
585 	}
586 
587 	dev->dev_type = dev_type;
588 	dev->io_timeout = timeout;
589 	dev->retry_count = retry_count;
590 	STAILQ_INIT(&dev->free_queue);
591 	STAILQ_INIT(&dev->free_indirect_queue);
592 	STAILQ_INIT(&dev->active_queue);
593 	STAILQ_INIT(&dev->pending_queue);
594 	STAILQ_INIT(&dev->run_queue);
595 	STAILQ_INIT(&dev->reorder_queue);
596 	STAILQ_INIT(&dev->work_queue);
597 	STAILQ_INIT(&dev->peer_done_queue);
598 	STAILQ_INIT(&dev->peer_work_queue);
599 	retval = pthread_mutex_init(&dev->mutex, NULL);
600 	if (retval != 0) {
601 		warnc(retval, "%s: failed to initialize mutex", __func__);
602 		goto bailout;
603 	}
604 
605 	retval = pthread_cond_init(&dev->cond, NULL);
606 	if (retval != 0) {
607 		warnc(retval, "%s: failed to initialize condition variable",
608 		      __func__);
609 		goto bailout;
610 	}
611 
612 	dev->kq = kqueue();
613 	if (dev->kq == -1) {
614 		warn("%s: Unable to create kqueue", __func__);
615 		goto bailout;
616 	}
617 
618 	ke_size = sizeof(struct kevent) * (num_ke + 4);
619 	ke = calloc(1, ke_size);
620 	if (ke == NULL) {
621 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
622 		goto bailout;
623 	}
624 	if (num_ke > 0)
625 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
626 
627 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
628 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
629 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
630 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
631 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
632 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
633 
634 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
635 	if (retval == -1) {
636 		warn("%s: Unable to register kevents", __func__);
637 		goto bailout;
638 	}
639 
640 
641 	return (dev);
642 
643 bailout:
644 	free(dev);
645 
646 	return (NULL);
647 }
648 
649 static struct camdd_buf *
650 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
651 {
652 	struct camdd_buf *buf = NULL;
653 	uint8_t *data_ptr = NULL;
654 
655 	/*
656 	 * We only need to allocate data space for data buffers.
657 	 */
658 	switch (buf_type) {
659 	case CAMDD_BUF_DATA:
660 		data_ptr = malloc(dev->blocksize);
661 		if (data_ptr == NULL) {
662 			warn("unable to allocate %u bytes", dev->blocksize);
663 			goto bailout_error;
664 		}
665 		break;
666 	default:
667 		break;
668 	}
669 
670 	buf = calloc(1, sizeof(*buf));
671 	if (buf == NULL) {
672 		warn("unable to allocate %zu bytes", sizeof(*buf));
673 		goto bailout_error;
674 	}
675 
676 	buf->buf_type = buf_type;
677 	buf->dev = dev;
678 	switch (buf_type) {
679 	case CAMDD_BUF_DATA: {
680 		struct camdd_buf_data *data;
681 
682 		data = &buf->buf_type_spec.data;
683 
684 		data->alloc_len = dev->blocksize;
685 		data->buf = data_ptr;
686 		break;
687 	}
688 	case CAMDD_BUF_INDIRECT:
689 		break;
690 	default:
691 		break;
692 	}
693 	STAILQ_INIT(&buf->src_list);
694 
695 	return (buf);
696 
697 bailout_error:
698 	free(data_ptr);
699 
700 	return (NULL);
701 }
702 
703 void
704 camdd_release_buf(struct camdd_buf *buf)
705 {
706 	struct camdd_dev *dev;
707 
708 	dev = buf->dev;
709 
710 	switch (buf->buf_type) {
711 	case CAMDD_BUF_DATA: {
712 		struct camdd_buf_data *data;
713 
714 		data = &buf->buf_type_spec.data;
715 
716 		if (data->segs != NULL) {
717 			if (data->extra_buf != 0) {
718 				void *extra_buf;
719 
720 				extra_buf = (void *)
721 				    data->segs[data->sg_count - 1].ds_addr;
722 				free(extra_buf);
723 				data->extra_buf = 0;
724 			}
725 			free(data->segs);
726 			data->segs = NULL;
727 			data->sg_count = 0;
728 		} else if (data->iovec != NULL) {
729 			if (data->extra_buf != 0) {
730 				free(data->iovec[data->sg_count - 1].iov_base);
731 				data->extra_buf = 0;
732 			}
733 			free(data->iovec);
734 			data->iovec = NULL;
735 			data->sg_count = 0;
736 		}
737 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
738 		break;
739 	}
740 	case CAMDD_BUF_INDIRECT:
741 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
742 		break;
743 	default:
744 		err(1, "%s: Invalid buffer type %d for released buffer",
745 		    __func__, buf->buf_type);
746 		break;
747 	}
748 }
749 
750 struct camdd_buf *
751 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
752 {
753 	struct camdd_buf *buf = NULL;
754 
755 	switch (buf_type) {
756 	case CAMDD_BUF_DATA:
757 		buf = STAILQ_FIRST(&dev->free_queue);
758 		if (buf != NULL) {
759 			struct camdd_buf_data *data;
760 			uint8_t *data_ptr;
761 			uint32_t alloc_len;
762 
763 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
764 			data = &buf->buf_type_spec.data;
765 			data_ptr = data->buf;
766 			alloc_len = data->alloc_len;
767 			bzero(buf, sizeof(*buf));
768 			data->buf = data_ptr;
769 			data->alloc_len = alloc_len;
770 		}
771 		break;
772 	case CAMDD_BUF_INDIRECT:
773 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
774 		if (buf != NULL) {
775 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
776 
777 			bzero(buf, sizeof(*buf));
778 		}
779 		break;
780 	default:
781 		warnx("Unknown buffer type %d requested", buf_type);
782 		break;
783 	}
784 
785 
786 	if (buf == NULL)
787 		return (camdd_alloc_buf(dev, buf_type));
788 	else {
789 		STAILQ_INIT(&buf->src_list);
790 		buf->dev = dev;
791 		buf->buf_type = buf_type;
792 
793 		return (buf);
794 	}
795 }
796 
797 int
798 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
799 		    uint32_t *num_sectors_used, int *double_buf_needed)
800 {
801 	struct camdd_buf *tmp_buf;
802 	struct camdd_buf_data *data;
803 	uint8_t *extra_buf = NULL;
804 	size_t extra_buf_len = 0;
805 	int extra_buf_attached = 0;
806 	int i, retval = 0;
807 
808 	data = &buf->buf_type_spec.data;
809 
810 	data->sg_count = buf->src_count;
811 	/*
812 	 * Compose a scatter/gather list from all of the buffers in the list.
813 	 * If the length of the buffer isn't a multiple of the sector size,
814 	 * we'll have to add an extra buffer.  This should only happen
815 	 * at the end of a transfer.
816 	 */
817 	if ((data->fill_len % sector_size) != 0) {
818 		extra_buf_len = sector_size - (data->fill_len % sector_size);
819 		extra_buf = calloc(extra_buf_len, 1);
820 		if (extra_buf == NULL) {
821 			warn("%s: unable to allocate %zu bytes for extra "
822 			    "buffer space", __func__, extra_buf_len);
823 			retval = 1;
824 			goto bailout;
825 		}
826 		data->extra_buf = 1;
827 		data->sg_count++;
828 	}
829 	if (iovec == 0) {
830 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
831 		if (data->segs == NULL) {
832 			warn("%s: unable to allocate %zu bytes for S/G list",
833 			    __func__, sizeof(bus_dma_segment_t) *
834 			    data->sg_count);
835 			retval = 1;
836 			goto bailout;
837 		}
838 
839 	} else {
840 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
841 		if (data->iovec == NULL) {
842 			warn("%s: unable to allocate %zu bytes for S/G list",
843 			    __func__, sizeof(struct iovec) * data->sg_count);
844 			retval = 1;
845 			goto bailout;
846 		}
847 	}
848 
849 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
850 	     i < buf->src_count && tmp_buf != NULL; i++,
851 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
852 
853 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
854 			struct camdd_buf_data *tmp_data;
855 
856 			tmp_data = &tmp_buf->buf_type_spec.data;
857 			if (iovec == 0) {
858 				data->segs[i].ds_addr =
859 				    (bus_addr_t) tmp_data->buf;
860 				data->segs[i].ds_len = tmp_data->fill_len -
861 				    tmp_data->resid;
862 			} else {
863 				data->iovec[i].iov_base = tmp_data->buf;
864 				data->iovec[i].iov_len = tmp_data->fill_len -
865 				    tmp_data->resid;
866 			}
867 			if (((tmp_data->fill_len - tmp_data->resid) %
868 			     sector_size) != 0)
869 				*double_buf_needed = 1;
870 		} else {
871 			struct camdd_buf_indirect *tmp_ind;
872 
873 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
874 			if (iovec == 0) {
875 				data->segs[i].ds_addr =
876 				    (bus_addr_t)tmp_ind->start_ptr;
877 				data->segs[i].ds_len = tmp_ind->len;
878 			} else {
879 				data->iovec[i].iov_base = tmp_ind->start_ptr;
880 				data->iovec[i].iov_len = tmp_ind->len;
881 			}
882 			if ((tmp_ind->len % sector_size) != 0)
883 				*double_buf_needed = 1;
884 		}
885 	}
886 
887 	if (extra_buf != NULL) {
888 		if (iovec == 0) {
889 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
890 			data->segs[i].ds_len = extra_buf_len;
891 		} else {
892 			data->iovec[i].iov_base = extra_buf;
893 			data->iovec[i].iov_len = extra_buf_len;
894 		}
895 		extra_buf_attached = 1;
896 		i++;
897 	}
898 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
899 		warnx("buffer source count does not match "
900 		      "number of buffers in list!");
901 		retval = 1;
902 		goto bailout;
903 	}
904 
905 bailout:
906 	if (retval == 0) {
907 		*num_sectors_used = (data->fill_len + extra_buf_len) /
908 		    sector_size;
909 	} else if (extra_buf_attached == 0) {
910 		/*
911 		 * If extra_buf isn't attached yet, we need to free it
912 		 * to avoid leaking.
913 		 */
914 		free(extra_buf);
915 		data->extra_buf = 0;
916 		data->sg_count--;
917 	}
918 	return (retval);
919 }
920 
921 uint32_t
922 camdd_buf_get_len(struct camdd_buf *buf)
923 {
924 	uint32_t len = 0;
925 
926 	if (buf->buf_type != CAMDD_BUF_DATA) {
927 		struct camdd_buf_indirect *indirect;
928 
929 		indirect = &buf->buf_type_spec.indirect;
930 		len = indirect->len;
931 	} else {
932 		struct camdd_buf_data *data;
933 
934 		data = &buf->buf_type_spec.data;
935 		len = data->fill_len;
936 	}
937 
938 	return (len);
939 }
940 
941 void
942 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
943 {
944 	struct camdd_buf_data *data;
945 
946 	assert(buf->buf_type == CAMDD_BUF_DATA);
947 
948 	data = &buf->buf_type_spec.data;
949 
950 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
951 	buf->src_count++;
952 
953 	data->fill_len += camdd_buf_get_len(child_buf);
954 }
955 
956 typedef enum {
957 	CAMDD_TS_MAX_BLK,
958 	CAMDD_TS_MIN_BLK,
959 	CAMDD_TS_BLK_GRAN,
960 	CAMDD_TS_EFF_IOSIZE
961 } camdd_status_item_index;
962 
963 static struct camdd_status_items {
964 	const char *name;
965 	struct mt_status_entry *entry;
966 } req_status_items[] = {
967 	{ "max_blk", NULL },
968 	{ "min_blk", NULL },
969 	{ "blk_gran", NULL },
970 	{ "max_effective_iosize", NULL }
971 };
972 
973 int
974 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
975 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
976 {
977 	struct mt_status_data status_data;
978 	char *xml_str = NULL;
979 	unsigned int i;
980 	int retval = 0;
981 
982 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
983 	if (retval != 0)
984 		err(1, "Couldn't get XML string from %s", filename);
985 
986 	retval = mt_get_status(xml_str, &status_data);
987 	if (retval != XML_STATUS_OK) {
988 		warn("couldn't get status for %s", filename);
989 		retval = 1;
990 		goto bailout;
991 	} else
992 		retval = 0;
993 
994 	if (status_data.error != 0) {
995 		warnx("%s", status_data.error_str);
996 		retval = 1;
997 		goto bailout;
998 	}
999 
1000 	for (i = 0; i < nitems(req_status_items); i++) {
1001                 char *name;
1002 
1003 		name = __DECONST(char *, req_status_items[i].name);
1004 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1005 		    name);
1006 		if (req_status_items[i].entry == NULL) {
1007 			errx(1, "Cannot find status entry %s",
1008 			    req_status_items[i].name);
1009 		}
1010 	}
1011 
1012 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1013 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1014 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1015 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1016 bailout:
1017 
1018 	free(xml_str);
1019 	mt_status_free(&status_data);
1020 
1021 	return (retval);
1022 }
1023 
1024 struct camdd_dev *
1025 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1026     int timeout)
1027 {
1028 	struct camdd_dev *dev = NULL;
1029 	struct camdd_dev_file *file_dev;
1030 	uint64_t blocksize = io_opts->blocksize;
1031 
1032 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1033 	if (dev == NULL)
1034 		goto bailout;
1035 
1036 	file_dev = &dev->dev_spec.file;
1037 	file_dev->fd = fd;
1038 	strlcpy(file_dev->filename, io_opts->dev_name,
1039 	    sizeof(file_dev->filename));
1040 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1041 	if (blocksize == 0)
1042 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1043 	else
1044 		dev->blocksize = blocksize;
1045 
1046 	if ((io_opts->queue_depth != 0)
1047 	 && (io_opts->queue_depth != 1)) {
1048 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1049 		    "command supported", (uintmax_t)io_opts->queue_depth,
1050 		    io_opts->dev_name);
1051 	}
1052 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1053 	dev->run = camdd_file_run;
1054 	dev->fetch = NULL;
1055 
1056 	/*
1057 	 * We can effectively access files on byte boundaries.  We'll reset
1058 	 * this for devices like disks that can be accessed on sector
1059 	 * boundaries.
1060 	 */
1061 	dev->sector_size = 1;
1062 
1063 	if ((fd != STDIN_FILENO)
1064 	 && (fd != STDOUT_FILENO)) {
1065 		int retval;
1066 
1067 		retval = fstat(fd, &file_dev->sb);
1068 		if (retval != 0) {
1069 			warn("Cannot stat %s", dev->device_name);
1070 			goto bailout_error;
1071 		}
1072 		if (S_ISREG(file_dev->sb.st_mode)) {
1073 			file_dev->file_type = CAMDD_FILE_REG;
1074 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1075 			int type;
1076 
1077 			if (ioctl(fd, FIODTYPE, &type) == -1)
1078 				err(1, "FIODTYPE ioctl failed on %s",
1079 				    dev->device_name);
1080 			else {
1081 				if (type & D_TAPE)
1082 					file_dev->file_type = CAMDD_FILE_TAPE;
1083 				else if (type & D_DISK)
1084 					file_dev->file_type = CAMDD_FILE_DISK;
1085 				else if (type & D_MEM)
1086 					file_dev->file_type = CAMDD_FILE_MEM;
1087 				else if (type & D_TTY)
1088 					file_dev->file_type = CAMDD_FILE_TTY;
1089 			}
1090 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1091 			errx(1, "cannot operate on directory %s",
1092 			    dev->device_name);
1093 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1094 			file_dev->file_type = CAMDD_FILE_PIPE;
1095 		} else
1096 			errx(1, "Cannot determine file type for %s",
1097 			    dev->device_name);
1098 
1099 		switch (file_dev->file_type) {
1100 		case CAMDD_FILE_REG:
1101 			if (file_dev->sb.st_size != 0)
1102 				dev->max_sector = file_dev->sb.st_size - 1;
1103 			else
1104 				dev->max_sector = 0;
1105 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1106 			break;
1107 		case CAMDD_FILE_TAPE: {
1108 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1109 			/*
1110 			 * Check block limits and maximum effective iosize.
1111 			 * Make sure the blocksize is within the block
1112 			 * limits (and a multiple of the minimum blocksize)
1113 			 * and that the blocksize is <= maximum effective
1114 			 * iosize.
1115 			 */
1116 			retval = camdd_probe_tape(fd, dev->device_name,
1117 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1118 			if (retval != 0)
1119 				errx(1, "Unable to probe tape %s",
1120 				    dev->device_name);
1121 
1122 			/*
1123 			 * The blocksize needs to be <= the maximum
1124 			 * effective I/O size of the tape device.  Note
1125 			 * that this also takes into account the maximum
1126 			 * blocksize reported by READ BLOCK LIMITS.
1127 			 */
1128 			if (dev->blocksize > max_iosize) {
1129 				warnx("Blocksize %u too big for %s, limiting "
1130 				    "to %ju", dev->blocksize, dev->device_name,
1131 				    max_iosize);
1132 				dev->blocksize = max_iosize;
1133 			}
1134 
1135 			/*
1136 			 * The blocksize needs to be at least min_blk;
1137 			 */
1138 			if (dev->blocksize < min_blk) {
1139 				warnx("Blocksize %u too small for %s, "
1140 				    "increasing to %ju", dev->blocksize,
1141 				    dev->device_name, min_blk);
1142 				dev->blocksize = min_blk;
1143 			}
1144 
1145 			/*
1146 			 * And the blocksize needs to be a multiple of
1147 			 * the block granularity.
1148 			 */
1149 			if ((blk_gran != 0)
1150 			 && (dev->blocksize % (1 << blk_gran))) {
1151 				warnx("Blocksize %u for %s not a multiple of "
1152 				    "%d, adjusting to %d", dev->blocksize,
1153 				    dev->device_name, (1 << blk_gran),
1154 				    dev->blocksize & ~((1 << blk_gran) - 1));
1155 				dev->blocksize &= ~((1 << blk_gran) - 1);
1156 			}
1157 
1158 			if (dev->blocksize == 0) {
1159 				errx(1, "Unable to derive valid blocksize for "
1160 				    "%s", dev->device_name);
1161 			}
1162 
1163 			/*
1164 			 * For tape drives, set the sector size to the
1165 			 * blocksize so that we make sure not to write
1166 			 * less than the blocksize out to the drive.
1167 			 */
1168 			dev->sector_size = dev->blocksize;
1169 			break;
1170 		}
1171 		case CAMDD_FILE_DISK: {
1172 			off_t media_size;
1173 			unsigned int sector_size;
1174 
1175 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1176 
1177 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1178 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1179 				    dev->device_name);
1180 			}
1181 
1182 			if (sector_size == 0) {
1183 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1184 				    "invalid sector size %u for %s",
1185 				    sector_size, dev->device_name);
1186 			}
1187 
1188 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1189 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1190 				    dev->device_name);
1191 			}
1192 
1193 			if (media_size == 0) {
1194 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1195 				    "invalid media size %ju for %s",
1196 				    (uintmax_t)media_size, dev->device_name);
1197 			}
1198 
1199 			if (dev->blocksize % sector_size) {
1200 				errx(1, "%s blocksize %u not a multiple of "
1201 				    "sector size %u", dev->device_name,
1202 				    dev->blocksize, sector_size);
1203 			}
1204 
1205 			dev->sector_size = sector_size;
1206 			dev->max_sector = (media_size / sector_size) - 1;
1207 			break;
1208 		}
1209 		case CAMDD_FILE_MEM:
1210 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1211 			break;
1212 		default:
1213 			break;
1214 		}
1215 	}
1216 
1217 	if ((io_opts->offset != 0)
1218 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1219 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1220 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1221 		goto bailout_error;
1222 	}
1223 #if 0
1224 	else if ((io_opts->offset != 0)
1225 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1226 		warnx("Offset %ju for %s is not a multiple of the "
1227 		      "sector size %u", io_opts->offset,
1228 		      io_opts->dev_name, dev->sector_size);
1229 		goto bailout_error;
1230 	} else {
1231 		dev->start_offset_bytes = io_opts->offset;
1232 	}
1233 #endif
1234 
1235 bailout:
1236 	return (dev);
1237 
1238 bailout_error:
1239 	camdd_free_dev(dev);
1240 	return (NULL);
1241 }
1242 
1243 /*
1244  * Get a get device CCB for the specified device.
1245  */
1246 int
1247 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1248 {
1249         union ccb *ccb;
1250 	int retval = 0;
1251 
1252 	ccb = cam_getccb(device);
1253 
1254 	if (ccb == NULL) {
1255 		warnx("%s: couldn't allocate CCB", __func__);
1256 		return -1;
1257 	}
1258 
1259 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1260 
1261 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1262 
1263 	if (cam_send_ccb(device, ccb) < 0) {
1264 		warn("%s: error sending Get Device Information CCB", __func__);
1265 			cam_error_print(device, ccb, CAM_ESF_ALL,
1266 					CAM_EPF_ALL, stderr);
1267 		retval = -1;
1268 		goto bailout;
1269 	}
1270 
1271 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1272 			cam_error_print(device, ccb, CAM_ESF_ALL,
1273 					CAM_EPF_ALL, stderr);
1274 		retval = -1;
1275 		goto bailout;
1276 	}
1277 
1278 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1279 
1280 bailout:
1281 	cam_freeccb(ccb);
1282 
1283 	return retval;
1284 }
1285 
1286 int
1287 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1288 		 camdd_argmask arglist, int probe_retry_count,
1289 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1290 {
1291 	struct scsi_read_capacity_data rcap;
1292 	struct scsi_read_capacity_data_long rcaplong;
1293 	int retval = -1;
1294 
1295 	if (ccb == NULL) {
1296 		warnx("%s: error passed ccb is NULL", __func__);
1297 		goto bailout;
1298 	}
1299 
1300 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1301 
1302 	scsi_read_capacity(&ccb->csio,
1303 			   /*retries*/ probe_retry_count,
1304 			   /*cbfcnp*/ NULL,
1305 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1306 			   &rcap,
1307 			   SSD_FULL_SIZE,
1308 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1309 
1310 	/* Disable freezing the device queue */
1311 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1312 
1313 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1314 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1315 
1316 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1317 		warn("error sending READ CAPACITY command");
1318 
1319 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1320 				CAM_EPF_ALL, stderr);
1321 
1322 		goto bailout;
1323 	}
1324 
1325 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1326 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1327 		goto bailout;
1328 	}
1329 
1330 	*maxsector = scsi_4btoul(rcap.addr);
1331 	*block_len = scsi_4btoul(rcap.length);
1332 
1333 	/*
1334 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1335 	 * and we need to issue the long READ CAPACITY to get the real
1336 	 * capacity.  Otherwise, we're all set.
1337 	 */
1338 	if (*maxsector != 0xffffffff) {
1339 		retval = 0;
1340 		goto bailout;
1341 	}
1342 
1343 	scsi_read_capacity_16(&ccb->csio,
1344 			      /*retries*/ probe_retry_count,
1345 			      /*cbfcnp*/ NULL,
1346 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1347 			      /*lba*/ 0,
1348 			      /*reladdr*/ 0,
1349 			      /*pmi*/ 0,
1350 			      (uint8_t *)&rcaplong,
1351 			      sizeof(rcaplong),
1352 			      /*sense_len*/ SSD_FULL_SIZE,
1353 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1354 
1355 	/* Disable freezing the device queue */
1356 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1357 
1358 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1359 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1360 
1361 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1362 		warn("error sending READ CAPACITY (16) command");
1363 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1364 				CAM_EPF_ALL, stderr);
1365 		goto bailout;
1366 	}
1367 
1368 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1369 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1370 		goto bailout;
1371 	}
1372 
1373 	*maxsector = scsi_8btou64(rcaplong.addr);
1374 	*block_len = scsi_4btoul(rcaplong.length);
1375 
1376 	retval = 0;
1377 
1378 bailout:
1379 	return retval;
1380 }
1381 
1382 /*
1383  * Need to implement this.  Do a basic probe:
1384  * - Check the inquiry data, make sure we're talking to a device that we
1385  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1386  * - Send a test unit ready, make sure the device is available.
1387  * - Get the capacity and block size.
1388  */
1389 struct camdd_dev *
1390 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1391 		 camdd_argmask arglist, int probe_retry_count,
1392 		 int probe_timeout, int io_retry_count, int io_timeout)
1393 {
1394 	union ccb *ccb;
1395 	uint64_t maxsector = 0;
1396 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1397 	uint32_t block_len = 0;
1398 	struct camdd_dev *dev = NULL;
1399 	struct camdd_dev_pass *pass_dev;
1400 	struct kevent ke;
1401 	struct ccb_getdev cgd;
1402 	int retval;
1403 	int scsi_dev_type;
1404 
1405 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1406 		warnx("%s: error retrieving CGD", __func__);
1407 		return NULL;
1408 	}
1409 
1410 	ccb = cam_getccb(cam_dev);
1411 
1412 	if (ccb == NULL) {
1413 		warnx("%s: error allocating ccb", __func__);
1414 		goto bailout;
1415 	}
1416 
1417 	switch (cgd.protocol) {
1418 	case PROTO_SCSI:
1419 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1420 
1421 		/*
1422 		 * For devices that support READ CAPACITY, we'll attempt to get the
1423 		 * capacity.  Otherwise, we really don't support tape or other
1424 		 * devices via SCSI passthrough, so just return an error in that case.
1425 		 */
1426 		switch (scsi_dev_type) {
1427 		case T_DIRECT:
1428 		case T_WORM:
1429 		case T_CDROM:
1430 		case T_OPTICAL:
1431 		case T_RBC:
1432 		case T_ZBC_HM:
1433 			break;
1434 		default:
1435 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1436 			break; /*NOTREACHED*/
1437 		}
1438 
1439 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1440 						arglist, probe_timeout, &maxsector,
1441 						&block_len))) {
1442 			goto bailout;
1443 		}
1444 		break;
1445 	default:
1446 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1447 		break; /*NOTREACHED*/
1448 	}
1449 
1450 	if (block_len == 0) {
1451 		warnx("Sector size for %s%u is 0, cannot continue",
1452 		    cam_dev->device_name, cam_dev->dev_unit_num);
1453 		goto bailout_error;
1454 	}
1455 
1456 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1457 
1458 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1459 	ccb->ccb_h.flags = CAM_DIR_NONE;
1460 	ccb->ccb_h.retry_count = 1;
1461 
1462 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1463 		warn("error sending XPT_PATH_INQ CCB");
1464 
1465 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1466 				CAM_EPF_ALL, stderr);
1467 		goto bailout;
1468 	}
1469 
1470 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1471 
1472 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1473 			      io_timeout);
1474 	if (dev == NULL)
1475 		goto bailout;
1476 
1477 	pass_dev = &dev->dev_spec.pass;
1478 	pass_dev->scsi_dev_type = scsi_dev_type;
1479 	pass_dev->protocol = cgd.protocol;
1480 	pass_dev->dev = cam_dev;
1481 	pass_dev->max_sector = maxsector;
1482 	pass_dev->block_len = block_len;
1483 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1484 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1485 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1486 	dev->sector_size = block_len;
1487 	dev->max_sector = maxsector;
1488 
1489 
1490 	/*
1491 	 * Determine the optimal blocksize to use for this device.
1492 	 */
1493 
1494 	/*
1495 	 * If the controller has not specified a maximum I/O size,
1496 	 * just go with 128K as a somewhat conservative value.
1497 	 */
1498 	if (pass_dev->cpi_maxio == 0)
1499 		cpi_maxio = 131072;
1500 	else
1501 		cpi_maxio = pass_dev->cpi_maxio;
1502 
1503 	/*
1504 	 * If the controller has a large maximum I/O size, limit it
1505 	 * to something smaller so that the kernel doesn't have trouble
1506 	 * allocating buffers to copy data in and out for us.
1507 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1508 	 */
1509 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1510 
1511 	/*
1512 	 * If we weren't able to get a block size for some reason,
1513 	 * default to 512 bytes.
1514 	 */
1515 	block_len = pass_dev->block_len;
1516 	if (block_len == 0)
1517 		block_len = 512;
1518 
1519 	/*
1520 	 * Figure out how many blocksize chunks will fit in the
1521 	 * maximum I/O size.
1522 	 */
1523 	pass_numblocks = max_iosize / block_len;
1524 
1525 	/*
1526 	 * And finally, multiple the number of blocks by the LBA
1527 	 * length to get our maximum block size;
1528 	 */
1529 	dev->blocksize = pass_numblocks * block_len;
1530 
1531 	if (io_opts->blocksize != 0) {
1532 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1533 			warnx("Blocksize %ju for %s is not a multiple of "
1534 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1535 			      dev->device_name, dev->sector_size);
1536 			goto bailout_error;
1537 		}
1538 		dev->blocksize = io_opts->blocksize;
1539 	}
1540 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1541 	if (io_opts->queue_depth != 0)
1542 		dev->target_queue_depth = io_opts->queue_depth;
1543 
1544 	if (io_opts->offset != 0) {
1545 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1546 			warnx("Offset %ju is past the end of device %s",
1547 			    io_opts->offset, dev->device_name);
1548 			goto bailout_error;
1549 		}
1550 #if 0
1551 		else if ((io_opts->offset % dev->sector_size) != 0) {
1552 			warnx("Offset %ju for %s is not a multiple of the "
1553 			      "sector size %u", io_opts->offset,
1554 			      dev->device_name, dev->sector_size);
1555 			goto bailout_error;
1556 		}
1557 		dev->start_offset_bytes = io_opts->offset;
1558 #endif
1559 	}
1560 
1561 	dev->min_cmd_size = io_opts->min_cmd_size;
1562 
1563 	dev->run = camdd_pass_run;
1564 	dev->fetch = camdd_pass_fetch;
1565 
1566 bailout:
1567 	cam_freeccb(ccb);
1568 
1569 	return (dev);
1570 
1571 bailout_error:
1572 	cam_freeccb(ccb);
1573 
1574 	camdd_free_dev(dev);
1575 
1576 	return (NULL);
1577 }
1578 
1579 void *
1580 camdd_worker(void *arg)
1581 {
1582 	struct camdd_dev *dev = arg;
1583 	struct camdd_buf *buf;
1584 	struct timespec ts, *kq_ts;
1585 
1586 	ts.tv_sec = 0;
1587 	ts.tv_nsec = 0;
1588 
1589 	pthread_mutex_lock(&dev->mutex);
1590 
1591 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1592 
1593 	for (;;) {
1594 		struct kevent ke;
1595 		int retval = 0;
1596 
1597 		/*
1598 		 * XXX KDM check the reorder queue depth?
1599 		 */
1600 		if (dev->write_dev == 0) {
1601 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1602 			uint32_t target_depth = dev->target_queue_depth;
1603 			uint32_t peer_target_depth =
1604 			    dev->peer_dev->target_queue_depth;
1605 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1606 
1607 			camdd_get_depth(dev, &our_depth, &peer_depth,
1608 					&our_bytes, &peer_bytes);
1609 
1610 #if 0
1611 			while (((our_depth < target_depth)
1612 			     && (peer_depth < peer_target_depth))
1613 			    || ((peer_bytes + our_bytes) <
1614 				 (peer_blocksize * 2))) {
1615 #endif
1616 			while (((our_depth + peer_depth) <
1617 			        (target_depth + peer_target_depth))
1618 			    || ((peer_bytes + our_bytes) <
1619 				(peer_blocksize * 3))) {
1620 
1621 				retval = camdd_queue(dev, NULL);
1622 				if (retval == 1)
1623 					break;
1624 				else if (retval != 0) {
1625 					error_exit = 1;
1626 					goto bailout;
1627 				}
1628 
1629 				camdd_get_depth(dev, &our_depth, &peer_depth,
1630 						&our_bytes, &peer_bytes);
1631 			}
1632 		}
1633 		/*
1634 		 * See if we have any I/O that is ready to execute.
1635 		 */
1636 		buf = STAILQ_FIRST(&dev->run_queue);
1637 		if (buf != NULL) {
1638 			while (dev->target_queue_depth > dev->cur_active_io) {
1639 				retval = dev->run(dev);
1640 				if (retval == -1) {
1641 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1642 					error_exit = 1;
1643 					break;
1644 				} else if (retval != 0) {
1645 					break;
1646 				}
1647 			}
1648 		}
1649 
1650 		/*
1651 		 * We've reached EOF, or our partner has reached EOF.
1652 		 */
1653 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1654 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1655 			if (dev->write_dev != 0) {
1656 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1657 				 && (dev->num_run_queue == 0)
1658 				 && (dev->cur_active_io == 0)) {
1659 					goto bailout;
1660 				}
1661 			} else {
1662 				/*
1663 				 * If we're the reader, and the writer
1664 				 * got EOF, he is already done.  If we got
1665 				 * the EOF, then we need to wait until
1666 				 * everything is flushed out for the writer.
1667 				 */
1668 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1669 					goto bailout;
1670 				} else if ((dev->num_peer_work_queue == 0)
1671 					&& (dev->num_peer_done_queue == 0)
1672 					&& (dev->cur_active_io == 0)
1673 					&& (dev->num_run_queue == 0)) {
1674 					goto bailout;
1675 				}
1676 			}
1677 			/*
1678 			 * XXX KDM need to do something about the pending
1679 			 * queue and cleanup resources.
1680 			 */
1681 		}
1682 
1683 		if ((dev->write_dev == 0)
1684 		 && (dev->cur_active_io == 0)
1685 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1686 			kq_ts = &ts;
1687 		else
1688 			kq_ts = NULL;
1689 
1690 		/*
1691 		 * Run kevent to see if there are events to process.
1692 		 */
1693 		pthread_mutex_unlock(&dev->mutex);
1694 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1695 		pthread_mutex_lock(&dev->mutex);
1696 		if (retval == -1) {
1697 			warn("%s: error returned from kevent",__func__);
1698 			goto bailout;
1699 		} else if (retval != 0) {
1700 			switch (ke.filter) {
1701 			case EVFILT_READ:
1702 				if (dev->fetch != NULL) {
1703 					retval = dev->fetch(dev);
1704 					if (retval == -1) {
1705 						error_exit = 1;
1706 						goto bailout;
1707 					}
1708 				}
1709 				break;
1710 			case EVFILT_SIGNAL:
1711 				/*
1712 				 * We register for this so we don't get
1713 				 * an error as a result of a SIGINFO or a
1714 				 * SIGINT.  It will actually get handled
1715 				 * by the signal handler.  If we get a
1716 				 * SIGINT, bail out without printing an
1717 				 * error message.  Any other signals
1718 				 * will result in the error message above.
1719 				 */
1720 				if (ke.ident == SIGINT)
1721 					goto bailout;
1722 				break;
1723 			case EVFILT_USER:
1724 				retval = 0;
1725 				/*
1726 				 * Check to see if the other thread has
1727 				 * queued any I/O for us to do.  (In this
1728 				 * case we're the writer.)
1729 				 */
1730 				for (buf = STAILQ_FIRST(&dev->work_queue);
1731 				     buf != NULL;
1732 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1733 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1734 							   work_links);
1735 					retval = camdd_queue(dev, buf);
1736 					/*
1737 					 * We keep going unless we get an
1738 					 * actual error.  If we get EOF, we
1739 					 * still want to remove the buffers
1740 					 * from the queue and send the back
1741 					 * to the reader thread.
1742 					 */
1743 					if (retval == -1) {
1744 						error_exit = 1;
1745 						goto bailout;
1746 					} else
1747 						retval = 0;
1748 				}
1749 
1750 				/*
1751 				 * Next check to see if the other thread has
1752 				 * queued any completed buffers back to us.
1753 				 * (In this case we're the reader.)
1754 				 */
1755 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1756 				     buf != NULL;
1757 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1758 					STAILQ_REMOVE_HEAD(
1759 					    &dev->peer_done_queue, work_links);
1760 					dev->num_peer_done_queue--;
1761 					camdd_peer_done(buf);
1762 				}
1763 				break;
1764 			default:
1765 				warnx("%s: unknown kevent filter %d",
1766 				      __func__, ke.filter);
1767 				break;
1768 			}
1769 		}
1770 	}
1771 
1772 bailout:
1773 
1774 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1775 
1776 	/* XXX KDM cleanup resources here? */
1777 
1778 	pthread_mutex_unlock(&dev->mutex);
1779 
1780 	need_exit = 1;
1781 	sem_post(&camdd_sem);
1782 
1783 	return (NULL);
1784 }
1785 
1786 /*
1787  * Simplistic translation of CCB status to our local status.
1788  */
1789 camdd_buf_status
1790 camdd_ccb_status(union ccb *ccb, int protocol)
1791 {
1792 	camdd_buf_status status = CAMDD_STATUS_NONE;
1793 	cam_status ccb_status;
1794 
1795 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1796 
1797 	switch (protocol) {
1798 	case PROTO_SCSI:
1799 		switch (ccb_status) {
1800 		case CAM_REQ_CMP: {
1801 			if (ccb->csio.resid == 0) {
1802 				status = CAMDD_STATUS_OK;
1803 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1804 				status = CAMDD_STATUS_SHORT_IO;
1805 			} else {
1806 				status = CAMDD_STATUS_EOF;
1807 			}
1808 			break;
1809 		}
1810 		case CAM_SCSI_STATUS_ERROR: {
1811 			switch (ccb->csio.scsi_status) {
1812 			case SCSI_STATUS_OK:
1813 			case SCSI_STATUS_COND_MET:
1814 			case SCSI_STATUS_INTERMED:
1815 			case SCSI_STATUS_INTERMED_COND_MET:
1816 				status = CAMDD_STATUS_OK;
1817 				break;
1818 			case SCSI_STATUS_CMD_TERMINATED:
1819 			case SCSI_STATUS_CHECK_COND:
1820 			case SCSI_STATUS_QUEUE_FULL:
1821 			case SCSI_STATUS_BUSY:
1822 			case SCSI_STATUS_RESERV_CONFLICT:
1823 			default:
1824 				status = CAMDD_STATUS_ERROR;
1825 				break;
1826 			}
1827 			break;
1828 		}
1829 		default:
1830 			status = CAMDD_STATUS_ERROR;
1831 			break;
1832 		}
1833 		break;
1834 	default:
1835 		status = CAMDD_STATUS_ERROR;
1836 		break;
1837 	}
1838 
1839 	return (status);
1840 }
1841 
1842 /*
1843  * Queue a buffer to our peer's work thread for writing.
1844  *
1845  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1846  */
1847 int
1848 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1849 {
1850 	struct kevent ke;
1851 	STAILQ_HEAD(, camdd_buf) local_queue;
1852 	struct camdd_buf *buf1, *buf2;
1853 	struct camdd_buf_data *data = NULL;
1854 	uint64_t peer_bytes_queued = 0;
1855 	int active = 1;
1856 	int retval = 0;
1857 
1858 	STAILQ_INIT(&local_queue);
1859 
1860 	/*
1861 	 * Since we're the reader, we need to queue our I/O to the writer
1862 	 * in sequential order in order to make sure it gets written out
1863 	 * in sequential order.
1864 	 *
1865 	 * Check the next expected I/O starting offset.  If this doesn't
1866 	 * match, put it on the reorder queue.
1867 	 */
1868 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1869 
1870 		/*
1871 		 * If there is nothing on the queue, there is no sorting
1872 		 * needed.
1873 		 */
1874 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1875 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1876 			dev->num_reorder_queue++;
1877 			goto bailout;
1878 		}
1879 
1880 		/*
1881 		 * Sort in ascending order by starting LBA.  There should
1882 		 * be no identical LBAs.
1883 		 */
1884 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1885 		     buf1 = buf2) {
1886 			buf2 = STAILQ_NEXT(buf1, links);
1887 			if (buf->lba < buf1->lba) {
1888 				/*
1889 				 * If we're less than the first one, then
1890 				 * we insert at the head of the list
1891 				 * because this has to be the first element
1892 				 * on the list.
1893 				 */
1894 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
1895 						   buf, links);
1896 				dev->num_reorder_queue++;
1897 				break;
1898 			} else if (buf->lba > buf1->lba) {
1899 				if (buf2 == NULL) {
1900 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
1901 					    buf, links);
1902 					dev->num_reorder_queue++;
1903 					break;
1904 				} else if (buf->lba < buf2->lba) {
1905 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
1906 					    buf1, buf, links);
1907 					dev->num_reorder_queue++;
1908 					break;
1909 				}
1910 			} else {
1911 				errx(1, "Found buffers with duplicate LBA %ju!",
1912 				     buf->lba);
1913 			}
1914 		}
1915 		goto bailout;
1916 	} else {
1917 
1918 		/*
1919 		 * We're the next expected I/O completion, so put ourselves
1920 		 * on the local queue to be sent to the writer.  We use
1921 		 * work_links here so that we can queue this to the
1922 		 * peer_work_queue before taking the buffer off of the
1923 		 * local_queue.
1924 		 */
1925 		dev->next_completion_pos_bytes += buf->len;
1926 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
1927 
1928 		/*
1929 		 * Go through the reorder queue looking for more sequential
1930 		 * I/O and add it to the local queue.
1931 		 */
1932 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1933 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
1934 			/*
1935 			 * As soon as we see an I/O that is out of sequence,
1936 			 * we're done.
1937 			 */
1938 			if ((buf1->lba * dev->sector_size) !=
1939 			     dev->next_completion_pos_bytes)
1940 				break;
1941 
1942 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
1943 			dev->num_reorder_queue--;
1944 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
1945 			dev->next_completion_pos_bytes += buf1->len;
1946 		}
1947 	}
1948 
1949 	/*
1950 	 * Setup the event to let the other thread know that it has work
1951 	 * pending.
1952 	 */
1953 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
1954 	       NOTE_TRIGGER, 0, NULL);
1955 
1956 	/*
1957 	 * Put this on our shadow queue so that we know what we've queued
1958 	 * to the other thread.
1959 	 */
1960 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
1961 		if (buf1->buf_type != CAMDD_BUF_DATA) {
1962 			errx(1, "%s: should have a data buffer, not an "
1963 			    "indirect buffer", __func__);
1964 		}
1965 		data = &buf1->buf_type_spec.data;
1966 
1967 		/*
1968 		 * We only need to send one EOF to the writer, and don't
1969 		 * need to continue sending EOFs after that.
1970 		 */
1971 		if (buf1->status == CAMDD_STATUS_EOF) {
1972 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
1973 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
1974 				    work_links);
1975 				camdd_release_buf(buf1);
1976 				retval = 1;
1977 				continue;
1978 			}
1979 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
1980 		}
1981 
1982 
1983 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
1984 		peer_bytes_queued += (data->fill_len - data->resid);
1985 		dev->peer_bytes_queued += (data->fill_len - data->resid);
1986 		dev->num_peer_work_queue++;
1987 	}
1988 
1989 	if (STAILQ_FIRST(&local_queue) == NULL)
1990 		goto bailout;
1991 
1992 	/*
1993 	 * Drop our mutex and pick up the other thread's mutex.  We need to
1994 	 * do this to avoid deadlocks.
1995 	 */
1996 	pthread_mutex_unlock(&dev->mutex);
1997 	pthread_mutex_lock(&dev->peer_dev->mutex);
1998 
1999 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2000 		/*
2001 		 * Put the buffers on the other thread's incoming work queue.
2002 		 */
2003 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2004 		     buf1 = STAILQ_FIRST(&local_queue)) {
2005 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2006 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2007 					   work_links);
2008 		}
2009 		/*
2010 		 * Send an event to the other thread's kqueue to let it know
2011 		 * that there is something on the work queue.
2012 		 */
2013 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2014 		if (retval == -1)
2015 			warn("%s: unable to add peer work_queue kevent",
2016 			     __func__);
2017 		else
2018 			retval = 0;
2019 	} else
2020 		active = 0;
2021 
2022 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2023 	pthread_mutex_lock(&dev->mutex);
2024 
2025 	/*
2026 	 * If the other side isn't active, run through the queue and
2027 	 * release all of the buffers.
2028 	 */
2029 	if (active == 0) {
2030 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2031 		     buf1 = STAILQ_FIRST(&local_queue)) {
2032 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2033 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2034 				      links);
2035 			dev->num_peer_work_queue--;
2036 			camdd_release_buf(buf1);
2037 		}
2038 		dev->peer_bytes_queued -= peer_bytes_queued;
2039 		retval = 1;
2040 	}
2041 
2042 bailout:
2043 	return (retval);
2044 }
2045 
2046 /*
2047  * Return a buffer to the reader thread when we have completed writing it.
2048  */
2049 int
2050 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2051 {
2052 	struct kevent ke;
2053 	int retval = 0;
2054 
2055 	/*
2056 	 * Setup the event to let the other thread know that we have
2057 	 * completed a buffer.
2058 	 */
2059 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2060 	       NOTE_TRIGGER, 0, NULL);
2061 
2062 	/*
2063 	 * Drop our lock and acquire the other thread's lock before
2064 	 * manipulating
2065 	 */
2066 	pthread_mutex_unlock(&dev->mutex);
2067 	pthread_mutex_lock(&dev->peer_dev->mutex);
2068 
2069 	/*
2070 	 * Put the buffer on the reader thread's peer done queue now that
2071 	 * we have completed it.
2072 	 */
2073 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2074 			   work_links);
2075 	dev->peer_dev->num_peer_done_queue++;
2076 
2077 	/*
2078 	 * Send an event to the peer thread to let it know that we've added
2079 	 * something to its peer done queue.
2080 	 */
2081 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2082 	if (retval == -1)
2083 		warn("%s: unable to add peer_done_queue kevent", __func__);
2084 	else
2085 		retval = 0;
2086 
2087 	/*
2088 	 * Drop the other thread's lock and reacquire ours.
2089 	 */
2090 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2091 	pthread_mutex_lock(&dev->mutex);
2092 
2093 	return (retval);
2094 }
2095 
2096 /*
2097  * Free a buffer that was written out by the writer thread and returned to
2098  * the reader thread.
2099  */
2100 void
2101 camdd_peer_done(struct camdd_buf *buf)
2102 {
2103 	struct camdd_dev *dev;
2104 	struct camdd_buf_data *data;
2105 
2106 	dev = buf->dev;
2107 	if (buf->buf_type != CAMDD_BUF_DATA) {
2108 		errx(1, "%s: should have a data buffer, not an "
2109 		    "indirect buffer", __func__);
2110 	}
2111 
2112 	data = &buf->buf_type_spec.data;
2113 
2114 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2115 	dev->num_peer_work_queue--;
2116 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2117 
2118 	if (buf->status == CAMDD_STATUS_EOF)
2119 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2120 
2121 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2122 }
2123 
2124 /*
2125  * Assumes caller holds the lock for this device.
2126  */
2127 void
2128 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2129 		   int *error_count)
2130 {
2131 	int retval = 0;
2132 
2133 	/*
2134 	 * If we're the reader, we need to send the completed I/O
2135 	 * to the writer.  If we're the writer, we need to just
2136 	 * free up resources, or let the reader know if we've
2137 	 * encountered an error.
2138 	 */
2139 	if (dev->write_dev == 0) {
2140 		retval = camdd_queue_peer_buf(dev, buf);
2141 		if (retval != 0)
2142 			(*error_count)++;
2143 	} else {
2144 		struct camdd_buf *tmp_buf, *next_buf;
2145 
2146 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2147 				    next_buf) {
2148 			struct camdd_buf *src_buf;
2149 			struct camdd_buf_indirect *indirect;
2150 
2151 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2152 				      camdd_buf, src_links);
2153 
2154 			tmp_buf->status = buf->status;
2155 
2156 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2157 				camdd_complete_peer_buf(dev, tmp_buf);
2158 				continue;
2159 			}
2160 
2161 			indirect = &tmp_buf->buf_type_spec.indirect;
2162 			src_buf = indirect->src_buf;
2163 			src_buf->refcount--;
2164 			/*
2165 			 * XXX KDM we probably need to account for
2166 			 * exactly how many bytes we were able to
2167 			 * write.  Allocate the residual to the
2168 			 * first N buffers?  Or just track the
2169 			 * number of bytes written?  Right now the reader
2170 			 * doesn't do anything with a residual.
2171 			 */
2172 			src_buf->status = buf->status;
2173 			if (src_buf->refcount <= 0)
2174 				camdd_complete_peer_buf(dev, src_buf);
2175 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2176 					   tmp_buf, links);
2177 		}
2178 
2179 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2180 	}
2181 }
2182 
2183 /*
2184  * Fetch all completed commands from the pass(4) device.
2185  *
2186  * Returns the number of commands received, or -1 if any of the commands
2187  * completed with an error.  Returns 0 if no commands are available.
2188  */
2189 int
2190 camdd_pass_fetch(struct camdd_dev *dev)
2191 {
2192 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2193 	union ccb ccb;
2194 	int retval = 0, num_fetched = 0, error_count = 0;
2195 
2196 	pthread_mutex_unlock(&dev->mutex);
2197 	/*
2198 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2199 	 */
2200 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2201 		struct camdd_buf *buf;
2202 		struct camdd_buf_data *data;
2203 		cam_status ccb_status;
2204 		union ccb *buf_ccb;
2205 
2206 		buf = ccb.ccb_h.ccb_buf;
2207 		data = &buf->buf_type_spec.data;
2208 		buf_ccb = &data->ccb;
2209 
2210 		num_fetched++;
2211 
2212 		/*
2213 		 * Copy the CCB back out so we get status, sense data, etc.
2214 		 */
2215 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2216 
2217 		pthread_mutex_lock(&dev->mutex);
2218 
2219 		/*
2220 		 * We're now done, so take this off the active queue.
2221 		 */
2222 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2223 		dev->cur_active_io--;
2224 
2225 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2226 		if (ccb_status != CAM_REQ_CMP) {
2227 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2228 					CAM_EPF_ALL, stderr);
2229 		}
2230 
2231 		switch (pass_dev->protocol) {
2232 		case PROTO_SCSI:
2233 			data->resid = ccb.csio.resid;
2234 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2235 			break;
2236 		default:
2237 			return -1;
2238 			break;
2239 		}
2240 
2241 		if (buf->status == CAMDD_STATUS_NONE)
2242 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2243 		if (buf->status == CAMDD_STATUS_ERROR)
2244 			error_count++;
2245 		else if (buf->status == CAMDD_STATUS_EOF) {
2246 			/*
2247 			 * Once we queue this buffer to our partner thread,
2248 			 * he will know that we've hit EOF.
2249 			 */
2250 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2251 		}
2252 
2253 		camdd_complete_buf(dev, buf, &error_count);
2254 
2255 		/*
2256 		 * Unlock in preparation for the ioctl call.
2257 		 */
2258 		pthread_mutex_unlock(&dev->mutex);
2259 	}
2260 
2261 	pthread_mutex_lock(&dev->mutex);
2262 
2263 	if (error_count > 0)
2264 		return (-1);
2265 	else
2266 		return (num_fetched);
2267 }
2268 
2269 /*
2270  * Returns -1 for error, 0 for success/continue, and 1 for resource
2271  * shortage/stop processing.
2272  */
2273 int
2274 camdd_file_run(struct camdd_dev *dev)
2275 {
2276 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2277 	struct camdd_buf_data *data;
2278 	struct camdd_buf *buf;
2279 	off_t io_offset;
2280 	int retval = 0, write_dev = dev->write_dev;
2281 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2282 	uint32_t num_sectors = 0, db_len = 0;
2283 
2284 	buf = STAILQ_FIRST(&dev->run_queue);
2285 	if (buf == NULL) {
2286 		no_resources = 1;
2287 		goto bailout;
2288 	} else if ((dev->write_dev == 0)
2289 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2290 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2291 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2292 		dev->num_run_queue--;
2293 		buf->status = CAMDD_STATUS_EOF;
2294 		error_count++;
2295 		goto bailout;
2296 	}
2297 
2298 	/*
2299 	 * If we're writing, we need to go through the source buffer list
2300 	 * and create an S/G list.
2301 	 */
2302 	if (write_dev != 0) {
2303 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2304 		    dev->sector_size, &num_sectors, &double_buf_needed);
2305 		if (retval != 0) {
2306 			no_resources = 1;
2307 			goto bailout;
2308 		}
2309 	}
2310 
2311 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2312 	dev->num_run_queue--;
2313 
2314 	data = &buf->buf_type_spec.data;
2315 
2316 	/*
2317 	 * pread(2) and pwrite(2) offsets are byte offsets.
2318 	 */
2319 	io_offset = buf->lba * dev->sector_size;
2320 
2321 	/*
2322 	 * Unlock the mutex while we read or write.
2323 	 */
2324 	pthread_mutex_unlock(&dev->mutex);
2325 
2326 	/*
2327 	 * Note that we don't need to double buffer if we're the reader
2328 	 * because in that case, we have allocated a single buffer of
2329 	 * sufficient size to do the read.  This copy is necessary on
2330 	 * writes because if one of the components of the S/G list is not
2331 	 * a sector size multiple, the kernel will reject the write.  This
2332 	 * is unfortunate but not surprising.  So this will make sure that
2333 	 * we're using a single buffer that is a multiple of the sector size.
2334 	 */
2335 	if ((double_buf_needed != 0)
2336 	 && (data->sg_count > 1)
2337 	 && (write_dev != 0)) {
2338 		uint32_t cur_offset;
2339 		int i;
2340 
2341 		if (file_dev->tmp_buf == NULL)
2342 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2343 		if (file_dev->tmp_buf == NULL) {
2344 			buf->status = CAMDD_STATUS_ERROR;
2345 			error_count++;
2346 			pthread_mutex_lock(&dev->mutex);
2347 			goto bailout;
2348 		}
2349 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2350 			bcopy(data->iovec[i].iov_base,
2351 			    &file_dev->tmp_buf[cur_offset],
2352 			    data->iovec[i].iov_len);
2353 			cur_offset += data->iovec[i].iov_len;
2354 		}
2355 		db_len = cur_offset;
2356 	}
2357 
2358 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2359 		if (write_dev == 0) {
2360 			/*
2361 			 * XXX KDM is there any way we would need a S/G
2362 			 * list here?
2363 			 */
2364 			retval = pread(file_dev->fd, data->buf,
2365 			    buf->len, io_offset);
2366 		} else {
2367 			if (double_buf_needed != 0) {
2368 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2369 				    db_len, io_offset);
2370 			} else if (data->sg_count == 0) {
2371 				retval = pwrite(file_dev->fd, data->buf,
2372 				    data->fill_len, io_offset);
2373 			} else {
2374 				retval = pwritev(file_dev->fd, data->iovec,
2375 				    data->sg_count, io_offset);
2376 			}
2377 		}
2378 	} else {
2379 		if (write_dev == 0) {
2380 			/*
2381 			 * XXX KDM is there any way we would need a S/G
2382 			 * list here?
2383 			 */
2384 			retval = read(file_dev->fd, data->buf, buf->len);
2385 		} else {
2386 			if (double_buf_needed != 0) {
2387 				retval = write(file_dev->fd, file_dev->tmp_buf,
2388 				    db_len);
2389 			} else if (data->sg_count == 0) {
2390 				retval = write(file_dev->fd, data->buf,
2391 				    data->fill_len);
2392 			} else {
2393 				retval = writev(file_dev->fd, data->iovec,
2394 				    data->sg_count);
2395 			}
2396 		}
2397 	}
2398 
2399 	/* We're done, re-acquire the lock */
2400 	pthread_mutex_lock(&dev->mutex);
2401 
2402 	if (retval >= (ssize_t)data->fill_len) {
2403 		/*
2404 		 * If the bytes transferred is more than the request size,
2405 		 * that indicates an overrun, which should only happen at
2406 		 * the end of a transfer if we have to round up to a sector
2407 		 * boundary.
2408 		 */
2409 		if (buf->status == CAMDD_STATUS_NONE)
2410 			buf->status = CAMDD_STATUS_OK;
2411 		data->resid = 0;
2412 		dev->bytes_transferred += retval;
2413 	} else if (retval == -1) {
2414 		warn("Error %s %s", (write_dev) ? "writing to" :
2415 		    "reading from", file_dev->filename);
2416 
2417 		buf->status = CAMDD_STATUS_ERROR;
2418 		data->resid = data->fill_len;
2419 		error_count++;
2420 
2421 		if (dev->debug == 0)
2422 			goto bailout;
2423 
2424 		if ((double_buf_needed != 0)
2425 		 && (write_dev != 0)) {
2426 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2427 			    "offset %ju\n", __func__, file_dev->fd,
2428 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2429 			    (uintmax_t)io_offset);
2430 		} else if (data->sg_count == 0) {
2431 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2432 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2433 			    data->fill_len, (uintmax_t)buf->lba,
2434 			    (uintmax_t)io_offset);
2435 		} else {
2436 			int i;
2437 
2438 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2439 			    "offset %ju\n", __func__, file_dev->fd,
2440 			    data->fill_len, (uintmax_t)buf->lba,
2441 			    (uintmax_t)io_offset);
2442 
2443 			for (i = 0; i < data->sg_count; i++) {
2444 				fprintf(stderr, "index %d ptr %p len %zu\n",
2445 				    i, data->iovec[i].iov_base,
2446 				    data->iovec[i].iov_len);
2447 			}
2448 		}
2449 	} else if (retval == 0) {
2450 		buf->status = CAMDD_STATUS_EOF;
2451 		if (dev->debug != 0)
2452 			printf("%s: got EOF from %s!\n", __func__,
2453 			    file_dev->filename);
2454 		data->resid = data->fill_len;
2455 		error_count++;
2456 	} else if (retval < (ssize_t)data->fill_len) {
2457 		if (buf->status == CAMDD_STATUS_NONE)
2458 			buf->status = CAMDD_STATUS_SHORT_IO;
2459 		data->resid = data->fill_len - retval;
2460 		dev->bytes_transferred += retval;
2461 	}
2462 
2463 bailout:
2464 	if (buf != NULL) {
2465 		if (buf->status == CAMDD_STATUS_EOF) {
2466 			struct camdd_buf *buf2;
2467 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2468 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2469 				buf2->status = CAMDD_STATUS_EOF;
2470 		}
2471 
2472 		camdd_complete_buf(dev, buf, &error_count);
2473 	}
2474 
2475 	if (error_count != 0)
2476 		return (-1);
2477 	else if (no_resources != 0)
2478 		return (1);
2479 	else
2480 		return (0);
2481 }
2482 
2483 /*
2484  * Execute one command from the run queue.  Returns 0 for success, 1 for
2485  * stop processing, and -1 for error.
2486  */
2487 int
2488 camdd_pass_run(struct camdd_dev *dev)
2489 {
2490 	struct camdd_buf *buf = NULL;
2491 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2492 	struct camdd_buf_data *data;
2493 	uint32_t num_blocks, sectors_used = 0;
2494 	union ccb *ccb;
2495 	int retval = 0, is_write = dev->write_dev;
2496 	int double_buf_needed = 0;
2497 
2498 	buf = STAILQ_FIRST(&dev->run_queue);
2499 	if (buf == NULL) {
2500 		retval = 1;
2501 		goto bailout;
2502 	}
2503 
2504 	/*
2505 	 * If we're writing, we need to go through the source buffer list
2506 	 * and create an S/G list.
2507 	 */
2508 	if (is_write != 0) {
2509 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2510 		    &sectors_used, &double_buf_needed);
2511 		if (retval != 0) {
2512 			retval = -1;
2513 			goto bailout;
2514 		}
2515 	}
2516 
2517 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2518 	dev->num_run_queue--;
2519 
2520 	data = &buf->buf_type_spec.data;
2521 
2522 	/*
2523 	 * In almost every case the number of blocks should be the device
2524 	 * block size.  The exception may be at the end of an I/O stream
2525 	 * for a partial block or at the end of a device.
2526 	 */
2527 	if (is_write != 0)
2528 		num_blocks = sectors_used;
2529 	else
2530 		num_blocks = data->fill_len / pass_dev->block_len;
2531 
2532 	ccb = &data->ccb;
2533 
2534 	switch (pass_dev->protocol) {
2535 	case PROTO_SCSI:
2536 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2537 
2538 		scsi_read_write(&ccb->csio,
2539 				/*retries*/ dev->retry_count,
2540 				/*cbfcnp*/ NULL,
2541 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2542 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2543 					   SCSI_RW_WRITE,
2544 				/*byte2*/ 0,
2545 				/*minimum_cmd_size*/ dev->min_cmd_size,
2546 				/*lba*/ buf->lba,
2547 				/*block_count*/ num_blocks,
2548 				/*data_ptr*/ (data->sg_count != 0) ?
2549 					     (uint8_t *)data->segs : data->buf,
2550 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2551 				/*sense_len*/ SSD_FULL_SIZE,
2552 				/*timeout*/ dev->io_timeout);
2553 
2554 		if (data->sg_count != 0) {
2555 			ccb->csio.sglist_cnt = data->sg_count;
2556 		}
2557 		break;
2558 	default:
2559 		retval = -1;
2560 		goto bailout;
2561 	}
2562 
2563 	/* Disable freezing the device queue */
2564 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2565 
2566 	if (dev->retry_count != 0)
2567 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2568 
2569 	if (data->sg_count != 0) {
2570 		ccb->ccb_h.flags |= CAM_DATA_SG;
2571 	}
2572 
2573 	/*
2574 	 * Store a pointer to the buffer in the CCB.  The kernel will
2575 	 * restore this when we get it back, and we'll use it to identify
2576 	 * the buffer this CCB came from.
2577 	 */
2578 	ccb->ccb_h.ccb_buf = buf;
2579 
2580 	/*
2581 	 * Unlock our mutex in preparation for issuing the ioctl.
2582 	 */
2583 	pthread_mutex_unlock(&dev->mutex);
2584 	/*
2585 	 * Queue the CCB to the pass(4) driver.
2586 	 */
2587 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2588 		pthread_mutex_lock(&dev->mutex);
2589 
2590 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2591 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2592 		warn("%s: CCB address is %p", __func__, ccb);
2593 		retval = -1;
2594 
2595 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2596 	} else {
2597 		pthread_mutex_lock(&dev->mutex);
2598 
2599 		dev->cur_active_io++;
2600 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2601 	}
2602 
2603 bailout:
2604 	return (retval);
2605 }
2606 
2607 int
2608 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2609 {
2610 	struct camdd_dev_pass *pass_dev;
2611 	uint32_t num_blocks;
2612 	int retval = 0;
2613 
2614 	pass_dev = &dev->dev_spec.pass;
2615 
2616 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2617 	*len = dev->blocksize;
2618 	num_blocks = *len / dev->sector_size;
2619 
2620 	/*
2621 	 * If max_sector is 0, then we have no set limit.  This can happen
2622 	 * if we're writing to a file in a filesystem, or reading from
2623 	 * something like /dev/zero.
2624 	 */
2625 	if ((dev->max_sector != 0)
2626 	 || (dev->sector_io_limit != 0)) {
2627 		uint64_t max_sector;
2628 
2629 		if ((dev->max_sector != 0)
2630 		 && (dev->sector_io_limit != 0))
2631 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2632 		else if (dev->max_sector != 0)
2633 			max_sector = dev->max_sector;
2634 		else
2635 			max_sector = dev->sector_io_limit;
2636 
2637 
2638 		/*
2639 		 * Check to see whether we're starting off past the end of
2640 		 * the device.  If so, we need to just send an EOF
2641 		 * notification to the writer.
2642 		 */
2643 		if (*lba > max_sector) {
2644 			*len = 0;
2645 			retval = 1;
2646 		} else if (((*lba + num_blocks) > max_sector + 1)
2647 			|| ((*lba + num_blocks) < *lba)) {
2648 			/*
2649 			 * If we get here (but pass the first check), we
2650 			 * can trim the request length down to go to the
2651 			 * end of the device.
2652 			 */
2653 			num_blocks = (max_sector + 1) - *lba;
2654 			*len = num_blocks * dev->sector_size;
2655 			retval = 1;
2656 		}
2657 	}
2658 
2659 	dev->next_io_pos_bytes += *len;
2660 
2661 	return (retval);
2662 }
2663 
2664 /*
2665  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2666  */
2667 int
2668 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2669 {
2670 	struct camdd_buf *buf = NULL;
2671 	struct camdd_buf_data *data;
2672 	struct camdd_dev_pass *pass_dev;
2673 	size_t new_len;
2674 	struct camdd_buf_data *rb_data;
2675 	int is_write = dev->write_dev;
2676 	int eof_flush_needed = 0;
2677 	int retval = 0;
2678 	int error;
2679 
2680 	pass_dev = &dev->dev_spec.pass;
2681 
2682 	/*
2683 	 * If we've gotten EOF or our partner has, we should not continue
2684 	 * queueing I/O.  If we're a writer, though, we should continue
2685 	 * to write any buffers that don't have EOF status.
2686 	 */
2687 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2688 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2689 	  && (is_write == 0))) {
2690 		/*
2691 		 * Tell the worker thread that we have seen EOF.
2692 		 */
2693 		retval = 1;
2694 
2695 		/*
2696 		 * If we're the writer, send the buffer back with EOF status.
2697 		 */
2698 		if (is_write) {
2699 			read_buf->status = CAMDD_STATUS_EOF;
2700 
2701 			error = camdd_complete_peer_buf(dev, read_buf);
2702 		}
2703 		goto bailout;
2704 	}
2705 
2706 	if (is_write == 0) {
2707 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2708 		if (buf == NULL) {
2709 			retval = -1;
2710 			goto bailout;
2711 		}
2712 		data = &buf->buf_type_spec.data;
2713 
2714 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2715 		if (retval != 0) {
2716 			buf->status = CAMDD_STATUS_EOF;
2717 
2718 		 	if ((buf->len == 0)
2719 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2720 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2721 				camdd_release_buf(buf);
2722 				goto bailout;
2723 			}
2724 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2725 		}
2726 
2727 		data->fill_len = buf->len;
2728 		data->src_start_offset = buf->lba * dev->sector_size;
2729 
2730 		/*
2731 		 * Put this on the run queue.
2732 		 */
2733 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2734 		dev->num_run_queue++;
2735 
2736 		/* We're done. */
2737 		goto bailout;
2738 	}
2739 
2740 	/*
2741 	 * Check for new EOF status from the reader.
2742 	 */
2743 	if ((read_buf->status == CAMDD_STATUS_EOF)
2744 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2745 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2746 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2747 		 && (read_buf->len == 0)) {
2748 			camdd_complete_peer_buf(dev, read_buf);
2749 			retval = 1;
2750 			goto bailout;
2751 		} else
2752 			eof_flush_needed = 1;
2753 	}
2754 
2755 	/*
2756 	 * See if we have a buffer we're composing with pieces from our
2757 	 * partner thread.
2758 	 */
2759 	buf = STAILQ_FIRST(&dev->pending_queue);
2760 	if (buf == NULL) {
2761 		uint64_t lba;
2762 		ssize_t len;
2763 
2764 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2765 		if (retval != 0) {
2766 			read_buf->status = CAMDD_STATUS_EOF;
2767 
2768 			if (len == 0) {
2769 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2770 				error = camdd_complete_peer_buf(dev, read_buf);
2771 				goto bailout;
2772 			}
2773 		}
2774 
2775 		/*
2776 		 * If we don't have a pending buffer, we need to grab a new
2777 		 * one from the free list or allocate another one.
2778 		 */
2779 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2780 		if (buf == NULL) {
2781 			retval = 1;
2782 			goto bailout;
2783 		}
2784 
2785 		buf->lba = lba;
2786 		buf->len = len;
2787 
2788 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2789 		dev->num_pending_queue++;
2790 	}
2791 
2792 	data = &buf->buf_type_spec.data;
2793 
2794 	rb_data = &read_buf->buf_type_spec.data;
2795 
2796 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2797 	 && (dev->debug != 0)) {
2798 		printf("%s: WARNING: reader offset %#jx != expected offset "
2799 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2800 		    (uintmax_t)dev->next_peer_pos_bytes);
2801 	}
2802 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2803 	    (rb_data->fill_len - rb_data->resid);
2804 
2805 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2806 	if (new_len < buf->len) {
2807 		/*
2808 		 * There are three cases here:
2809 		 * 1. We need more data to fill up a block, so we put
2810 		 *    this I/O on the queue and wait for more I/O.
2811 		 * 2. We have a pending buffer in the queue that is
2812 		 *    smaller than our blocksize, but we got an EOF.  So we
2813 		 *    need to go ahead and flush the write out.
2814 		 * 3. We got an error.
2815 		 */
2816 
2817 		/*
2818 		 * Increment our fill length.
2819 		 */
2820 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2821 
2822 		/*
2823 		 * Add the new read buffer to the list for writing.
2824 		 */
2825 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2826 
2827 		/* Increment the count */
2828 		buf->src_count++;
2829 
2830 		if (eof_flush_needed == 0) {
2831 			/*
2832 			 * We need to exit, because we don't have enough
2833 			 * data yet.
2834 			 */
2835 			goto bailout;
2836 		} else {
2837 			/*
2838 			 * Take the buffer off of the pending queue.
2839 			 */
2840 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2841 				      links);
2842 			dev->num_pending_queue--;
2843 
2844 			/*
2845 			 * If we need an EOF flush, but there is no data
2846 			 * to flush, go ahead and return this buffer.
2847 			 */
2848 			if (data->fill_len == 0) {
2849 				camdd_complete_buf(dev, buf, /*error_count*/0);
2850 				retval = 1;
2851 				goto bailout;
2852 			}
2853 
2854 			/*
2855 			 * Put this on the next queue for execution.
2856 			 */
2857 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2858 			dev->num_run_queue++;
2859 		}
2860 	} else if (new_len == buf->len) {
2861 		/*
2862 		 * We have enough data to completey fill one block,
2863 		 * so we're ready to issue the I/O.
2864 		 */
2865 
2866 		/*
2867 		 * Take the buffer off of the pending queue.
2868 		 */
2869 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2870 		dev->num_pending_queue--;
2871 
2872 		/*
2873 		 * Add the new read buffer to the list for writing.
2874 		 */
2875 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2876 
2877 		/* Increment the count */
2878 		buf->src_count++;
2879 
2880 		/*
2881 		 * Increment our fill length.
2882 		 */
2883 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2884 
2885 		/*
2886 		 * Put this on the next queue for execution.
2887 		 */
2888 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2889 		dev->num_run_queue++;
2890 	} else {
2891 		struct camdd_buf *idb;
2892 		struct camdd_buf_indirect *indirect;
2893 		uint32_t len_to_go, cur_offset;
2894 
2895 
2896 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2897 		if (idb == NULL) {
2898 			retval = 1;
2899 			goto bailout;
2900 		}
2901 		indirect = &idb->buf_type_spec.indirect;
2902 		indirect->src_buf = read_buf;
2903 		read_buf->refcount++;
2904 		indirect->offset = 0;
2905 		indirect->start_ptr = rb_data->buf;
2906 		/*
2907 		 * We've already established that there is more
2908 		 * data in read_buf than we have room for in our
2909 		 * current write request.  So this particular chunk
2910 		 * of the request should just be the remainder
2911 		 * needed to fill up a block.
2912 		 */
2913 		indirect->len = buf->len - (data->fill_len - data->resid);
2914 
2915 		camdd_buf_add_child(buf, idb);
2916 
2917 		/*
2918 		 * This buffer is ready to execute, so we can take
2919 		 * it off the pending queue and put it on the run
2920 		 * queue.
2921 		 */
2922 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2923 			      links);
2924 		dev->num_pending_queue--;
2925 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2926 		dev->num_run_queue++;
2927 
2928 		cur_offset = indirect->offset + indirect->len;
2929 
2930 		/*
2931 		 * The resulting I/O would be too large to fit in
2932 		 * one block.  We need to split this I/O into
2933 		 * multiple pieces.  Allocate as many buffers as needed.
2934 		 */
2935 		for (len_to_go = rb_data->fill_len - rb_data->resid -
2936 		     indirect->len; len_to_go > 0;) {
2937 			struct camdd_buf *new_buf;
2938 			struct camdd_buf_data *new_data;
2939 			uint64_t lba;
2940 			ssize_t len;
2941 
2942 			retval = camdd_get_next_lba_len(dev, &lba, &len);
2943 			if ((retval != 0)
2944 			 && (len == 0)) {
2945 				/*
2946 				 * The device has already been marked
2947 				 * as EOF, and there is no space left.
2948 				 */
2949 				goto bailout;
2950 			}
2951 
2952 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2953 			if (new_buf == NULL) {
2954 				retval = 1;
2955 				goto bailout;
2956 			}
2957 
2958 			new_buf->lba = lba;
2959 			new_buf->len = len;
2960 
2961 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2962 			if (idb == NULL) {
2963 				retval = 1;
2964 				goto bailout;
2965 			}
2966 
2967 			indirect = &idb->buf_type_spec.indirect;
2968 
2969 			indirect->src_buf = read_buf;
2970 			read_buf->refcount++;
2971 			indirect->offset = cur_offset;
2972 			indirect->start_ptr = rb_data->buf + cur_offset;
2973 			indirect->len = min(len_to_go, new_buf->len);
2974 #if 0
2975 			if (((indirect->len % dev->sector_size) != 0)
2976 			 || ((indirect->offset % dev->sector_size) != 0)) {
2977 				warnx("offset %ju len %ju not aligned with "
2978 				    "sector size %u", indirect->offset,
2979 				    (uintmax_t)indirect->len, dev->sector_size);
2980 			}
2981 #endif
2982 			cur_offset += indirect->len;
2983 			len_to_go -= indirect->len;
2984 
2985 			camdd_buf_add_child(new_buf, idb);
2986 
2987 			new_data = &new_buf->buf_type_spec.data;
2988 
2989 			if ((new_data->fill_len == new_buf->len)
2990 			 || (eof_flush_needed != 0)) {
2991 				STAILQ_INSERT_TAIL(&dev->run_queue,
2992 						   new_buf, links);
2993 				dev->num_run_queue++;
2994 			} else if (new_data->fill_len < buf->len) {
2995 				STAILQ_INSERT_TAIL(&dev->pending_queue,
2996 					   	new_buf, links);
2997 				dev->num_pending_queue++;
2998 			} else {
2999 				warnx("%s: too much data in new "
3000 				      "buffer!", __func__);
3001 				retval = 1;
3002 				goto bailout;
3003 			}
3004 		}
3005 	}
3006 
3007 bailout:
3008 	return (retval);
3009 }
3010 
3011 void
3012 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3013 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3014 {
3015 	*our_depth = dev->cur_active_io + dev->num_run_queue;
3016 	if (dev->num_peer_work_queue >
3017 	    dev->num_peer_done_queue)
3018 		*peer_depth = dev->num_peer_work_queue -
3019 			      dev->num_peer_done_queue;
3020 	else
3021 		*peer_depth = 0;
3022 	*our_bytes = *our_depth * dev->blocksize;
3023 	*peer_bytes = dev->peer_bytes_queued;
3024 }
3025 
3026 void
3027 camdd_sig_handler(int sig)
3028 {
3029 	if (sig == SIGINFO)
3030 		need_status = 1;
3031 	else {
3032 		need_exit = 1;
3033 		error_exit = 1;
3034 	}
3035 
3036 	sem_post(&camdd_sem);
3037 }
3038 
3039 void
3040 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3041 		   struct timespec *start_time)
3042 {
3043 	struct timespec done_time;
3044 	uint64_t total_ns;
3045 	long double mb_sec, total_sec;
3046 	int error = 0;
3047 
3048 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3049 	if (error != 0) {
3050 		warn("Unable to get done time");
3051 		return;
3052 	}
3053 
3054 	timespecsub(&done_time, start_time, &done_time);
3055 
3056 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3057 	total_sec = total_ns;
3058 	total_sec /= 1000000000;
3059 
3060 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3061 		"%.4Lf seconds elapsed\n",
3062 		(uintmax_t)camdd_dev->bytes_transferred,
3063 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3064 		camdd_dev->device_name,
3065 		(uintmax_t)other_dev->bytes_transferred,
3066 		(other_dev->write_dev == 0) ? "read from" : "written to",
3067 		other_dev->device_name, total_sec);
3068 
3069 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3070 	mb_sec /= 1024 * 1024;
3071 	mb_sec *= 1000000000;
3072 	mb_sec /= total_ns;
3073 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3074 }
3075 
3076 int
3077 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
3078 	 int retry_count, int timeout)
3079 {
3080 	struct cam_device *new_cam_dev = NULL;
3081 	struct camdd_dev *devs[2];
3082 	struct timespec start_time;
3083 	pthread_t threads[2];
3084 	int unit = 0;
3085 	int error = 0;
3086 	int i;
3087 
3088 	if (num_io_opts != 2) {
3089 		warnx("Must have one input and one output path");
3090 		error = 1;
3091 		goto bailout;
3092 	}
3093 
3094 	bzero(devs, sizeof(devs));
3095 
3096 	for (i = 0; i < num_io_opts; i++) {
3097 		switch (io_opts[i].dev_type) {
3098 		case CAMDD_DEV_PASS: {
3099 			if (isdigit(io_opts[i].dev_name[0])) {
3100 				camdd_argmask new_arglist = CAMDD_ARG_NONE;
3101 				int bus = 0, target = 0, lun = 0;
3102 				int rv;
3103 
3104 				/* device specified as bus:target[:lun] */
3105 				rv = parse_btl(io_opts[i].dev_name, &bus,
3106 				    &target, &lun, &new_arglist);
3107 				if (rv < 2) {
3108 					warnx("numeric device specification "
3109 					     "must be either bus:target, or "
3110 					     "bus:target:lun");
3111 					error = 1;
3112 					goto bailout;
3113 				}
3114 				/* default to 0 if lun was not specified */
3115 				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3116 					lun = 0;
3117 					new_arglist |= CAMDD_ARG_LUN;
3118 				}
3119 				new_cam_dev = cam_open_btl(bus, target, lun,
3120 				    O_RDWR, NULL);
3121 			} else {
3122 				char name[30];
3123 
3124 				if (cam_get_device(io_opts[i].dev_name, name,
3125 						   sizeof name, &unit) == -1) {
3126 					warnx("%s", cam_errbuf);
3127 					error = 1;
3128 					goto bailout;
3129 				}
3130 				new_cam_dev = cam_open_spec_device(name, unit,
3131 				    O_RDWR, NULL);
3132 			}
3133 
3134 			if (new_cam_dev == NULL) {
3135 				warnx("%s", cam_errbuf);
3136 				error = 1;
3137 				goto bailout;
3138 			}
3139 
3140 			devs[i] = camdd_probe_pass(new_cam_dev,
3141 			    /*io_opts*/ &io_opts[i],
3142 			    CAMDD_ARG_ERR_RECOVER,
3143 			    /*probe_retry_count*/ 3,
3144 			    /*probe_timeout*/ 5000,
3145 			    /*io_retry_count*/ retry_count,
3146 			    /*io_timeout*/ timeout);
3147 			if (devs[i] == NULL) {
3148 				warn("Unable to probe device %s%u",
3149 				     new_cam_dev->device_name,
3150 				     new_cam_dev->dev_unit_num);
3151 				error = 1;
3152 				goto bailout;
3153 			}
3154 			break;
3155 		}
3156 		case CAMDD_DEV_FILE: {
3157 			int fd = -1;
3158 
3159 			if (io_opts[i].dev_name[0] == '-') {
3160 				if (io_opts[i].write_dev != 0)
3161 					fd = STDOUT_FILENO;
3162 				else
3163 					fd = STDIN_FILENO;
3164 			} else {
3165 				if (io_opts[i].write_dev != 0) {
3166 					fd = open(io_opts[i].dev_name,
3167 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3168 				} else {
3169 					fd = open(io_opts[i].dev_name,
3170 					    O_RDONLY);
3171 				}
3172 			}
3173 			if (fd == -1) {
3174 				warn("error opening file %s",
3175 				    io_opts[i].dev_name);
3176 				error = 1;
3177 				goto bailout;
3178 			}
3179 
3180 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3181 			    retry_count, timeout);
3182 			if (devs[i] == NULL) {
3183 				error = 1;
3184 				goto bailout;
3185 			}
3186 
3187 			break;
3188 		}
3189 		default:
3190 			warnx("Unknown device type %d (%s)",
3191 			    io_opts[i].dev_type, io_opts[i].dev_name);
3192 			error = 1;
3193 			goto bailout;
3194 			break; /*NOTREACHED */
3195 		}
3196 
3197 		devs[i]->write_dev = io_opts[i].write_dev;
3198 
3199 		devs[i]->start_offset_bytes = io_opts[i].offset;
3200 
3201 		if (max_io != 0) {
3202 			devs[i]->sector_io_limit =
3203 			    (devs[i]->start_offset_bytes /
3204 			    devs[i]->sector_size) +
3205 			    (max_io / devs[i]->sector_size) - 1;
3206 		}
3207 
3208 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3209 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3210 	}
3211 
3212 	devs[0]->peer_dev = devs[1];
3213 	devs[1]->peer_dev = devs[0];
3214 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3215 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3216 
3217 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3218 
3219 	signal(SIGINFO, camdd_sig_handler);
3220 	signal(SIGINT, camdd_sig_handler);
3221 
3222 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3223 	if (error != 0) {
3224 		warn("Unable to get start time");
3225 		goto bailout;
3226 	}
3227 
3228 	for (i = 0; i < num_io_opts; i++) {
3229 		error = pthread_create(&threads[i], NULL, camdd_worker,
3230 				       (void *)devs[i]);
3231 		if (error != 0) {
3232 			warnc(error, "pthread_create() failed");
3233 			goto bailout;
3234 		}
3235 	}
3236 
3237 	for (;;) {
3238 		if ((sem_wait(&camdd_sem) == -1)
3239 		 || (need_exit != 0)) {
3240 			struct kevent ke;
3241 
3242 			for (i = 0; i < num_io_opts; i++) {
3243 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3244 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3245 
3246 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3247 
3248 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3249 						NULL);
3250 				if (error == -1)
3251 					warn("%s: unable to wake up thread",
3252 					    __func__);
3253 				error = 0;
3254 			}
3255 			break;
3256 		} else if (need_status != 0) {
3257 			camdd_print_status(devs[0], devs[1], &start_time);
3258 			need_status = 0;
3259 		}
3260 	}
3261 	for (i = 0; i < num_io_opts; i++) {
3262 		pthread_join(threads[i], NULL);
3263 	}
3264 
3265 	camdd_print_status(devs[0], devs[1], &start_time);
3266 
3267 bailout:
3268 
3269 	for (i = 0; i < num_io_opts; i++)
3270 		camdd_free_dev(devs[i]);
3271 
3272 	return (error + error_exit);
3273 }
3274 
3275 void
3276 usage(void)
3277 {
3278 	fprintf(stderr,
3279 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3280 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3281 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3282 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3283 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3284 "Option description\n"
3285 "-i <arg=val>  Specify input device/file and parameters\n"
3286 "-o <arg=val>  Specify output device/file and parameters\n"
3287 "Input and Output parameters\n"
3288 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3289 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3290 "              or - for stdin/stdout\n"
3291 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3292 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3293 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3294 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3295 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3296 "Optional arguments\n"
3297 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3298 "-E            Enable CAM error recovery for pass(4) devices\n"
3299 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3300 "              using K, G, M, etc. suffixes\n"
3301 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3302 "-v            Enable verbose error recovery\n"
3303 "-h            Print this message\n");
3304 }
3305 
3306 
3307 int
3308 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3309 {
3310 	char *tmpstr, *tmpstr2;
3311 	char *orig_tmpstr = NULL;
3312 	int retval = 0;
3313 
3314 	io_opts->write_dev = is_write;
3315 
3316 	tmpstr = strdup(args);
3317 	if (tmpstr == NULL) {
3318 		warn("strdup failed");
3319 		retval = 1;
3320 		goto bailout;
3321 	}
3322 	orig_tmpstr = tmpstr;
3323 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3324 		char *name, *value;
3325 
3326 		/*
3327 		 * If the user creates an empty parameter by putting in two
3328 		 * commas, skip over it and look for the next field.
3329 		 */
3330 		if (*tmpstr2 == '\0')
3331 			continue;
3332 
3333 		name = strsep(&tmpstr2, "=");
3334 		if (*name == '\0') {
3335 			warnx("Got empty I/O parameter name");
3336 			retval = 1;
3337 			goto bailout;
3338 		}
3339 		value = strsep(&tmpstr2, "=");
3340 		if ((value == NULL)
3341 		 || (*value == '\0')) {
3342 			warnx("Empty I/O parameter value for %s", name);
3343 			retval = 1;
3344 			goto bailout;
3345 		}
3346 		if (strncasecmp(name, "file", 4) == 0) {
3347 			io_opts->dev_type = CAMDD_DEV_FILE;
3348 			io_opts->dev_name = strdup(value);
3349 			if (io_opts->dev_name == NULL) {
3350 				warn("Error allocating memory");
3351 				retval = 1;
3352 				goto bailout;
3353 			}
3354 		} else if (strncasecmp(name, "pass", 4) == 0) {
3355 			io_opts->dev_type = CAMDD_DEV_PASS;
3356 			io_opts->dev_name = strdup(value);
3357 			if (io_opts->dev_name == NULL) {
3358 				warn("Error allocating memory");
3359 				retval = 1;
3360 				goto bailout;
3361 			}
3362 		} else if ((strncasecmp(name, "bs", 2) == 0)
3363 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3364 			retval = expand_number(value, &io_opts->blocksize);
3365 			if (retval == -1) {
3366 				warn("expand_number(3) failed on %s=%s", name,
3367 				    value);
3368 				retval = 1;
3369 				goto bailout;
3370 			}
3371 		} else if (strncasecmp(name, "depth", 5) == 0) {
3372 			char *endptr;
3373 
3374 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3375 			if (*endptr != '\0') {
3376 				warnx("invalid queue depth %s", value);
3377 				retval = 1;
3378 				goto bailout;
3379 			}
3380 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3381 			char *endptr;
3382 
3383 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3384 			if ((*endptr != '\0')
3385 			 || ((io_opts->min_cmd_size > 16)
3386 			  || (io_opts->min_cmd_size < 0))) {
3387 				warnx("invalid minimum cmd size %s", value);
3388 				retval = 1;
3389 				goto bailout;
3390 			}
3391 		} else if (strncasecmp(name, "offset", 6) == 0) {
3392 			retval = expand_number(value, &io_opts->offset);
3393 			if (retval == -1) {
3394 				warn("expand_number(3) failed on %s=%s", name,
3395 				    value);
3396 				retval = 1;
3397 				goto bailout;
3398 			}
3399 		} else if (strncasecmp(name, "debug", 5) == 0) {
3400 			char *endptr;
3401 
3402 			io_opts->debug = strtoull(value, &endptr, 0);
3403 			if (*endptr != '\0') {
3404 				warnx("invalid debug level %s", value);
3405 				retval = 1;
3406 				goto bailout;
3407 			}
3408 		} else {
3409 			warnx("Unrecognized parameter %s=%s", name, value);
3410 		}
3411 	}
3412 bailout:
3413 	free(orig_tmpstr);
3414 
3415 	return (retval);
3416 }
3417 
3418 int
3419 main(int argc, char **argv)
3420 {
3421 	int c;
3422 	camdd_argmask arglist = CAMDD_ARG_NONE;
3423 	int timeout = 0, retry_count = 1;
3424 	int error = 0;
3425 	uint64_t max_io = 0;
3426 	struct camdd_io_opts *opt_list = NULL;
3427 
3428 	if (argc == 1) {
3429 		usage();
3430 		exit(1);
3431 	}
3432 
3433 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3434 	if (opt_list == NULL) {
3435 		warn("Unable to allocate option list");
3436 		error = 1;
3437 		goto bailout;
3438 	}
3439 
3440 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3441 		switch (c) {
3442 		case 'C':
3443 			retry_count = strtol(optarg, NULL, 0);
3444 			if (retry_count < 0)
3445 				errx(1, "retry count %d is < 0",
3446 				     retry_count);
3447 			arglist |= CAMDD_ARG_RETRIES;
3448 			break;
3449 		case 'E':
3450 			arglist |= CAMDD_ARG_ERR_RECOVER;
3451 			break;
3452 		case 'i':
3453 		case 'o':
3454 			if (((c == 'i')
3455 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3456 			 || ((c == 'o')
3457 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3458 				errx(1, "Only one input and output path "
3459 				    "allowed");
3460 			}
3461 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3462 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3463 			if (error != 0)
3464 				goto bailout;
3465 			break;
3466 		case 'm':
3467 			error = expand_number(optarg, &max_io);
3468 			if (error == -1) {
3469 				warn("invalid maximum I/O amount %s", optarg);
3470 				error = 1;
3471 				goto bailout;
3472 			}
3473 			break;
3474 		case 't':
3475 			timeout = strtol(optarg, NULL, 0);
3476 			if (timeout < 0)
3477 				errx(1, "invalid timeout %d", timeout);
3478 			/* Convert the timeout from seconds to ms */
3479 			timeout *= 1000;
3480 			arglist |= CAMDD_ARG_TIMEOUT;
3481 			break;
3482 		case 'v':
3483 			arglist |= CAMDD_ARG_VERBOSE;
3484 			break;
3485 		case 'h':
3486 		default:
3487 			usage();
3488 			exit(1);
3489 			break; /*NOTREACHED*/
3490 		}
3491 	}
3492 
3493 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3494 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3495 		errx(1, "Must specify both -i and -o");
3496 
3497 	/*
3498 	 * Set the timeout if the user hasn't specified one.
3499 	 */
3500 	if (timeout == 0)
3501 		timeout = CAMDD_PASS_RW_TIMEOUT;
3502 
3503 	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3504 
3505 bailout:
3506 	free(opt_list);
3507 
3508 	exit(error);
3509 }
3510