xref: /freebsd/usr.sbin/camdd/camdd.c (revision 46c1105fbb6fbff6d6ccd0a18571342eb992d637)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <machine/bus.h>
55 #include <sys/bus.h>
56 #include <sys/bus_dma.h>
57 #include <sys/mtio.h>
58 #include <sys/conf.h>
59 #include <sys/disk.h>
60 
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <semaphore.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <inttypes.h>
67 #include <limits.h>
68 #include <fcntl.h>
69 #include <ctype.h>
70 #include <err.h>
71 #include <libutil.h>
72 #include <pthread.h>
73 #include <assert.h>
74 #include <bsdxml.h>
75 
76 #include <cam/cam.h>
77 #include <cam/cam_debug.h>
78 #include <cam/cam_ccb.h>
79 #include <cam/scsi/scsi_all.h>
80 #include <cam/scsi/scsi_da.h>
81 #include <cam/scsi/scsi_pass.h>
82 #include <cam/scsi/scsi_message.h>
83 #include <cam/scsi/smp_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87 
88 typedef enum {
89 	CAMDD_CMD_NONE		= 0x00000000,
90 	CAMDD_CMD_HELP		= 0x00000001,
91 	CAMDD_CMD_WRITE		= 0x00000002,
92 	CAMDD_CMD_READ		= 0x00000003
93 } camdd_cmdmask;
94 
95 typedef enum {
96 	CAMDD_ARG_NONE		= 0x00000000,
97 	CAMDD_ARG_VERBOSE	= 0x00000001,
98 	CAMDD_ARG_DEVICE	= 0x00000002,
99 	CAMDD_ARG_BUS		= 0x00000004,
100 	CAMDD_ARG_TARGET	= 0x00000008,
101 	CAMDD_ARG_LUN		= 0x00000010,
102 	CAMDD_ARG_UNIT		= 0x00000020,
103 	CAMDD_ARG_TIMEOUT	= 0x00000040,
104 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
105 	CAMDD_ARG_RETRIES	= 0x00000100
106 } camdd_argmask;
107 
108 typedef enum {
109 	CAMDD_DEV_NONE		= 0x00,
110 	CAMDD_DEV_PASS		= 0x01,
111 	CAMDD_DEV_FILE		= 0x02
112 } camdd_dev_type;
113 
114 struct camdd_io_opts {
115 	camdd_dev_type	dev_type;
116 	char		*dev_name;
117 	uint64_t	blocksize;
118 	uint64_t	queue_depth;
119 	uint64_t	offset;
120 	int		min_cmd_size;
121 	int		write_dev;
122 	uint64_t	debug;
123 };
124 
125 typedef enum {
126 	CAMDD_BUF_NONE,
127 	CAMDD_BUF_DATA,
128 	CAMDD_BUF_INDIRECT
129 } camdd_buf_type;
130 
131 struct camdd_buf_indirect {
132 	/*
133 	 * Pointer to the source buffer.
134 	 */
135 	struct camdd_buf *src_buf;
136 
137 	/*
138 	 * Offset into the source buffer, in bytes.
139 	 */
140 	uint64_t	  offset;
141 	/*
142 	 * Pointer to the starting point in the source buffer.
143 	 */
144 	uint8_t		 *start_ptr;
145 
146 	/*
147 	 * Length of this chunk in bytes.
148 	 */
149 	size_t		  len;
150 };
151 
152 struct camdd_buf_data {
153 	/*
154 	 * Buffer allocated when we allocate this camdd_buf.  This should
155 	 * be the size of the blocksize for this device.
156 	 */
157 	uint8_t			*buf;
158 
159 	/*
160 	 * The amount of backing store allocated in buf.  Generally this
161 	 * will be the blocksize of the device.
162 	 */
163 	uint32_t		 alloc_len;
164 
165 	/*
166 	 * The amount of data that was put into the buffer (on reads) or
167 	 * the amount of data we have put onto the src_list so far (on
168 	 * writes).
169 	 */
170 	uint32_t		 fill_len;
171 
172 	/*
173 	 * The amount of data that was not transferred.
174 	 */
175 	uint32_t		 resid;
176 
177 	/*
178 	 * Starting byte offset on the reader.
179 	 */
180 	uint64_t		 src_start_offset;
181 
182 	/*
183 	 * CCB used for pass(4) device targets.
184 	 */
185 	union ccb		 ccb;
186 
187 	/*
188 	 * Number of scatter/gather segments.
189 	 */
190 	int			 sg_count;
191 
192 	/*
193 	 * Set if we had to tack on an extra buffer to round the transfer
194 	 * up to a sector size.
195 	 */
196 	int			 extra_buf;
197 
198 	/*
199 	 * Scatter/gather list used generally when we're the writer for a
200 	 * pass(4) device.
201 	 */
202 	bus_dma_segment_t	*segs;
203 
204 	/*
205 	 * Scatter/gather list used generally when we're the writer for a
206 	 * file or block device;
207 	 */
208 	struct iovec		*iovec;
209 };
210 
211 union camdd_buf_types {
212 	struct camdd_buf_indirect	indirect;
213 	struct camdd_buf_data		data;
214 };
215 
216 typedef enum {
217 	CAMDD_STATUS_NONE,
218 	CAMDD_STATUS_OK,
219 	CAMDD_STATUS_SHORT_IO,
220 	CAMDD_STATUS_EOF,
221 	CAMDD_STATUS_ERROR
222 } camdd_buf_status;
223 
224 struct camdd_buf {
225 	camdd_buf_type		 buf_type;
226 	union camdd_buf_types	 buf_type_spec;
227 
228 	camdd_buf_status	 status;
229 
230 	uint64_t		 lba;
231 	size_t			 len;
232 
233 	/*
234 	 * A reference count of how many indirect buffers point to this
235 	 * buffer.
236 	 */
237 	int			 refcount;
238 
239 	/*
240 	 * A link back to our parent device.
241 	 */
242 	struct camdd_dev	*dev;
243 	STAILQ_ENTRY(camdd_buf)  links;
244 	STAILQ_ENTRY(camdd_buf)  work_links;
245 
246 	/*
247 	 * A count of the buffers on the src_list.
248 	 */
249 	int			 src_count;
250 
251 	/*
252 	 * List of buffers from our partner thread that are the components
253 	 * of this buffer for the I/O.  Uses src_links.
254 	 */
255 	STAILQ_HEAD(,camdd_buf)	 src_list;
256 	STAILQ_ENTRY(camdd_buf)  src_links;
257 };
258 
259 #define	NUM_DEV_TYPES	2
260 
261 struct camdd_dev_pass {
262 	int			 scsi_dev_type;
263 	struct cam_device	*dev;
264 	uint64_t		 max_sector;
265 	uint32_t		 block_len;
266 	uint32_t		 cpi_maxio;
267 };
268 
269 typedef enum {
270 	CAMDD_FILE_NONE,
271 	CAMDD_FILE_REG,
272 	CAMDD_FILE_STD,
273 	CAMDD_FILE_PIPE,
274 	CAMDD_FILE_DISK,
275 	CAMDD_FILE_TAPE,
276 	CAMDD_FILE_TTY,
277 	CAMDD_FILE_MEM
278 } camdd_file_type;
279 
280 typedef enum {
281 	CAMDD_FF_NONE 		= 0x00,
282 	CAMDD_FF_CAN_SEEK	= 0x01
283 } camdd_file_flags;
284 
285 struct camdd_dev_file {
286 	int			 fd;
287 	struct stat		 sb;
288 	char			 filename[MAXPATHLEN + 1];
289 	camdd_file_type		 file_type;
290 	camdd_file_flags	 file_flags;
291 	uint8_t			*tmp_buf;
292 };
293 
294 struct camdd_dev_block {
295 	int			 fd;
296 	uint64_t		 size_bytes;
297 	uint32_t		 block_len;
298 };
299 
300 union camdd_dev_spec {
301 	struct camdd_dev_pass	pass;
302 	struct camdd_dev_file	file;
303 	struct camdd_dev_block	block;
304 };
305 
306 typedef enum {
307 	CAMDD_DEV_FLAG_NONE		= 0x00,
308 	CAMDD_DEV_FLAG_EOF		= 0x01,
309 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
310 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
311 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
312 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
313 } camdd_dev_flags;
314 
315 struct camdd_dev {
316 	camdd_dev_type		 dev_type;
317 	union camdd_dev_spec	 dev_spec;
318 	camdd_dev_flags		 flags;
319 	char			 device_name[MAXPATHLEN+1];
320 	uint32_t		 blocksize;
321 	uint32_t		 sector_size;
322 	uint64_t		 max_sector;
323 	uint64_t		 sector_io_limit;
324 	int			 min_cmd_size;
325 	int			 write_dev;
326 	int			 retry_count;
327 	int			 io_timeout;
328 	int			 debug;
329 	uint64_t		 start_offset_bytes;
330 	uint64_t		 next_io_pos_bytes;
331 	uint64_t		 next_peer_pos_bytes;
332 	uint64_t		 next_completion_pos_bytes;
333 	uint64_t		 peer_bytes_queued;
334 	uint64_t		 bytes_transferred;
335 	uint32_t		 target_queue_depth;
336 	uint32_t		 cur_active_io;
337 	uint8_t			*extra_buf;
338 	uint32_t		 extra_buf_len;
339 	struct camdd_dev	*peer_dev;
340 	pthread_mutex_t		 mutex;
341 	pthread_cond_t		 cond;
342 	int			 kq;
343 
344 	int			 (*run)(struct camdd_dev *dev);
345 	int			 (*fetch)(struct camdd_dev *dev);
346 
347 	/*
348 	 * Buffers that are available for I/O.  Uses links.
349 	 */
350 	STAILQ_HEAD(,camdd_buf)	 free_queue;
351 
352 	/*
353 	 * Free indirect buffers.  These are used for breaking a large
354 	 * buffer into multiple pieces.
355 	 */
356 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
357 
358 	/*
359 	 * Buffers that have been queued to the kernel.  Uses links.
360 	 */
361 	STAILQ_HEAD(,camdd_buf)	 active_queue;
362 
363 	/*
364 	 * Will generally contain one of our buffers that is waiting for enough
365 	 * I/O from our partner thread to be able to execute.  This will
366 	 * generally happen when our per-I/O-size is larger than the
367 	 * partner thread's per-I/O-size.  Uses links.
368 	 */
369 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
370 
371 	/*
372 	 * Number of buffers on the pending queue
373 	 */
374 	int			 num_pending_queue;
375 
376 	/*
377 	 * Buffers that are filled and ready to execute.  This is used when
378 	 * our partner (reader) thread sends us blocks that are larger than
379 	 * our blocksize, and so we have to split them into multiple pieces.
380 	 */
381 	STAILQ_HEAD(,camdd_buf)	 run_queue;
382 
383 	/*
384 	 * Number of buffers on the run queue.
385 	 */
386 	int			 num_run_queue;
387 
388 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
389 
390 	int			 num_reorder_queue;
391 
392 	/*
393 	 * Buffers that have been queued to us by our partner thread
394 	 * (generally the reader thread) to be written out.  Uses
395 	 * work_links.
396 	 */
397 	STAILQ_HEAD(,camdd_buf)	 work_queue;
398 
399 	/*
400 	 * Buffers that have been completed by our partner thread.  Uses
401 	 * work_links.
402 	 */
403 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
404 
405 	/*
406 	 * Number of buffers on the peer done queue.
407 	 */
408 	uint32_t		 num_peer_done_queue;
409 
410 	/*
411 	 * A list of buffers that we have queued to our peer thread.  Uses
412 	 * links.
413 	 */
414 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
415 
416 	/*
417 	 * Number of buffers on the peer work queue.
418 	 */
419 	uint32_t		 num_peer_work_queue;
420 };
421 
422 static sem_t camdd_sem;
423 static int need_exit = 0;
424 static int error_exit = 0;
425 static int need_status = 0;
426 
427 #ifndef min
428 #define	min(a, b) (a < b) ? a : b
429 #endif
430 
431 /*
432  * XXX KDM private copy of timespecsub().  This is normally defined in
433  * sys/time.h, but is only enabled in the kernel.  If that definition is
434  * enabled in userland, it breaks the build of libnetbsd.
435  */
436 #ifndef timespecsub
437 #define	timespecsub(vvp, uvp)						\
438 	do {								\
439 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
440 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
441 		if ((vvp)->tv_nsec < 0) {				\
442 			(vvp)->tv_sec--;				\
443 			(vvp)->tv_nsec += 1000000000;			\
444 		}							\
445 	} while (0)
446 #endif
447 
448 
449 /* Generically useful offsets into the peripheral private area */
450 #define ppriv_ptr0 periph_priv.entries[0].ptr
451 #define ppriv_ptr1 periph_priv.entries[1].ptr
452 #define ppriv_field0 periph_priv.entries[0].field
453 #define ppriv_field1 periph_priv.entries[1].field
454 
455 #define	ccb_buf	ppriv_ptr0
456 
457 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
458 #define	CAMDD_FILE_DEFAULT_DEPTH	1
459 #define	CAMDD_PASS_MAX_BLOCK		1048576
460 #define	CAMDD_PASS_DEFAULT_DEPTH	6
461 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
462 
463 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
464 		     camdd_argmask *arglst);
465 void camdd_free_dev(struct camdd_dev *dev);
466 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
467 				  struct kevent *new_ke, int num_ke,
468 				  int retry_count, int timeout);
469 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
470 					 camdd_buf_type buf_type);
471 void camdd_release_buf(struct camdd_buf *buf);
472 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
473 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
474 			uint32_t sector_size, uint32_t *num_sectors_used,
475 			int *double_buf_needed);
476 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
477 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
478 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
479 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
480 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
481 				   int retry_count, int timeout);
482 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
483 				   struct camdd_io_opts *io_opts,
484 				   camdd_argmask arglist, int probe_retry_count,
485 				   int probe_timeout, int io_retry_count,
486 				   int io_timeout);
487 void *camdd_file_worker(void *arg);
488 camdd_buf_status camdd_ccb_status(union ccb *ccb);
489 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
490 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
491 void camdd_peer_done(struct camdd_buf *buf);
492 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
493 			int *error_count);
494 int camdd_pass_fetch(struct camdd_dev *dev);
495 int camdd_file_run(struct camdd_dev *dev);
496 int camdd_pass_run(struct camdd_dev *dev);
497 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
498 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
499 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
500 		     uint32_t *peer_depth, uint32_t *our_bytes,
501 		     uint32_t *peer_bytes);
502 void *camdd_worker(void *arg);
503 void camdd_sig_handler(int sig);
504 void camdd_print_status(struct camdd_dev *camdd_dev,
505 			struct camdd_dev *other_dev,
506 			struct timespec *start_time);
507 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
508 	     uint64_t max_io, int retry_count, int timeout);
509 int camdd_parse_io_opts(char *args, int is_write,
510 			struct camdd_io_opts *io_opts);
511 void usage(void);
512 
513 /*
514  * Parse out a bus, or a bus, target and lun in the following
515  * format:
516  * bus
517  * bus:target
518  * bus:target:lun
519  *
520  * Returns the number of parsed components, or 0.
521  */
522 static int
523 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
524 {
525 	char *tmpstr;
526 	int convs = 0;
527 
528 	while (isspace(*tstr) && (*tstr != '\0'))
529 		tstr++;
530 
531 	tmpstr = (char *)strtok(tstr, ":");
532 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
533 		*bus = strtol(tmpstr, NULL, 0);
534 		*arglst |= CAMDD_ARG_BUS;
535 		convs++;
536 		tmpstr = (char *)strtok(NULL, ":");
537 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
538 			*target = strtol(tmpstr, NULL, 0);
539 			*arglst |= CAMDD_ARG_TARGET;
540 			convs++;
541 			tmpstr = (char *)strtok(NULL, ":");
542 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
543 				*lun = strtol(tmpstr, NULL, 0);
544 				*arglst |= CAMDD_ARG_LUN;
545 				convs++;
546 			}
547 		}
548 	}
549 
550 	return convs;
551 }
552 
553 /*
554  * XXX KDM clean up and free all of the buffers on the queue!
555  */
556 void
557 camdd_free_dev(struct camdd_dev *dev)
558 {
559 	if (dev == NULL)
560 		return;
561 
562 	switch (dev->dev_type) {
563 	case CAMDD_DEV_FILE: {
564 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
565 
566 		if (file_dev->fd != -1)
567 			close(file_dev->fd);
568 		free(file_dev->tmp_buf);
569 		break;
570 	}
571 	case CAMDD_DEV_PASS: {
572 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
573 
574 		if (pass_dev->dev != NULL)
575 			cam_close_device(pass_dev->dev);
576 		break;
577 	}
578 	default:
579 		break;
580 	}
581 
582 	free(dev);
583 }
584 
585 struct camdd_dev *
586 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
587 		int retry_count, int timeout)
588 {
589 	struct camdd_dev *dev = NULL;
590 	struct kevent *ke;
591 	size_t ke_size;
592 	int retval = 0;
593 
594 	dev = malloc(sizeof(*dev));
595 	if (dev == NULL) {
596 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
597 		goto bailout;
598 	}
599 
600 	bzero(dev, sizeof(*dev));
601 
602 	dev->dev_type = dev_type;
603 	dev->io_timeout = timeout;
604 	dev->retry_count = retry_count;
605 	STAILQ_INIT(&dev->free_queue);
606 	STAILQ_INIT(&dev->free_indirect_queue);
607 	STAILQ_INIT(&dev->active_queue);
608 	STAILQ_INIT(&dev->pending_queue);
609 	STAILQ_INIT(&dev->run_queue);
610 	STAILQ_INIT(&dev->reorder_queue);
611 	STAILQ_INIT(&dev->work_queue);
612 	STAILQ_INIT(&dev->peer_done_queue);
613 	STAILQ_INIT(&dev->peer_work_queue);
614 	retval = pthread_mutex_init(&dev->mutex, NULL);
615 	if (retval != 0) {
616 		warnc(retval, "%s: failed to initialize mutex", __func__);
617 		goto bailout;
618 	}
619 
620 	retval = pthread_cond_init(&dev->cond, NULL);
621 	if (retval != 0) {
622 		warnc(retval, "%s: failed to initialize condition variable",
623 		      __func__);
624 		goto bailout;
625 	}
626 
627 	dev->kq = kqueue();
628 	if (dev->kq == -1) {
629 		warn("%s: Unable to create kqueue", __func__);
630 		goto bailout;
631 	}
632 
633 	ke_size = sizeof(struct kevent) * (num_ke + 4);
634 	ke = malloc(ke_size);
635 	if (ke == NULL) {
636 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
637 		goto bailout;
638 	}
639 	bzero(ke, ke_size);
640 	if (num_ke > 0)
641 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
642 
643 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
644 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
645 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
646 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
647 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
648 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
649 
650 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
651 	if (retval == -1) {
652 		warn("%s: Unable to register kevents", __func__);
653 		goto bailout;
654 	}
655 
656 
657 	return (dev);
658 
659 bailout:
660 	free(dev);
661 
662 	return (NULL);
663 }
664 
665 static struct camdd_buf *
666 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
667 {
668 	struct camdd_buf *buf = NULL;
669 	uint8_t *data_ptr = NULL;
670 
671 	/*
672 	 * We only need to allocate data space for data buffers.
673 	 */
674 	switch (buf_type) {
675 	case CAMDD_BUF_DATA:
676 		data_ptr = malloc(dev->blocksize);
677 		if (data_ptr == NULL) {
678 			warn("unable to allocate %u bytes", dev->blocksize);
679 			goto bailout_error;
680 		}
681 		break;
682 	default:
683 		break;
684 	}
685 
686 	buf = malloc(sizeof(*buf));
687 	if (buf == NULL) {
688 		warn("unable to allocate %zu bytes", sizeof(*buf));
689 		goto bailout_error;
690 	}
691 
692 	bzero(buf, sizeof(*buf));
693 	buf->buf_type = buf_type;
694 	buf->dev = dev;
695 	switch (buf_type) {
696 	case CAMDD_BUF_DATA: {
697 		struct camdd_buf_data *data;
698 
699 		data = &buf->buf_type_spec.data;
700 
701 		data->alloc_len = dev->blocksize;
702 		data->buf = data_ptr;
703 		break;
704 	}
705 	case CAMDD_BUF_INDIRECT:
706 		break;
707 	default:
708 		break;
709 	}
710 	STAILQ_INIT(&buf->src_list);
711 
712 	return (buf);
713 
714 bailout_error:
715 	if (data_ptr != NULL)
716 		free(data_ptr);
717 
718 	if (buf != NULL)
719 		free(buf);
720 
721 	return (NULL);
722 }
723 
724 void
725 camdd_release_buf(struct camdd_buf *buf)
726 {
727 	struct camdd_dev *dev;
728 
729 	dev = buf->dev;
730 
731 	switch (buf->buf_type) {
732 	case CAMDD_BUF_DATA: {
733 		struct camdd_buf_data *data;
734 
735 		data = &buf->buf_type_spec.data;
736 
737 		if (data->segs != NULL) {
738 			if (data->extra_buf != 0) {
739 				void *extra_buf;
740 
741 				extra_buf = (void *)
742 				    data->segs[data->sg_count - 1].ds_addr;
743 				free(extra_buf);
744 				data->extra_buf = 0;
745 			}
746 			free(data->segs);
747 			data->segs = NULL;
748 			data->sg_count = 0;
749 		} else if (data->iovec != NULL) {
750 			if (data->extra_buf != 0) {
751 				free(data->iovec[data->sg_count - 1].iov_base);
752 				data->extra_buf = 0;
753 			}
754 			free(data->iovec);
755 			data->iovec = NULL;
756 			data->sg_count = 0;
757 		}
758 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
759 		break;
760 	}
761 	case CAMDD_BUF_INDIRECT:
762 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
763 		break;
764 	default:
765 		err(1, "%s: Invalid buffer type %d for released buffer",
766 		    __func__, buf->buf_type);
767 		break;
768 	}
769 }
770 
771 struct camdd_buf *
772 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
773 {
774 	struct camdd_buf *buf = NULL;
775 
776 	switch (buf_type) {
777 	case CAMDD_BUF_DATA:
778 		buf = STAILQ_FIRST(&dev->free_queue);
779 		if (buf != NULL) {
780 			struct camdd_buf_data *data;
781 			uint8_t *data_ptr;
782 			uint32_t alloc_len;
783 
784 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
785 			data = &buf->buf_type_spec.data;
786 			data_ptr = data->buf;
787 			alloc_len = data->alloc_len;
788 			bzero(buf, sizeof(*buf));
789 			data->buf = data_ptr;
790 			data->alloc_len = alloc_len;
791 		}
792 		break;
793 	case CAMDD_BUF_INDIRECT:
794 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
795 		if (buf != NULL) {
796 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
797 
798 			bzero(buf, sizeof(*buf));
799 		}
800 		break;
801 	default:
802 		warnx("Unknown buffer type %d requested", buf_type);
803 		break;
804 	}
805 
806 
807 	if (buf == NULL)
808 		return (camdd_alloc_buf(dev, buf_type));
809 	else {
810 		STAILQ_INIT(&buf->src_list);
811 		buf->dev = dev;
812 		buf->buf_type = buf_type;
813 
814 		return (buf);
815 	}
816 }
817 
818 int
819 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
820 		    uint32_t *num_sectors_used, int *double_buf_needed)
821 {
822 	struct camdd_buf *tmp_buf;
823 	struct camdd_buf_data *data;
824 	uint8_t *extra_buf = NULL;
825 	size_t extra_buf_len = 0;
826 	int i, retval = 0;
827 
828 	data = &buf->buf_type_spec.data;
829 
830 	data->sg_count = buf->src_count;
831 	/*
832 	 * Compose a scatter/gather list from all of the buffers in the list.
833 	 * If the length of the buffer isn't a multiple of the sector size,
834 	 * we'll have to add an extra buffer.  This should only happen
835 	 * at the end of a transfer.
836 	 */
837 	if ((data->fill_len % sector_size) != 0) {
838 		extra_buf_len = sector_size - (data->fill_len % sector_size);
839 		extra_buf = calloc(extra_buf_len, 1);
840 		if (extra_buf == NULL) {
841 			warn("%s: unable to allocate %zu bytes for extra "
842 			    "buffer space", __func__, extra_buf_len);
843 			retval = 1;
844 			goto bailout;
845 		}
846 		data->extra_buf = 1;
847 		data->sg_count++;
848 	}
849 	if (iovec == 0) {
850 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
851 		if (data->segs == NULL) {
852 			warn("%s: unable to allocate %zu bytes for S/G list",
853 			    __func__, sizeof(bus_dma_segment_t) *
854 			    data->sg_count);
855 			retval = 1;
856 			goto bailout;
857 		}
858 
859 	} else {
860 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
861 		if (data->iovec == NULL) {
862 			warn("%s: unable to allocate %zu bytes for S/G list",
863 			    __func__, sizeof(struct iovec) * data->sg_count);
864 			retval = 1;
865 			goto bailout;
866 		}
867 	}
868 
869 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
870 	     i < buf->src_count && tmp_buf != NULL; i++,
871 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
872 
873 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
874 			struct camdd_buf_data *tmp_data;
875 
876 			tmp_data = &tmp_buf->buf_type_spec.data;
877 			if (iovec == 0) {
878 				data->segs[i].ds_addr =
879 				    (bus_addr_t) tmp_data->buf;
880 				data->segs[i].ds_len = tmp_data->fill_len -
881 				    tmp_data->resid;
882 			} else {
883 				data->iovec[i].iov_base = tmp_data->buf;
884 				data->iovec[i].iov_len = tmp_data->fill_len -
885 				    tmp_data->resid;
886 			}
887 			if (((tmp_data->fill_len - tmp_data->resid) %
888 			     sector_size) != 0)
889 				*double_buf_needed = 1;
890 		} else {
891 			struct camdd_buf_indirect *tmp_ind;
892 
893 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
894 			if (iovec == 0) {
895 				data->segs[i].ds_addr =
896 				    (bus_addr_t)tmp_ind->start_ptr;
897 				data->segs[i].ds_len = tmp_ind->len;
898 			} else {
899 				data->iovec[i].iov_base = tmp_ind->start_ptr;
900 				data->iovec[i].iov_len = tmp_ind->len;
901 			}
902 			if ((tmp_ind->len % sector_size) != 0)
903 				*double_buf_needed = 1;
904 		}
905 	}
906 
907 	if (extra_buf != NULL) {
908 		if (iovec == 0) {
909 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
910 			data->segs[i].ds_len = extra_buf_len;
911 		} else {
912 			data->iovec[i].iov_base = extra_buf;
913 			data->iovec[i].iov_len = extra_buf_len;
914 		}
915 		i++;
916 	}
917 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
918 		warnx("buffer source count does not match "
919 		      "number of buffers in list!");
920 		retval = 1;
921 		goto bailout;
922 	}
923 
924 bailout:
925 	if (retval == 0) {
926 		*num_sectors_used = (data->fill_len + extra_buf_len) /
927 		    sector_size;
928 	}
929 	return (retval);
930 }
931 
932 uint32_t
933 camdd_buf_get_len(struct camdd_buf *buf)
934 {
935 	uint32_t len = 0;
936 
937 	if (buf->buf_type != CAMDD_BUF_DATA) {
938 		struct camdd_buf_indirect *indirect;
939 
940 		indirect = &buf->buf_type_spec.indirect;
941 		len = indirect->len;
942 	} else {
943 		struct camdd_buf_data *data;
944 
945 		data = &buf->buf_type_spec.data;
946 		len = data->fill_len;
947 	}
948 
949 	return (len);
950 }
951 
952 void
953 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
954 {
955 	struct camdd_buf_data *data;
956 
957 	assert(buf->buf_type == CAMDD_BUF_DATA);
958 
959 	data = &buf->buf_type_spec.data;
960 
961 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
962 	buf->src_count++;
963 
964 	data->fill_len += camdd_buf_get_len(child_buf);
965 }
966 
967 typedef enum {
968 	CAMDD_TS_MAX_BLK,
969 	CAMDD_TS_MIN_BLK,
970 	CAMDD_TS_BLK_GRAN,
971 	CAMDD_TS_EFF_IOSIZE
972 } camdd_status_item_index;
973 
974 static struct camdd_status_items {
975 	const char *name;
976 	struct mt_status_entry *entry;
977 } req_status_items[] = {
978 	{ "max_blk", NULL },
979 	{ "min_blk", NULL },
980 	{ "blk_gran", NULL },
981 	{ "max_effective_iosize", NULL }
982 };
983 
984 int
985 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
986 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
987 {
988 	struct mt_status_data status_data;
989 	char *xml_str = NULL;
990 	unsigned int i;
991 	int retval = 0;
992 
993 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
994 	if (retval != 0)
995 		err(1, "Couldn't get XML string from %s", filename);
996 
997 	retval = mt_get_status(xml_str, &status_data);
998 	if (retval != XML_STATUS_OK) {
999 		warn("couldn't get status for %s", filename);
1000 		retval = 1;
1001 		goto bailout;
1002 	} else
1003 		retval = 0;
1004 
1005 	if (status_data.error != 0) {
1006 		warnx("%s", status_data.error_str);
1007 		retval = 1;
1008 		goto bailout;
1009 	}
1010 
1011 	for (i = 0; i < sizeof(req_status_items) /
1012 	     sizeof(req_status_items[0]); i++) {
1013                 char *name;
1014 
1015 		name = __DECONST(char *, req_status_items[i].name);
1016 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1017 		    name);
1018 		if (req_status_items[i].entry == NULL) {
1019 			errx(1, "Cannot find status entry %s",
1020 			    req_status_items[i].name);
1021 		}
1022 	}
1023 
1024 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1025 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1026 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1027 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1028 bailout:
1029 
1030 	free(xml_str);
1031 	mt_status_free(&status_data);
1032 
1033 	return (retval);
1034 }
1035 
1036 struct camdd_dev *
1037 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1038     int timeout)
1039 {
1040 	struct camdd_dev *dev = NULL;
1041 	struct camdd_dev_file *file_dev;
1042 	uint64_t blocksize = io_opts->blocksize;
1043 
1044 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1045 	if (dev == NULL)
1046 		goto bailout;
1047 
1048 	file_dev = &dev->dev_spec.file;
1049 	file_dev->fd = fd;
1050 	strlcpy(file_dev->filename, io_opts->dev_name,
1051 	    sizeof(file_dev->filename));
1052 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1053 	if (blocksize == 0)
1054 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1055 	else
1056 		dev->blocksize = blocksize;
1057 
1058 	if ((io_opts->queue_depth != 0)
1059 	 && (io_opts->queue_depth != 1)) {
1060 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1061 		    "command supported", (uintmax_t)io_opts->queue_depth,
1062 		    io_opts->dev_name);
1063 	}
1064 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1065 	dev->run = camdd_file_run;
1066 	dev->fetch = NULL;
1067 
1068 	/*
1069 	 * We can effectively access files on byte boundaries.  We'll reset
1070 	 * this for devices like disks that can be accessed on sector
1071 	 * boundaries.
1072 	 */
1073 	dev->sector_size = 1;
1074 
1075 	if ((fd != STDIN_FILENO)
1076 	 && (fd != STDOUT_FILENO)) {
1077 		int retval;
1078 
1079 		retval = fstat(fd, &file_dev->sb);
1080 		if (retval != 0) {
1081 			warn("Cannot stat %s", dev->device_name);
1082 			goto bailout_error;
1083 		}
1084 		if (S_ISREG(file_dev->sb.st_mode)) {
1085 			file_dev->file_type = CAMDD_FILE_REG;
1086 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1087 			int type;
1088 
1089 			if (ioctl(fd, FIODTYPE, &type) == -1)
1090 				err(1, "FIODTYPE ioctl failed on %s",
1091 				    dev->device_name);
1092 			else {
1093 				if (type & D_TAPE)
1094 					file_dev->file_type = CAMDD_FILE_TAPE;
1095 				else if (type & D_DISK)
1096 					file_dev->file_type = CAMDD_FILE_DISK;
1097 				else if (type & D_MEM)
1098 					file_dev->file_type = CAMDD_FILE_MEM;
1099 				else if (type & D_TTY)
1100 					file_dev->file_type = CAMDD_FILE_TTY;
1101 			}
1102 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1103 			errx(1, "cannot operate on directory %s",
1104 			    dev->device_name);
1105 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1106 			file_dev->file_type = CAMDD_FILE_PIPE;
1107 		} else
1108 			errx(1, "Cannot determine file type for %s",
1109 			    dev->device_name);
1110 
1111 		switch (file_dev->file_type) {
1112 		case CAMDD_FILE_REG:
1113 			if (file_dev->sb.st_size != 0)
1114 				dev->max_sector = file_dev->sb.st_size - 1;
1115 			else
1116 				dev->max_sector = 0;
1117 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1118 			break;
1119 		case CAMDD_FILE_TAPE: {
1120 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1121 			/*
1122 			 * Check block limits and maximum effective iosize.
1123 			 * Make sure the blocksize is within the block
1124 			 * limits (and a multiple of the minimum blocksize)
1125 			 * and that the blocksize is <= maximum effective
1126 			 * iosize.
1127 			 */
1128 			retval = camdd_probe_tape(fd, dev->device_name,
1129 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1130 			if (retval != 0)
1131 				errx(1, "Unable to probe tape %s",
1132 				    dev->device_name);
1133 
1134 			/*
1135 			 * The blocksize needs to be <= the maximum
1136 			 * effective I/O size of the tape device.  Note
1137 			 * that this also takes into account the maximum
1138 			 * blocksize reported by READ BLOCK LIMITS.
1139 			 */
1140 			if (dev->blocksize > max_iosize) {
1141 				warnx("Blocksize %u too big for %s, limiting "
1142 				    "to %ju", dev->blocksize, dev->device_name,
1143 				    max_iosize);
1144 				dev->blocksize = max_iosize;
1145 			}
1146 
1147 			/*
1148 			 * The blocksize needs to be at least min_blk;
1149 			 */
1150 			if (dev->blocksize < min_blk) {
1151 				warnx("Blocksize %u too small for %s, "
1152 				    "increasing to %ju", dev->blocksize,
1153 				    dev->device_name, min_blk);
1154 				dev->blocksize = min_blk;
1155 			}
1156 
1157 			/*
1158 			 * And the blocksize needs to be a multiple of
1159 			 * the block granularity.
1160 			 */
1161 			if ((blk_gran != 0)
1162 			 && (dev->blocksize % (1 << blk_gran))) {
1163 				warnx("Blocksize %u for %s not a multiple of "
1164 				    "%d, adjusting to %d", dev->blocksize,
1165 				    dev->device_name, (1 << blk_gran),
1166 				    dev->blocksize & ~((1 << blk_gran) - 1));
1167 				dev->blocksize &= ~((1 << blk_gran) - 1);
1168 			}
1169 
1170 			if (dev->blocksize == 0) {
1171 				errx(1, "Unable to derive valid blocksize for "
1172 				    "%s", dev->device_name);
1173 			}
1174 
1175 			/*
1176 			 * For tape drives, set the sector size to the
1177 			 * blocksize so that we make sure not to write
1178 			 * less than the blocksize out to the drive.
1179 			 */
1180 			dev->sector_size = dev->blocksize;
1181 			break;
1182 		}
1183 		case CAMDD_FILE_DISK: {
1184 			off_t media_size;
1185 			unsigned int sector_size;
1186 
1187 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1188 
1189 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1190 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1191 				    dev->device_name);
1192 			}
1193 
1194 			if (sector_size == 0) {
1195 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1196 				    "invalid sector size %u for %s",
1197 				    sector_size, dev->device_name);
1198 			}
1199 
1200 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1201 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1202 				    dev->device_name);
1203 			}
1204 
1205 			if (media_size == 0) {
1206 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1207 				    "invalid media size %ju for %s",
1208 				    (uintmax_t)media_size, dev->device_name);
1209 			}
1210 
1211 			if (dev->blocksize % sector_size) {
1212 				errx(1, "%s blocksize %u not a multiple of "
1213 				    "sector size %u", dev->device_name,
1214 				    dev->blocksize, sector_size);
1215 			}
1216 
1217 			dev->sector_size = sector_size;
1218 			dev->max_sector = (media_size / sector_size) - 1;
1219 			break;
1220 		}
1221 		case CAMDD_FILE_MEM:
1222 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1223 			break;
1224 		default:
1225 			break;
1226 		}
1227 	}
1228 
1229 	if ((io_opts->offset != 0)
1230 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1231 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1232 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1233 		goto bailout_error;
1234 	}
1235 #if 0
1236 	else if ((io_opts->offset != 0)
1237 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1238 		warnx("Offset %ju for %s is not a multiple of the "
1239 		      "sector size %u", io_opts->offset,
1240 		      io_opts->dev_name, dev->sector_size);
1241 		goto bailout_error;
1242 	} else {
1243 		dev->start_offset_bytes = io_opts->offset;
1244 	}
1245 #endif
1246 
1247 bailout:
1248 	return (dev);
1249 
1250 bailout_error:
1251 	camdd_free_dev(dev);
1252 	return (NULL);
1253 }
1254 
1255 /*
1256  * Need to implement this.  Do a basic probe:
1257  * - Check the inquiry data, make sure we're talking to a device that we
1258  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1259  * - Send a test unit ready, make sure the device is available.
1260  * - Get the capacity and block size.
1261  */
1262 struct camdd_dev *
1263 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1264 		 camdd_argmask arglist, int probe_retry_count,
1265 		 int probe_timeout, int io_retry_count, int io_timeout)
1266 {
1267 	union ccb *ccb;
1268 	uint64_t maxsector;
1269 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1270 	uint32_t block_len;
1271 	struct scsi_read_capacity_data rcap;
1272 	struct scsi_read_capacity_data_long rcaplong;
1273 	struct camdd_dev *dev;
1274 	struct camdd_dev_pass *pass_dev;
1275 	struct kevent ke;
1276 	int scsi_dev_type;
1277 
1278 	dev = NULL;
1279 
1280 	scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1281 	maxsector = 0;
1282 	block_len = 0;
1283 
1284 	/*
1285 	 * For devices that support READ CAPACITY, we'll attempt to get the
1286 	 * capacity.  Otherwise, we really don't support tape or other
1287 	 * devices via SCSI passthrough, so just return an error in that case.
1288 	 */
1289 	switch (scsi_dev_type) {
1290 	case T_DIRECT:
1291 	case T_WORM:
1292 	case T_CDROM:
1293 	case T_OPTICAL:
1294 	case T_RBC:
1295 		break;
1296 	default:
1297 		errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1298 		break; /*NOTREACHED*/
1299 	}
1300 
1301 	ccb = cam_getccb(cam_dev);
1302 
1303 	if (ccb == NULL) {
1304 		warnx("%s: error allocating ccb", __func__);
1305 		goto bailout;
1306 	}
1307 
1308 	bzero(&(&ccb->ccb_h)[1],
1309 	      sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
1310 
1311 	scsi_read_capacity(&ccb->csio,
1312 			   /*retries*/ probe_retry_count,
1313 			   /*cbfcnp*/ NULL,
1314 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1315 			   &rcap,
1316 			   SSD_FULL_SIZE,
1317 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1318 
1319 	/* Disable freezing the device queue */
1320 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1321 
1322 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1323 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1324 
1325 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1326 		warn("error sending READ CAPACITY command");
1327 
1328 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1329 				CAM_EPF_ALL, stderr);
1330 
1331 		goto bailout;
1332 	}
1333 
1334 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1335 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1336 		goto bailout;
1337 	}
1338 
1339 	maxsector = scsi_4btoul(rcap.addr);
1340 	block_len = scsi_4btoul(rcap.length);
1341 
1342 	/*
1343 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1344 	 * and we need to issue the long READ CAPACITY to get the real
1345 	 * capacity.  Otherwise, we're all set.
1346 	 */
1347 	if (maxsector != 0xffffffff)
1348 		goto rcap_done;
1349 
1350 	scsi_read_capacity_16(&ccb->csio,
1351 			      /*retries*/ probe_retry_count,
1352 			      /*cbfcnp*/ NULL,
1353 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1354 			      /*lba*/ 0,
1355 			      /*reladdr*/ 0,
1356 			      /*pmi*/ 0,
1357 			      (uint8_t *)&rcaplong,
1358 			      sizeof(rcaplong),
1359 			      /*sense_len*/ SSD_FULL_SIZE,
1360 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1361 
1362 	/* Disable freezing the device queue */
1363 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1364 
1365 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1366 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1367 
1368 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1369 		warn("error sending READ CAPACITY (16) command");
1370 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1371 				CAM_EPF_ALL, stderr);
1372 		goto bailout;
1373 	}
1374 
1375 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1376 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1377 		goto bailout;
1378 	}
1379 
1380 	maxsector = scsi_8btou64(rcaplong.addr);
1381 	block_len = scsi_4btoul(rcaplong.length);
1382 
1383 rcap_done:
1384 	if (block_len == 0) {
1385 		warnx("Sector size for %s%u is 0, cannot continue",
1386 		    cam_dev->device_name, cam_dev->dev_unit_num);
1387 		goto bailout_error;
1388 	}
1389 
1390 	bzero(&(&ccb->ccb_h)[1],
1391 	      sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
1392 
1393 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1394 	ccb->ccb_h.flags = CAM_DIR_NONE;
1395 	ccb->ccb_h.retry_count = 1;
1396 
1397 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1398 		warn("error sending XPT_PATH_INQ CCB");
1399 
1400 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1401 				CAM_EPF_ALL, stderr);
1402 		goto bailout;
1403 	}
1404 
1405 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1406 
1407 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1408 			      io_timeout);
1409 	if (dev == NULL)
1410 		goto bailout;
1411 
1412 	pass_dev = &dev->dev_spec.pass;
1413 	pass_dev->scsi_dev_type = scsi_dev_type;
1414 	pass_dev->dev = cam_dev;
1415 	pass_dev->max_sector = maxsector;
1416 	pass_dev->block_len = block_len;
1417 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1418 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1419 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1420 	dev->sector_size = block_len;
1421 	dev->max_sector = maxsector;
1422 
1423 
1424 	/*
1425 	 * Determine the optimal blocksize to use for this device.
1426 	 */
1427 
1428 	/*
1429 	 * If the controller has not specified a maximum I/O size,
1430 	 * just go with 128K as a somewhat conservative value.
1431 	 */
1432 	if (pass_dev->cpi_maxio == 0)
1433 		cpi_maxio = 131072;
1434 	else
1435 		cpi_maxio = pass_dev->cpi_maxio;
1436 
1437 	/*
1438 	 * If the controller has a large maximum I/O size, limit it
1439 	 * to something smaller so that the kernel doesn't have trouble
1440 	 * allocating buffers to copy data in and out for us.
1441 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1442 	 */
1443 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1444 
1445 	/*
1446 	 * If we weren't able to get a block size for some reason,
1447 	 * default to 512 bytes.
1448 	 */
1449 	block_len = pass_dev->block_len;
1450 	if (block_len == 0)
1451 		block_len = 512;
1452 
1453 	/*
1454 	 * Figure out how many blocksize chunks will fit in the
1455 	 * maximum I/O size.
1456 	 */
1457 	pass_numblocks = max_iosize / block_len;
1458 
1459 	/*
1460 	 * And finally, multiple the number of blocks by the LBA
1461 	 * length to get our maximum block size;
1462 	 */
1463 	dev->blocksize = pass_numblocks * block_len;
1464 
1465 	if (io_opts->blocksize != 0) {
1466 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1467 			warnx("Blocksize %ju for %s is not a multiple of "
1468 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1469 			      dev->device_name, dev->sector_size);
1470 			goto bailout_error;
1471 		}
1472 		dev->blocksize = io_opts->blocksize;
1473 	}
1474 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1475 	if (io_opts->queue_depth != 0)
1476 		dev->target_queue_depth = io_opts->queue_depth;
1477 
1478 	if (io_opts->offset != 0) {
1479 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1480 			warnx("Offset %ju is past the end of device %s",
1481 			    io_opts->offset, dev->device_name);
1482 			goto bailout_error;
1483 		}
1484 #if 0
1485 		else if ((io_opts->offset % dev->sector_size) != 0) {
1486 			warnx("Offset %ju for %s is not a multiple of the "
1487 			      "sector size %u", io_opts->offset,
1488 			      dev->device_name, dev->sector_size);
1489 			goto bailout_error;
1490 		}
1491 		dev->start_offset_bytes = io_opts->offset;
1492 #endif
1493 	}
1494 
1495 	dev->min_cmd_size = io_opts->min_cmd_size;
1496 
1497 	dev->run = camdd_pass_run;
1498 	dev->fetch = camdd_pass_fetch;
1499 
1500 bailout:
1501 	cam_freeccb(ccb);
1502 
1503 	return (dev);
1504 
1505 bailout_error:
1506 	cam_freeccb(ccb);
1507 
1508 	camdd_free_dev(dev);
1509 
1510 	return (NULL);
1511 }
1512 
1513 void *
1514 camdd_worker(void *arg)
1515 {
1516 	struct camdd_dev *dev = arg;
1517 	struct camdd_buf *buf;
1518 	struct timespec ts, *kq_ts;
1519 
1520 	ts.tv_sec = 0;
1521 	ts.tv_nsec = 0;
1522 
1523 	pthread_mutex_lock(&dev->mutex);
1524 
1525 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1526 
1527 	for (;;) {
1528 		struct kevent ke;
1529 		int retval = 0;
1530 
1531 		/*
1532 		 * XXX KDM check the reorder queue depth?
1533 		 */
1534 		if (dev->write_dev == 0) {
1535 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1536 			uint32_t target_depth = dev->target_queue_depth;
1537 			uint32_t peer_target_depth =
1538 			    dev->peer_dev->target_queue_depth;
1539 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1540 
1541 			camdd_get_depth(dev, &our_depth, &peer_depth,
1542 					&our_bytes, &peer_bytes);
1543 
1544 #if 0
1545 			while (((our_depth < target_depth)
1546 			     && (peer_depth < peer_target_depth))
1547 			    || ((peer_bytes + our_bytes) <
1548 				 (peer_blocksize * 2))) {
1549 #endif
1550 			while (((our_depth + peer_depth) <
1551 			        (target_depth + peer_target_depth))
1552 			    || ((peer_bytes + our_bytes) <
1553 				(peer_blocksize * 3))) {
1554 
1555 				retval = camdd_queue(dev, NULL);
1556 				if (retval == 1)
1557 					break;
1558 				else if (retval != 0) {
1559 					error_exit = 1;
1560 					goto bailout;
1561 				}
1562 
1563 				camdd_get_depth(dev, &our_depth, &peer_depth,
1564 						&our_bytes, &peer_bytes);
1565 			}
1566 		}
1567 		/*
1568 		 * See if we have any I/O that is ready to execute.
1569 		 */
1570 		buf = STAILQ_FIRST(&dev->run_queue);
1571 		if (buf != NULL) {
1572 			while (dev->target_queue_depth > dev->cur_active_io) {
1573 				retval = dev->run(dev);
1574 				if (retval == -1) {
1575 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1576 					error_exit = 1;
1577 					break;
1578 				} else if (retval != 0) {
1579 					break;
1580 				}
1581 			}
1582 		}
1583 
1584 		/*
1585 		 * We've reached EOF, or our partner has reached EOF.
1586 		 */
1587 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1588 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1589 			if (dev->write_dev != 0) {
1590 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1591 				 && (dev->num_run_queue == 0)
1592 				 && (dev->cur_active_io == 0)) {
1593 					goto bailout;
1594 				}
1595 			} else {
1596 				/*
1597 				 * If we're the reader, and the writer
1598 				 * got EOF, he is already done.  If we got
1599 				 * the EOF, then we need to wait until
1600 				 * everything is flushed out for the writer.
1601 				 */
1602 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1603 					goto bailout;
1604 				} else if ((dev->num_peer_work_queue == 0)
1605 					&& (dev->num_peer_done_queue == 0)
1606 					&& (dev->cur_active_io == 0)
1607 					&& (dev->num_run_queue == 0)) {
1608 					goto bailout;
1609 				}
1610 			}
1611 			/*
1612 			 * XXX KDM need to do something about the pending
1613 			 * queue and cleanup resources.
1614 			 */
1615 		}
1616 
1617 		if ((dev->write_dev == 0)
1618 		 && (dev->cur_active_io == 0)
1619 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1620 			kq_ts = &ts;
1621 		else
1622 			kq_ts = NULL;
1623 
1624 		/*
1625 		 * Run kevent to see if there are events to process.
1626 		 */
1627 		pthread_mutex_unlock(&dev->mutex);
1628 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1629 		pthread_mutex_lock(&dev->mutex);
1630 		if (retval == -1) {
1631 			warn("%s: error returned from kevent",__func__);
1632 			goto bailout;
1633 		} else if (retval != 0) {
1634 			switch (ke.filter) {
1635 			case EVFILT_READ:
1636 				if (dev->fetch != NULL) {
1637 					retval = dev->fetch(dev);
1638 					if (retval == -1) {
1639 						error_exit = 1;
1640 						goto bailout;
1641 					}
1642 				}
1643 				break;
1644 			case EVFILT_SIGNAL:
1645 				/*
1646 				 * We register for this so we don't get
1647 				 * an error as a result of a SIGINFO or a
1648 				 * SIGINT.  It will actually get handled
1649 				 * by the signal handler.  If we get a
1650 				 * SIGINT, bail out without printing an
1651 				 * error message.  Any other signals
1652 				 * will result in the error message above.
1653 				 */
1654 				if (ke.ident == SIGINT)
1655 					goto bailout;
1656 				break;
1657 			case EVFILT_USER:
1658 				retval = 0;
1659 				/*
1660 				 * Check to see if the other thread has
1661 				 * queued any I/O for us to do.  (In this
1662 				 * case we're the writer.)
1663 				 */
1664 				for (buf = STAILQ_FIRST(&dev->work_queue);
1665 				     buf != NULL;
1666 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1667 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1668 							   work_links);
1669 					retval = camdd_queue(dev, buf);
1670 					/*
1671 					 * We keep going unless we get an
1672 					 * actual error.  If we get EOF, we
1673 					 * still want to remove the buffers
1674 					 * from the queue and send the back
1675 					 * to the reader thread.
1676 					 */
1677 					if (retval == -1) {
1678 						error_exit = 1;
1679 						goto bailout;
1680 					} else
1681 						retval = 0;
1682 				}
1683 
1684 				/*
1685 				 * Next check to see if the other thread has
1686 				 * queued any completed buffers back to us.
1687 				 * (In this case we're the reader.)
1688 				 */
1689 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1690 				     buf != NULL;
1691 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1692 					STAILQ_REMOVE_HEAD(
1693 					    &dev->peer_done_queue, work_links);
1694 					dev->num_peer_done_queue--;
1695 					camdd_peer_done(buf);
1696 				}
1697 				break;
1698 			default:
1699 				warnx("%s: unknown kevent filter %d",
1700 				      __func__, ke.filter);
1701 				break;
1702 			}
1703 		}
1704 	}
1705 
1706 bailout:
1707 
1708 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1709 
1710 	/* XXX KDM cleanup resources here? */
1711 
1712 	pthread_mutex_unlock(&dev->mutex);
1713 
1714 	need_exit = 1;
1715 	sem_post(&camdd_sem);
1716 
1717 	return (NULL);
1718 }
1719 
1720 /*
1721  * Simplistic translation of CCB status to our local status.
1722  */
1723 camdd_buf_status
1724 camdd_ccb_status(union ccb *ccb)
1725 {
1726 	camdd_buf_status status = CAMDD_STATUS_NONE;
1727 	cam_status ccb_status;
1728 
1729 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1730 
1731 	switch (ccb_status) {
1732 	case CAM_REQ_CMP: {
1733 		if (ccb->csio.resid == 0) {
1734 			status = CAMDD_STATUS_OK;
1735 		} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1736 			status = CAMDD_STATUS_SHORT_IO;
1737 		} else {
1738 			status = CAMDD_STATUS_EOF;
1739 		}
1740 		break;
1741 	}
1742 	case CAM_SCSI_STATUS_ERROR: {
1743 		switch (ccb->csio.scsi_status) {
1744 		case SCSI_STATUS_OK:
1745 		case SCSI_STATUS_COND_MET:
1746 		case SCSI_STATUS_INTERMED:
1747 		case SCSI_STATUS_INTERMED_COND_MET:
1748 			status = CAMDD_STATUS_OK;
1749 			break;
1750 		case SCSI_STATUS_CMD_TERMINATED:
1751 		case SCSI_STATUS_CHECK_COND:
1752 		case SCSI_STATUS_QUEUE_FULL:
1753 		case SCSI_STATUS_BUSY:
1754 		case SCSI_STATUS_RESERV_CONFLICT:
1755 		default:
1756 			status = CAMDD_STATUS_ERROR;
1757 			break;
1758 		}
1759 		break;
1760 	}
1761 	default:
1762 		status = CAMDD_STATUS_ERROR;
1763 		break;
1764 	}
1765 
1766 	return (status);
1767 }
1768 
1769 /*
1770  * Queue a buffer to our peer's work thread for writing.
1771  *
1772  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1773  */
1774 int
1775 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1776 {
1777 	struct kevent ke;
1778 	STAILQ_HEAD(, camdd_buf) local_queue;
1779 	struct camdd_buf *buf1, *buf2;
1780 	struct camdd_buf_data *data = NULL;
1781 	uint64_t peer_bytes_queued = 0;
1782 	int active = 1;
1783 	int retval = 0;
1784 
1785 	STAILQ_INIT(&local_queue);
1786 
1787 	/*
1788 	 * Since we're the reader, we need to queue our I/O to the writer
1789 	 * in sequential order in order to make sure it gets written out
1790 	 * in sequential order.
1791 	 *
1792 	 * Check the next expected I/O starting offset.  If this doesn't
1793 	 * match, put it on the reorder queue.
1794 	 */
1795 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1796 
1797 		/*
1798 		 * If there is nothing on the queue, there is no sorting
1799 		 * needed.
1800 		 */
1801 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1802 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1803 			dev->num_reorder_queue++;
1804 			goto bailout;
1805 		}
1806 
1807 		/*
1808 		 * Sort in ascending order by starting LBA.  There should
1809 		 * be no identical LBAs.
1810 		 */
1811 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1812 		     buf1 = buf2) {
1813 			buf2 = STAILQ_NEXT(buf1, links);
1814 			if (buf->lba < buf1->lba) {
1815 				/*
1816 				 * If we're less than the first one, then
1817 				 * we insert at the head of the list
1818 				 * because this has to be the first element
1819 				 * on the list.
1820 				 */
1821 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
1822 						   buf, links);
1823 				dev->num_reorder_queue++;
1824 				break;
1825 			} else if (buf->lba > buf1->lba) {
1826 				if (buf2 == NULL) {
1827 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
1828 					    buf, links);
1829 					dev->num_reorder_queue++;
1830 					break;
1831 				} else if (buf->lba < buf2->lba) {
1832 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
1833 					    buf1, buf, links);
1834 					dev->num_reorder_queue++;
1835 					break;
1836 				}
1837 			} else {
1838 				errx(1, "Found buffers with duplicate LBA %ju!",
1839 				     buf->lba);
1840 			}
1841 		}
1842 		goto bailout;
1843 	} else {
1844 
1845 		/*
1846 		 * We're the next expected I/O completion, so put ourselves
1847 		 * on the local queue to be sent to the writer.  We use
1848 		 * work_links here so that we can queue this to the
1849 		 * peer_work_queue before taking the buffer off of the
1850 		 * local_queue.
1851 		 */
1852 		dev->next_completion_pos_bytes += buf->len;
1853 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
1854 
1855 		/*
1856 		 * Go through the reorder queue looking for more sequential
1857 		 * I/O and add it to the local queue.
1858 		 */
1859 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1860 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
1861 			/*
1862 			 * As soon as we see an I/O that is out of sequence,
1863 			 * we're done.
1864 			 */
1865 			if ((buf1->lba * dev->sector_size) !=
1866 			     dev->next_completion_pos_bytes)
1867 				break;
1868 
1869 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
1870 			dev->num_reorder_queue--;
1871 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
1872 			dev->next_completion_pos_bytes += buf1->len;
1873 		}
1874 	}
1875 
1876 	/*
1877 	 * Setup the event to let the other thread know that it has work
1878 	 * pending.
1879 	 */
1880 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
1881 	       NOTE_TRIGGER, 0, NULL);
1882 
1883 	/*
1884 	 * Put this on our shadow queue so that we know what we've queued
1885 	 * to the other thread.
1886 	 */
1887 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
1888 		if (buf1->buf_type != CAMDD_BUF_DATA) {
1889 			errx(1, "%s: should have a data buffer, not an "
1890 			    "indirect buffer", __func__);
1891 		}
1892 		data = &buf1->buf_type_spec.data;
1893 
1894 		/*
1895 		 * We only need to send one EOF to the writer, and don't
1896 		 * need to continue sending EOFs after that.
1897 		 */
1898 		if (buf1->status == CAMDD_STATUS_EOF) {
1899 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
1900 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
1901 				    work_links);
1902 				camdd_release_buf(buf1);
1903 				retval = 1;
1904 				continue;
1905 			}
1906 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
1907 		}
1908 
1909 
1910 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
1911 		peer_bytes_queued += (data->fill_len - data->resid);
1912 		dev->peer_bytes_queued += (data->fill_len - data->resid);
1913 		dev->num_peer_work_queue++;
1914 	}
1915 
1916 	if (STAILQ_FIRST(&local_queue) == NULL)
1917 		goto bailout;
1918 
1919 	/*
1920 	 * Drop our mutex and pick up the other thread's mutex.  We need to
1921 	 * do this to avoid deadlocks.
1922 	 */
1923 	pthread_mutex_unlock(&dev->mutex);
1924 	pthread_mutex_lock(&dev->peer_dev->mutex);
1925 
1926 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
1927 		/*
1928 		 * Put the buffers on the other thread's incoming work queue.
1929 		 */
1930 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
1931 		     buf1 = STAILQ_FIRST(&local_queue)) {
1932 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
1933 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
1934 					   work_links);
1935 		}
1936 		/*
1937 		 * Send an event to the other thread's kqueue to let it know
1938 		 * that there is something on the work queue.
1939 		 */
1940 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
1941 		if (retval == -1)
1942 			warn("%s: unable to add peer work_queue kevent",
1943 			     __func__);
1944 		else
1945 			retval = 0;
1946 	} else
1947 		active = 0;
1948 
1949 	pthread_mutex_unlock(&dev->peer_dev->mutex);
1950 	pthread_mutex_lock(&dev->mutex);
1951 
1952 	/*
1953 	 * If the other side isn't active, run through the queue and
1954 	 * release all of the buffers.
1955 	 */
1956 	if (active == 0) {
1957 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
1958 		     buf1 = STAILQ_FIRST(&local_queue)) {
1959 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
1960 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
1961 				      links);
1962 			dev->num_peer_work_queue--;
1963 			camdd_release_buf(buf1);
1964 		}
1965 		dev->peer_bytes_queued -= peer_bytes_queued;
1966 		retval = 1;
1967 	}
1968 
1969 bailout:
1970 	return (retval);
1971 }
1972 
1973 /*
1974  * Return a buffer to the reader thread when we have completed writing it.
1975  */
1976 int
1977 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
1978 {
1979 	struct kevent ke;
1980 	int retval = 0;
1981 
1982 	/*
1983 	 * Setup the event to let the other thread know that we have
1984 	 * completed a buffer.
1985 	 */
1986 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
1987 	       NOTE_TRIGGER, 0, NULL);
1988 
1989 	/*
1990 	 * Drop our lock and acquire the other thread's lock before
1991 	 * manipulating
1992 	 */
1993 	pthread_mutex_unlock(&dev->mutex);
1994 	pthread_mutex_lock(&dev->peer_dev->mutex);
1995 
1996 	/*
1997 	 * Put the buffer on the reader thread's peer done queue now that
1998 	 * we have completed it.
1999 	 */
2000 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2001 			   work_links);
2002 	dev->peer_dev->num_peer_done_queue++;
2003 
2004 	/*
2005 	 * Send an event to the peer thread to let it know that we've added
2006 	 * something to its peer done queue.
2007 	 */
2008 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2009 	if (retval == -1)
2010 		warn("%s: unable to add peer_done_queue kevent", __func__);
2011 	else
2012 		retval = 0;
2013 
2014 	/*
2015 	 * Drop the other thread's lock and reacquire ours.
2016 	 */
2017 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2018 	pthread_mutex_lock(&dev->mutex);
2019 
2020 	return (retval);
2021 }
2022 
2023 /*
2024  * Free a buffer that was written out by the writer thread and returned to
2025  * the reader thread.
2026  */
2027 void
2028 camdd_peer_done(struct camdd_buf *buf)
2029 {
2030 	struct camdd_dev *dev;
2031 	struct camdd_buf_data *data;
2032 
2033 	dev = buf->dev;
2034 	if (buf->buf_type != CAMDD_BUF_DATA) {
2035 		errx(1, "%s: should have a data buffer, not an "
2036 		    "indirect buffer", __func__);
2037 	}
2038 
2039 	data = &buf->buf_type_spec.data;
2040 
2041 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2042 	dev->num_peer_work_queue--;
2043 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2044 
2045 	if (buf->status == CAMDD_STATUS_EOF)
2046 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2047 
2048 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2049 }
2050 
2051 /*
2052  * Assumes caller holds the lock for this device.
2053  */
2054 void
2055 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2056 		   int *error_count)
2057 {
2058 	int retval = 0;
2059 
2060 	/*
2061 	 * If we're the reader, we need to send the completed I/O
2062 	 * to the writer.  If we're the writer, we need to just
2063 	 * free up resources, or let the reader know if we've
2064 	 * encountered an error.
2065 	 */
2066 	if (dev->write_dev == 0) {
2067 		retval = camdd_queue_peer_buf(dev, buf);
2068 		if (retval != 0)
2069 			(*error_count)++;
2070 	} else {
2071 		struct camdd_buf *tmp_buf, *next_buf;
2072 
2073 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2074 				    next_buf) {
2075 			struct camdd_buf *src_buf;
2076 			struct camdd_buf_indirect *indirect;
2077 
2078 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2079 				      camdd_buf, src_links);
2080 
2081 			tmp_buf->status = buf->status;
2082 
2083 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2084 				camdd_complete_peer_buf(dev, tmp_buf);
2085 				continue;
2086 			}
2087 
2088 			indirect = &tmp_buf->buf_type_spec.indirect;
2089 			src_buf = indirect->src_buf;
2090 			src_buf->refcount--;
2091 			/*
2092 			 * XXX KDM we probably need to account for
2093 			 * exactly how many bytes we were able to
2094 			 * write.  Allocate the residual to the
2095 			 * first N buffers?  Or just track the
2096 			 * number of bytes written?  Right now the reader
2097 			 * doesn't do anything with a residual.
2098 			 */
2099 			src_buf->status = buf->status;
2100 			if (src_buf->refcount <= 0)
2101 				camdd_complete_peer_buf(dev, src_buf);
2102 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2103 					   tmp_buf, links);
2104 		}
2105 
2106 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2107 	}
2108 }
2109 
2110 /*
2111  * Fetch all completed commands from the pass(4) device.
2112  *
2113  * Returns the number of commands received, or -1 if any of the commands
2114  * completed with an error.  Returns 0 if no commands are available.
2115  */
2116 int
2117 camdd_pass_fetch(struct camdd_dev *dev)
2118 {
2119 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2120 	union ccb ccb;
2121 	int retval = 0, num_fetched = 0, error_count = 0;
2122 
2123 	pthread_mutex_unlock(&dev->mutex);
2124 	/*
2125 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2126 	 */
2127 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2128 		struct camdd_buf *buf;
2129 		struct camdd_buf_data *data;
2130 		cam_status ccb_status;
2131 		union ccb *buf_ccb;
2132 
2133 		buf = ccb.ccb_h.ccb_buf;
2134 		data = &buf->buf_type_spec.data;
2135 		buf_ccb = &data->ccb;
2136 
2137 		num_fetched++;
2138 
2139 		/*
2140 		 * Copy the CCB back out so we get status, sense data, etc.
2141 		 */
2142 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2143 
2144 		pthread_mutex_lock(&dev->mutex);
2145 
2146 		/*
2147 		 * We're now done, so take this off the active queue.
2148 		 */
2149 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2150 		dev->cur_active_io--;
2151 
2152 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2153 		if (ccb_status != CAM_REQ_CMP) {
2154 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2155 					CAM_EPF_ALL, stderr);
2156 		}
2157 
2158 		data->resid = ccb.csio.resid;
2159 		dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2160 
2161 		if (buf->status == CAMDD_STATUS_NONE)
2162 			buf->status = camdd_ccb_status(&ccb);
2163 		if (buf->status == CAMDD_STATUS_ERROR)
2164 			error_count++;
2165 		else if (buf->status == CAMDD_STATUS_EOF) {
2166 			/*
2167 			 * Once we queue this buffer to our partner thread,
2168 			 * he will know that we've hit EOF.
2169 			 */
2170 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2171 		}
2172 
2173 		camdd_complete_buf(dev, buf, &error_count);
2174 
2175 		/*
2176 		 * Unlock in preparation for the ioctl call.
2177 		 */
2178 		pthread_mutex_unlock(&dev->mutex);
2179 	}
2180 
2181 	pthread_mutex_lock(&dev->mutex);
2182 
2183 	if (error_count > 0)
2184 		return (-1);
2185 	else
2186 		return (num_fetched);
2187 }
2188 
2189 /*
2190  * Returns -1 for error, 0 for success/continue, and 1 for resource
2191  * shortage/stop processing.
2192  */
2193 int
2194 camdd_file_run(struct camdd_dev *dev)
2195 {
2196 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2197 	struct camdd_buf_data *data;
2198 	struct camdd_buf *buf;
2199 	off_t io_offset;
2200 	int retval = 0, write_dev = dev->write_dev;
2201 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2202 	uint32_t num_sectors = 0, db_len = 0;
2203 
2204 	buf = STAILQ_FIRST(&dev->run_queue);
2205 	if (buf == NULL) {
2206 		no_resources = 1;
2207 		goto bailout;
2208 	} else if ((dev->write_dev == 0)
2209 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2210 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2211 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2212 		dev->num_run_queue--;
2213 		buf->status = CAMDD_STATUS_EOF;
2214 		error_count++;
2215 		goto bailout;
2216 	}
2217 
2218 	/*
2219 	 * If we're writing, we need to go through the source buffer list
2220 	 * and create an S/G list.
2221 	 */
2222 	if (write_dev != 0) {
2223 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2224 		    dev->sector_size, &num_sectors, &double_buf_needed);
2225 		if (retval != 0) {
2226 			no_resources = 1;
2227 			goto bailout;
2228 		}
2229 	}
2230 
2231 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2232 	dev->num_run_queue--;
2233 
2234 	data = &buf->buf_type_spec.data;
2235 
2236 	/*
2237 	 * pread(2) and pwrite(2) offsets are byte offsets.
2238 	 */
2239 	io_offset = buf->lba * dev->sector_size;
2240 
2241 	/*
2242 	 * Unlock the mutex while we read or write.
2243 	 */
2244 	pthread_mutex_unlock(&dev->mutex);
2245 
2246 	/*
2247 	 * Note that we don't need to double buffer if we're the reader
2248 	 * because in that case, we have allocated a single buffer of
2249 	 * sufficient size to do the read.  This copy is necessary on
2250 	 * writes because if one of the components of the S/G list is not
2251 	 * a sector size multiple, the kernel will reject the write.  This
2252 	 * is unfortunate but not surprising.  So this will make sure that
2253 	 * we're using a single buffer that is a multiple of the sector size.
2254 	 */
2255 	if ((double_buf_needed != 0)
2256 	 && (data->sg_count > 1)
2257 	 && (write_dev != 0)) {
2258 		uint32_t cur_offset;
2259 		int i;
2260 
2261 		if (file_dev->tmp_buf == NULL)
2262 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2263 		if (file_dev->tmp_buf == NULL) {
2264 			buf->status = CAMDD_STATUS_ERROR;
2265 			error_count++;
2266 			goto bailout;
2267 		}
2268 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2269 			bcopy(data->iovec[i].iov_base,
2270 			    &file_dev->tmp_buf[cur_offset],
2271 			    data->iovec[i].iov_len);
2272 			cur_offset += data->iovec[i].iov_len;
2273 		}
2274 		db_len = cur_offset;
2275 	}
2276 
2277 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2278 		if (write_dev == 0) {
2279 			/*
2280 			 * XXX KDM is there any way we would need a S/G
2281 			 * list here?
2282 			 */
2283 			retval = pread(file_dev->fd, data->buf,
2284 			    buf->len, io_offset);
2285 		} else {
2286 			if (double_buf_needed != 0) {
2287 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2288 				    db_len, io_offset);
2289 			} else if (data->sg_count == 0) {
2290 				retval = pwrite(file_dev->fd, data->buf,
2291 				    data->fill_len, io_offset);
2292 			} else {
2293 				retval = pwritev(file_dev->fd, data->iovec,
2294 				    data->sg_count, io_offset);
2295 			}
2296 		}
2297 	} else {
2298 		if (write_dev == 0) {
2299 			/*
2300 			 * XXX KDM is there any way we would need a S/G
2301 			 * list here?
2302 			 */
2303 			retval = read(file_dev->fd, data->buf, buf->len);
2304 		} else {
2305 			if (double_buf_needed != 0) {
2306 				retval = write(file_dev->fd, file_dev->tmp_buf,
2307 				    db_len);
2308 			} else if (data->sg_count == 0) {
2309 				retval = write(file_dev->fd, data->buf,
2310 				    data->fill_len);
2311 			} else {
2312 				retval = writev(file_dev->fd, data->iovec,
2313 				    data->sg_count);
2314 			}
2315 		}
2316 	}
2317 
2318 	/* We're done, re-acquire the lock */
2319 	pthread_mutex_lock(&dev->mutex);
2320 
2321 	if (retval >= (ssize_t)data->fill_len) {
2322 		/*
2323 		 * If the bytes transferred is more than the request size,
2324 		 * that indicates an overrun, which should only happen at
2325 		 * the end of a transfer if we have to round up to a sector
2326 		 * boundary.
2327 		 */
2328 		if (buf->status == CAMDD_STATUS_NONE)
2329 			buf->status = CAMDD_STATUS_OK;
2330 		data->resid = 0;
2331 		dev->bytes_transferred += retval;
2332 	} else if (retval == -1) {
2333 		warn("Error %s %s", (write_dev) ? "writing to" :
2334 		    "reading from", file_dev->filename);
2335 
2336 		buf->status = CAMDD_STATUS_ERROR;
2337 		data->resid = data->fill_len;
2338 		error_count++;
2339 
2340 		if (dev->debug == 0)
2341 			goto bailout;
2342 
2343 		if ((double_buf_needed != 0)
2344 		 && (write_dev != 0)) {
2345 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2346 			    "offset %ju\n", __func__, file_dev->fd,
2347 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2348 			    (uintmax_t)io_offset);
2349 		} else if (data->sg_count == 0) {
2350 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2351 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2352 			    data->fill_len, (uintmax_t)buf->lba,
2353 			    (uintmax_t)io_offset);
2354 		} else {
2355 			int i;
2356 
2357 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2358 			    "offset %ju\n", __func__, file_dev->fd,
2359 			    data->fill_len, (uintmax_t)buf->lba,
2360 			    (uintmax_t)io_offset);
2361 
2362 			for (i = 0; i < data->sg_count; i++) {
2363 				fprintf(stderr, "index %d ptr %p len %zu\n",
2364 				    i, data->iovec[i].iov_base,
2365 				    data->iovec[i].iov_len);
2366 			}
2367 		}
2368 	} else if (retval == 0) {
2369 		buf->status = CAMDD_STATUS_EOF;
2370 		if (dev->debug != 0)
2371 			printf("%s: got EOF from %s!\n", __func__,
2372 			    file_dev->filename);
2373 		data->resid = data->fill_len;
2374 		error_count++;
2375 	} else if (retval < (ssize_t)data->fill_len) {
2376 		if (buf->status == CAMDD_STATUS_NONE)
2377 			buf->status = CAMDD_STATUS_SHORT_IO;
2378 		data->resid = data->fill_len - retval;
2379 		dev->bytes_transferred += retval;
2380 	}
2381 
2382 bailout:
2383 	if (buf != NULL) {
2384 		if (buf->status == CAMDD_STATUS_EOF) {
2385 			struct camdd_buf *buf2;
2386 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2387 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2388 				buf2->status = CAMDD_STATUS_EOF;
2389 		}
2390 
2391 		camdd_complete_buf(dev, buf, &error_count);
2392 	}
2393 
2394 	if (error_count != 0)
2395 		return (-1);
2396 	else if (no_resources != 0)
2397 		return (1);
2398 	else
2399 		return (0);
2400 }
2401 
2402 /*
2403  * Execute one command from the run queue.  Returns 0 for success, 1 for
2404  * stop processing, and -1 for error.
2405  */
2406 int
2407 camdd_pass_run(struct camdd_dev *dev)
2408 {
2409 	struct camdd_buf *buf = NULL;
2410 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2411 	struct camdd_buf_data *data;
2412 	uint32_t num_blocks, sectors_used = 0;
2413 	union ccb *ccb;
2414 	int retval = 0, is_write = dev->write_dev;
2415 	int double_buf_needed = 0;
2416 
2417 	buf = STAILQ_FIRST(&dev->run_queue);
2418 	if (buf == NULL) {
2419 		retval = 1;
2420 		goto bailout;
2421 	}
2422 
2423 	/*
2424 	 * If we're writing, we need to go through the source buffer list
2425 	 * and create an S/G list.
2426 	 */
2427 	if (is_write != 0) {
2428 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2429 		    &sectors_used, &double_buf_needed);
2430 		if (retval != 0) {
2431 			retval = -1;
2432 			goto bailout;
2433 		}
2434 	}
2435 
2436 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2437 	dev->num_run_queue--;
2438 
2439 	data = &buf->buf_type_spec.data;
2440 
2441 	ccb = &data->ccb;
2442 	bzero(&(&ccb->ccb_h)[1],
2443 	      sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
2444 
2445 	/*
2446 	 * In almost every case the number of blocks should be the device
2447 	 * block size.  The exception may be at the end of an I/O stream
2448 	 * for a partial block or at the end of a device.
2449 	 */
2450 	if (is_write != 0)
2451 		num_blocks = sectors_used;
2452 	else
2453 		num_blocks = data->fill_len / pass_dev->block_len;
2454 
2455 	scsi_read_write(&ccb->csio,
2456 			/*retries*/ dev->retry_count,
2457 			/*cbfcnp*/ NULL,
2458 			/*tag_action*/ MSG_SIMPLE_Q_TAG,
2459 			/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2460 				   SCSI_RW_WRITE,
2461 			/*byte2*/ 0,
2462 			/*minimum_cmd_size*/ dev->min_cmd_size,
2463 			/*lba*/ buf->lba,
2464 			/*block_count*/ num_blocks,
2465 			/*data_ptr*/ (data->sg_count != 0) ?
2466 				     (uint8_t *)data->segs : data->buf,
2467 			/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2468 			/*sense_len*/ SSD_FULL_SIZE,
2469 			/*timeout*/ dev->io_timeout);
2470 
2471 	/* Disable freezing the device queue */
2472 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2473 
2474 	if (dev->retry_count != 0)
2475 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2476 
2477 	if (data->sg_count != 0) {
2478 		ccb->csio.sglist_cnt = data->sg_count;
2479 		ccb->ccb_h.flags |= CAM_DATA_SG;
2480 	}
2481 
2482 	/*
2483 	 * Store a pointer to the buffer in the CCB.  The kernel will
2484 	 * restore this when we get it back, and we'll use it to identify
2485 	 * the buffer this CCB came from.
2486 	 */
2487 	ccb->ccb_h.ccb_buf = buf;
2488 
2489 	/*
2490 	 * Unlock our mutex in preparation for issuing the ioctl.
2491 	 */
2492 	pthread_mutex_unlock(&dev->mutex);
2493 	/*
2494 	 * Queue the CCB to the pass(4) driver.
2495 	 */
2496 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2497 		pthread_mutex_lock(&dev->mutex);
2498 
2499 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2500 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2501 		warn("%s: CCB address is %p", __func__, ccb);
2502 		retval = -1;
2503 
2504 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2505 	} else {
2506 		pthread_mutex_lock(&dev->mutex);
2507 
2508 		dev->cur_active_io++;
2509 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2510 	}
2511 
2512 bailout:
2513 	return (retval);
2514 }
2515 
2516 int
2517 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2518 {
2519 	struct camdd_dev_pass *pass_dev;
2520 	uint32_t num_blocks;
2521 	int retval = 0;
2522 
2523 	pass_dev = &dev->dev_spec.pass;
2524 
2525 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2526 	*len = dev->blocksize;
2527 	num_blocks = *len / dev->sector_size;
2528 
2529 	/*
2530 	 * If max_sector is 0, then we have no set limit.  This can happen
2531 	 * if we're writing to a file in a filesystem, or reading from
2532 	 * something like /dev/zero.
2533 	 */
2534 	if ((dev->max_sector != 0)
2535 	 || (dev->sector_io_limit != 0)) {
2536 		uint64_t max_sector;
2537 
2538 		if ((dev->max_sector != 0)
2539 		 && (dev->sector_io_limit != 0))
2540 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2541 		else if (dev->max_sector != 0)
2542 			max_sector = dev->max_sector;
2543 		else
2544 			max_sector = dev->sector_io_limit;
2545 
2546 
2547 		/*
2548 		 * Check to see whether we're starting off past the end of
2549 		 * the device.  If so, we need to just send an EOF
2550 		 * notification to the writer.
2551 		 */
2552 		if (*lba > max_sector) {
2553 			*len = 0;
2554 			retval = 1;
2555 		} else if (((*lba + num_blocks) > max_sector + 1)
2556 			|| ((*lba + num_blocks) < *lba)) {
2557 			/*
2558 			 * If we get here (but pass the first check), we
2559 			 * can trim the request length down to go to the
2560 			 * end of the device.
2561 			 */
2562 			num_blocks = (max_sector + 1) - *lba;
2563 			*len = num_blocks * dev->sector_size;
2564 			retval = 1;
2565 		}
2566 	}
2567 
2568 	dev->next_io_pos_bytes += *len;
2569 
2570 	return (retval);
2571 }
2572 
2573 /*
2574  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2575  */
2576 int
2577 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2578 {
2579 	struct camdd_buf *buf = NULL;
2580 	struct camdd_buf_data *data;
2581 	struct camdd_dev_pass *pass_dev;
2582 	size_t new_len;
2583 	struct camdd_buf_data *rb_data;
2584 	int is_write = dev->write_dev;
2585 	int eof_flush_needed = 0;
2586 	int retval = 0;
2587 	int error;
2588 
2589 	pass_dev = &dev->dev_spec.pass;
2590 
2591 	/*
2592 	 * If we've gotten EOF or our partner has, we should not continue
2593 	 * queueing I/O.  If we're a writer, though, we should continue
2594 	 * to write any buffers that don't have EOF status.
2595 	 */
2596 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2597 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2598 	  && (is_write == 0))) {
2599 		/*
2600 		 * Tell the worker thread that we have seen EOF.
2601 		 */
2602 		retval = 1;
2603 
2604 		/*
2605 		 * If we're the writer, send the buffer back with EOF status.
2606 		 */
2607 		if (is_write) {
2608 			read_buf->status = CAMDD_STATUS_EOF;
2609 
2610 			error = camdd_complete_peer_buf(dev, read_buf);
2611 		}
2612 		goto bailout;
2613 	}
2614 
2615 	if (is_write == 0) {
2616 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2617 		if (buf == NULL) {
2618 			retval = -1;
2619 			goto bailout;
2620 		}
2621 		data = &buf->buf_type_spec.data;
2622 
2623 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2624 		if (retval != 0) {
2625 			buf->status = CAMDD_STATUS_EOF;
2626 
2627 		 	if ((buf->len == 0)
2628 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2629 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2630 				camdd_release_buf(buf);
2631 				goto bailout;
2632 			}
2633 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2634 		}
2635 
2636 		data->fill_len = buf->len;
2637 		data->src_start_offset = buf->lba * dev->sector_size;
2638 
2639 		/*
2640 		 * Put this on the run queue.
2641 		 */
2642 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2643 		dev->num_run_queue++;
2644 
2645 		/* We're done. */
2646 		goto bailout;
2647 	}
2648 
2649 	/*
2650 	 * Check for new EOF status from the reader.
2651 	 */
2652 	if ((read_buf->status == CAMDD_STATUS_EOF)
2653 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2654 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2655 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2656 		 && (read_buf->len == 0)) {
2657 			camdd_complete_peer_buf(dev, read_buf);
2658 			retval = 1;
2659 			goto bailout;
2660 		} else
2661 			eof_flush_needed = 1;
2662 	}
2663 
2664 	/*
2665 	 * See if we have a buffer we're composing with pieces from our
2666 	 * partner thread.
2667 	 */
2668 	buf = STAILQ_FIRST(&dev->pending_queue);
2669 	if (buf == NULL) {
2670 		uint64_t lba;
2671 		ssize_t len;
2672 
2673 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2674 		if (retval != 0) {
2675 			read_buf->status = CAMDD_STATUS_EOF;
2676 
2677 			if (len == 0) {
2678 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2679 				error = camdd_complete_peer_buf(dev, read_buf);
2680 				goto bailout;
2681 			}
2682 		}
2683 
2684 		/*
2685 		 * If we don't have a pending buffer, we need to grab a new
2686 		 * one from the free list or allocate another one.
2687 		 */
2688 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2689 		if (buf == NULL) {
2690 			retval = 1;
2691 			goto bailout;
2692 		}
2693 
2694 		buf->lba = lba;
2695 		buf->len = len;
2696 
2697 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2698 		dev->num_pending_queue++;
2699 	}
2700 
2701 	data = &buf->buf_type_spec.data;
2702 
2703 	rb_data = &read_buf->buf_type_spec.data;
2704 
2705 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2706 	 && (dev->debug != 0)) {
2707 		printf("%s: WARNING: reader offset %#jx != expected offset "
2708 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2709 		    (uintmax_t)dev->next_peer_pos_bytes);
2710 	}
2711 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2712 	    (rb_data->fill_len - rb_data->resid);
2713 
2714 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2715 	if (new_len < buf->len) {
2716 		/*
2717 		 * There are three cases here:
2718 		 * 1. We need more data to fill up a block, so we put
2719 		 *    this I/O on the queue and wait for more I/O.
2720 		 * 2. We have a pending buffer in the queue that is
2721 		 *    smaller than our blocksize, but we got an EOF.  So we
2722 		 *    need to go ahead and flush the write out.
2723 		 * 3. We got an error.
2724 		 */
2725 
2726 		/*
2727 		 * Increment our fill length.
2728 		 */
2729 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2730 
2731 		/*
2732 		 * Add the new read buffer to the list for writing.
2733 		 */
2734 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2735 
2736 		/* Increment the count */
2737 		buf->src_count++;
2738 
2739 		if (eof_flush_needed == 0) {
2740 			/*
2741 			 * We need to exit, because we don't have enough
2742 			 * data yet.
2743 			 */
2744 			goto bailout;
2745 		} else {
2746 			/*
2747 			 * Take the buffer off of the pending queue.
2748 			 */
2749 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2750 				      links);
2751 			dev->num_pending_queue--;
2752 
2753 			/*
2754 			 * If we need an EOF flush, but there is no data
2755 			 * to flush, go ahead and return this buffer.
2756 			 */
2757 			if (data->fill_len == 0) {
2758 				camdd_complete_buf(dev, buf, /*error_count*/0);
2759 				retval = 1;
2760 				goto bailout;
2761 			}
2762 
2763 			/*
2764 			 * Put this on the next queue for execution.
2765 			 */
2766 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2767 			dev->num_run_queue++;
2768 		}
2769 	} else if (new_len == buf->len) {
2770 		/*
2771 		 * We have enough data to completey fill one block,
2772 		 * so we're ready to issue the I/O.
2773 		 */
2774 
2775 		/*
2776 		 * Take the buffer off of the pending queue.
2777 		 */
2778 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2779 		dev->num_pending_queue--;
2780 
2781 		/*
2782 		 * Add the new read buffer to the list for writing.
2783 		 */
2784 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2785 
2786 		/* Increment the count */
2787 		buf->src_count++;
2788 
2789 		/*
2790 		 * Increment our fill length.
2791 		 */
2792 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2793 
2794 		/*
2795 		 * Put this on the next queue for execution.
2796 		 */
2797 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2798 		dev->num_run_queue++;
2799 	} else {
2800 		struct camdd_buf *idb;
2801 		struct camdd_buf_indirect *indirect;
2802 		uint32_t len_to_go, cur_offset;
2803 
2804 
2805 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2806 		if (idb == NULL) {
2807 			retval = 1;
2808 			goto bailout;
2809 		}
2810 		indirect = &idb->buf_type_spec.indirect;
2811 		indirect->src_buf = read_buf;
2812 		read_buf->refcount++;
2813 		indirect->offset = 0;
2814 		indirect->start_ptr = rb_data->buf;
2815 		/*
2816 		 * We've already established that there is more
2817 		 * data in read_buf than we have room for in our
2818 		 * current write request.  So this particular chunk
2819 		 * of the request should just be the remainder
2820 		 * needed to fill up a block.
2821 		 */
2822 		indirect->len = buf->len - (data->fill_len - data->resid);
2823 
2824 		camdd_buf_add_child(buf, idb);
2825 
2826 		/*
2827 		 * This buffer is ready to execute, so we can take
2828 		 * it off the pending queue and put it on the run
2829 		 * queue.
2830 		 */
2831 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2832 			      links);
2833 		dev->num_pending_queue--;
2834 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2835 		dev->num_run_queue++;
2836 
2837 		cur_offset = indirect->offset + indirect->len;
2838 
2839 		/*
2840 		 * The resulting I/O would be too large to fit in
2841 		 * one block.  We need to split this I/O into
2842 		 * multiple pieces.  Allocate as many buffers as needed.
2843 		 */
2844 		for (len_to_go = rb_data->fill_len - rb_data->resid -
2845 		     indirect->len; len_to_go > 0;) {
2846 			struct camdd_buf *new_buf;
2847 			struct camdd_buf_data *new_data;
2848 			uint64_t lba;
2849 			ssize_t len;
2850 
2851 			retval = camdd_get_next_lba_len(dev, &lba, &len);
2852 			if ((retval != 0)
2853 			 && (len == 0)) {
2854 				/*
2855 				 * The device has already been marked
2856 				 * as EOF, and there is no space left.
2857 				 */
2858 				goto bailout;
2859 			}
2860 
2861 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2862 			if (new_buf == NULL) {
2863 				retval = 1;
2864 				goto bailout;
2865 			}
2866 
2867 			new_buf->lba = lba;
2868 			new_buf->len = len;
2869 
2870 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2871 			if (idb == NULL) {
2872 				retval = 1;
2873 				goto bailout;
2874 			}
2875 
2876 			indirect = &idb->buf_type_spec.indirect;
2877 
2878 			indirect->src_buf = read_buf;
2879 			read_buf->refcount++;
2880 			indirect->offset = cur_offset;
2881 			indirect->start_ptr = rb_data->buf + cur_offset;
2882 			indirect->len = min(len_to_go, new_buf->len);
2883 #if 0
2884 			if (((indirect->len % dev->sector_size) != 0)
2885 			 || ((indirect->offset % dev->sector_size) != 0)) {
2886 				warnx("offset %ju len %ju not aligned with "
2887 				    "sector size %u", indirect->offset,
2888 				    (uintmax_t)indirect->len, dev->sector_size);
2889 			}
2890 #endif
2891 			cur_offset += indirect->len;
2892 			len_to_go -= indirect->len;
2893 
2894 			camdd_buf_add_child(new_buf, idb);
2895 
2896 			new_data = &new_buf->buf_type_spec.data;
2897 
2898 			if ((new_data->fill_len == new_buf->len)
2899 			 || (eof_flush_needed != 0)) {
2900 				STAILQ_INSERT_TAIL(&dev->run_queue,
2901 						   new_buf, links);
2902 				dev->num_run_queue++;
2903 			} else if (new_data->fill_len < buf->len) {
2904 				STAILQ_INSERT_TAIL(&dev->pending_queue,
2905 					   	new_buf, links);
2906 				dev->num_pending_queue++;
2907 			} else {
2908 				warnx("%s: too much data in new "
2909 				      "buffer!", __func__);
2910 				retval = 1;
2911 				goto bailout;
2912 			}
2913 		}
2914 	}
2915 
2916 bailout:
2917 	return (retval);
2918 }
2919 
2920 void
2921 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
2922 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
2923 {
2924 	*our_depth = dev->cur_active_io + dev->num_run_queue;
2925 	if (dev->num_peer_work_queue >
2926 	    dev->num_peer_done_queue)
2927 		*peer_depth = dev->num_peer_work_queue -
2928 			      dev->num_peer_done_queue;
2929 	else
2930 		*peer_depth = 0;
2931 	*our_bytes = *our_depth * dev->blocksize;
2932 	*peer_bytes = dev->peer_bytes_queued;
2933 }
2934 
2935 void
2936 camdd_sig_handler(int sig)
2937 {
2938 	if (sig == SIGINFO)
2939 		need_status = 1;
2940 	else {
2941 		need_exit = 1;
2942 		error_exit = 1;
2943 	}
2944 
2945 	sem_post(&camdd_sem);
2946 }
2947 
2948 void
2949 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
2950 		   struct timespec *start_time)
2951 {
2952 	struct timespec done_time;
2953 	uint64_t total_ns;
2954 	long double mb_sec, total_sec;
2955 	int error = 0;
2956 
2957 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
2958 	if (error != 0) {
2959 		warn("Unable to get done time");
2960 		return;
2961 	}
2962 
2963 	timespecsub(&done_time, start_time);
2964 
2965 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
2966 	total_sec = total_ns;
2967 	total_sec /= 1000000000;
2968 
2969 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
2970 		"%.4Lf seconds elapsed\n",
2971 		(uintmax_t)camdd_dev->bytes_transferred,
2972 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
2973 		camdd_dev->device_name,
2974 		(uintmax_t)other_dev->bytes_transferred,
2975 		(other_dev->write_dev == 0) ? "read from" : "written to",
2976 		other_dev->device_name, total_sec);
2977 
2978 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
2979 	mb_sec /= 1024 * 1024;
2980 	mb_sec *= 1000000000;
2981 	mb_sec /= total_ns;
2982 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
2983 }
2984 
2985 int
2986 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
2987 	 int retry_count, int timeout)
2988 {
2989 	char *device = NULL;
2990 	struct cam_device *new_cam_dev = NULL;
2991 	struct camdd_dev *devs[2];
2992 	struct timespec start_time;
2993 	pthread_t threads[2];
2994 	int unit = 0;
2995 	int error = 0;
2996 	int i;
2997 
2998 	if (num_io_opts != 2) {
2999 		warnx("Must have one input and one output path");
3000 		error = 1;
3001 		goto bailout;
3002 	}
3003 
3004 	bzero(devs, sizeof(devs));
3005 
3006 	for (i = 0; i < num_io_opts; i++) {
3007 		switch (io_opts[i].dev_type) {
3008 		case CAMDD_DEV_PASS: {
3009 			camdd_argmask new_arglist = CAMDD_ARG_NONE;
3010 			int bus = 0, target = 0, lun = 0;
3011 			char name[30];
3012 			int rv;
3013 
3014 			if (isdigit(io_opts[i].dev_name[0])) {
3015 				/* device specified as bus:target[:lun] */
3016 				rv = parse_btl(io_opts[i].dev_name, &bus,
3017 				    &target, &lun, &new_arglist);
3018 				if (rv < 2) {
3019 					warnx("numeric device specification "
3020 					     "must be either bus:target, or "
3021 					     "bus:target:lun");
3022 					error = 1;
3023 					goto bailout;
3024 				}
3025 				/* default to 0 if lun was not specified */
3026 				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3027 					lun = 0;
3028 					new_arglist |= CAMDD_ARG_LUN;
3029 				}
3030 			} else {
3031 				if (cam_get_device(io_opts[i].dev_name, name,
3032 						   sizeof name, &unit) == -1) {
3033 					warnx("%s", cam_errbuf);
3034 					error = 1;
3035 					goto bailout;
3036 				}
3037 				device = strdup(name);
3038 				new_arglist |= CAMDD_ARG_DEVICE |CAMDD_ARG_UNIT;
3039 			}
3040 
3041 			if (new_arglist & (CAMDD_ARG_BUS | CAMDD_ARG_TARGET))
3042 				new_cam_dev = cam_open_btl(bus, target, lun,
3043 				    O_RDWR, NULL);
3044 			else
3045 				new_cam_dev = cam_open_spec_device(device, unit,
3046 				    O_RDWR, NULL);
3047 			if (new_cam_dev == NULL) {
3048 				warnx("%s", cam_errbuf);
3049 				error = 1;
3050 				goto bailout;
3051 			}
3052 
3053 			devs[i] = camdd_probe_pass(new_cam_dev,
3054 			    /*io_opts*/ &io_opts[i],
3055 			    CAMDD_ARG_ERR_RECOVER,
3056 			    /*probe_retry_count*/ 3,
3057 			    /*probe_timeout*/ 5000,
3058 			    /*io_retry_count*/ retry_count,
3059 			    /*io_timeout*/ timeout);
3060 			if (devs[i] == NULL) {
3061 				warn("Unable to probe device %s%u",
3062 				     new_cam_dev->device_name,
3063 				     new_cam_dev->dev_unit_num);
3064 				error = 1;
3065 				goto bailout;
3066 			}
3067 			break;
3068 		}
3069 		case CAMDD_DEV_FILE: {
3070 			int fd = -1;
3071 
3072 			if (io_opts[i].dev_name[0] == '-') {
3073 				if (io_opts[i].write_dev != 0)
3074 					fd = STDOUT_FILENO;
3075 				else
3076 					fd = STDIN_FILENO;
3077 			} else {
3078 				if (io_opts[i].write_dev != 0) {
3079 					fd = open(io_opts[i].dev_name,
3080 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3081 				} else {
3082 					fd = open(io_opts[i].dev_name,
3083 					    O_RDONLY);
3084 				}
3085 			}
3086 			if (fd == -1) {
3087 				warn("error opening file %s",
3088 				    io_opts[i].dev_name);
3089 				error = 1;
3090 				goto bailout;
3091 			}
3092 
3093 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3094 			    retry_count, timeout);
3095 			if (devs[i] == NULL) {
3096 				error = 1;
3097 				goto bailout;
3098 			}
3099 
3100 			break;
3101 		}
3102 		default:
3103 			warnx("Unknown device type %d (%s)",
3104 			    io_opts[i].dev_type, io_opts[i].dev_name);
3105 			error = 1;
3106 			goto bailout;
3107 			break; /*NOTREACHED */
3108 		}
3109 
3110 		devs[i]->write_dev = io_opts[i].write_dev;
3111 
3112 		devs[i]->start_offset_bytes = io_opts[i].offset;
3113 
3114 		if (max_io != 0) {
3115 			devs[i]->sector_io_limit =
3116 			    (devs[i]->start_offset_bytes /
3117 			    devs[i]->sector_size) +
3118 			    (max_io / devs[i]->sector_size) - 1;
3119 			devs[i]->sector_io_limit =
3120 			    (devs[i]->start_offset_bytes /
3121 			    devs[i]->sector_size) +
3122 			    (max_io / devs[i]->sector_size) - 1;
3123 		}
3124 
3125 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3126 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3127 	}
3128 
3129 	devs[0]->peer_dev = devs[1];
3130 	devs[1]->peer_dev = devs[0];
3131 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3132 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3133 
3134 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3135 
3136 	signal(SIGINFO, camdd_sig_handler);
3137 	signal(SIGINT, camdd_sig_handler);
3138 
3139 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3140 	if (error != 0) {
3141 		warn("Unable to get start time");
3142 		goto bailout;
3143 	}
3144 
3145 	for (i = 0; i < num_io_opts; i++) {
3146 		error = pthread_create(&threads[i], NULL, camdd_worker,
3147 				       (void *)devs[i]);
3148 		if (error != 0) {
3149 			warnc(error, "pthread_create() failed");
3150 			goto bailout;
3151 		}
3152 	}
3153 
3154 	for (;;) {
3155 		if ((sem_wait(&camdd_sem) == -1)
3156 		 || (need_exit != 0)) {
3157 			struct kevent ke;
3158 
3159 			for (i = 0; i < num_io_opts; i++) {
3160 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3161 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3162 
3163 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3164 
3165 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3166 						NULL);
3167 				if (error == -1)
3168 					warn("%s: unable to wake up thread",
3169 					    __func__);
3170 				error = 0;
3171 			}
3172 			break;
3173 		} else if (need_status != 0) {
3174 			camdd_print_status(devs[0], devs[1], &start_time);
3175 			need_status = 0;
3176 		}
3177 	}
3178 	for (i = 0; i < num_io_opts; i++) {
3179 		pthread_join(threads[i], NULL);
3180 	}
3181 
3182 	camdd_print_status(devs[0], devs[1], &start_time);
3183 
3184 bailout:
3185 
3186 	for (i = 0; i < num_io_opts; i++)
3187 		camdd_free_dev(devs[i]);
3188 
3189 	return (error + error_exit);
3190 }
3191 
3192 void
3193 usage(void)
3194 {
3195 	fprintf(stderr,
3196 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3197 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3198 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3199 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3200 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3201 "Option description\n"
3202 "-i <arg=val>  Specify input device/file and parameters\n"
3203 "-o <arg=val>  Specify output device/file and parameters\n"
3204 "Input and Output parameters\n"
3205 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3206 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3207 "              or - for stdin/stdout\n"
3208 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3209 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3210 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3211 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3212 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3213 "Optional arguments\n"
3214 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3215 "-E            Enable CAM error recovery for pass(4) devices\n"
3216 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3217 "              using K, G, M, etc. suffixes\n"
3218 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3219 "-v            Enable verbose error recovery\n"
3220 "-h            Print this message\n");
3221 }
3222 
3223 
3224 int
3225 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3226 {
3227 	char *tmpstr, *tmpstr2;
3228 	char *orig_tmpstr = NULL;
3229 	int retval = 0;
3230 
3231 	io_opts->write_dev = is_write;
3232 
3233 	tmpstr = strdup(args);
3234 	if (tmpstr == NULL) {
3235 		warn("strdup failed");
3236 		retval = 1;
3237 		goto bailout;
3238 	}
3239 	orig_tmpstr = tmpstr;
3240 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3241 		char *name, *value;
3242 
3243 		/*
3244 		 * If the user creates an empty parameter by putting in two
3245 		 * commas, skip over it and look for the next field.
3246 		 */
3247 		if (*tmpstr2 == '\0')
3248 			continue;
3249 
3250 		name = strsep(&tmpstr2, "=");
3251 		if (*name == '\0') {
3252 			warnx("Got empty I/O parameter name");
3253 			retval = 1;
3254 			goto bailout;
3255 		}
3256 		value = strsep(&tmpstr2, "=");
3257 		if ((value == NULL)
3258 		 || (*value == '\0')) {
3259 			warnx("Empty I/O parameter value for %s", name);
3260 			retval = 1;
3261 			goto bailout;
3262 		}
3263 		if (strncasecmp(name, "file", 4) == 0) {
3264 			io_opts->dev_type = CAMDD_DEV_FILE;
3265 			io_opts->dev_name = strdup(value);
3266 			if (io_opts->dev_name == NULL) {
3267 				warn("Error allocating memory");
3268 				retval = 1;
3269 				goto bailout;
3270 			}
3271 		} else if (strncasecmp(name, "pass", 4) == 0) {
3272 			io_opts->dev_type = CAMDD_DEV_PASS;
3273 			io_opts->dev_name = strdup(value);
3274 			if (io_opts->dev_name == NULL) {
3275 				warn("Error allocating memory");
3276 				retval = 1;
3277 				goto bailout;
3278 			}
3279 		} else if ((strncasecmp(name, "bs", 2) == 0)
3280 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3281 			retval = expand_number(value, &io_opts->blocksize);
3282 			if (retval == -1) {
3283 				warn("expand_number(3) failed on %s=%s", name,
3284 				    value);
3285 				retval = 1;
3286 				goto bailout;
3287 			}
3288 		} else if (strncasecmp(name, "depth", 5) == 0) {
3289 			char *endptr;
3290 
3291 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3292 			if (*endptr != '\0') {
3293 				warnx("invalid queue depth %s", value);
3294 				retval = 1;
3295 				goto bailout;
3296 			}
3297 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3298 			char *endptr;
3299 
3300 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3301 			if ((*endptr != '\0')
3302 			 || ((io_opts->min_cmd_size > 16)
3303 			  || (io_opts->min_cmd_size < 0))) {
3304 				warnx("invalid minimum cmd size %s", value);
3305 				retval = 1;
3306 				goto bailout;
3307 			}
3308 		} else if (strncasecmp(name, "offset", 6) == 0) {
3309 			retval = expand_number(value, &io_opts->offset);
3310 			if (retval == -1) {
3311 				warn("expand_number(3) failed on %s=%s", name,
3312 				    value);
3313 				retval = 1;
3314 				goto bailout;
3315 			}
3316 		} else if (strncasecmp(name, "debug", 5) == 0) {
3317 			char *endptr;
3318 
3319 			io_opts->debug = strtoull(value, &endptr, 0);
3320 			if (*endptr != '\0') {
3321 				warnx("invalid debug level %s", value);
3322 				retval = 1;
3323 				goto bailout;
3324 			}
3325 		} else {
3326 			warnx("Unrecognized parameter %s=%s", name, value);
3327 		}
3328 	}
3329 bailout:
3330 	free(orig_tmpstr);
3331 
3332 	return (retval);
3333 }
3334 
3335 int
3336 main(int argc, char **argv)
3337 {
3338 	int c;
3339 	camdd_argmask arglist = CAMDD_ARG_NONE;
3340 	int timeout = 0, retry_count = 1;
3341 	int error = 0;
3342 	uint64_t max_io = 0;
3343 	struct camdd_io_opts *opt_list = NULL;
3344 
3345 	if (argc == 1) {
3346 		usage();
3347 		exit(1);
3348 	}
3349 
3350 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3351 	if (opt_list == NULL) {
3352 		warn("Unable to allocate option list");
3353 		error = 1;
3354 		goto bailout;
3355 	}
3356 
3357 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3358 		switch (c) {
3359 		case 'C':
3360 			retry_count = strtol(optarg, NULL, 0);
3361 			if (retry_count < 0)
3362 				errx(1, "retry count %d is < 0",
3363 				     retry_count);
3364 			arglist |= CAMDD_ARG_RETRIES;
3365 			break;
3366 		case 'E':
3367 			arglist |= CAMDD_ARG_ERR_RECOVER;
3368 			break;
3369 		case 'i':
3370 		case 'o':
3371 			if (((c == 'i')
3372 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3373 			 || ((c == 'o')
3374 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3375 				errx(1, "Only one input and output path "
3376 				    "allowed");
3377 			}
3378 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3379 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3380 			if (error != 0)
3381 				goto bailout;
3382 			break;
3383 		case 'm':
3384 			error = expand_number(optarg, &max_io);
3385 			if (error == -1) {
3386 				warn("invalid maximum I/O amount %s", optarg);
3387 				error = 1;
3388 				goto bailout;
3389 			}
3390 			break;
3391 		case 't':
3392 			timeout = strtol(optarg, NULL, 0);
3393 			if (timeout < 0)
3394 				errx(1, "invalid timeout %d", timeout);
3395 			/* Convert the timeout from seconds to ms */
3396 			timeout *= 1000;
3397 			arglist |= CAMDD_ARG_TIMEOUT;
3398 			break;
3399 		case 'v':
3400 			arglist |= CAMDD_ARG_VERBOSE;
3401 			break;
3402 		case 'h':
3403 		default:
3404 			usage();
3405 			exit(1);
3406 			break; /*NOTREACHED*/
3407 		}
3408 	}
3409 
3410 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3411 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3412 		errx(1, "Must specify both -i and -o");
3413 
3414 	/*
3415 	 * Set the timeout if the user hasn't specified one.
3416 	 */
3417 	if (timeout == 0)
3418 		timeout = CAMDD_PASS_RW_TIMEOUT;
3419 
3420 	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3421 
3422 bailout:
3423 	free(opt_list);
3424 
3425 	exit(error);
3426 }
3427