xref: /freebsd/usr.sbin/camdd/camdd.c (revision c9d9315b74017b0637bcbcc2e7a5c01d7bc58bf1)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 #include <sys/ioctl.h>
42 #include <sys/stdint.h>
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <sys/param.h>
46 #include <sys/sbuf.h>
47 #include <sys/stat.h>
48 #include <sys/event.h>
49 #include <sys/time.h>
50 #include <sys/uio.h>
51 #include <vm/vm.h>
52 #include <sys/bus.h>
53 #include <sys/bus_dma.h>
54 #include <sys/mtio.h>
55 #include <sys/conf.h>
56 #include <sys/disk.h>
57 
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <semaphore.h>
61 #include <string.h>
62 #include <unistd.h>
63 #include <inttypes.h>
64 #include <limits.h>
65 #include <fcntl.h>
66 #include <ctype.h>
67 #include <err.h>
68 #include <libutil.h>
69 #include <pthread.h>
70 #include <assert.h>
71 #include <bsdxml.h>
72 
73 #include <cam/cam.h>
74 #include <cam/cam_debug.h>
75 #include <cam/cam_ccb.h>
76 #include <cam/scsi/scsi_all.h>
77 #include <cam/scsi/scsi_da.h>
78 #include <cam/scsi/scsi_pass.h>
79 #include <cam/scsi/scsi_message.h>
80 #include <cam/scsi/smp_all.h>
81 #include <cam/nvme/nvme_all.h>
82 #include <camlib.h>
83 #include <mtlib.h>
84 #include <zlib.h>
85 
86 typedef enum {
87 	CAMDD_CMD_NONE		= 0x00000000,
88 	CAMDD_CMD_HELP		= 0x00000001,
89 	CAMDD_CMD_WRITE		= 0x00000002,
90 	CAMDD_CMD_READ		= 0x00000003
91 } camdd_cmdmask;
92 
93 typedef enum {
94 	CAMDD_ARG_NONE		= 0x00000000,
95 	CAMDD_ARG_VERBOSE	= 0x00000001,
96 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
97 } camdd_argmask;
98 
99 typedef enum {
100 	CAMDD_DEV_NONE		= 0x00,
101 	CAMDD_DEV_PASS		= 0x01,
102 	CAMDD_DEV_FILE		= 0x02
103 } camdd_dev_type;
104 
105 struct camdd_io_opts {
106 	camdd_dev_type	dev_type;
107 	char		*dev_name;
108 	uint64_t	blocksize;
109 	uint64_t	queue_depth;
110 	uint64_t	offset;
111 	int		min_cmd_size;
112 	int		write_dev;
113 	uint64_t	debug;
114 };
115 
116 typedef enum {
117 	CAMDD_BUF_NONE,
118 	CAMDD_BUF_DATA,
119 	CAMDD_BUF_INDIRECT
120 } camdd_buf_type;
121 
122 struct camdd_buf_indirect {
123 	/*
124 	 * Pointer to the source buffer.
125 	 */
126 	struct camdd_buf *src_buf;
127 
128 	/*
129 	 * Offset into the source buffer, in bytes.
130 	 */
131 	uint64_t	  offset;
132 	/*
133 	 * Pointer to the starting point in the source buffer.
134 	 */
135 	uint8_t		 *start_ptr;
136 
137 	/*
138 	 * Length of this chunk in bytes.
139 	 */
140 	size_t		  len;
141 };
142 
143 struct camdd_buf_data {
144 	/*
145 	 * Buffer allocated when we allocate this camdd_buf.  This should
146 	 * be the size of the blocksize for this device.
147 	 */
148 	uint8_t			*buf;
149 
150 	/*
151 	 * The amount of backing store allocated in buf.  Generally this
152 	 * will be the blocksize of the device.
153 	 */
154 	uint32_t		 alloc_len;
155 
156 	/*
157 	 * The amount of data that was put into the buffer (on reads) or
158 	 * the amount of data we have put onto the src_list so far (on
159 	 * writes).
160 	 */
161 	uint32_t		 fill_len;
162 
163 	/*
164 	 * The amount of data that was not transferred.
165 	 */
166 	uint32_t		 resid;
167 
168 	/*
169 	 * Starting byte offset on the reader.
170 	 */
171 	uint64_t		 src_start_offset;
172 
173 	/*
174 	 * CCB used for pass(4) device targets.
175 	 */
176 	union ccb		 ccb;
177 
178 	/*
179 	 * Number of scatter/gather segments.
180 	 */
181 	int			 sg_count;
182 
183 	/*
184 	 * Set if we had to tack on an extra buffer to round the transfer
185 	 * up to a sector size.
186 	 */
187 	int			 extra_buf;
188 
189 	/*
190 	 * Scatter/gather list used generally when we're the writer for a
191 	 * pass(4) device.
192 	 */
193 	bus_dma_segment_t	*segs;
194 
195 	/*
196 	 * Scatter/gather list used generally when we're the writer for a
197 	 * file or block device;
198 	 */
199 	struct iovec		*iovec;
200 };
201 
202 union camdd_buf_types {
203 	struct camdd_buf_indirect	indirect;
204 	struct camdd_buf_data		data;
205 };
206 
207 typedef enum {
208 	CAMDD_STATUS_NONE,
209 	CAMDD_STATUS_OK,
210 	CAMDD_STATUS_SHORT_IO,
211 	CAMDD_STATUS_EOF,
212 	CAMDD_STATUS_ERROR
213 } camdd_buf_status;
214 
215 struct camdd_buf {
216 	camdd_buf_type		 buf_type;
217 	union camdd_buf_types	 buf_type_spec;
218 
219 	camdd_buf_status	 status;
220 
221 	uint64_t		 lba;
222 	size_t			 len;
223 
224 	/*
225 	 * A reference count of how many indirect buffers point to this
226 	 * buffer.
227 	 */
228 	int			 refcount;
229 
230 	/*
231 	 * A link back to our parent device.
232 	 */
233 	struct camdd_dev	*dev;
234 	STAILQ_ENTRY(camdd_buf)  links;
235 	STAILQ_ENTRY(camdd_buf)  work_links;
236 
237 	/*
238 	 * A count of the buffers on the src_list.
239 	 */
240 	int			 src_count;
241 
242 	/*
243 	 * List of buffers from our partner thread that are the components
244 	 * of this buffer for the I/O.  Uses src_links.
245 	 */
246 	STAILQ_HEAD(,camdd_buf)	 src_list;
247 	STAILQ_ENTRY(camdd_buf)  src_links;
248 };
249 
250 #define	NUM_DEV_TYPES	2
251 
252 struct camdd_dev_pass {
253 	int			 scsi_dev_type;
254 	int			 protocol;
255 	struct cam_device	*dev;
256 	uint64_t		 max_sector;
257 	uint32_t		 block_len;
258 	uint32_t		 cpi_maxio;
259 };
260 
261 typedef enum {
262 	CAMDD_FILE_NONE,
263 	CAMDD_FILE_REG,
264 	CAMDD_FILE_STD,
265 	CAMDD_FILE_PIPE,
266 	CAMDD_FILE_DISK,
267 	CAMDD_FILE_TAPE,
268 	CAMDD_FILE_TTY,
269 	CAMDD_FILE_MEM
270 } camdd_file_type;
271 
272 typedef enum {
273 	CAMDD_FF_NONE 		= 0x00,
274 	CAMDD_FF_CAN_SEEK	= 0x01
275 } camdd_file_flags;
276 
277 struct camdd_dev_file {
278 	int			 fd;
279 	struct stat		 sb;
280 	char			 filename[MAXPATHLEN + 1];
281 	camdd_file_type		 file_type;
282 	camdd_file_flags	 file_flags;
283 	uint8_t			*tmp_buf;
284 };
285 
286 struct camdd_dev_block {
287 	int			 fd;
288 	uint64_t		 size_bytes;
289 	uint32_t		 block_len;
290 };
291 
292 union camdd_dev_spec {
293 	struct camdd_dev_pass	pass;
294 	struct camdd_dev_file	file;
295 	struct camdd_dev_block	block;
296 };
297 
298 typedef enum {
299 	CAMDD_DEV_FLAG_NONE		= 0x00,
300 	CAMDD_DEV_FLAG_EOF		= 0x01,
301 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
302 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
303 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
304 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
305 } camdd_dev_flags;
306 
307 struct camdd_dev {
308 	camdd_dev_type		 dev_type;
309 	union camdd_dev_spec	 dev_spec;
310 	camdd_dev_flags		 flags;
311 	char			 device_name[MAXPATHLEN+1];
312 	uint32_t		 blocksize;
313 	uint32_t		 sector_size;
314 	uint64_t		 max_sector;
315 	uint64_t		 sector_io_limit;
316 	int			 min_cmd_size;
317 	int			 write_dev;
318 	int			 retry_count;
319 	int			 io_timeout;
320 	int			 debug;
321 	uint64_t		 start_offset_bytes;
322 	uint64_t		 next_io_pos_bytes;
323 	uint64_t		 next_peer_pos_bytes;
324 	uint64_t		 next_completion_pos_bytes;
325 	uint64_t		 peer_bytes_queued;
326 	uint64_t		 bytes_transferred;
327 	uint32_t		 target_queue_depth;
328 	uint32_t		 cur_active_io;
329 	uint8_t			*extra_buf;
330 	uint32_t		 extra_buf_len;
331 	struct camdd_dev	*peer_dev;
332 	pthread_mutex_t		 mutex;
333 	pthread_cond_t		 cond;
334 	int			 kq;
335 
336 	int			 (*run)(struct camdd_dev *dev);
337 	int			 (*fetch)(struct camdd_dev *dev);
338 
339 	/*
340 	 * Buffers that are available for I/O.  Uses links.
341 	 */
342 	STAILQ_HEAD(,camdd_buf)	 free_queue;
343 
344 	/*
345 	 * Free indirect buffers.  These are used for breaking a large
346 	 * buffer into multiple pieces.
347 	 */
348 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
349 
350 	/*
351 	 * Buffers that have been queued to the kernel.  Uses links.
352 	 */
353 	STAILQ_HEAD(,camdd_buf)	 active_queue;
354 
355 	/*
356 	 * Will generally contain one of our buffers that is waiting for enough
357 	 * I/O from our partner thread to be able to execute.  This will
358 	 * generally happen when our per-I/O-size is larger than the
359 	 * partner thread's per-I/O-size.  Uses links.
360 	 */
361 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
362 
363 	/*
364 	 * Number of buffers on the pending queue
365 	 */
366 	int			 num_pending_queue;
367 
368 	/*
369 	 * Buffers that are filled and ready to execute.  This is used when
370 	 * our partner (reader) thread sends us blocks that are larger than
371 	 * our blocksize, and so we have to split them into multiple pieces.
372 	 */
373 	STAILQ_HEAD(,camdd_buf)	 run_queue;
374 
375 	/*
376 	 * Number of buffers on the run queue.
377 	 */
378 	int			 num_run_queue;
379 
380 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
381 
382 	int			 num_reorder_queue;
383 
384 	/*
385 	 * Buffers that have been queued to us by our partner thread
386 	 * (generally the reader thread) to be written out.  Uses
387 	 * work_links.
388 	 */
389 	STAILQ_HEAD(,camdd_buf)	 work_queue;
390 
391 	/*
392 	 * Buffers that have been completed by our partner thread.  Uses
393 	 * work_links.
394 	 */
395 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
396 
397 	/*
398 	 * Number of buffers on the peer done queue.
399 	 */
400 	uint32_t		 num_peer_done_queue;
401 
402 	/*
403 	 * A list of buffers that we have queued to our peer thread.  Uses
404 	 * links.
405 	 */
406 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
407 
408 	/*
409 	 * Number of buffers on the peer work queue.
410 	 */
411 	uint32_t		 num_peer_work_queue;
412 };
413 
414 static sem_t camdd_sem;
415 static sig_atomic_t need_exit = 0;
416 static sig_atomic_t error_exit = 0;
417 static sig_atomic_t need_status = 0;
418 
419 #ifndef min
420 #define	min(a, b) (a < b) ? a : b
421 #endif
422 
423 
424 /* Generically useful offsets into the peripheral private area */
425 #define ppriv_ptr0 periph_priv.entries[0].ptr
426 #define ppriv_ptr1 periph_priv.entries[1].ptr
427 #define ppriv_field0 periph_priv.entries[0].field
428 #define ppriv_field1 periph_priv.entries[1].field
429 
430 #define	ccb_buf	ppriv_ptr0
431 
432 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
433 #define	CAMDD_FILE_DEFAULT_DEPTH	1
434 #define	CAMDD_PASS_MAX_BLOCK		1048576
435 #define	CAMDD_PASS_DEFAULT_DEPTH	6
436 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
437 
438 static int parse_btl(char *tstr, int *bus, int *target, int *lun);
439 void camdd_free_dev(struct camdd_dev *dev);
440 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
441 				  struct kevent *new_ke, int num_ke,
442 				  int retry_count, int timeout);
443 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
444 					 camdd_buf_type buf_type);
445 void camdd_release_buf(struct camdd_buf *buf);
446 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
447 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
448 			uint32_t sector_size, uint32_t *num_sectors_used,
449 			int *double_buf_needed);
450 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
451 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
452 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
453 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
454 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
455          camdd_argmask arglist, int probe_retry_count,
456          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
457 int camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
458          camdd_argmask arglist, int probe_retry_count,
459          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
460 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
461 				   int retry_count, int timeout);
462 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
463 				   struct camdd_io_opts *io_opts,
464 				   camdd_argmask arglist, int probe_retry_count,
465 				   int probe_timeout, int io_retry_count,
466 				   int io_timeout);
467 void nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
468 		void (*cbfcnp)(struct cam_periph *, union ccb *),
469 		uint32_t nsid, int readop, uint64_t lba,
470 		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
471 		uint32_t timeout);
472 void *camdd_file_worker(void *arg);
473 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
474 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
475 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
476 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
477 void camdd_peer_done(struct camdd_buf *buf);
478 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
479 			int *error_count);
480 int camdd_pass_fetch(struct camdd_dev *dev);
481 int camdd_file_run(struct camdd_dev *dev);
482 int camdd_pass_run(struct camdd_dev *dev);
483 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
484 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
485 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
486 		     uint32_t *peer_depth, uint32_t *our_bytes,
487 		     uint32_t *peer_bytes);
488 void *camdd_worker(void *arg);
489 void camdd_sig_handler(int sig);
490 void camdd_print_status(struct camdd_dev *camdd_dev,
491 			struct camdd_dev *other_dev,
492 			struct timespec *start_time);
493 int camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist,
494 	     int num_io_opts, uint64_t max_io, int retry_count, int timeout);
495 int camdd_parse_io_opts(char *args, int is_write,
496 			struct camdd_io_opts *io_opts);
497 void usage(void);
498 
499 /*
500  * Parse out a bus, or a bus, target and lun in the following
501  * format:
502  * bus
503  * bus:target
504  * bus:target:lun
505  *
506  * Returns the number of parsed components, or 0.
507  */
508 static int
parse_btl(char * tstr,int * bus,int * target,int * lun)509 parse_btl(char *tstr, int *bus, int *target, int *lun)
510 {
511 	char *tmpstr;
512 	int convs = 0;
513 
514 	while (isspace(*tstr) && (*tstr != '\0'))
515 		tstr++;
516 
517 	tmpstr = (char *)strtok(tstr, ":");
518 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
519 		*bus = strtol(tmpstr, NULL, 0);
520 		convs++;
521 		tmpstr = (char *)strtok(NULL, ":");
522 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
523 			*target = strtol(tmpstr, NULL, 0);
524 			convs++;
525 			tmpstr = (char *)strtok(NULL, ":");
526 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
527 				*lun = strtol(tmpstr, NULL, 0);
528 				convs++;
529 			}
530 		}
531 	}
532 
533 	return convs;
534 }
535 
536 /*
537  * XXX KDM clean up and free all of the buffers on the queue!
538  */
539 void
camdd_free_dev(struct camdd_dev * dev)540 camdd_free_dev(struct camdd_dev *dev)
541 {
542 	if (dev == NULL)
543 		return;
544 
545 	switch (dev->dev_type) {
546 	case CAMDD_DEV_FILE: {
547 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
548 
549 		if (file_dev->fd != -1)
550 			close(file_dev->fd);
551 		free(file_dev->tmp_buf);
552 		break;
553 	}
554 	case CAMDD_DEV_PASS: {
555 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
556 
557 		if (pass_dev->dev != NULL)
558 			cam_close_device(pass_dev->dev);
559 		break;
560 	}
561 	default:
562 		break;
563 	}
564 
565 	free(dev);
566 }
567 
568 struct camdd_dev *
camdd_alloc_dev(camdd_dev_type dev_type,struct kevent * new_ke,int num_ke,int retry_count,int timeout)569 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
570 		int retry_count, int timeout)
571 {
572 	struct camdd_dev *dev = NULL;
573 	struct kevent *ke;
574 	size_t ke_size;
575 	int retval = 0;
576 
577 	dev = calloc(1, sizeof(*dev));
578 	if (dev == NULL) {
579 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
580 		goto bailout;
581 	}
582 
583 	dev->dev_type = dev_type;
584 	dev->io_timeout = timeout;
585 	dev->retry_count = retry_count;
586 	STAILQ_INIT(&dev->free_queue);
587 	STAILQ_INIT(&dev->free_indirect_queue);
588 	STAILQ_INIT(&dev->active_queue);
589 	STAILQ_INIT(&dev->pending_queue);
590 	STAILQ_INIT(&dev->run_queue);
591 	STAILQ_INIT(&dev->reorder_queue);
592 	STAILQ_INIT(&dev->work_queue);
593 	STAILQ_INIT(&dev->peer_done_queue);
594 	STAILQ_INIT(&dev->peer_work_queue);
595 	retval = pthread_mutex_init(&dev->mutex, NULL);
596 	if (retval != 0) {
597 		warnc(retval, "%s: failed to initialize mutex", __func__);
598 		goto bailout;
599 	}
600 
601 	retval = pthread_cond_init(&dev->cond, NULL);
602 	if (retval != 0) {
603 		warnc(retval, "%s: failed to initialize condition variable",
604 		      __func__);
605 		goto bailout;
606 	}
607 
608 	dev->kq = kqueue();
609 	if (dev->kq == -1) {
610 		warn("%s: Unable to create kqueue", __func__);
611 		goto bailout;
612 	}
613 
614 	ke_size = sizeof(struct kevent) * (num_ke + 4);
615 	ke = calloc(1, ke_size);
616 	if (ke == NULL) {
617 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
618 		goto bailout;
619 	}
620 	if (num_ke > 0)
621 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
622 
623 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
624 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
625 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
626 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
627 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
628 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
629 
630 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
631 	if (retval == -1) {
632 		warn("%s: Unable to register kevents", __func__);
633 		goto bailout;
634 	}
635 
636 
637 	return (dev);
638 
639 bailout:
640 	free(dev);
641 
642 	return (NULL);
643 }
644 
645 static struct camdd_buf *
camdd_alloc_buf(struct camdd_dev * dev,camdd_buf_type buf_type)646 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
647 {
648 	struct camdd_buf *buf = NULL;
649 	uint8_t *data_ptr = NULL;
650 
651 	/*
652 	 * We only need to allocate data space for data buffers.
653 	 */
654 	switch (buf_type) {
655 	case CAMDD_BUF_DATA:
656 		data_ptr = malloc(dev->blocksize);
657 		if (data_ptr == NULL) {
658 			warn("unable to allocate %u bytes", dev->blocksize);
659 			goto bailout_error;
660 		}
661 		break;
662 	default:
663 		break;
664 	}
665 
666 	buf = calloc(1, sizeof(*buf));
667 	if (buf == NULL) {
668 		warn("unable to allocate %zu bytes", sizeof(*buf));
669 		goto bailout_error;
670 	}
671 
672 	buf->buf_type = buf_type;
673 	buf->dev = dev;
674 	switch (buf_type) {
675 	case CAMDD_BUF_DATA: {
676 		struct camdd_buf_data *data;
677 
678 		data = &buf->buf_type_spec.data;
679 
680 		data->alloc_len = dev->blocksize;
681 		data->buf = data_ptr;
682 		break;
683 	}
684 	case CAMDD_BUF_INDIRECT:
685 		break;
686 	default:
687 		break;
688 	}
689 	STAILQ_INIT(&buf->src_list);
690 
691 	return (buf);
692 
693 bailout_error:
694 	free(data_ptr);
695 
696 	return (NULL);
697 }
698 
699 void
camdd_release_buf(struct camdd_buf * buf)700 camdd_release_buf(struct camdd_buf *buf)
701 {
702 	struct camdd_dev *dev;
703 
704 	dev = buf->dev;
705 
706 	switch (buf->buf_type) {
707 	case CAMDD_BUF_DATA: {
708 		struct camdd_buf_data *data;
709 
710 		data = &buf->buf_type_spec.data;
711 
712 		if (data->segs != NULL) {
713 			if (data->extra_buf != 0) {
714 				void *extra_buf;
715 
716 				extra_buf = (void *)
717 				    data->segs[data->sg_count - 1].ds_addr;
718 				free(extra_buf);
719 				data->extra_buf = 0;
720 			}
721 			free(data->segs);
722 			data->segs = NULL;
723 			data->sg_count = 0;
724 		} else if (data->iovec != NULL) {
725 			if (data->extra_buf != 0) {
726 				free(data->iovec[data->sg_count - 1].iov_base);
727 				data->extra_buf = 0;
728 			}
729 			free(data->iovec);
730 			data->iovec = NULL;
731 			data->sg_count = 0;
732 		}
733 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
734 		break;
735 	}
736 	case CAMDD_BUF_INDIRECT:
737 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
738 		break;
739 	default:
740 		err(1, "%s: Invalid buffer type %d for released buffer",
741 		    __func__, buf->buf_type);
742 		break;
743 	}
744 }
745 
746 struct camdd_buf *
camdd_get_buf(struct camdd_dev * dev,camdd_buf_type buf_type)747 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
748 {
749 	struct camdd_buf *buf = NULL;
750 
751 	switch (buf_type) {
752 	case CAMDD_BUF_DATA:
753 		buf = STAILQ_FIRST(&dev->free_queue);
754 		if (buf != NULL) {
755 			struct camdd_buf_data *data;
756 			uint8_t *data_ptr;
757 			uint32_t alloc_len;
758 
759 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
760 			data = &buf->buf_type_spec.data;
761 			data_ptr = data->buf;
762 			alloc_len = data->alloc_len;
763 			bzero(buf, sizeof(*buf));
764 			data->buf = data_ptr;
765 			data->alloc_len = alloc_len;
766 		}
767 		break;
768 	case CAMDD_BUF_INDIRECT:
769 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
770 		if (buf != NULL) {
771 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
772 
773 			bzero(buf, sizeof(*buf));
774 		}
775 		break;
776 	default:
777 		warnx("Unknown buffer type %d requested", buf_type);
778 		break;
779 	}
780 
781 
782 	if (buf == NULL)
783 		return (camdd_alloc_buf(dev, buf_type));
784 	else {
785 		STAILQ_INIT(&buf->src_list);
786 		buf->dev = dev;
787 		buf->buf_type = buf_type;
788 
789 		return (buf);
790 	}
791 }
792 
793 int
camdd_buf_sg_create(struct camdd_buf * buf,int iovec,uint32_t sector_size,uint32_t * num_sectors_used,int * double_buf_needed)794 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
795 		    uint32_t *num_sectors_used, int *double_buf_needed)
796 {
797 	struct camdd_buf *tmp_buf;
798 	struct camdd_buf_data *data;
799 	uint8_t *extra_buf = NULL;
800 	size_t extra_buf_len = 0;
801 	int extra_buf_attached = 0;
802 	int i, retval = 0;
803 
804 	data = &buf->buf_type_spec.data;
805 
806 	data->sg_count = buf->src_count;
807 	/*
808 	 * Compose a scatter/gather list from all of the buffers in the list.
809 	 * If the length of the buffer isn't a multiple of the sector size,
810 	 * we'll have to add an extra buffer.  This should only happen
811 	 * at the end of a transfer.
812 	 */
813 	if ((data->fill_len % sector_size) != 0) {
814 		extra_buf_len = sector_size - (data->fill_len % sector_size);
815 		extra_buf = calloc(extra_buf_len, 1);
816 		if (extra_buf == NULL) {
817 			warn("%s: unable to allocate %zu bytes for extra "
818 			    "buffer space", __func__, extra_buf_len);
819 			retval = 1;
820 			goto bailout;
821 		}
822 		data->extra_buf = 1;
823 		data->sg_count++;
824 	}
825 	if (iovec == 0) {
826 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
827 		if (data->segs == NULL) {
828 			warn("%s: unable to allocate %zu bytes for S/G list",
829 			    __func__, sizeof(bus_dma_segment_t) *
830 			    data->sg_count);
831 			retval = 1;
832 			goto bailout;
833 		}
834 
835 	} else {
836 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
837 		if (data->iovec == NULL) {
838 			warn("%s: unable to allocate %zu bytes for S/G list",
839 			    __func__, sizeof(struct iovec) * data->sg_count);
840 			retval = 1;
841 			goto bailout;
842 		}
843 	}
844 
845 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
846 	     i < buf->src_count && tmp_buf != NULL; i++,
847 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
848 
849 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
850 			struct camdd_buf_data *tmp_data;
851 
852 			tmp_data = &tmp_buf->buf_type_spec.data;
853 			if (iovec == 0) {
854 				data->segs[i].ds_addr =
855 				    (bus_addr_t) tmp_data->buf;
856 				data->segs[i].ds_len = tmp_data->fill_len -
857 				    tmp_data->resid;
858 			} else {
859 				data->iovec[i].iov_base = tmp_data->buf;
860 				data->iovec[i].iov_len = tmp_data->fill_len -
861 				    tmp_data->resid;
862 			}
863 			if (((tmp_data->fill_len - tmp_data->resid) %
864 			     sector_size) != 0)
865 				*double_buf_needed = 1;
866 		} else {
867 			struct camdd_buf_indirect *tmp_ind;
868 
869 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
870 			if (iovec == 0) {
871 				data->segs[i].ds_addr =
872 				    (bus_addr_t)tmp_ind->start_ptr;
873 				data->segs[i].ds_len = tmp_ind->len;
874 			} else {
875 				data->iovec[i].iov_base = tmp_ind->start_ptr;
876 				data->iovec[i].iov_len = tmp_ind->len;
877 			}
878 			if ((tmp_ind->len % sector_size) != 0)
879 				*double_buf_needed = 1;
880 		}
881 	}
882 
883 	if (extra_buf != NULL) {
884 		if (iovec == 0) {
885 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
886 			data->segs[i].ds_len = extra_buf_len;
887 		} else {
888 			data->iovec[i].iov_base = extra_buf;
889 			data->iovec[i].iov_len = extra_buf_len;
890 		}
891 		extra_buf_attached = 1;
892 		i++;
893 	}
894 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
895 		warnx("buffer source count does not match "
896 		      "number of buffers in list!");
897 		retval = 1;
898 		goto bailout;
899 	}
900 
901 bailout:
902 	if (retval == 0) {
903 		*num_sectors_used = (data->fill_len + extra_buf_len) /
904 		    sector_size;
905 	} else if (extra_buf_attached == 0) {
906 		/*
907 		 * If extra_buf isn't attached yet, we need to free it
908 		 * to avoid leaking.
909 		 */
910 		free(extra_buf);
911 		data->extra_buf = 0;
912 		data->sg_count--;
913 	}
914 	return (retval);
915 }
916 
917 uint32_t
camdd_buf_get_len(struct camdd_buf * buf)918 camdd_buf_get_len(struct camdd_buf *buf)
919 {
920 	uint32_t len = 0;
921 
922 	if (buf->buf_type != CAMDD_BUF_DATA) {
923 		struct camdd_buf_indirect *indirect;
924 
925 		indirect = &buf->buf_type_spec.indirect;
926 		len = indirect->len;
927 	} else {
928 		struct camdd_buf_data *data;
929 
930 		data = &buf->buf_type_spec.data;
931 		len = data->fill_len;
932 	}
933 
934 	return (len);
935 }
936 
937 void
camdd_buf_add_child(struct camdd_buf * buf,struct camdd_buf * child_buf)938 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
939 {
940 	struct camdd_buf_data *data;
941 
942 	assert(buf->buf_type == CAMDD_BUF_DATA);
943 
944 	data = &buf->buf_type_spec.data;
945 
946 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
947 	buf->src_count++;
948 
949 	data->fill_len += camdd_buf_get_len(child_buf);
950 }
951 
952 typedef enum {
953 	CAMDD_TS_MAX_BLK,
954 	CAMDD_TS_MIN_BLK,
955 	CAMDD_TS_BLK_GRAN,
956 	CAMDD_TS_EFF_IOSIZE
957 } camdd_status_item_index;
958 
959 static struct camdd_status_items {
960 	const char *name;
961 	struct mt_status_entry *entry;
962 } req_status_items[] = {
963 	{ "max_blk", NULL },
964 	{ "min_blk", NULL },
965 	{ "blk_gran", NULL },
966 	{ "max_effective_iosize", NULL }
967 };
968 
969 int
camdd_probe_tape(int fd,char * filename,uint64_t * max_iosize,uint64_t * max_blk,uint64_t * min_blk,uint64_t * blk_gran)970 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
971 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
972 {
973 	struct mt_status_data status_data;
974 	char *xml_str = NULL;
975 	unsigned int i;
976 	int retval = 0;
977 
978 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
979 	if (retval != 0)
980 		err(1, "Couldn't get XML string from %s", filename);
981 
982 	retval = mt_get_status(xml_str, &status_data);
983 	if (retval != XML_STATUS_OK) {
984 		warn("couldn't get status for %s", filename);
985 		retval = 1;
986 		goto bailout;
987 	} else
988 		retval = 0;
989 
990 	if (status_data.error != 0) {
991 		warnx("%s", status_data.error_str);
992 		retval = 1;
993 		goto bailout;
994 	}
995 
996 	for (i = 0; i < nitems(req_status_items); i++) {
997                 char *name;
998 
999 		name = __DECONST(char *, req_status_items[i].name);
1000 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1001 		    name);
1002 		if (req_status_items[i].entry == NULL) {
1003 			errx(1, "Cannot find status entry %s",
1004 			    req_status_items[i].name);
1005 		}
1006 	}
1007 
1008 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1009 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1010 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1011 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1012 bailout:
1013 
1014 	free(xml_str);
1015 	mt_status_free(&status_data);
1016 
1017 	return (retval);
1018 }
1019 
1020 struct camdd_dev *
camdd_probe_file(int fd,struct camdd_io_opts * io_opts,int retry_count,int timeout)1021 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1022     int timeout)
1023 {
1024 	struct camdd_dev *dev = NULL;
1025 	struct camdd_dev_file *file_dev;
1026 	uint64_t blocksize = io_opts->blocksize;
1027 
1028 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1029 	if (dev == NULL)
1030 		goto bailout;
1031 
1032 	file_dev = &dev->dev_spec.file;
1033 	file_dev->fd = fd;
1034 	strlcpy(file_dev->filename, io_opts->dev_name,
1035 	    sizeof(file_dev->filename));
1036 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1037 	if (blocksize == 0)
1038 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1039 	else
1040 		dev->blocksize = blocksize;
1041 
1042 	if ((io_opts->queue_depth != 0)
1043 	 && (io_opts->queue_depth != 1)) {
1044 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1045 		    "command supported", (uintmax_t)io_opts->queue_depth,
1046 		    io_opts->dev_name);
1047 	}
1048 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1049 	dev->run = camdd_file_run;
1050 	dev->fetch = NULL;
1051 
1052 	/*
1053 	 * We can effectively access files on byte boundaries.  We'll reset
1054 	 * this for devices like disks that can be accessed on sector
1055 	 * boundaries.
1056 	 */
1057 	dev->sector_size = 1;
1058 
1059 	if ((fd != STDIN_FILENO)
1060 	 && (fd != STDOUT_FILENO)) {
1061 		int retval;
1062 
1063 		retval = fstat(fd, &file_dev->sb);
1064 		if (retval != 0) {
1065 			warn("Cannot stat %s", dev->device_name);
1066 			goto bailout_error;
1067 		}
1068 		if (S_ISREG(file_dev->sb.st_mode)) {
1069 			file_dev->file_type = CAMDD_FILE_REG;
1070 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1071 			int type;
1072 
1073 			if (ioctl(fd, FIODTYPE, &type) == -1)
1074 				err(1, "FIODTYPE ioctl failed on %s",
1075 				    dev->device_name);
1076 			else {
1077 				if (type & D_TAPE)
1078 					file_dev->file_type = CAMDD_FILE_TAPE;
1079 				else if (type & D_DISK)
1080 					file_dev->file_type = CAMDD_FILE_DISK;
1081 				else if (type & D_MEM)
1082 					file_dev->file_type = CAMDD_FILE_MEM;
1083 				else if (type & D_TTY)
1084 					file_dev->file_type = CAMDD_FILE_TTY;
1085 			}
1086 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1087 			errx(1, "cannot operate on directory %s",
1088 			    dev->device_name);
1089 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1090 			file_dev->file_type = CAMDD_FILE_PIPE;
1091 		} else
1092 			errx(1, "Cannot determine file type for %s",
1093 			    dev->device_name);
1094 
1095 		switch (file_dev->file_type) {
1096 		case CAMDD_FILE_REG:
1097 			if (file_dev->sb.st_size != 0)
1098 				dev->max_sector = file_dev->sb.st_size - 1;
1099 			else
1100 				dev->max_sector = 0;
1101 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1102 			break;
1103 		case CAMDD_FILE_TAPE: {
1104 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1105 			/*
1106 			 * Check block limits and maximum effective iosize.
1107 			 * Make sure the blocksize is within the block
1108 			 * limits (and a multiple of the minimum blocksize)
1109 			 * and that the blocksize is <= maximum effective
1110 			 * iosize.
1111 			 */
1112 			retval = camdd_probe_tape(fd, dev->device_name,
1113 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1114 			if (retval != 0)
1115 				errx(1, "Unable to probe tape %s",
1116 				    dev->device_name);
1117 
1118 			/*
1119 			 * The blocksize needs to be <= the maximum
1120 			 * effective I/O size of the tape device.  Note
1121 			 * that this also takes into account the maximum
1122 			 * blocksize reported by READ BLOCK LIMITS.
1123 			 */
1124 			if (dev->blocksize > max_iosize) {
1125 				warnx("Blocksize %u too big for %s, limiting "
1126 				    "to %ju", dev->blocksize, dev->device_name,
1127 				    max_iosize);
1128 				dev->blocksize = max_iosize;
1129 			}
1130 
1131 			/*
1132 			 * The blocksize needs to be at least min_blk;
1133 			 */
1134 			if (dev->blocksize < min_blk) {
1135 				warnx("Blocksize %u too small for %s, "
1136 				    "increasing to %ju", dev->blocksize,
1137 				    dev->device_name, min_blk);
1138 				dev->blocksize = min_blk;
1139 			}
1140 
1141 			/*
1142 			 * And the blocksize needs to be a multiple of
1143 			 * the block granularity.
1144 			 */
1145 			if ((blk_gran != 0)
1146 			 && (dev->blocksize % (1 << blk_gran))) {
1147 				warnx("Blocksize %u for %s not a multiple of "
1148 				    "%d, adjusting to %d", dev->blocksize,
1149 				    dev->device_name, (1 << blk_gran),
1150 				    dev->blocksize & ~((1 << blk_gran) - 1));
1151 				dev->blocksize &= ~((1 << blk_gran) - 1);
1152 			}
1153 
1154 			if (dev->blocksize == 0) {
1155 				errx(1, "Unable to derive valid blocksize for "
1156 				    "%s", dev->device_name);
1157 			}
1158 
1159 			/*
1160 			 * For tape drives, set the sector size to the
1161 			 * blocksize so that we make sure not to write
1162 			 * less than the blocksize out to the drive.
1163 			 */
1164 			dev->sector_size = dev->blocksize;
1165 			break;
1166 		}
1167 		case CAMDD_FILE_DISK: {
1168 			off_t media_size;
1169 			unsigned int sector_size;
1170 
1171 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1172 
1173 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1174 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1175 				    dev->device_name);
1176 			}
1177 
1178 			if (sector_size == 0) {
1179 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1180 				    "invalid sector size %u for %s",
1181 				    sector_size, dev->device_name);
1182 			}
1183 
1184 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1185 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1186 				    dev->device_name);
1187 			}
1188 
1189 			if (media_size == 0) {
1190 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1191 				    "invalid media size %ju for %s",
1192 				    (uintmax_t)media_size, dev->device_name);
1193 			}
1194 
1195 			if (dev->blocksize % sector_size) {
1196 				errx(1, "%s blocksize %u not a multiple of "
1197 				    "sector size %u", dev->device_name,
1198 				    dev->blocksize, sector_size);
1199 			}
1200 
1201 			dev->sector_size = sector_size;
1202 			dev->max_sector = (media_size / sector_size) - 1;
1203 			break;
1204 		}
1205 		case CAMDD_FILE_MEM:
1206 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1207 			break;
1208 		default:
1209 			break;
1210 		}
1211 	}
1212 
1213 	if ((io_opts->offset != 0)
1214 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1215 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1216 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1217 		goto bailout_error;
1218 	}
1219 #if 0
1220 	else if ((io_opts->offset != 0)
1221 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1222 		warnx("Offset %ju for %s is not a multiple of the "
1223 		      "sector size %u", io_opts->offset,
1224 		      io_opts->dev_name, dev->sector_size);
1225 		goto bailout_error;
1226 	} else {
1227 		dev->start_offset_bytes = io_opts->offset;
1228 	}
1229 #endif
1230 
1231 bailout:
1232 	return (dev);
1233 
1234 bailout_error:
1235 	camdd_free_dev(dev);
1236 	return (NULL);
1237 }
1238 
1239 /*
1240  * Get a get device CCB for the specified device.
1241  */
1242 int
camdd_get_cgd(struct cam_device * device,struct ccb_getdev * cgd)1243 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1244 {
1245         union ccb *ccb;
1246 	int retval = 0;
1247 
1248 	ccb = cam_getccb(device);
1249 
1250 	if (ccb == NULL) {
1251 		warnx("%s: couldn't allocate CCB", __func__);
1252 		return -1;
1253 	}
1254 
1255 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1256 
1257 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1258 
1259 	if (cam_send_ccb(device, ccb) < 0) {
1260 		warn("%s: error sending Get Device Information CCB", __func__);
1261 			cam_error_print(device, ccb, CAM_ESF_ALL,
1262 					CAM_EPF_ALL, stderr);
1263 		retval = -1;
1264 		goto bailout;
1265 	}
1266 
1267 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1268 			cam_error_print(device, ccb, CAM_ESF_ALL,
1269 					CAM_EPF_ALL, stderr);
1270 		retval = -1;
1271 		goto bailout;
1272 	}
1273 
1274 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1275 
1276 bailout:
1277 	cam_freeccb(ccb);
1278 
1279 	return retval;
1280 }
1281 
1282 int
camdd_probe_pass_scsi(struct cam_device * cam_dev,union ccb * ccb,camdd_argmask arglist,int probe_retry_count,int probe_timeout,uint64_t * maxsector,uint32_t * block_len)1283 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1284 		 camdd_argmask arglist, int probe_retry_count,
1285 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1286 {
1287 	struct scsi_read_capacity_data rcap;
1288 	struct scsi_read_capacity_data_long rcaplong;
1289 	int retval = -1;
1290 
1291 	if (ccb == NULL) {
1292 		warnx("%s: error passed ccb is NULL", __func__);
1293 		goto bailout;
1294 	}
1295 
1296 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1297 
1298 	scsi_read_capacity(&ccb->csio,
1299 			   /*retries*/ probe_retry_count,
1300 			   /*cbfcnp*/ NULL,
1301 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1302 			   &rcap,
1303 			   SSD_FULL_SIZE,
1304 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1305 
1306 	/* Disable freezing the device queue */
1307 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1308 
1309 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1310 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1311 
1312 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1313 		warn("error sending READ CAPACITY command");
1314 
1315 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1316 				CAM_EPF_ALL, stderr);
1317 
1318 		goto bailout;
1319 	}
1320 
1321 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1322 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1323 		goto bailout;
1324 	}
1325 
1326 	*maxsector = scsi_4btoul(rcap.addr);
1327 	*block_len = scsi_4btoul(rcap.length);
1328 
1329 	/*
1330 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1331 	 * and we need to issue the long READ CAPACITY to get the real
1332 	 * capacity.  Otherwise, we're all set.
1333 	 */
1334 	if (*maxsector != 0xffffffff) {
1335 		retval = 0;
1336 		goto bailout;
1337 	}
1338 
1339 	scsi_read_capacity_16(&ccb->csio,
1340 			      /*retries*/ probe_retry_count,
1341 			      /*cbfcnp*/ NULL,
1342 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1343 			      /*lba*/ 0,
1344 			      /*reladdr*/ 0,
1345 			      /*pmi*/ 0,
1346 			      (uint8_t *)&rcaplong,
1347 			      sizeof(rcaplong),
1348 			      /*sense_len*/ SSD_FULL_SIZE,
1349 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1350 
1351 	/* Disable freezing the device queue */
1352 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1353 
1354 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1355 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1356 
1357 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1358 		warn("error sending READ CAPACITY (16) command");
1359 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1360 				CAM_EPF_ALL, stderr);
1361 		goto bailout;
1362 	}
1363 
1364 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1365 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1366 		goto bailout;
1367 	}
1368 
1369 	*maxsector = scsi_8btou64(rcaplong.addr);
1370 	*block_len = scsi_4btoul(rcaplong.length);
1371 
1372 	retval = 0;
1373 
1374 bailout:
1375 	return retval;
1376 }
1377 
1378 int
camdd_probe_pass_nvme(struct cam_device * cam_dev,union ccb * ccb,camdd_argmask arglist,int probe_retry_count,int probe_timeout,uint64_t * maxsector,uint32_t * block_len)1379 camdd_probe_pass_nvme(struct cam_device *cam_dev, union ccb *ccb,
1380 		 camdd_argmask arglist, int probe_retry_count,
1381 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1382 {
1383 	struct nvme_command *nc = NULL;
1384 	struct nvme_namespace_data nsdata;
1385 	uint32_t nsid = cam_dev->target_lun & UINT32_MAX;
1386 	uint8_t format = 0, lbads = 0;
1387 	int retval = -1;
1388 
1389 	if (ccb == NULL) {
1390 		warnx("%s: error passed ccb is NULL", __func__);
1391 		goto bailout;
1392 	}
1393 
1394 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
1395 
1396 	/* Send Identify Namespace to get block size and capacity */
1397 	nc = &ccb->nvmeio.cmd;
1398 	nc->opc = NVME_OPC_IDENTIFY;
1399 
1400 	nc->nsid = nsid;
1401 	nc->cdw10 = 0; /* Identify Namespace is CNS = 0 */
1402 
1403 	cam_fill_nvmeadmin(&ccb->nvmeio,
1404 			/*retries*/ probe_retry_count,
1405 			/*cbfcnp*/ NULL,
1406 			CAM_DIR_IN,
1407 			(uint8_t *)&nsdata,
1408 			sizeof(nsdata),
1409 			probe_timeout);
1410 
1411 	/* Disable freezing the device queue */
1412 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1413 
1414 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1415 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1416 
1417 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1418 		warn("error sending Identify Namespace command");
1419 
1420 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1421 				CAM_EPF_ALL, stderr);
1422 
1423 		goto bailout;
1424 	}
1425 
1426 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1427 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1428 		goto bailout;
1429 	}
1430 
1431 	*maxsector = nsdata.nsze;
1432 	/* The LBA Data Size (LBADS) is reported as a power of 2 */
1433 	format = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, nsdata.flbas);
1434 	lbads = NVMEV(NVME_NS_DATA_LBAF_LBADS, nsdata.lbaf[format]);
1435 	*block_len = 1 << lbads;
1436 
1437 	retval = 0;
1438 
1439 bailout:
1440 	return retval;
1441 }
1442 
1443 /*
1444  * Need to implement this.  Do a basic probe:
1445  * - Check the inquiry data, make sure we're talking to a device that we
1446  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1447  * - Send a test unit ready, make sure the device is available.
1448  * - Get the capacity and block size.
1449  */
1450 struct camdd_dev *
camdd_probe_pass(struct cam_device * cam_dev,struct camdd_io_opts * io_opts,camdd_argmask arglist,int probe_retry_count,int probe_timeout,int io_retry_count,int io_timeout)1451 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1452 		 camdd_argmask arglist, int probe_retry_count,
1453 		 int probe_timeout, int io_retry_count, int io_timeout)
1454 {
1455 	union ccb *ccb;
1456 	uint64_t maxsector = 0;
1457 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1458 	uint32_t block_len = 0;
1459 	struct camdd_dev *dev = NULL;
1460 	struct camdd_dev_pass *pass_dev;
1461 	struct kevent ke;
1462 	struct ccb_getdev cgd;
1463 	int retval;
1464 	int scsi_dev_type = T_NODEVICE;
1465 
1466 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1467 		warnx("%s: error retrieving CGD", __func__);
1468 		return NULL;
1469 	}
1470 
1471 	ccb = cam_getccb(cam_dev);
1472 
1473 	if (ccb == NULL) {
1474 		warnx("%s: error allocating ccb", __func__);
1475 		goto bailout;
1476 	}
1477 
1478 	switch (cgd.protocol) {
1479 	case PROTO_SCSI:
1480 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1481 
1482 		/*
1483 		 * For devices that support READ CAPACITY, we'll attempt to get the
1484 		 * capacity.  Otherwise, we really don't support tape or other
1485 		 * devices via SCSI passthrough, so just return an error in that case.
1486 		 */
1487 		switch (scsi_dev_type) {
1488 		case T_DIRECT:
1489 		case T_WORM:
1490 		case T_CDROM:
1491 		case T_OPTICAL:
1492 		case T_RBC:
1493 		case T_ZBC_HM:
1494 			break;
1495 		default:
1496 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1497 			break; /*NOTREACHED*/
1498 		}
1499 
1500 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1501 						arglist, probe_timeout, &maxsector,
1502 						&block_len))) {
1503 			goto bailout;
1504 		}
1505 		break;
1506 	case PROTO_NVME:
1507 		if ((retval = camdd_probe_pass_nvme(cam_dev, ccb, probe_retry_count,
1508 						arglist, probe_timeout, &maxsector,
1509 						&block_len))) {
1510 			goto bailout;
1511 		}
1512 		break;
1513 	default:
1514 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1515 		break; /*NOTREACHED*/
1516 	}
1517 
1518 	if (block_len == 0) {
1519 		warnx("Sector size for %s%u is 0, cannot continue",
1520 		    cam_dev->device_name, cam_dev->dev_unit_num);
1521 		goto bailout_error;
1522 	}
1523 
1524 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1525 
1526 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1527 	ccb->ccb_h.flags = CAM_DIR_NONE;
1528 	ccb->ccb_h.retry_count = 1;
1529 
1530 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1531 		warn("error sending XPT_PATH_INQ CCB");
1532 
1533 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1534 				CAM_EPF_ALL, stderr);
1535 		goto bailout;
1536 	}
1537 
1538 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1539 
1540 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1541 			      io_timeout);
1542 	if (dev == NULL)
1543 		goto bailout;
1544 
1545 	pass_dev = &dev->dev_spec.pass;
1546 	pass_dev->scsi_dev_type = scsi_dev_type;
1547 	pass_dev->protocol = cgd.protocol;
1548 	pass_dev->dev = cam_dev;
1549 	pass_dev->max_sector = maxsector;
1550 	pass_dev->block_len = block_len;
1551 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1552 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1553 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1554 	dev->sector_size = block_len;
1555 	dev->max_sector = maxsector;
1556 
1557 
1558 	/*
1559 	 * Determine the optimal blocksize to use for this device.
1560 	 */
1561 
1562 	/*
1563 	 * If the controller has not specified a maximum I/O size,
1564 	 * just go with 128K as a somewhat conservative value.
1565 	 */
1566 	if (pass_dev->cpi_maxio == 0)
1567 		cpi_maxio = 131072;
1568 	else
1569 		cpi_maxio = pass_dev->cpi_maxio;
1570 
1571 	/*
1572 	 * If the controller has a large maximum I/O size, limit it
1573 	 * to something smaller so that the kernel doesn't have trouble
1574 	 * allocating buffers to copy data in and out for us.
1575 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1576 	 */
1577 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1578 
1579 	/*
1580 	 * If we weren't able to get a block size for some reason,
1581 	 * default to 512 bytes.
1582 	 */
1583 	block_len = pass_dev->block_len;
1584 	if (block_len == 0)
1585 		block_len = 512;
1586 
1587 	/*
1588 	 * Figure out how many blocksize chunks will fit in the
1589 	 * maximum I/O size.
1590 	 */
1591 	pass_numblocks = max_iosize / block_len;
1592 
1593 	/*
1594 	 * And finally, multiple the number of blocks by the LBA
1595 	 * length to get our maximum block size;
1596 	 */
1597 	dev->blocksize = pass_numblocks * block_len;
1598 
1599 	if (io_opts->blocksize != 0) {
1600 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1601 			warnx("Blocksize %ju for %s is not a multiple of "
1602 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1603 			      dev->device_name, dev->sector_size);
1604 			goto bailout_error;
1605 		}
1606 		dev->blocksize = io_opts->blocksize;
1607 	}
1608 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1609 	if (io_opts->queue_depth != 0)
1610 		dev->target_queue_depth = io_opts->queue_depth;
1611 
1612 	if (io_opts->offset != 0) {
1613 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1614 			warnx("Offset %ju is past the end of device %s",
1615 			    io_opts->offset, dev->device_name);
1616 			goto bailout_error;
1617 		}
1618 #if 0
1619 		else if ((io_opts->offset % dev->sector_size) != 0) {
1620 			warnx("Offset %ju for %s is not a multiple of the "
1621 			      "sector size %u", io_opts->offset,
1622 			      dev->device_name, dev->sector_size);
1623 			goto bailout_error;
1624 		}
1625 		dev->start_offset_bytes = io_opts->offset;
1626 #endif
1627 	}
1628 
1629 	dev->min_cmd_size = io_opts->min_cmd_size;
1630 
1631 	dev->run = camdd_pass_run;
1632 	dev->fetch = camdd_pass_fetch;
1633 
1634 bailout:
1635 	cam_freeccb(ccb);
1636 
1637 	return (dev);
1638 
1639 bailout_error:
1640 	cam_freeccb(ccb);
1641 
1642 	camdd_free_dev(dev);
1643 
1644 	return (NULL);
1645 }
1646 
1647 void
nvme_read_write(struct ccb_nvmeio * nvmeio,uint32_t retries,void (* cbfcnp)(struct cam_periph *,union ccb *),uint32_t nsid,int readop,uint64_t lba,uint32_t block_count,uint8_t * data_ptr,uint32_t dxfer_len,uint32_t timeout)1648 nvme_read_write(struct ccb_nvmeio *nvmeio, uint32_t retries,
1649 		void (*cbfcnp)(struct cam_periph *, union ccb *),
1650 		uint32_t nsid, int readop, uint64_t lba,
1651 		uint32_t block_count, uint8_t *data_ptr, uint32_t dxfer_len,
1652 		uint32_t timeout)
1653 {
1654 	struct nvme_command *nc = &nvmeio->cmd;
1655 
1656 	nc->opc = readop ? NVME_OPC_READ : NVME_OPC_WRITE;
1657 
1658 	nc->nsid = nsid;
1659 
1660 	nc->cdw10 = lba & UINT32_MAX;
1661 	nc->cdw11 = lba >> 32;
1662 
1663 	/* NLB (bits 15:0) is a zero based value */
1664 	nc->cdw12 = (block_count - 1) & UINT16_MAX;
1665 
1666 	cam_fill_nvmeio(nvmeio,
1667 			retries,
1668 			cbfcnp,
1669 			readop ? CAM_DIR_IN : CAM_DIR_OUT,
1670 			data_ptr,
1671 			dxfer_len,
1672 			timeout);
1673 }
1674 
1675 void *
camdd_worker(void * arg)1676 camdd_worker(void *arg)
1677 {
1678 	struct camdd_dev *dev = arg;
1679 	struct camdd_buf *buf;
1680 	struct timespec ts, *kq_ts;
1681 
1682 	ts.tv_sec = 0;
1683 	ts.tv_nsec = 0;
1684 
1685 	pthread_mutex_lock(&dev->mutex);
1686 
1687 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1688 
1689 	for (;;) {
1690 		struct kevent ke;
1691 		int retval = 0;
1692 
1693 		/*
1694 		 * XXX KDM check the reorder queue depth?
1695 		 */
1696 		if (dev->write_dev == 0) {
1697 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1698 			uint32_t target_depth = dev->target_queue_depth;
1699 			uint32_t peer_target_depth =
1700 			    dev->peer_dev->target_queue_depth;
1701 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1702 
1703 			camdd_get_depth(dev, &our_depth, &peer_depth,
1704 					&our_bytes, &peer_bytes);
1705 
1706 #if 0
1707 			while (((our_depth < target_depth)
1708 			     && (peer_depth < peer_target_depth))
1709 			    || ((peer_bytes + our_bytes) <
1710 				 (peer_blocksize * 2))) {
1711 #endif
1712 			while (((our_depth + peer_depth) <
1713 			        (target_depth + peer_target_depth))
1714 			    || ((peer_bytes + our_bytes) <
1715 				(peer_blocksize * 3))) {
1716 
1717 				retval = camdd_queue(dev, NULL);
1718 				if (retval == 1)
1719 					break;
1720 				else if (retval != 0) {
1721 					error_exit = 1;
1722 					goto bailout;
1723 				}
1724 
1725 				camdd_get_depth(dev, &our_depth, &peer_depth,
1726 						&our_bytes, &peer_bytes);
1727 			}
1728 		}
1729 		/*
1730 		 * See if we have any I/O that is ready to execute.
1731 		 */
1732 		buf = STAILQ_FIRST(&dev->run_queue);
1733 		if (buf != NULL) {
1734 			while (dev->target_queue_depth > dev->cur_active_io) {
1735 				retval = dev->run(dev);
1736 				if (retval == -1) {
1737 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1738 					error_exit = 1;
1739 					break;
1740 				} else if (retval != 0) {
1741 					break;
1742 				}
1743 			}
1744 		}
1745 
1746 		/*
1747 		 * We've reached EOF, or our partner has reached EOF.
1748 		 */
1749 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1750 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1751 			if (dev->write_dev != 0) {
1752 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1753 				 && (dev->num_run_queue == 0)
1754 				 && (dev->cur_active_io == 0)) {
1755 					goto bailout;
1756 				}
1757 			} else {
1758 				/*
1759 				 * If we're the reader, and the writer
1760 				 * got EOF, he is already done.  If we got
1761 				 * the EOF, then we need to wait until
1762 				 * everything is flushed out for the writer.
1763 				 */
1764 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1765 					goto bailout;
1766 				} else if ((dev->num_peer_work_queue == 0)
1767 					&& (dev->num_peer_done_queue == 0)
1768 					&& (dev->cur_active_io == 0)
1769 					&& (dev->num_run_queue == 0)) {
1770 					goto bailout;
1771 				}
1772 			}
1773 			/*
1774 			 * XXX KDM need to do something about the pending
1775 			 * queue and cleanup resources.
1776 			 */
1777 		}
1778 
1779 		if ((dev->write_dev == 0)
1780 		 && (dev->cur_active_io == 0)
1781 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1782 			kq_ts = &ts;
1783 		else
1784 			kq_ts = NULL;
1785 
1786 		/*
1787 		 * Run kevent to see if there are events to process.
1788 		 */
1789 		pthread_mutex_unlock(&dev->mutex);
1790 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1791 		pthread_mutex_lock(&dev->mutex);
1792 		if (retval == -1) {
1793 			warn("%s: error returned from kevent",__func__);
1794 			goto bailout;
1795 		} else if (retval != 0) {
1796 			switch (ke.filter) {
1797 			case EVFILT_READ:
1798 				if (dev->fetch != NULL) {
1799 					retval = dev->fetch(dev);
1800 					if (retval == -1) {
1801 						error_exit = 1;
1802 						goto bailout;
1803 					}
1804 				}
1805 				break;
1806 			case EVFILT_SIGNAL:
1807 				/*
1808 				 * We register for this so we don't get
1809 				 * an error as a result of a SIGINFO or a
1810 				 * SIGINT.  It will actually get handled
1811 				 * by the signal handler.  If we get a
1812 				 * SIGINT, bail out without printing an
1813 				 * error message.  Any other signals
1814 				 * will result in the error message above.
1815 				 */
1816 				if (ke.ident == SIGINT)
1817 					goto bailout;
1818 				break;
1819 			case EVFILT_USER:
1820 				retval = 0;
1821 				/*
1822 				 * Check to see if the other thread has
1823 				 * queued any I/O for us to do.  (In this
1824 				 * case we're the writer.)
1825 				 */
1826 				for (buf = STAILQ_FIRST(&dev->work_queue);
1827 				     buf != NULL;
1828 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1829 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1830 							   work_links);
1831 					retval = camdd_queue(dev, buf);
1832 					/*
1833 					 * We keep going unless we get an
1834 					 * actual error.  If we get EOF, we
1835 					 * still want to remove the buffers
1836 					 * from the queue and send the back
1837 					 * to the reader thread.
1838 					 */
1839 					if (retval == -1) {
1840 						error_exit = 1;
1841 						goto bailout;
1842 					} else
1843 						retval = 0;
1844 				}
1845 
1846 				/*
1847 				 * Next check to see if the other thread has
1848 				 * queued any completed buffers back to us.
1849 				 * (In this case we're the reader.)
1850 				 */
1851 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1852 				     buf != NULL;
1853 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1854 					STAILQ_REMOVE_HEAD(
1855 					    &dev->peer_done_queue, work_links);
1856 					dev->num_peer_done_queue--;
1857 					camdd_peer_done(buf);
1858 				}
1859 				break;
1860 			default:
1861 				warnx("%s: unknown kevent filter %d",
1862 				      __func__, ke.filter);
1863 				break;
1864 			}
1865 		}
1866 	}
1867 
1868 bailout:
1869 
1870 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1871 
1872 	/* XXX KDM cleanup resources here? */
1873 
1874 	pthread_mutex_unlock(&dev->mutex);
1875 
1876 	need_exit = 1;
1877 	sem_post(&camdd_sem);
1878 
1879 	return (NULL);
1880 }
1881 
1882 /*
1883  * Simplistic translation of CCB status to our local status.
1884  */
1885 camdd_buf_status
1886 camdd_ccb_status(union ccb *ccb, int protocol)
1887 {
1888 	camdd_buf_status status = CAMDD_STATUS_NONE;
1889 	cam_status ccb_status;
1890 
1891 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1892 
1893 	switch (protocol) {
1894 	case PROTO_SCSI:
1895 		switch (ccb_status) {
1896 		case CAM_REQ_CMP: {
1897 			if (ccb->csio.resid == 0) {
1898 				status = CAMDD_STATUS_OK;
1899 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1900 				status = CAMDD_STATUS_SHORT_IO;
1901 			} else {
1902 				status = CAMDD_STATUS_EOF;
1903 			}
1904 			break;
1905 		}
1906 		case CAM_SCSI_STATUS_ERROR: {
1907 			switch (ccb->csio.scsi_status) {
1908 			case SCSI_STATUS_OK:
1909 			case SCSI_STATUS_COND_MET:
1910 			case SCSI_STATUS_INTERMED:
1911 			case SCSI_STATUS_INTERMED_COND_MET:
1912 				status = CAMDD_STATUS_OK;
1913 				break;
1914 			case SCSI_STATUS_CMD_TERMINATED:
1915 			case SCSI_STATUS_CHECK_COND:
1916 			case SCSI_STATUS_QUEUE_FULL:
1917 			case SCSI_STATUS_BUSY:
1918 			case SCSI_STATUS_RESERV_CONFLICT:
1919 			default:
1920 				status = CAMDD_STATUS_ERROR;
1921 				break;
1922 			}
1923 			break;
1924 		}
1925 		default:
1926 			status = CAMDD_STATUS_ERROR;
1927 			break;
1928 		}
1929 		break;
1930 	case PROTO_NVME:
1931 		switch (ccb_status) {
1932 		case CAM_REQ_CMP:
1933 			status = CAMDD_STATUS_OK;
1934 			break;
1935 		default:
1936 			status = CAMDD_STATUS_ERROR;
1937 			break;
1938 		}
1939 		break;
1940 	default:
1941 		status = CAMDD_STATUS_ERROR;
1942 		break;
1943 	}
1944 
1945 	return (status);
1946 }
1947 
1948 /*
1949  * Queue a buffer to our peer's work thread for writing.
1950  *
1951  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1952  */
1953 int
1954 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1955 {
1956 	struct kevent ke;
1957 	STAILQ_HEAD(, camdd_buf) local_queue;
1958 	struct camdd_buf *buf1, *buf2;
1959 	struct camdd_buf_data *data = NULL;
1960 	uint64_t peer_bytes_queued = 0;
1961 	int active = 1;
1962 	int retval = 0;
1963 
1964 	STAILQ_INIT(&local_queue);
1965 
1966 	/*
1967 	 * Since we're the reader, we need to queue our I/O to the writer
1968 	 * in sequential order in order to make sure it gets written out
1969 	 * in sequential order.
1970 	 *
1971 	 * Check the next expected I/O starting offset.  If this doesn't
1972 	 * match, put it on the reorder queue.
1973 	 */
1974 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1975 
1976 		/*
1977 		 * If there is nothing on the queue, there is no sorting
1978 		 * needed.
1979 		 */
1980 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1981 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1982 			dev->num_reorder_queue++;
1983 			goto bailout;
1984 		}
1985 
1986 		/*
1987 		 * Sort in ascending order by starting LBA.  There should
1988 		 * be no identical LBAs.
1989 		 */
1990 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1991 		     buf1 = buf2) {
1992 			buf2 = STAILQ_NEXT(buf1, links);
1993 			if (buf->lba < buf1->lba) {
1994 				/*
1995 				 * If we're less than the first one, then
1996 				 * we insert at the head of the list
1997 				 * because this has to be the first element
1998 				 * on the list.
1999 				 */
2000 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
2001 						   buf, links);
2002 				dev->num_reorder_queue++;
2003 				break;
2004 			} else if (buf->lba > buf1->lba) {
2005 				if (buf2 == NULL) {
2006 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
2007 					    buf, links);
2008 					dev->num_reorder_queue++;
2009 					break;
2010 				} else if (buf->lba < buf2->lba) {
2011 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
2012 					    buf1, buf, links);
2013 					dev->num_reorder_queue++;
2014 					break;
2015 				}
2016 			} else {
2017 				errx(1, "Found buffers with duplicate LBA %ju!",
2018 				     buf->lba);
2019 			}
2020 		}
2021 		goto bailout;
2022 	} else {
2023 
2024 		/*
2025 		 * We're the next expected I/O completion, so put ourselves
2026 		 * on the local queue to be sent to the writer.  We use
2027 		 * work_links here so that we can queue this to the
2028 		 * peer_work_queue before taking the buffer off of the
2029 		 * local_queue.
2030 		 */
2031 		dev->next_completion_pos_bytes += buf->len;
2032 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
2033 
2034 		/*
2035 		 * Go through the reorder queue looking for more sequential
2036 		 * I/O and add it to the local queue.
2037 		 */
2038 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
2039 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
2040 			/*
2041 			 * As soon as we see an I/O that is out of sequence,
2042 			 * we're done.
2043 			 */
2044 			if ((buf1->lba * dev->sector_size) !=
2045 			     dev->next_completion_pos_bytes)
2046 				break;
2047 
2048 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
2049 			dev->num_reorder_queue--;
2050 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
2051 			dev->next_completion_pos_bytes += buf1->len;
2052 		}
2053 	}
2054 
2055 	/*
2056 	 * Setup the event to let the other thread know that it has work
2057 	 * pending.
2058 	 */
2059 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
2060 	       NOTE_TRIGGER, 0, NULL);
2061 
2062 	/*
2063 	 * Put this on our shadow queue so that we know what we've queued
2064 	 * to the other thread.
2065 	 */
2066 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
2067 		if (buf1->buf_type != CAMDD_BUF_DATA) {
2068 			errx(1, "%s: should have a data buffer, not an "
2069 			    "indirect buffer", __func__);
2070 		}
2071 		data = &buf1->buf_type_spec.data;
2072 
2073 		/*
2074 		 * We only need to send one EOF to the writer, and don't
2075 		 * need to continue sending EOFs after that.
2076 		 */
2077 		if (buf1->status == CAMDD_STATUS_EOF) {
2078 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
2079 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
2080 				    work_links);
2081 				camdd_release_buf(buf1);
2082 				retval = 1;
2083 				continue;
2084 			}
2085 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
2086 		}
2087 
2088 
2089 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
2090 		peer_bytes_queued += (data->fill_len - data->resid);
2091 		dev->peer_bytes_queued += (data->fill_len - data->resid);
2092 		dev->num_peer_work_queue++;
2093 	}
2094 
2095 	if (STAILQ_FIRST(&local_queue) == NULL)
2096 		goto bailout;
2097 
2098 	/*
2099 	 * Drop our mutex and pick up the other thread's mutex.  We need to
2100 	 * do this to avoid deadlocks.
2101 	 */
2102 	pthread_mutex_unlock(&dev->mutex);
2103 	pthread_mutex_lock(&dev->peer_dev->mutex);
2104 
2105 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2106 		/*
2107 		 * Put the buffers on the other thread's incoming work queue.
2108 		 */
2109 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2110 		     buf1 = STAILQ_FIRST(&local_queue)) {
2111 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2112 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2113 					   work_links);
2114 		}
2115 		/*
2116 		 * Send an event to the other thread's kqueue to let it know
2117 		 * that there is something on the work queue.
2118 		 */
2119 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2120 		if (retval == -1)
2121 			warn("%s: unable to add peer work_queue kevent",
2122 			     __func__);
2123 		else
2124 			retval = 0;
2125 	} else
2126 		active = 0;
2127 
2128 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2129 	pthread_mutex_lock(&dev->mutex);
2130 
2131 	/*
2132 	 * If the other side isn't active, run through the queue and
2133 	 * release all of the buffers.
2134 	 */
2135 	if (active == 0) {
2136 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2137 		     buf1 = STAILQ_FIRST(&local_queue)) {
2138 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2139 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2140 				      links);
2141 			dev->num_peer_work_queue--;
2142 			camdd_release_buf(buf1);
2143 		}
2144 		dev->peer_bytes_queued -= peer_bytes_queued;
2145 		retval = 1;
2146 	}
2147 
2148 bailout:
2149 	return (retval);
2150 }
2151 
2152 /*
2153  * Return a buffer to the reader thread when we have completed writing it.
2154  */
2155 int
2156 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2157 {
2158 	struct kevent ke;
2159 	int retval = 0;
2160 
2161 	/*
2162 	 * Setup the event to let the other thread know that we have
2163 	 * completed a buffer.
2164 	 */
2165 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2166 	       NOTE_TRIGGER, 0, NULL);
2167 
2168 	/*
2169 	 * Drop our lock and acquire the other thread's lock before
2170 	 * manipulating
2171 	 */
2172 	pthread_mutex_unlock(&dev->mutex);
2173 	pthread_mutex_lock(&dev->peer_dev->mutex);
2174 
2175 	/*
2176 	 * Put the buffer on the reader thread's peer done queue now that
2177 	 * we have completed it.
2178 	 */
2179 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2180 			   work_links);
2181 	dev->peer_dev->num_peer_done_queue++;
2182 
2183 	/*
2184 	 * Send an event to the peer thread to let it know that we've added
2185 	 * something to its peer done queue.
2186 	 */
2187 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2188 	if (retval == -1)
2189 		warn("%s: unable to add peer_done_queue kevent", __func__);
2190 	else
2191 		retval = 0;
2192 
2193 	/*
2194 	 * Drop the other thread's lock and reacquire ours.
2195 	 */
2196 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2197 	pthread_mutex_lock(&dev->mutex);
2198 
2199 	return (retval);
2200 }
2201 
2202 /*
2203  * Free a buffer that was written out by the writer thread and returned to
2204  * the reader thread.
2205  */
2206 void
2207 camdd_peer_done(struct camdd_buf *buf)
2208 {
2209 	struct camdd_dev *dev;
2210 	struct camdd_buf_data *data;
2211 
2212 	dev = buf->dev;
2213 	if (buf->buf_type != CAMDD_BUF_DATA) {
2214 		errx(1, "%s: should have a data buffer, not an "
2215 		    "indirect buffer", __func__);
2216 	}
2217 
2218 	data = &buf->buf_type_spec.data;
2219 
2220 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2221 	dev->num_peer_work_queue--;
2222 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2223 
2224 	if (buf->status == CAMDD_STATUS_EOF)
2225 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2226 
2227 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2228 }
2229 
2230 /*
2231  * Assumes caller holds the lock for this device.
2232  */
2233 void
2234 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2235 		   int *error_count)
2236 {
2237 	int retval = 0;
2238 
2239 	/*
2240 	 * If we're the reader, we need to send the completed I/O
2241 	 * to the writer.  If we're the writer, we need to just
2242 	 * free up resources, or let the reader know if we've
2243 	 * encountered an error.
2244 	 */
2245 	if (dev->write_dev == 0) {
2246 		retval = camdd_queue_peer_buf(dev, buf);
2247 		if (retval != 0)
2248 			(*error_count)++;
2249 	} else {
2250 		struct camdd_buf *tmp_buf, *next_buf;
2251 
2252 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2253 				    next_buf) {
2254 			struct camdd_buf *src_buf;
2255 			struct camdd_buf_indirect *indirect;
2256 
2257 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2258 				      camdd_buf, src_links);
2259 
2260 			tmp_buf->status = buf->status;
2261 
2262 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2263 				camdd_complete_peer_buf(dev, tmp_buf);
2264 				continue;
2265 			}
2266 
2267 			indirect = &tmp_buf->buf_type_spec.indirect;
2268 			src_buf = indirect->src_buf;
2269 			src_buf->refcount--;
2270 			/*
2271 			 * XXX KDM we probably need to account for
2272 			 * exactly how many bytes we were able to
2273 			 * write.  Allocate the residual to the
2274 			 * first N buffers?  Or just track the
2275 			 * number of bytes written?  Right now the reader
2276 			 * doesn't do anything with a residual.
2277 			 */
2278 			src_buf->status = buf->status;
2279 			if (src_buf->refcount <= 0)
2280 				camdd_complete_peer_buf(dev, src_buf);
2281 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2282 					   tmp_buf, links);
2283 		}
2284 
2285 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2286 	}
2287 }
2288 
2289 /*
2290  * Fetch all completed commands from the pass(4) device.
2291  *
2292  * Returns the number of commands received, or -1 if any of the commands
2293  * completed with an error.  Returns 0 if no commands are available.
2294  */
2295 int
2296 camdd_pass_fetch(struct camdd_dev *dev)
2297 {
2298 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2299 	union ccb ccb;
2300 	int retval = 0, num_fetched = 0, error_count = 0;
2301 
2302 	pthread_mutex_unlock(&dev->mutex);
2303 	/*
2304 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2305 	 */
2306 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2307 		struct camdd_buf *buf;
2308 		struct camdd_buf_data *data;
2309 		cam_status ccb_status;
2310 		union ccb *buf_ccb;
2311 
2312 		buf = ccb.ccb_h.ccb_buf;
2313 		data = &buf->buf_type_spec.data;
2314 		buf_ccb = &data->ccb;
2315 
2316 		num_fetched++;
2317 
2318 		/*
2319 		 * Copy the CCB back out so we get status, sense data, etc.
2320 		 */
2321 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2322 
2323 		pthread_mutex_lock(&dev->mutex);
2324 
2325 		/*
2326 		 * We're now done, so take this off the active queue.
2327 		 */
2328 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2329 		dev->cur_active_io--;
2330 
2331 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2332 		if (ccb_status != CAM_REQ_CMP) {
2333 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2334 					CAM_EPF_ALL, stderr);
2335 		}
2336 
2337 		switch (pass_dev->protocol) {
2338 		case PROTO_SCSI:
2339 			data->resid = ccb.csio.resid;
2340 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2341 			break;
2342 		case PROTO_NVME:
2343 			data->resid = 0;
2344 			dev->bytes_transferred += ccb.nvmeio.dxfer_len;
2345 			break;
2346 		default:
2347 			return -1;
2348 			break;
2349 		}
2350 
2351 		if (buf->status == CAMDD_STATUS_NONE)
2352 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2353 		if (buf->status == CAMDD_STATUS_ERROR)
2354 			error_count++;
2355 		else if (buf->status == CAMDD_STATUS_EOF) {
2356 			/*
2357 			 * Once we queue this buffer to our partner thread,
2358 			 * he will know that we've hit EOF.
2359 			 */
2360 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2361 		}
2362 
2363 		camdd_complete_buf(dev, buf, &error_count);
2364 
2365 		/*
2366 		 * Unlock in preparation for the ioctl call.
2367 		 */
2368 		pthread_mutex_unlock(&dev->mutex);
2369 	}
2370 
2371 	pthread_mutex_lock(&dev->mutex);
2372 
2373 	if (error_count > 0)
2374 		return (-1);
2375 	else
2376 		return (num_fetched);
2377 }
2378 
2379 /*
2380  * Returns -1 for error, 0 for success/continue, and 1 for resource
2381  * shortage/stop processing.
2382  */
2383 int
2384 camdd_file_run(struct camdd_dev *dev)
2385 {
2386 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2387 	struct camdd_buf_data *data;
2388 	struct camdd_buf *buf;
2389 	off_t io_offset;
2390 	int retval = 0, write_dev = dev->write_dev;
2391 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2392 	uint32_t num_sectors = 0, db_len = 0;
2393 
2394 	buf = STAILQ_FIRST(&dev->run_queue);
2395 	if (buf == NULL) {
2396 		no_resources = 1;
2397 		goto bailout;
2398 	} else if ((dev->write_dev == 0)
2399 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2400 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2401 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2402 		dev->num_run_queue--;
2403 		buf->status = CAMDD_STATUS_EOF;
2404 		error_count++;
2405 		goto bailout;
2406 	}
2407 
2408 	/*
2409 	 * If we're writing, we need to go through the source buffer list
2410 	 * and create an S/G list.
2411 	 */
2412 	if (write_dev != 0) {
2413 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2414 		    dev->sector_size, &num_sectors, &double_buf_needed);
2415 		if (retval != 0) {
2416 			no_resources = 1;
2417 			goto bailout;
2418 		}
2419 	}
2420 
2421 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2422 	dev->num_run_queue--;
2423 
2424 	data = &buf->buf_type_spec.data;
2425 
2426 	/*
2427 	 * pread(2) and pwrite(2) offsets are byte offsets.
2428 	 */
2429 	io_offset = buf->lba * dev->sector_size;
2430 
2431 	/*
2432 	 * Unlock the mutex while we read or write.
2433 	 */
2434 	pthread_mutex_unlock(&dev->mutex);
2435 
2436 	/*
2437 	 * Note that we don't need to double buffer if we're the reader
2438 	 * because in that case, we have allocated a single buffer of
2439 	 * sufficient size to do the read.  This copy is necessary on
2440 	 * writes because if one of the components of the S/G list is not
2441 	 * a sector size multiple, the kernel will reject the write.  This
2442 	 * is unfortunate but not surprising.  So this will make sure that
2443 	 * we're using a single buffer that is a multiple of the sector size.
2444 	 */
2445 	if ((double_buf_needed != 0)
2446 	 && (data->sg_count > 1)
2447 	 && (write_dev != 0)) {
2448 		uint32_t cur_offset;
2449 		int i;
2450 
2451 		if (file_dev->tmp_buf == NULL)
2452 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2453 		if (file_dev->tmp_buf == NULL) {
2454 			buf->status = CAMDD_STATUS_ERROR;
2455 			error_count++;
2456 			pthread_mutex_lock(&dev->mutex);
2457 			goto bailout;
2458 		}
2459 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2460 			bcopy(data->iovec[i].iov_base,
2461 			    &file_dev->tmp_buf[cur_offset],
2462 			    data->iovec[i].iov_len);
2463 			cur_offset += data->iovec[i].iov_len;
2464 		}
2465 		db_len = cur_offset;
2466 	}
2467 
2468 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2469 		if (write_dev == 0) {
2470 			/*
2471 			 * XXX KDM is there any way we would need a S/G
2472 			 * list here?
2473 			 */
2474 			retval = pread(file_dev->fd, data->buf,
2475 			    buf->len, io_offset);
2476 		} else {
2477 			if (double_buf_needed != 0) {
2478 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2479 				    db_len, io_offset);
2480 			} else if (data->sg_count == 0) {
2481 				retval = pwrite(file_dev->fd, data->buf,
2482 				    data->fill_len, io_offset);
2483 			} else {
2484 				retval = pwritev(file_dev->fd, data->iovec,
2485 				    data->sg_count, io_offset);
2486 			}
2487 		}
2488 	} else {
2489 		if (write_dev == 0) {
2490 			/*
2491 			 * XXX KDM is there any way we would need a S/G
2492 			 * list here?
2493 			 */
2494 			retval = read(file_dev->fd, data->buf, buf->len);
2495 		} else {
2496 			if (double_buf_needed != 0) {
2497 				retval = write(file_dev->fd, file_dev->tmp_buf,
2498 				    db_len);
2499 			} else if (data->sg_count == 0) {
2500 				retval = write(file_dev->fd, data->buf,
2501 				    data->fill_len);
2502 			} else {
2503 				retval = writev(file_dev->fd, data->iovec,
2504 				    data->sg_count);
2505 			}
2506 		}
2507 	}
2508 
2509 	/* We're done, re-acquire the lock */
2510 	pthread_mutex_lock(&dev->mutex);
2511 
2512 	if (retval >= (ssize_t)data->fill_len) {
2513 		/*
2514 		 * If the bytes transferred is more than the request size,
2515 		 * that indicates an overrun, which should only happen at
2516 		 * the end of a transfer if we have to round up to a sector
2517 		 * boundary.
2518 		 */
2519 		if (buf->status == CAMDD_STATUS_NONE)
2520 			buf->status = CAMDD_STATUS_OK;
2521 		data->resid = 0;
2522 		dev->bytes_transferred += retval;
2523 	} else if (retval == -1) {
2524 		warn("Error %s %s", (write_dev) ? "writing to" :
2525 		    "reading from", file_dev->filename);
2526 
2527 		buf->status = CAMDD_STATUS_ERROR;
2528 		data->resid = data->fill_len;
2529 		error_count++;
2530 
2531 		if (dev->debug == 0)
2532 			goto bailout;
2533 
2534 		if ((double_buf_needed != 0)
2535 		 && (write_dev != 0)) {
2536 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2537 			    "offset %ju\n", __func__, file_dev->fd,
2538 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2539 			    (uintmax_t)io_offset);
2540 		} else if (data->sg_count == 0) {
2541 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2542 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2543 			    data->fill_len, (uintmax_t)buf->lba,
2544 			    (uintmax_t)io_offset);
2545 		} else {
2546 			int i;
2547 
2548 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2549 			    "offset %ju\n", __func__, file_dev->fd,
2550 			    data->fill_len, (uintmax_t)buf->lba,
2551 			    (uintmax_t)io_offset);
2552 
2553 			for (i = 0; i < data->sg_count; i++) {
2554 				fprintf(stderr, "index %d ptr %p len %zu\n",
2555 				    i, data->iovec[i].iov_base,
2556 				    data->iovec[i].iov_len);
2557 			}
2558 		}
2559 	} else if (retval == 0) {
2560 		buf->status = CAMDD_STATUS_EOF;
2561 		if (dev->debug != 0)
2562 			printf("%s: got EOF from %s!\n", __func__,
2563 			    file_dev->filename);
2564 		data->resid = data->fill_len;
2565 		error_count++;
2566 	} else if (retval < (ssize_t)data->fill_len) {
2567 		if (buf->status == CAMDD_STATUS_NONE)
2568 			buf->status = CAMDD_STATUS_SHORT_IO;
2569 		data->resid = data->fill_len - retval;
2570 		dev->bytes_transferred += retval;
2571 	}
2572 
2573 bailout:
2574 	if (buf != NULL) {
2575 		if (buf->status == CAMDD_STATUS_EOF) {
2576 			struct camdd_buf *buf2;
2577 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2578 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2579 				buf2->status = CAMDD_STATUS_EOF;
2580 		}
2581 
2582 		camdd_complete_buf(dev, buf, &error_count);
2583 	}
2584 
2585 	if (error_count != 0)
2586 		return (-1);
2587 	else if (no_resources != 0)
2588 		return (1);
2589 	else
2590 		return (0);
2591 }
2592 
2593 /*
2594  * Execute one command from the run queue.  Returns 0 for success, 1 for
2595  * stop processing, and -1 for error.
2596  */
2597 int
2598 camdd_pass_run(struct camdd_dev *dev)
2599 {
2600 	struct camdd_buf *buf = NULL;
2601 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2602 	struct camdd_buf_data *data;
2603 	uint32_t num_blocks, sectors_used = 0;
2604 	union ccb *ccb;
2605 	int retval = 0, is_write = dev->write_dev;
2606 	int double_buf_needed = 0;
2607 
2608 	buf = STAILQ_FIRST(&dev->run_queue);
2609 	if (buf == NULL) {
2610 		retval = 1;
2611 		goto bailout;
2612 	}
2613 
2614 	/*
2615 	 * If we're writing, we need to go through the source buffer list
2616 	 * and create an S/G list.
2617 	 */
2618 	if (is_write != 0) {
2619 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2620 		    &sectors_used, &double_buf_needed);
2621 		if (retval != 0) {
2622 			retval = -1;
2623 			goto bailout;
2624 		}
2625 	}
2626 
2627 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2628 	dev->num_run_queue--;
2629 
2630 	data = &buf->buf_type_spec.data;
2631 
2632 	/*
2633 	 * In almost every case the number of blocks should be the device
2634 	 * block size.  The exception may be at the end of an I/O stream
2635 	 * for a partial block or at the end of a device.
2636 	 */
2637 	if (is_write != 0)
2638 		num_blocks = sectors_used;
2639 	else
2640 		num_blocks = data->fill_len / pass_dev->block_len;
2641 
2642 	ccb = &data->ccb;
2643 
2644 	switch (pass_dev->protocol) {
2645 	case PROTO_SCSI:
2646 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2647 
2648 		scsi_read_write(&ccb->csio,
2649 				/*retries*/ dev->retry_count,
2650 				/*cbfcnp*/ NULL,
2651 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2652 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2653 					   SCSI_RW_WRITE,
2654 				/*byte2*/ 0,
2655 				/*minimum_cmd_size*/ dev->min_cmd_size,
2656 				/*lba*/ buf->lba,
2657 				/*block_count*/ num_blocks,
2658 				/*data_ptr*/ (data->sg_count != 0) ?
2659 					     (uint8_t *)data->segs : data->buf,
2660 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2661 				/*sense_len*/ SSD_FULL_SIZE,
2662 				/*timeout*/ dev->io_timeout);
2663 
2664 		if (data->sg_count != 0) {
2665 			ccb->csio.sglist_cnt = data->sg_count;
2666 		}
2667 		break;
2668 	case PROTO_NVME:
2669 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->nvmeio);
2670 
2671 		nvme_read_write(&ccb->nvmeio,
2672 				/*retries*/ dev->retry_count,
2673 				/*cbfcnp*/ NULL,
2674 				/*nsid*/ pass_dev->dev->target_lun & UINT32_MAX,
2675 				/*readop*/ dev->write_dev == 0,
2676 				/*lba*/ buf->lba,
2677 				/*block_count*/ num_blocks,
2678 				/*data_ptr*/ (data->sg_count != 0) ?
2679 					     (uint8_t *)data->segs : data->buf,
2680 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2681 				/*timeout*/ dev->io_timeout);
2682 
2683 		ccb->nvmeio.sglist_cnt = data->sg_count;
2684 		break;
2685 	default:
2686 		retval = -1;
2687 		goto bailout;
2688 	}
2689 
2690 	/* Disable freezing the device queue */
2691 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2692 
2693 	if (dev->retry_count != 0)
2694 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2695 
2696 	if (data->sg_count != 0) {
2697 		ccb->ccb_h.flags |= CAM_DATA_SG;
2698 	}
2699 
2700 	/*
2701 	 * Store a pointer to the buffer in the CCB.  The kernel will
2702 	 * restore this when we get it back, and we'll use it to identify
2703 	 * the buffer this CCB came from.
2704 	 */
2705 	ccb->ccb_h.ccb_buf = buf;
2706 
2707 	/*
2708 	 * Unlock our mutex in preparation for issuing the ioctl.
2709 	 */
2710 	pthread_mutex_unlock(&dev->mutex);
2711 	/*
2712 	 * Queue the CCB to the pass(4) driver.
2713 	 */
2714 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2715 		pthread_mutex_lock(&dev->mutex);
2716 
2717 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2718 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2719 		warn("%s: CCB address is %p", __func__, ccb);
2720 		retval = -1;
2721 
2722 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2723 	} else {
2724 		pthread_mutex_lock(&dev->mutex);
2725 
2726 		dev->cur_active_io++;
2727 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2728 	}
2729 
2730 bailout:
2731 	return (retval);
2732 }
2733 
2734 int
2735 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2736 {
2737 	uint32_t num_blocks;
2738 	int retval = 0;
2739 
2740 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2741 	*len = dev->blocksize;
2742 	num_blocks = *len / dev->sector_size;
2743 
2744 	/*
2745 	 * If max_sector is 0, then we have no set limit.  This can happen
2746 	 * if we're writing to a file in a filesystem, or reading from
2747 	 * something like /dev/zero.
2748 	 */
2749 	if ((dev->max_sector != 0)
2750 	 || (dev->sector_io_limit != 0)) {
2751 		uint64_t max_sector;
2752 
2753 		if ((dev->max_sector != 0)
2754 		 && (dev->sector_io_limit != 0))
2755 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2756 		else if (dev->max_sector != 0)
2757 			max_sector = dev->max_sector;
2758 		else
2759 			max_sector = dev->sector_io_limit;
2760 
2761 
2762 		/*
2763 		 * Check to see whether we're starting off past the end of
2764 		 * the device.  If so, we need to just send an EOF
2765 		 * notification to the writer.
2766 		 */
2767 		if (*lba > max_sector) {
2768 			*len = 0;
2769 			retval = 1;
2770 		} else if (((*lba + num_blocks) > max_sector + 1)
2771 			|| ((*lba + num_blocks) < *lba)) {
2772 			/*
2773 			 * If we get here (but pass the first check), we
2774 			 * can trim the request length down to go to the
2775 			 * end of the device.
2776 			 */
2777 			num_blocks = (max_sector + 1) - *lba;
2778 			*len = num_blocks * dev->sector_size;
2779 			retval = 1;
2780 		}
2781 	}
2782 
2783 	dev->next_io_pos_bytes += *len;
2784 
2785 	return (retval);
2786 }
2787 
2788 /*
2789  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2790  */
2791 int
2792 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2793 {
2794 	struct camdd_buf *buf = NULL;
2795 	struct camdd_buf_data *data;
2796 	size_t new_len;
2797 	struct camdd_buf_data *rb_data;
2798 	int is_write = dev->write_dev;
2799 	int eof_flush_needed = 0;
2800 	int retval = 0;
2801 
2802 	/*
2803 	 * If we've gotten EOF or our partner has, we should not continue
2804 	 * queueing I/O.  If we're a writer, though, we should continue
2805 	 * to write any buffers that don't have EOF status.
2806 	 */
2807 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2808 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2809 	  && (is_write == 0))) {
2810 		/*
2811 		 * Tell the worker thread that we have seen EOF.
2812 		 */
2813 		retval = 1;
2814 
2815 		/*
2816 		 * If we're the writer, send the buffer back with EOF status.
2817 		 */
2818 		if (is_write) {
2819 			read_buf->status = CAMDD_STATUS_EOF;
2820 
2821 			camdd_complete_peer_buf(dev, read_buf);
2822 		}
2823 		goto bailout;
2824 	}
2825 
2826 	if (is_write == 0) {
2827 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2828 		if (buf == NULL) {
2829 			retval = -1;
2830 			goto bailout;
2831 		}
2832 		data = &buf->buf_type_spec.data;
2833 
2834 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2835 		if (retval != 0) {
2836 			buf->status = CAMDD_STATUS_EOF;
2837 
2838 		 	if ((buf->len == 0)
2839 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2840 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2841 				camdd_release_buf(buf);
2842 				goto bailout;
2843 			}
2844 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2845 		}
2846 
2847 		data->fill_len = buf->len;
2848 		data->src_start_offset = buf->lba * dev->sector_size;
2849 
2850 		/*
2851 		 * Put this on the run queue.
2852 		 */
2853 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2854 		dev->num_run_queue++;
2855 
2856 		/* We're done. */
2857 		goto bailout;
2858 	}
2859 
2860 	/*
2861 	 * Check for new EOF status from the reader.
2862 	 */
2863 	if ((read_buf->status == CAMDD_STATUS_EOF)
2864 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2865 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2866 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2867 		 && (read_buf->len == 0)) {
2868 			camdd_complete_peer_buf(dev, read_buf);
2869 			retval = 1;
2870 			goto bailout;
2871 		} else
2872 			eof_flush_needed = 1;
2873 	}
2874 
2875 	/*
2876 	 * See if we have a buffer we're composing with pieces from our
2877 	 * partner thread.
2878 	 */
2879 	buf = STAILQ_FIRST(&dev->pending_queue);
2880 	if (buf == NULL) {
2881 		uint64_t lba;
2882 		ssize_t len;
2883 
2884 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2885 		if (retval != 0) {
2886 			read_buf->status = CAMDD_STATUS_EOF;
2887 
2888 			if (len == 0) {
2889 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2890 				camdd_complete_peer_buf(dev, read_buf);
2891 				goto bailout;
2892 			}
2893 		}
2894 
2895 		/*
2896 		 * If we don't have a pending buffer, we need to grab a new
2897 		 * one from the free list or allocate another one.
2898 		 */
2899 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2900 		if (buf == NULL) {
2901 			retval = 1;
2902 			goto bailout;
2903 		}
2904 
2905 		buf->lba = lba;
2906 		buf->len = len;
2907 
2908 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2909 		dev->num_pending_queue++;
2910 	}
2911 
2912 	data = &buf->buf_type_spec.data;
2913 
2914 	rb_data = &read_buf->buf_type_spec.data;
2915 
2916 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2917 	 && (dev->debug != 0)) {
2918 		printf("%s: WARNING: reader offset %#jx != expected offset "
2919 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2920 		    (uintmax_t)dev->next_peer_pos_bytes);
2921 	}
2922 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2923 	    (rb_data->fill_len - rb_data->resid);
2924 
2925 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2926 	if (new_len < buf->len) {
2927 		/*
2928 		 * There are three cases here:
2929 		 * 1. We need more data to fill up a block, so we put
2930 		 *    this I/O on the queue and wait for more I/O.
2931 		 * 2. We have a pending buffer in the queue that is
2932 		 *    smaller than our blocksize, but we got an EOF.  So we
2933 		 *    need to go ahead and flush the write out.
2934 		 * 3. We got an error.
2935 		 */
2936 
2937 		/*
2938 		 * Increment our fill length.
2939 		 */
2940 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2941 
2942 		/*
2943 		 * Add the new read buffer to the list for writing.
2944 		 */
2945 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2946 
2947 		/* Increment the count */
2948 		buf->src_count++;
2949 
2950 		if (eof_flush_needed == 0) {
2951 			/*
2952 			 * We need to exit, because we don't have enough
2953 			 * data yet.
2954 			 */
2955 			goto bailout;
2956 		} else {
2957 			/*
2958 			 * Take the buffer off of the pending queue.
2959 			 */
2960 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2961 				      links);
2962 			dev->num_pending_queue--;
2963 
2964 			/*
2965 			 * If we need an EOF flush, but there is no data
2966 			 * to flush, go ahead and return this buffer.
2967 			 */
2968 			if (data->fill_len == 0) {
2969 				camdd_complete_buf(dev, buf, /*error_count*/0);
2970 				retval = 1;
2971 				goto bailout;
2972 			}
2973 
2974 			/*
2975 			 * Put this on the next queue for execution.
2976 			 */
2977 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2978 			dev->num_run_queue++;
2979 		}
2980 	} else if (new_len == buf->len) {
2981 		/*
2982 		 * We have enough data to completey fill one block,
2983 		 * so we're ready to issue the I/O.
2984 		 */
2985 
2986 		/*
2987 		 * Take the buffer off of the pending queue.
2988 		 */
2989 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2990 		dev->num_pending_queue--;
2991 
2992 		/*
2993 		 * Add the new read buffer to the list for writing.
2994 		 */
2995 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2996 
2997 		/* Increment the count */
2998 		buf->src_count++;
2999 
3000 		/*
3001 		 * Increment our fill length.
3002 		 */
3003 		data->fill_len += (rb_data->fill_len - rb_data->resid);
3004 
3005 		/*
3006 		 * Put this on the next queue for execution.
3007 		 */
3008 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3009 		dev->num_run_queue++;
3010 	} else {
3011 		struct camdd_buf *idb;
3012 		struct camdd_buf_indirect *indirect;
3013 		uint32_t len_to_go, cur_offset;
3014 
3015 
3016 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3017 		if (idb == NULL) {
3018 			retval = 1;
3019 			goto bailout;
3020 		}
3021 		indirect = &idb->buf_type_spec.indirect;
3022 		indirect->src_buf = read_buf;
3023 		read_buf->refcount++;
3024 		indirect->offset = 0;
3025 		indirect->start_ptr = rb_data->buf;
3026 		/*
3027 		 * We've already established that there is more
3028 		 * data in read_buf than we have room for in our
3029 		 * current write request.  So this particular chunk
3030 		 * of the request should just be the remainder
3031 		 * needed to fill up a block.
3032 		 */
3033 		indirect->len = buf->len - (data->fill_len - data->resid);
3034 
3035 		camdd_buf_add_child(buf, idb);
3036 
3037 		/*
3038 		 * This buffer is ready to execute, so we can take
3039 		 * it off the pending queue and put it on the run
3040 		 * queue.
3041 		 */
3042 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
3043 			      links);
3044 		dev->num_pending_queue--;
3045 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
3046 		dev->num_run_queue++;
3047 
3048 		cur_offset = indirect->offset + indirect->len;
3049 
3050 		/*
3051 		 * The resulting I/O would be too large to fit in
3052 		 * one block.  We need to split this I/O into
3053 		 * multiple pieces.  Allocate as many buffers as needed.
3054 		 */
3055 		for (len_to_go = rb_data->fill_len - rb_data->resid -
3056 		     indirect->len; len_to_go > 0;) {
3057 			struct camdd_buf *new_buf;
3058 			struct camdd_buf_data *new_data;
3059 			uint64_t lba;
3060 			ssize_t len;
3061 
3062 			retval = camdd_get_next_lba_len(dev, &lba, &len);
3063 			if ((retval != 0)
3064 			 && (len == 0)) {
3065 				/*
3066 				 * The device has already been marked
3067 				 * as EOF, and there is no space left.
3068 				 */
3069 				goto bailout;
3070 			}
3071 
3072 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
3073 			if (new_buf == NULL) {
3074 				retval = 1;
3075 				goto bailout;
3076 			}
3077 
3078 			new_buf->lba = lba;
3079 			new_buf->len = len;
3080 
3081 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
3082 			if (idb == NULL) {
3083 				retval = 1;
3084 				goto bailout;
3085 			}
3086 
3087 			indirect = &idb->buf_type_spec.indirect;
3088 
3089 			indirect->src_buf = read_buf;
3090 			read_buf->refcount++;
3091 			indirect->offset = cur_offset;
3092 			indirect->start_ptr = rb_data->buf + cur_offset;
3093 			indirect->len = min(len_to_go, new_buf->len);
3094 #if 0
3095 			if (((indirect->len % dev->sector_size) != 0)
3096 			 || ((indirect->offset % dev->sector_size) != 0)) {
3097 				warnx("offset %ju len %ju not aligned with "
3098 				    "sector size %u", indirect->offset,
3099 				    (uintmax_t)indirect->len, dev->sector_size);
3100 			}
3101 #endif
3102 			cur_offset += indirect->len;
3103 			len_to_go -= indirect->len;
3104 
3105 			camdd_buf_add_child(new_buf, idb);
3106 
3107 			new_data = &new_buf->buf_type_spec.data;
3108 
3109 			if ((new_data->fill_len == new_buf->len)
3110 			 || (eof_flush_needed != 0)) {
3111 				STAILQ_INSERT_TAIL(&dev->run_queue,
3112 						   new_buf, links);
3113 				dev->num_run_queue++;
3114 			} else if (new_data->fill_len < buf->len) {
3115 				STAILQ_INSERT_TAIL(&dev->pending_queue,
3116 					   	new_buf, links);
3117 				dev->num_pending_queue++;
3118 			} else {
3119 				warnx("%s: too much data in new "
3120 				      "buffer!", __func__);
3121 				retval = 1;
3122 				goto bailout;
3123 			}
3124 		}
3125 	}
3126 
3127 bailout:
3128 	return (retval);
3129 }
3130 
3131 void
3132 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3133 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3134 {
3135 	*our_depth = dev->cur_active_io + dev->num_run_queue;
3136 	if (dev->num_peer_work_queue >
3137 	    dev->num_peer_done_queue)
3138 		*peer_depth = dev->num_peer_work_queue -
3139 			      dev->num_peer_done_queue;
3140 	else
3141 		*peer_depth = 0;
3142 	*our_bytes = *our_depth * dev->blocksize;
3143 	*peer_bytes = dev->peer_bytes_queued;
3144 }
3145 
3146 void
3147 camdd_sig_handler(int sig)
3148 {
3149 	if (sig == SIGINFO)
3150 		need_status = 1;
3151 	else {
3152 		need_exit = 1;
3153 		error_exit = 1;
3154 	}
3155 
3156 	sem_post(&camdd_sem);
3157 }
3158 
3159 void
3160 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3161 		   struct timespec *start_time)
3162 {
3163 	struct timespec done_time;
3164 	uint64_t total_ns;
3165 	long double mb_sec, total_sec;
3166 	int error = 0;
3167 
3168 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3169 	if (error != 0) {
3170 		warn("Unable to get done time");
3171 		return;
3172 	}
3173 
3174 	timespecsub(&done_time, start_time, &done_time);
3175 
3176 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3177 	total_sec = total_ns;
3178 	total_sec /= 1000000000;
3179 
3180 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3181 		"%.4Lf seconds elapsed\n",
3182 		(uintmax_t)camdd_dev->bytes_transferred,
3183 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3184 		camdd_dev->device_name,
3185 		(uintmax_t)other_dev->bytes_transferred,
3186 		(other_dev->write_dev == 0) ? "read from" : "written to",
3187 		other_dev->device_name, total_sec);
3188 
3189 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3190 	mb_sec /= 1024 * 1024;
3191 	mb_sec *= 1000000000;
3192 	mb_sec /= total_ns;
3193 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3194 }
3195 
3196 int
3197 camdd_rw(struct camdd_io_opts *io_opts, camdd_argmask arglist, int num_io_opts,
3198 	 uint64_t max_io, int retry_count, int timeout)
3199 {
3200 	struct cam_device *new_cam_dev = NULL;
3201 	struct camdd_dev *devs[2];
3202 	struct timespec start_time;
3203 	pthread_t threads[2];
3204 	int unit = 0;
3205 	int error = 0;
3206 	int i;
3207 
3208 	bzero(devs, sizeof(devs));
3209 
3210 	if (num_io_opts != 2) {
3211 		warnx("Must have one input and one output path");
3212 		error = 1;
3213 		goto bailout;
3214 	}
3215 
3216 	for (i = 0; i < num_io_opts; i++) {
3217 		switch (io_opts[i].dev_type) {
3218 		case CAMDD_DEV_PASS: {
3219 			if (isdigit(io_opts[i].dev_name[0])) {
3220 				int bus = 0, target = 0, lun = 0;
3221 				int rv;
3222 
3223 				/* device specified as bus:target[:lun] */
3224 				rv = parse_btl(io_opts[i].dev_name, &bus,
3225 				    &target, &lun);
3226 				if (rv < 2) {
3227 					warnx("numeric device specification "
3228 					     "must be either bus:target, or "
3229 					     "bus:target:lun");
3230 					error = 1;
3231 					goto bailout;
3232 				}
3233 				/* default to 0 if lun was not specified */
3234 				if (rv == 2) {
3235 					lun = 0;
3236 				}
3237 				new_cam_dev = cam_open_btl(bus, target, lun,
3238 				    O_RDWR, NULL);
3239 			} else {
3240 				char name[30];
3241 
3242 				if (cam_get_device(io_opts[i].dev_name, name,
3243 						   sizeof name, &unit) == -1) {
3244 					warnx("%s", cam_errbuf);
3245 					error = 1;
3246 					goto bailout;
3247 				}
3248 				new_cam_dev = cam_open_spec_device(name, unit,
3249 				    O_RDWR, NULL);
3250 			}
3251 
3252 			if (new_cam_dev == NULL) {
3253 				warnx("%s", cam_errbuf);
3254 				error = 1;
3255 				goto bailout;
3256 			}
3257 
3258 			devs[i] = camdd_probe_pass(new_cam_dev,
3259 			    /*io_opts*/ &io_opts[i],
3260 			    arglist,
3261 			    /*probe_retry_count*/ 3,
3262 			    /*probe_timeout*/ 5000,
3263 			    /*io_retry_count*/ retry_count,
3264 			    /*io_timeout*/ timeout);
3265 			if (devs[i] == NULL) {
3266 				warn("Unable to probe device %s%u",
3267 				     new_cam_dev->device_name,
3268 				     new_cam_dev->dev_unit_num);
3269 				error = 1;
3270 				goto bailout;
3271 			}
3272 			break;
3273 		}
3274 		case CAMDD_DEV_FILE: {
3275 			int fd = -1;
3276 
3277 			if (io_opts[i].dev_name[0] == '-') {
3278 				if (io_opts[i].write_dev != 0)
3279 					fd = STDOUT_FILENO;
3280 				else
3281 					fd = STDIN_FILENO;
3282 			} else {
3283 				if (io_opts[i].write_dev != 0) {
3284 					fd = open(io_opts[i].dev_name,
3285 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3286 				} else {
3287 					fd = open(io_opts[i].dev_name,
3288 					    O_RDONLY);
3289 				}
3290 			}
3291 			if (fd == -1) {
3292 				warn("error opening file %s",
3293 				    io_opts[i].dev_name);
3294 				error = 1;
3295 				goto bailout;
3296 			}
3297 
3298 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3299 			    retry_count, timeout);
3300 			if (devs[i] == NULL) {
3301 				error = 1;
3302 				goto bailout;
3303 			}
3304 
3305 			break;
3306 		}
3307 		default:
3308 			warnx("Unknown device type %d (%s)",
3309 			    io_opts[i].dev_type, io_opts[i].dev_name);
3310 			error = 1;
3311 			goto bailout;
3312 			break; /*NOTREACHED */
3313 		}
3314 
3315 		devs[i]->write_dev = io_opts[i].write_dev;
3316 
3317 		devs[i]->start_offset_bytes = io_opts[i].offset;
3318 
3319 		if (max_io != 0) {
3320 			devs[i]->sector_io_limit =
3321 			    (devs[i]->start_offset_bytes /
3322 			    devs[i]->sector_size) +
3323 			    (max_io / devs[i]->sector_size) - 1;
3324 		}
3325 
3326 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3327 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3328 	}
3329 
3330 	devs[0]->peer_dev = devs[1];
3331 	devs[1]->peer_dev = devs[0];
3332 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3333 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3334 
3335 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3336 
3337 	signal(SIGINFO, camdd_sig_handler);
3338 	signal(SIGINT, camdd_sig_handler);
3339 
3340 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3341 	if (error != 0) {
3342 		warn("Unable to get start time");
3343 		goto bailout;
3344 	}
3345 
3346 	for (i = 0; i < num_io_opts; i++) {
3347 		error = pthread_create(&threads[i], NULL, camdd_worker,
3348 				       (void *)devs[i]);
3349 		if (error != 0) {
3350 			warnc(error, "pthread_create() failed");
3351 			goto bailout;
3352 		}
3353 	}
3354 
3355 	for (;;) {
3356 		if ((sem_wait(&camdd_sem) == -1)
3357 		 || (need_exit != 0)) {
3358 			struct kevent ke;
3359 
3360 			for (i = 0; i < num_io_opts; i++) {
3361 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3362 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3363 
3364 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3365 
3366 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3367 						NULL);
3368 				if (error == -1)
3369 					warn("%s: unable to wake up thread",
3370 					    __func__);
3371 				error = 0;
3372 			}
3373 			break;
3374 		} else if (need_status != 0) {
3375 			camdd_print_status(devs[0], devs[1], &start_time);
3376 			need_status = 0;
3377 		}
3378 	}
3379 	for (i = 0; i < num_io_opts; i++) {
3380 		pthread_join(threads[i], NULL);
3381 	}
3382 
3383 	camdd_print_status(devs[0], devs[1], &start_time);
3384 
3385 bailout:
3386 
3387 	for (i = 0; i < num_io_opts; i++)
3388 		camdd_free_dev(devs[i]);
3389 
3390 	return (error + error_exit);
3391 }
3392 
3393 void
3394 usage(void)
3395 {
3396 	fprintf(stderr,
3397 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3398 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3399 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3400 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3401 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3402 "Option description\n"
3403 "-i <arg=val>  Specify input device/file and parameters\n"
3404 "-o <arg=val>  Specify output device/file and parameters\n"
3405 "Input and Output parameters\n"
3406 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3407 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3408 "              or - for stdin/stdout\n"
3409 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3410 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3411 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3412 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3413 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3414 "Optional arguments\n"
3415 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3416 "-E            Enable CAM error recovery for pass(4) devices\n"
3417 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3418 "              using K, G, M, etc. suffixes\n"
3419 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3420 "-v            Enable verbose error recovery\n"
3421 "-h            Print this message\n");
3422 }
3423 
3424 
3425 int
3426 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3427 {
3428 	char *tmpstr, *tmpstr2;
3429 	char *orig_tmpstr = NULL;
3430 	int retval = 0;
3431 
3432 	io_opts->write_dev = is_write;
3433 
3434 	tmpstr = strdup(args);
3435 	if (tmpstr == NULL) {
3436 		warn("strdup failed");
3437 		retval = 1;
3438 		goto bailout;
3439 	}
3440 	orig_tmpstr = tmpstr;
3441 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3442 		char *name, *value;
3443 
3444 		/*
3445 		 * If the user creates an empty parameter by putting in two
3446 		 * commas, skip over it and look for the next field.
3447 		 */
3448 		if (*tmpstr2 == '\0')
3449 			continue;
3450 
3451 		name = strsep(&tmpstr2, "=");
3452 		if (*name == '\0') {
3453 			warnx("Got empty I/O parameter name");
3454 			retval = 1;
3455 			goto bailout;
3456 		}
3457 		value = strsep(&tmpstr2, "=");
3458 		if ((value == NULL)
3459 		 || (*value == '\0')) {
3460 			warnx("Empty I/O parameter value for %s", name);
3461 			retval = 1;
3462 			goto bailout;
3463 		}
3464 		if (strncasecmp(name, "file", 4) == 0) {
3465 			io_opts->dev_type = CAMDD_DEV_FILE;
3466 			io_opts->dev_name = strdup(value);
3467 			if (io_opts->dev_name == NULL) {
3468 				warn("Error allocating memory");
3469 				retval = 1;
3470 				goto bailout;
3471 			}
3472 		} else if (strncasecmp(name, "pass", 4) == 0) {
3473 			io_opts->dev_type = CAMDD_DEV_PASS;
3474 			io_opts->dev_name = strdup(value);
3475 			if (io_opts->dev_name == NULL) {
3476 				warn("Error allocating memory");
3477 				retval = 1;
3478 				goto bailout;
3479 			}
3480 		} else if ((strncasecmp(name, "bs", 2) == 0)
3481 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3482 			retval = expand_number(value, &io_opts->blocksize);
3483 			if (retval == -1) {
3484 				warn("expand_number(3) failed on %s=%s", name,
3485 				    value);
3486 				retval = 1;
3487 				goto bailout;
3488 			}
3489 		} else if (strncasecmp(name, "depth", 5) == 0) {
3490 			char *endptr;
3491 
3492 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3493 			if (*endptr != '\0') {
3494 				warnx("invalid queue depth %s", value);
3495 				retval = 1;
3496 				goto bailout;
3497 			}
3498 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3499 			char *endptr;
3500 
3501 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3502 			if ((*endptr != '\0')
3503 			 || ((io_opts->min_cmd_size > 16)
3504 			  || (io_opts->min_cmd_size < 0))) {
3505 				warnx("invalid minimum cmd size %s", value);
3506 				retval = 1;
3507 				goto bailout;
3508 			}
3509 		} else if (strncasecmp(name, "offset", 6) == 0) {
3510 			retval = expand_number(value, &io_opts->offset);
3511 			if (retval == -1) {
3512 				warn("expand_number(3) failed on %s=%s", name,
3513 				    value);
3514 				retval = 1;
3515 				goto bailout;
3516 			}
3517 		} else if (strncasecmp(name, "debug", 5) == 0) {
3518 			char *endptr;
3519 
3520 			io_opts->debug = strtoull(value, &endptr, 0);
3521 			if (*endptr != '\0') {
3522 				warnx("invalid debug level %s", value);
3523 				retval = 1;
3524 				goto bailout;
3525 			}
3526 		} else {
3527 			warnx("Unrecognized parameter %s=%s", name, value);
3528 		}
3529 	}
3530 bailout:
3531 	free(orig_tmpstr);
3532 
3533 	return (retval);
3534 }
3535 
3536 int
3537 main(int argc, char **argv)
3538 {
3539 	int c;
3540 	camdd_argmask arglist = CAMDD_ARG_NONE;
3541 	int timeout = 0, retry_count = 1;
3542 	int error = 0;
3543 	uint64_t max_io = 0;
3544 	struct camdd_io_opts *opt_list = NULL;
3545 
3546 	if (argc == 1) {
3547 		usage();
3548 		exit(1);
3549 	}
3550 
3551 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3552 	if (opt_list == NULL) {
3553 		warn("Unable to allocate option list");
3554 		error = 1;
3555 		goto bailout;
3556 	}
3557 
3558 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3559 		switch (c) {
3560 		case 'C':
3561 			retry_count = strtol(optarg, NULL, 0);
3562 			if (retry_count < 0)
3563 				errx(1, "retry count %d is < 0",
3564 				     retry_count);
3565 			break;
3566 		case 'E':
3567 			arglist |= CAMDD_ARG_ERR_RECOVER;
3568 			break;
3569 		case 'i':
3570 		case 'o':
3571 			if (((c == 'i')
3572 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3573 			 || ((c == 'o')
3574 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3575 				errx(1, "Only one input and output path "
3576 				    "allowed");
3577 			}
3578 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3579 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3580 			if (error != 0)
3581 				goto bailout;
3582 			break;
3583 		case 'm':
3584 			error = expand_number(optarg, &max_io);
3585 			if (error == -1) {
3586 				warn("invalid maximum I/O amount %s", optarg);
3587 				error = 1;
3588 				goto bailout;
3589 			}
3590 			break;
3591 		case 't':
3592 			timeout = strtol(optarg, NULL, 0);
3593 			if (timeout < 0)
3594 				errx(1, "invalid timeout %d", timeout);
3595 			/* Convert the timeout from seconds to ms */
3596 			timeout *= 1000;
3597 			break;
3598 		case 'v':
3599 			arglist |= CAMDD_ARG_VERBOSE;
3600 			break;
3601 		case 'h':
3602 		default:
3603 			usage();
3604 			exit(1);
3605 			break; /*NOTREACHED*/
3606 		}
3607 	}
3608 
3609 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3610 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3611 		errx(1, "Must specify both -i and -o");
3612 
3613 	/*
3614 	 * Set the timeout if the user hasn't specified one.
3615 	 */
3616 	if (timeout == 0)
3617 		timeout = CAMDD_PASS_RW_TIMEOUT;
3618 
3619 	error = camdd_rw(opt_list, arglist, 2, max_io, retry_count, timeout);
3620 
3621 bailout:
3622 	free(opt_list);
3623 
3624 	exit(error);
3625 }
3626