xref: /freebsd/sys/cam/ctl/ctl_backend_block.c (revision a2f733abcff64628b7771a47089628b7327a88bd)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003 Silicon Graphics International Corp.
5  * Copyright (c) 2009-2011 Spectra Logic Corporation
6  * Copyright (c) 2012,2021 The FreeBSD Foundation
7  * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Edward Tomasz Napierala
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org>
14  * under sponsorship from the FreeBSD Foundation.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions, and the following disclaimer,
21  *    without modification.
22  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
23  *    substantially similar to the "NO WARRANTY" disclaimer below
24  *    ("Disclaimer") and any redistribution must be conditioned upon
25  *    including a substantially similar Disclaimer requirement for further
26  *    binary redistribution.
27  *
28  * NO WARRANTY
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
38  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGES.
40  *
41  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
42  */
43 /*
44  * CAM Target Layer driver backend for block devices.
45  *
46  * Author: Ken Merry <ken@FreeBSD.org>
47  */
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/types.h>
52 #include <sys/kthread.h>
53 #include <sys/bio.h>
54 #include <sys/fcntl.h>
55 #include <sys/limits.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/condvar.h>
59 #include <sys/malloc.h>
60 #include <sys/conf.h>
61 #include <sys/ioccom.h>
62 #include <sys/queue.h>
63 #include <sys/sbuf.h>
64 #include <sys/endian.h>
65 #include <sys/uio.h>
66 #include <sys/buf.h>
67 #include <sys/taskqueue.h>
68 #include <sys/vnode.h>
69 #include <sys/namei.h>
70 #include <sys/mount.h>
71 #include <sys/disk.h>
72 #include <sys/fcntl.h>
73 #include <sys/filedesc.h>
74 #include <sys/filio.h>
75 #include <sys/proc.h>
76 #include <sys/pcpu.h>
77 #include <sys/module.h>
78 #include <sys/sdt.h>
79 #include <sys/devicestat.h>
80 #include <sys/sysctl.h>
81 #include <sys/nv.h>
82 #include <sys/dnv.h>
83 #include <sys/sx.h>
84 #include <sys/unistd.h>
85 
86 #include <geom/geom.h>
87 
88 #include <cam/cam.h>
89 #include <cam/scsi/scsi_all.h>
90 #include <cam/scsi/scsi_da.h>
91 #include <cam/ctl/ctl_io.h>
92 #include <cam/ctl/ctl.h>
93 #include <cam/ctl/ctl_backend.h>
94 #include <cam/ctl/ctl_ioctl.h>
95 #include <cam/ctl/ctl_ha.h>
96 #include <cam/ctl/ctl_scsi_all.h>
97 #include <cam/ctl/ctl_private.h>
98 #include <cam/ctl/ctl_error.h>
99 
100 /*
101  * The idea here is to allocate enough S/G space to handle at least 1MB I/Os.
102  * On systems with small maxphys it can be 8 128KB segments.  On large systems
103  * it can be up to 8 1MB segments.  I/Os larger than that we'll split.
104  */
105 #define	CTLBLK_MAX_SEGS		8
106 #define	CTLBLK_HALF_SEGS	(CTLBLK_MAX_SEGS / 2)
107 #define	CTLBLK_MIN_SEG		(128 * 1024)
108 #define	CTLBLK_MAX_SEG		MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys))
109 #define	CTLBLK_MAX_IO_SIZE	(CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS)
110 
111 #ifdef CTLBLK_DEBUG
112 #define DPRINTF(fmt, args...) \
113     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
114 #else
115 #define DPRINTF(fmt, args...) do {} while(0)
116 #endif
117 
118 #define PRIV(io)	\
119     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
120 #define ARGS(io)	\
121     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
122 
123 SDT_PROVIDER_DEFINE(cbb);
124 
125 typedef enum {
126 	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
127 	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
128 } ctl_be_block_lun_flags;
129 
130 typedef enum {
131 	CTL_BE_BLOCK_NONE,
132 	CTL_BE_BLOCK_DEV,
133 	CTL_BE_BLOCK_FILE
134 } ctl_be_block_type;
135 
136 struct ctl_be_block_filedata {
137 	struct ucred *cred;
138 };
139 
140 union ctl_be_block_bedata {
141 	struct ctl_be_block_filedata file;
142 };
143 
144 struct ctl_be_block_io;
145 struct ctl_be_block_lun;
146 
147 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
148 			       struct ctl_be_block_io *beio);
149 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
150 				  const char *attrname);
151 
152 /*
153  * Backend LUN structure.  There is a 1:1 mapping between a block device
154  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
155  */
156 struct ctl_be_block_lun {
157 	struct ctl_be_lun cbe_lun;		/* Must be first element. */
158 	struct ctl_lun_create_params params;
159 	char *dev_path;
160 	ctl_be_block_type dev_type;
161 	struct vnode *vn;
162 	union ctl_be_block_bedata backend;
163 	cbb_dispatch_t dispatch;
164 	cbb_dispatch_t lun_flush;
165 	cbb_dispatch_t unmap;
166 	cbb_dispatch_t get_lba_status;
167 	cbb_getattr_t getattr;
168 	uint64_t size_blocks;
169 	uint64_t size_bytes;
170 	struct ctl_be_block_softc *softc;
171 	struct devstat *disk_stats;
172 	ctl_be_block_lun_flags flags;
173 	SLIST_ENTRY(ctl_be_block_lun) links;
174 	struct taskqueue *io_taskqueue;
175 	struct task io_task;
176 	int num_threads;
177 	STAILQ_HEAD(, ctl_io_hdr) input_queue;
178 	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
179 	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
180 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
181 	struct mtx_padalign io_lock;
182 	struct mtx_padalign queue_lock;
183 };
184 
185 /*
186  * Overall softc structure for the block backend module.
187  */
188 struct ctl_be_block_softc {
189 	struct sx			 modify_lock;
190 	struct mtx			 lock;
191 	int				 num_luns;
192 	SLIST_HEAD(, ctl_be_block_lun)	 lun_list;
193 	uma_zone_t			 beio_zone;
194 	uma_zone_t			 bufmin_zone;
195 	uma_zone_t			 bufmax_zone;
196 };
197 
198 static struct ctl_be_block_softc backend_block_softc;
199 
200 /*
201  * Per-I/O information.
202  */
203 struct ctl_be_block_io {
204 	union ctl_io			*io;
205 	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
206 	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
207 	int				refcnt;
208 	int				bio_cmd;
209 	int				two_sglists;
210 	int				num_segs;
211 	int				num_bios_sent;
212 	int				num_bios_done;
213 	int				send_complete;
214 	int				first_error;
215 	uint64_t			first_error_offset;
216 	struct bintime			ds_t0;
217 	devstat_tag_type		ds_tag_type;
218 	devstat_trans_flags		ds_trans_type;
219 	uint64_t			io_len;
220 	uint64_t			io_offset;
221 	int				io_arg;
222 	struct ctl_be_block_softc	*softc;
223 	struct ctl_be_block_lun		*lun;
224 	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
225 };
226 
227 extern struct ctl_softc *control_softc;
228 
229 static int cbb_num_threads = 32;
230 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
231 	    "CAM Target Layer Block Backend");
232 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
233            &cbb_num_threads, 0, "Number of threads per backing file");
234 
235 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
236 static void ctl_free_beio(struct ctl_be_block_io *beio);
237 static void ctl_complete_beio(struct ctl_be_block_io *beio);
238 static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
239 static void ctl_be_block_biodone(struct bio *bio);
240 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
241 				    struct ctl_be_block_io *beio);
242 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
243 				       struct ctl_be_block_io *beio);
244 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
245 				  struct ctl_be_block_io *beio);
246 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
247 					 const char *attrname);
248 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
249 				    struct ctl_be_block_io *beio);
250 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
251 				   struct ctl_be_block_io *beio);
252 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
253 				   struct ctl_be_block_io *beio);
254 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
255 				      struct ctl_be_block_io *beio);
256 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
257 					 const char *attrname);
258 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
259 				    union ctl_io *io);
260 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
261 				    union ctl_io *io);
262 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
263 				  union ctl_io *io);
264 static void ctl_be_block_worker(void *context, int pending);
265 static int ctl_be_block_submit(union ctl_io *io);
266 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
267 				   int flag, struct thread *td);
268 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
269 				  struct ctl_lun_req *req);
270 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
271 				 struct ctl_lun_req *req);
272 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
273 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
274 			     struct ctl_lun_req *req);
275 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
276 			       struct ctl_lun_req *req);
277 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
278 			   struct ctl_lun_req *req);
279 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
280 			   struct ctl_lun_req *req);
281 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
282 static int ctl_be_block_config_write(union ctl_io *io);
283 static int ctl_be_block_config_read(union ctl_io *io);
284 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
285 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
286 static int ctl_be_block_init(void);
287 static int ctl_be_block_shutdown(void);
288 
289 static struct ctl_backend_driver ctl_be_block_driver =
290 {
291 	.name = "block",
292 	.flags = CTL_BE_FLAG_HAS_CONFIG,
293 	.init = ctl_be_block_init,
294 	.shutdown = ctl_be_block_shutdown,
295 	.data_submit = ctl_be_block_submit,
296 	.config_read = ctl_be_block_config_read,
297 	.config_write = ctl_be_block_config_write,
298 	.ioctl = ctl_be_block_ioctl,
299 	.lun_info = ctl_be_block_lun_info,
300 	.lun_attr = ctl_be_block_lun_attr
301 };
302 
303 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
304 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
305 
306 static void
307 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
308     size_t len)
309 {
310 
311 	if (len <= CTLBLK_MIN_SEG) {
312 		sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK);
313 	} else {
314 		KASSERT(len <= CTLBLK_MAX_SEG,
315 		    ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG));
316 		sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK);
317 	}
318 	sg->len = len;
319 }
320 
321 static void
322 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
323 {
324 
325 	if (sg->len <= CTLBLK_MIN_SEG) {
326 		uma_zfree(softc->bufmin_zone, sg->addr);
327 	} else {
328 		KASSERT(sg->len <= CTLBLK_MAX_SEG,
329 		    ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG));
330 		uma_zfree(softc->bufmax_zone, sg->addr);
331 	}
332 }
333 
334 static struct ctl_be_block_io *
335 ctl_alloc_beio(struct ctl_be_block_softc *softc)
336 {
337 	struct ctl_be_block_io *beio;
338 
339 	beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
340 	beio->softc = softc;
341 	beio->refcnt = 1;
342 	return (beio);
343 }
344 
345 static void
346 ctl_real_free_beio(struct ctl_be_block_io *beio)
347 {
348 	struct ctl_be_block_softc *softc = beio->softc;
349 	int i;
350 
351 	for (i = 0; i < beio->num_segs; i++) {
352 		ctl_free_seg(softc, &beio->sg_segs[i]);
353 
354 		/* For compare we had two equal S/G lists. */
355 		if (beio->two_sglists) {
356 			ctl_free_seg(softc,
357 			    &beio->sg_segs[i + CTLBLK_HALF_SEGS]);
358 		}
359 	}
360 
361 	uma_zfree(softc->beio_zone, beio);
362 }
363 
364 static void
365 ctl_refcnt_beio(void *arg, int diff)
366 {
367 	struct ctl_be_block_io *beio = arg;
368 
369 	if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0)
370 		ctl_real_free_beio(beio);
371 }
372 
373 static void
374 ctl_free_beio(struct ctl_be_block_io *beio)
375 {
376 
377 	ctl_refcnt_beio(beio, -1);
378 }
379 
380 static void
381 ctl_complete_beio(struct ctl_be_block_io *beio)
382 {
383 	union ctl_io *io = beio->io;
384 
385 	if (beio->beio_cont != NULL) {
386 		beio->beio_cont(beio);
387 	} else {
388 		ctl_free_beio(beio);
389 		ctl_data_submit_done(io);
390 	}
391 }
392 
393 static size_t
394 cmp(uint8_t *a, uint8_t *b, size_t size)
395 {
396 	size_t i;
397 
398 	for (i = 0; i < size; i++) {
399 		if (a[i] != b[i])
400 			break;
401 	}
402 	return (i);
403 }
404 
405 static void
406 ctl_be_block_compare(union ctl_io *io)
407 {
408 	struct ctl_be_block_io *beio;
409 	uint64_t off, res;
410 	int i;
411 	uint8_t info[8];
412 
413 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
414 	off = 0;
415 	for (i = 0; i < beio->num_segs; i++) {
416 		res = cmp(beio->sg_segs[i].addr,
417 		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
418 		    beio->sg_segs[i].len);
419 		off += res;
420 		if (res < beio->sg_segs[i].len)
421 			break;
422 	}
423 	if (i < beio->num_segs) {
424 		scsi_u64to8b(off, info);
425 		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
426 		    /*sense_key*/ SSD_KEY_MISCOMPARE,
427 		    /*asc*/ 0x1D, /*ascq*/ 0x00,
428 		    /*type*/ SSD_ELEM_INFO,
429 		    /*size*/ sizeof(info), /*data*/ &info,
430 		    /*type*/ SSD_ELEM_NONE);
431 	} else
432 		ctl_set_success(&io->scsiio);
433 }
434 
435 static int
436 ctl_be_block_move_done(union ctl_io *io, bool samethr)
437 {
438 	struct ctl_be_block_io *beio;
439 	struct ctl_be_block_lun *be_lun;
440 	struct ctl_lba_len_flags *lbalen;
441 
442 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
443 
444 	DPRINTF("entered\n");
445 	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
446 
447 	/*
448 	 * We set status at this point for read and compare commands.
449 	 */
450 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
451 	    (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
452 		lbalen = ARGS(io);
453 		if (lbalen->flags & CTL_LLF_READ) {
454 			ctl_set_success(&io->scsiio);
455 		} else if (lbalen->flags & CTL_LLF_COMPARE) {
456 			/* We have two data blocks ready for comparison. */
457 			ctl_be_block_compare(io);
458 		}
459 	}
460 
461 	/*
462 	 * If this is a read, or a write with errors, it is done.
463 	 */
464 	if ((beio->bio_cmd == BIO_READ)
465 	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
466 	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
467 		ctl_complete_beio(beio);
468 		return (0);
469 	}
470 
471 	/*
472 	 * At this point, we have a write and the DMA completed successfully.
473 	 * If we were called synchronously in the original thread then just
474 	 * dispatch, otherwise we now have to queue it to the task queue to
475 	 * execute the backend I/O.  That is because we do blocking
476 	 * memory allocations, and in the file backing case, blocking I/O.
477 	 * This move done routine is generally called in the SIM's
478 	 * interrupt context, and therefore we cannot block.
479 	 */
480 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
481 	if (samethr) {
482 		be_lun->dispatch(be_lun, beio);
483 	} else {
484 		mtx_lock(&be_lun->queue_lock);
485 		STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
486 		mtx_unlock(&be_lun->queue_lock);
487 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
488 	}
489 	return (0);
490 }
491 
492 static void
493 ctl_be_block_biodone(struct bio *bio)
494 {
495 	struct ctl_be_block_io *beio = bio->bio_caller1;
496 	struct ctl_be_block_lun *be_lun = beio->lun;
497 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
498 	union ctl_io *io;
499 	int error;
500 
501 	io = beio->io;
502 
503 	DPRINTF("entered\n");
504 
505 	error = bio->bio_error;
506 	mtx_lock(&be_lun->io_lock);
507 	if (error != 0 &&
508 	    (beio->first_error == 0 ||
509 	     bio->bio_offset < beio->first_error_offset)) {
510 		beio->first_error = error;
511 		beio->first_error_offset = bio->bio_offset;
512 	}
513 
514 	beio->num_bios_done++;
515 
516 	/*
517 	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
518 	 * during the free might cause it to complain.
519 	 */
520 	g_destroy_bio(bio);
521 
522 	/*
523 	 * If the send complete bit isn't set, or we aren't the last I/O to
524 	 * complete, then we're done.
525 	 */
526 	if ((beio->send_complete == 0)
527 	 || (beio->num_bios_done < beio->num_bios_sent)) {
528 		mtx_unlock(&be_lun->io_lock);
529 		return;
530 	}
531 
532 	/*
533 	 * At this point, we've verified that we are the last I/O to
534 	 * complete, so it's safe to drop the lock.
535 	 */
536 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
537 	    beio->ds_tag_type, beio->ds_trans_type,
538 	    /*now*/ NULL, /*then*/&beio->ds_t0);
539 	mtx_unlock(&be_lun->io_lock);
540 
541 	/*
542 	 * If there are any errors from the backing device, we fail the
543 	 * entire I/O with a medium error.
544 	 */
545 	error = beio->first_error;
546 	if (error != 0) {
547 		if (error == EOPNOTSUPP) {
548 			ctl_set_invalid_opcode(&io->scsiio);
549 		} else if (error == ENOSPC || error == EDQUOT) {
550 			ctl_set_space_alloc_fail(&io->scsiio);
551 		} else if (error == EROFS || error == EACCES) {
552 			ctl_set_hw_write_protected(&io->scsiio);
553 		} else if (beio->bio_cmd == BIO_FLUSH) {
554 			/* XXX KDM is there is a better error here? */
555 			ctl_set_internal_failure(&io->scsiio,
556 						 /*sks_valid*/ 1,
557 						 /*retry_count*/ 0xbad2);
558 		} else {
559 			ctl_set_medium_error(&io->scsiio,
560 			    beio->bio_cmd == BIO_READ);
561 		}
562 		ctl_complete_beio(beio);
563 		return;
564 	}
565 
566 	/*
567 	 * If this is a write, a flush, a delete or verify, we're all done.
568 	 * If this is a read, we can now send the data to the user.
569 	 */
570 	if ((beio->bio_cmd == BIO_WRITE)
571 	 || (beio->bio_cmd == BIO_FLUSH)
572 	 || (beio->bio_cmd == BIO_DELETE)
573 	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
574 		ctl_set_success(&io->scsiio);
575 		ctl_complete_beio(beio);
576 	} else {
577 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
578 		    beio->beio_cont == NULL) {
579 			ctl_set_success(&io->scsiio);
580 			if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
581 				ctl_serseq_done(io);
582 		}
583 		ctl_datamove(io);
584 	}
585 }
586 
587 static void
588 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
589 			struct ctl_be_block_io *beio)
590 {
591 	union ctl_io *io = beio->io;
592 	struct mount *mountpoint;
593 	int error;
594 
595 	DPRINTF("entered\n");
596 
597 	binuptime(&beio->ds_t0);
598 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
599 
600 	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
601 
602 	vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) |
603 	    LK_RETRY);
604 	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
605 	    curthread);
606 	VOP_UNLOCK(be_lun->vn);
607 
608 	vn_finished_write(mountpoint);
609 
610 	mtx_lock(&be_lun->io_lock);
611 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
612 	    beio->ds_tag_type, beio->ds_trans_type,
613 	    /*now*/ NULL, /*then*/&beio->ds_t0);
614 	mtx_unlock(&be_lun->io_lock);
615 
616 	if (error == 0)
617 		ctl_set_success(&io->scsiio);
618 	else {
619 		/* XXX KDM is there is a better error here? */
620 		ctl_set_internal_failure(&io->scsiio,
621 					 /*sks_valid*/ 1,
622 					 /*retry_count*/ 0xbad1);
623 	}
624 
625 	ctl_complete_beio(beio);
626 }
627 
628 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
629 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
630 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
631 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
632 
633 static void
634 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
635 			   struct ctl_be_block_io *beio)
636 {
637 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
638 	struct ctl_be_block_filedata *file_data;
639 	union ctl_io *io;
640 	struct uio xuio;
641 	struct iovec *xiovec;
642 	size_t s;
643 	int error, flags, i;
644 
645 	DPRINTF("entered\n");
646 
647 	file_data = &be_lun->backend.file;
648 	io = beio->io;
649 	flags = 0;
650 	if (ARGS(io)->flags & CTL_LLF_DPO)
651 		flags |= IO_DIRECT;
652 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
653 		flags |= IO_SYNC;
654 
655 	bzero(&xuio, sizeof(xuio));
656 	if (beio->bio_cmd == BIO_READ) {
657 		SDT_PROBE0(cbb, , read, file_start);
658 		xuio.uio_rw = UIO_READ;
659 	} else {
660 		SDT_PROBE0(cbb, , write, file_start);
661 		xuio.uio_rw = UIO_WRITE;
662 	}
663 	xuio.uio_offset = beio->io_offset;
664 	xuio.uio_resid = beio->io_len;
665 	xuio.uio_segflg = UIO_SYSSPACE;
666 	xuio.uio_iov = beio->xiovecs;
667 	xuio.uio_iovcnt = beio->num_segs;
668 	xuio.uio_td = curthread;
669 
670 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
671 		xiovec->iov_base = beio->sg_segs[i].addr;
672 		xiovec->iov_len = beio->sg_segs[i].len;
673 	}
674 
675 	binuptime(&beio->ds_t0);
676 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
677 
678 	if (beio->bio_cmd == BIO_READ) {
679 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
680 
681 		if (beio->beio_cont == NULL &&
682 		    cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
683 			ctl_serseq_done(io);
684 		/*
685 		 * UFS pays attention to IO_DIRECT for reads.  If the
686 		 * DIRECTIO option is configured into the kernel, it calls
687 		 * ffs_rawread().  But that only works for single-segment
688 		 * uios with user space addresses.  In our case, with a
689 		 * kernel uio, it still reads into the buffer cache, but it
690 		 * will just try to release the buffer from the cache later
691 		 * on in ffs_read().
692 		 *
693 		 * ZFS does not pay attention to IO_DIRECT for reads.
694 		 *
695 		 * UFS does not pay attention to IO_SYNC for reads.
696 		 *
697 		 * ZFS pays attention to IO_SYNC (which translates into the
698 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
699 		 * attempts to sync the file before reading.
700 		 */
701 		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
702 
703 		VOP_UNLOCK(be_lun->vn);
704 		SDT_PROBE0(cbb, , read, file_done);
705 		if (error == 0 && xuio.uio_resid > 0) {
706 			/*
707 			 * If we red less then requested (EOF), then
708 			 * we should clean the rest of the buffer.
709 			 */
710 			s = beio->io_len - xuio.uio_resid;
711 			for (i = 0; i < beio->num_segs; i++) {
712 				if (s >= beio->sg_segs[i].len) {
713 					s -= beio->sg_segs[i].len;
714 					continue;
715 				}
716 				bzero((uint8_t *)beio->sg_segs[i].addr + s,
717 				    beio->sg_segs[i].len - s);
718 				s = 0;
719 			}
720 		}
721 	} else {
722 		struct mount *mountpoint;
723 
724 		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
725 		vn_lock(be_lun->vn, vn_lktype_write(mountpoint,
726 		    be_lun->vn) | LK_RETRY);
727 
728 		/*
729 		 * UFS pays attention to IO_DIRECT for writes.  The write
730 		 * is done asynchronously.  (Normally the write would just
731 		 * get put into cache.
732 		 *
733 		 * UFS pays attention to IO_SYNC for writes.  It will
734 		 * attempt to write the buffer out synchronously if that
735 		 * flag is set.
736 		 *
737 		 * ZFS does not pay attention to IO_DIRECT for writes.
738 		 *
739 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
740 		 * for writes.  It will flush the transaction from the
741 		 * cache before returning.
742 		 */
743 		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
744 		VOP_UNLOCK(be_lun->vn);
745 
746 		vn_finished_write(mountpoint);
747 		SDT_PROBE0(cbb, , write, file_done);
748         }
749 
750 	mtx_lock(&be_lun->io_lock);
751 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
752 	    beio->ds_tag_type, beio->ds_trans_type,
753 	    /*now*/ NULL, /*then*/&beio->ds_t0);
754 	mtx_unlock(&be_lun->io_lock);
755 
756 	/*
757 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
758 	 * return the I/O to the user.
759 	 */
760 	if (error != 0) {
761 		if (error == ENOSPC || error == EDQUOT) {
762 			ctl_set_space_alloc_fail(&io->scsiio);
763 		} else if (error == EROFS || error == EACCES) {
764 			ctl_set_hw_write_protected(&io->scsiio);
765 		} else {
766 			ctl_set_medium_error(&io->scsiio,
767 			    beio->bio_cmd == BIO_READ);
768 		}
769 		ctl_complete_beio(beio);
770 		return;
771 	}
772 
773 	/*
774 	 * If this is a write or a verify, we're all done.
775 	 * If this is a read, we can now send the data to the user.
776 	 */
777 	if ((beio->bio_cmd == BIO_WRITE) ||
778 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
779 		ctl_set_success(&io->scsiio);
780 		ctl_complete_beio(beio);
781 	} else {
782 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
783 		    beio->beio_cont == NULL) {
784 			ctl_set_success(&io->scsiio);
785 			if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
786 				ctl_serseq_done(io);
787 		}
788 		ctl_datamove(io);
789 	}
790 }
791 
792 static void
793 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
794 			struct ctl_be_block_io *beio)
795 {
796 	union ctl_io *io = beio->io;
797 	struct ctl_lba_len_flags *lbalen = ARGS(io);
798 	struct scsi_get_lba_status_data *data;
799 	off_t roff, off;
800 	int error, status;
801 
802 	DPRINTF("entered\n");
803 
804 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
805 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
806 	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
807 	    0, curthread->td_ucred, curthread);
808 	if (error == 0 && off > roff)
809 		status = 0;	/* mapped up to off */
810 	else {
811 		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
812 		    0, curthread->td_ucred, curthread);
813 		if (error == 0 && off > roff)
814 			status = 1;	/* deallocated up to off */
815 		else {
816 			status = 0;	/* unknown up to the end */
817 			off = be_lun->size_bytes;
818 		}
819 	}
820 	VOP_UNLOCK(be_lun->vn);
821 
822 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
823 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
824 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
825 	    lbalen->lba), data->descr[0].length);
826 	data->descr[0].status = status;
827 
828 	ctl_complete_beio(beio);
829 }
830 
831 static uint64_t
832 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
833 {
834 	struct vattr		vattr;
835 	struct statfs		statfs;
836 	uint64_t		val;
837 	int			error;
838 
839 	val = UINT64_MAX;
840 	if (be_lun->vn == NULL)
841 		return (val);
842 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
843 	if (strcmp(attrname, "blocksused") == 0) {
844 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
845 		if (error == 0)
846 			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
847 	}
848 	if (strcmp(attrname, "blocksavail") == 0 &&
849 	    !VN_IS_DOOMED(be_lun->vn)) {
850 		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
851 		if (error == 0)
852 			val = statfs.f_bavail * statfs.f_bsize /
853 			    be_lun->cbe_lun.blocksize;
854 	}
855 	VOP_UNLOCK(be_lun->vn);
856 	return (val);
857 }
858 
859 static void
860 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
861 		        struct ctl_be_block_io *beio)
862 {
863 	struct ctl_be_block_filedata *file_data;
864 	union ctl_io *io;
865 	struct ctl_ptr_len_flags *ptrlen;
866 	struct scsi_unmap_desc *buf, *end;
867 	struct mount *mp;
868 	off_t off, len;
869 	int error;
870 
871 	io = beio->io;
872 	file_data = &be_lun->backend.file;
873 	mp = NULL;
874 	error = 0;
875 
876 	binuptime(&beio->ds_t0);
877 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
878 
879 	(void)vn_start_write(be_lun->vn, &mp, V_WAIT);
880 	vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY);
881 	if (beio->io_offset == -1) {
882 		beio->io_len = 0;
883 		ptrlen = (struct ctl_ptr_len_flags *)
884 		    &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
885 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
886 		end = buf + ptrlen->len / sizeof(*buf);
887 		for (; buf < end; buf++) {
888 			off = (off_t)scsi_8btou64(buf->lba) *
889 			    be_lun->cbe_lun.blocksize;
890 			len = (off_t)scsi_4btoul(buf->length) *
891 			    be_lun->cbe_lun.blocksize;
892 			beio->io_len += len;
893 			error = vn_deallocate(be_lun->vn, &off, &len,
894 			    0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred,
895 			    NOCRED);
896 			if (error != 0)
897 				break;
898 		}
899 	} else {
900 		/* WRITE_SAME */
901 		off = beio->io_offset;
902 		len = beio->io_len;
903 		error = vn_deallocate(be_lun->vn, &off, &len, 0,
904 		    IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED);
905 	}
906 	VOP_UNLOCK(be_lun->vn);
907 	vn_finished_write(mp);
908 
909 	mtx_lock(&be_lun->io_lock);
910 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
911 	    beio->ds_tag_type, beio->ds_trans_type,
912 	    /*now*/ NULL, /*then*/&beio->ds_t0);
913 	mtx_unlock(&be_lun->io_lock);
914 
915 	/*
916 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
917 	 * return the I/O to the user.
918 	 */
919 	switch (error) {
920 	case 0:
921 		ctl_set_success(&io->scsiio);
922 		break;
923 	case ENOSPC:
924 	case EDQUOT:
925 		ctl_set_space_alloc_fail(&io->scsiio);
926 		break;
927 	case EROFS:
928 	case EACCES:
929 		ctl_set_hw_write_protected(&io->scsiio);
930 		break;
931 	default:
932 		ctl_set_medium_error(&io->scsiio, false);
933 	}
934 	ctl_complete_beio(beio);
935 }
936 
937 static void
938 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
939 			   struct ctl_be_block_io *beio)
940 {
941 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
942 	union ctl_io *io;
943 	struct cdevsw *csw;
944 	struct cdev *dev;
945 	struct uio xuio;
946 	struct iovec *xiovec;
947 	int error, flags, i, ref;
948 
949 	DPRINTF("entered\n");
950 
951 	io = beio->io;
952 	flags = 0;
953 	if (ARGS(io)->flags & CTL_LLF_DPO)
954 		flags |= IO_DIRECT;
955 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
956 		flags |= IO_SYNC;
957 
958 	bzero(&xuio, sizeof(xuio));
959 	if (beio->bio_cmd == BIO_READ) {
960 		SDT_PROBE0(cbb, , read, file_start);
961 		xuio.uio_rw = UIO_READ;
962 	} else {
963 		SDT_PROBE0(cbb, , write, file_start);
964 		xuio.uio_rw = UIO_WRITE;
965 	}
966 	xuio.uio_offset = beio->io_offset;
967 	xuio.uio_resid = beio->io_len;
968 	xuio.uio_segflg = UIO_SYSSPACE;
969 	xuio.uio_iov = beio->xiovecs;
970 	xuio.uio_iovcnt = beio->num_segs;
971 	xuio.uio_td = curthread;
972 
973 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
974 		xiovec->iov_base = beio->sg_segs[i].addr;
975 		xiovec->iov_len = beio->sg_segs[i].len;
976 	}
977 
978 	binuptime(&beio->ds_t0);
979 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
980 
981 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
982 	if (csw) {
983 		if (beio->bio_cmd == BIO_READ) {
984 			if (beio->beio_cont == NULL &&
985 			    cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
986 				ctl_serseq_done(io);
987 			error = csw->d_read(dev, &xuio, flags);
988 		} else
989 			error = csw->d_write(dev, &xuio, flags);
990 		dev_relthread(dev, ref);
991 	} else
992 		error = ENXIO;
993 
994 	if (beio->bio_cmd == BIO_READ)
995 		SDT_PROBE0(cbb, , read, file_done);
996 	else
997 		SDT_PROBE0(cbb, , write, file_done);
998 
999 	mtx_lock(&be_lun->io_lock);
1000 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
1001 	    beio->ds_tag_type, beio->ds_trans_type,
1002 	    /*now*/ NULL, /*then*/&beio->ds_t0);
1003 	mtx_unlock(&be_lun->io_lock);
1004 
1005 	/*
1006 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
1007 	 * return the I/O to the user.
1008 	 */
1009 	if (error != 0) {
1010 		if (error == ENOSPC || error == EDQUOT) {
1011 			ctl_set_space_alloc_fail(&io->scsiio);
1012 		} else if (error == EROFS || error == EACCES) {
1013 			ctl_set_hw_write_protected(&io->scsiio);
1014 		} else {
1015 			ctl_set_medium_error(&io->scsiio,
1016 			    beio->bio_cmd == BIO_READ);
1017 		}
1018 		ctl_complete_beio(beio);
1019 		return;
1020 	}
1021 
1022 	/*
1023 	 * If this is a write or a verify, we're all done.
1024 	 * If this is a read, we can now send the data to the user.
1025 	 */
1026 	if ((beio->bio_cmd == BIO_WRITE) ||
1027 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
1028 		ctl_set_success(&io->scsiio);
1029 		ctl_complete_beio(beio);
1030 	} else {
1031 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
1032 		    beio->beio_cont == NULL) {
1033 			ctl_set_success(&io->scsiio);
1034 			if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
1035 				ctl_serseq_done(io);
1036 		}
1037 		ctl_datamove(io);
1038 	}
1039 }
1040 
1041 static void
1042 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
1043 			struct ctl_be_block_io *beio)
1044 {
1045 	union ctl_io *io = beio->io;
1046 	struct cdevsw *csw;
1047 	struct cdev *dev;
1048 	struct ctl_lba_len_flags *lbalen = ARGS(io);
1049 	struct scsi_get_lba_status_data *data;
1050 	off_t roff, off;
1051 	int error, ref, status;
1052 
1053 	DPRINTF("entered\n");
1054 
1055 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1056 	if (csw == NULL) {
1057 		status = 0;	/* unknown up to the end */
1058 		off = be_lun->size_bytes;
1059 		goto done;
1060 	}
1061 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
1062 	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
1063 	    curthread);
1064 	if (error == 0 && off > roff)
1065 		status = 0;	/* mapped up to off */
1066 	else {
1067 		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
1068 		    curthread);
1069 		if (error == 0 && off > roff)
1070 			status = 1;	/* deallocated up to off */
1071 		else {
1072 			status = 0;	/* unknown up to the end */
1073 			off = be_lun->size_bytes;
1074 		}
1075 	}
1076 	dev_relthread(dev, ref);
1077 
1078 done:
1079 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1080 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1081 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1082 	    lbalen->lba), data->descr[0].length);
1083 	data->descr[0].status = status;
1084 
1085 	ctl_complete_beio(beio);
1086 }
1087 
1088 static void
1089 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1090 		       struct ctl_be_block_io *beio)
1091 {
1092 	struct bio *bio;
1093 	struct cdevsw *csw;
1094 	struct cdev *dev;
1095 	int ref;
1096 
1097 	DPRINTF("entered\n");
1098 
1099 	/* This can't fail, it's a blocking allocation. */
1100 	bio = g_alloc_bio();
1101 
1102 	bio->bio_cmd	    = BIO_FLUSH;
1103 	bio->bio_offset	    = 0;
1104 	bio->bio_data	    = 0;
1105 	bio->bio_done	    = ctl_be_block_biodone;
1106 	bio->bio_caller1    = beio;
1107 	bio->bio_pblkno	    = 0;
1108 
1109 	/*
1110 	 * We don't need to acquire the LUN lock here, because we are only
1111 	 * sending one bio, and so there is no other context to synchronize
1112 	 * with.
1113 	 */
1114 	beio->num_bios_sent = 1;
1115 	beio->send_complete = 1;
1116 
1117 	binuptime(&beio->ds_t0);
1118 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1119 
1120 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1121 	if (csw) {
1122 		bio->bio_dev = dev;
1123 		csw->d_strategy(bio);
1124 		dev_relthread(dev, ref);
1125 	} else {
1126 		bio->bio_error = ENXIO;
1127 		ctl_be_block_biodone(bio);
1128 	}
1129 }
1130 
1131 static void
1132 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1133 		       struct ctl_be_block_io *beio,
1134 		       uint64_t off, uint64_t len, int last)
1135 {
1136 	struct bio *bio;
1137 	uint64_t maxlen;
1138 	struct cdevsw *csw;
1139 	struct cdev *dev;
1140 	int ref;
1141 
1142 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1143 	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1144 	while (len > 0) {
1145 		bio = g_alloc_bio();
1146 		bio->bio_cmd	    = BIO_DELETE;
1147 		bio->bio_dev	    = dev;
1148 		bio->bio_offset	    = off;
1149 		bio->bio_length	    = MIN(len, maxlen);
1150 		bio->bio_data	    = 0;
1151 		bio->bio_done	    = ctl_be_block_biodone;
1152 		bio->bio_caller1    = beio;
1153 		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1154 
1155 		off += bio->bio_length;
1156 		len -= bio->bio_length;
1157 
1158 		mtx_lock(&be_lun->io_lock);
1159 		beio->num_bios_sent++;
1160 		if (last && len == 0)
1161 			beio->send_complete = 1;
1162 		mtx_unlock(&be_lun->io_lock);
1163 
1164 		if (csw) {
1165 			csw->d_strategy(bio);
1166 		} else {
1167 			bio->bio_error = ENXIO;
1168 			ctl_be_block_biodone(bio);
1169 		}
1170 	}
1171 	if (csw)
1172 		dev_relthread(dev, ref);
1173 }
1174 
1175 static void
1176 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1177 		       struct ctl_be_block_io *beio)
1178 {
1179 	union ctl_io *io;
1180 	struct ctl_ptr_len_flags *ptrlen;
1181 	struct scsi_unmap_desc *buf, *end;
1182 	uint64_t len;
1183 
1184 	io = beio->io;
1185 
1186 	DPRINTF("entered\n");
1187 
1188 	binuptime(&beio->ds_t0);
1189 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1190 
1191 	if (beio->io_offset == -1) {
1192 		beio->io_len = 0;
1193 		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1194 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1195 		end = buf + ptrlen->len / sizeof(*buf);
1196 		for (; buf < end; buf++) {
1197 			len = (uint64_t)scsi_4btoul(buf->length) *
1198 			    be_lun->cbe_lun.blocksize;
1199 			beio->io_len += len;
1200 			ctl_be_block_unmap_dev_range(be_lun, beio,
1201 			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1202 			    len, (end - buf < 2) ? TRUE : FALSE);
1203 		}
1204 	} else
1205 		ctl_be_block_unmap_dev_range(be_lun, beio,
1206 		    beio->io_offset, beio->io_len, TRUE);
1207 }
1208 
1209 static void
1210 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1211 			  struct ctl_be_block_io *beio)
1212 {
1213 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1214 	struct bio *bio;
1215 	struct cdevsw *csw;
1216 	struct cdev *dev;
1217 	off_t cur_offset;
1218 	int i, max_iosize, ref;
1219 
1220 	DPRINTF("entered\n");
1221 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1222 
1223 	/*
1224 	 * We have to limit our I/O size to the maximum supported by the
1225 	 * backend device.
1226 	 */
1227 	if (csw) {
1228 		max_iosize = dev->si_iosize_max;
1229 		if (max_iosize <= 0)
1230 			max_iosize = DFLTPHYS;
1231 	} else
1232 		max_iosize = maxphys;
1233 
1234 	cur_offset = beio->io_offset;
1235 	for (i = 0; i < beio->num_segs; i++) {
1236 		size_t cur_size;
1237 		uint8_t *cur_ptr;
1238 
1239 		cur_size = beio->sg_segs[i].len;
1240 		cur_ptr = beio->sg_segs[i].addr;
1241 
1242 		while (cur_size > 0) {
1243 			/* This can't fail, it's a blocking allocation. */
1244 			bio = g_alloc_bio();
1245 
1246 			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1247 
1248 			bio->bio_cmd = beio->bio_cmd;
1249 			bio->bio_dev = dev;
1250 			bio->bio_caller1 = beio;
1251 			bio->bio_length = min(cur_size, max_iosize);
1252 			bio->bio_offset = cur_offset;
1253 			bio->bio_data = cur_ptr;
1254 			bio->bio_done = ctl_be_block_biodone;
1255 			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1256 
1257 			cur_offset += bio->bio_length;
1258 			cur_ptr += bio->bio_length;
1259 			cur_size -= bio->bio_length;
1260 
1261 			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1262 			beio->num_bios_sent++;
1263 		}
1264 	}
1265 	beio->send_complete = 1;
1266 	binuptime(&beio->ds_t0);
1267 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1268 
1269 	/*
1270 	 * Fire off all allocated requests!
1271 	 */
1272 	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1273 		TAILQ_REMOVE(&queue, bio, bio_queue);
1274 		if (csw)
1275 			csw->d_strategy(bio);
1276 		else {
1277 			bio->bio_error = ENXIO;
1278 			ctl_be_block_biodone(bio);
1279 		}
1280 	}
1281 	if (csw)
1282 		dev_relthread(dev, ref);
1283 }
1284 
1285 static uint64_t
1286 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1287 {
1288 	struct diocgattr_arg	arg;
1289 	struct cdevsw *csw;
1290 	struct cdev *dev;
1291 	int error, ref;
1292 
1293 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1294 	if (csw == NULL)
1295 		return (UINT64_MAX);
1296 	strlcpy(arg.name, attrname, sizeof(arg.name));
1297 	arg.len = sizeof(arg.value.off);
1298 	if (csw->d_ioctl) {
1299 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1300 		    curthread);
1301 	} else
1302 		error = ENODEV;
1303 	dev_relthread(dev, ref);
1304 	if (error != 0)
1305 		return (UINT64_MAX);
1306 	return (arg.value.off);
1307 }
1308 
1309 static void
1310 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1311 			    union ctl_io *io)
1312 {
1313 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1314 	struct ctl_be_block_io *beio;
1315 	struct ctl_lba_len_flags *lbalen;
1316 
1317 	DPRINTF("entered\n");
1318 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1319 	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1320 
1321 	beio->io_len = lbalen->len * cbe_lun->blocksize;
1322 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1323 	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1324 	beio->bio_cmd = BIO_FLUSH;
1325 	beio->ds_trans_type = DEVSTAT_NO_DATA;
1326 	DPRINTF("SYNC\n");
1327 	be_lun->lun_flush(be_lun, beio);
1328 }
1329 
1330 static void
1331 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1332 {
1333 	union ctl_io *io;
1334 
1335 	io = beio->io;
1336 	ctl_free_beio(beio);
1337 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1338 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1339 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1340 		ctl_config_write_done(io);
1341 		return;
1342 	}
1343 
1344 	ctl_be_block_config_write(io);
1345 }
1346 
1347 static void
1348 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1349 			    union ctl_io *io)
1350 {
1351 	struct ctl_be_block_softc *softc = be_lun->softc;
1352 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1353 	struct ctl_be_block_io *beio;
1354 	struct ctl_lba_len_flags *lbalen;
1355 	uint64_t len_left, lba;
1356 	uint32_t pb, pbo, adj;
1357 	int i, seglen;
1358 	uint8_t *buf, *end;
1359 
1360 	DPRINTF("entered\n");
1361 
1362 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1363 	lbalen = ARGS(io);
1364 
1365 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1366 	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1367 		ctl_free_beio(beio);
1368 		ctl_set_invalid_field(&io->scsiio,
1369 				      /*sks_valid*/ 1,
1370 				      /*command*/ 1,
1371 				      /*field*/ 1,
1372 				      /*bit_valid*/ 0,
1373 				      /*bit*/ 0);
1374 		ctl_config_write_done(io);
1375 		return;
1376 	}
1377 
1378 	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1379 		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1380 		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1381 		beio->bio_cmd = BIO_DELETE;
1382 		beio->ds_trans_type = DEVSTAT_FREE;
1383 
1384 		be_lun->unmap(be_lun, beio);
1385 		return;
1386 	}
1387 
1388 	beio->bio_cmd = BIO_WRITE;
1389 	beio->ds_trans_type = DEVSTAT_WRITE;
1390 
1391 	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1392 	       (uintmax_t)lbalen->lba, lbalen->len);
1393 
1394 	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1395 	if (be_lun->cbe_lun.pblockoff > 0)
1396 		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1397 	else
1398 		pbo = 0;
1399 	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1400 	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1401 		/*
1402 		 * Setup the S/G entry for this chunk.
1403 		 */
1404 		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1405 		if (pb > cbe_lun->blocksize) {
1406 			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1407 			    seglen - pbo) % pb;
1408 			if (seglen > adj)
1409 				seglen -= adj;
1410 			else
1411 				seglen -= seglen % cbe_lun->blocksize;
1412 		} else
1413 			seglen -= seglen % cbe_lun->blocksize;
1414 		ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1415 
1416 		DPRINTF("segment %d addr %p len %zd\n", i,
1417 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1418 
1419 		beio->num_segs++;
1420 		len_left -= seglen;
1421 
1422 		buf = beio->sg_segs[i].addr;
1423 		end = buf + seglen;
1424 		for (; buf < end; buf += cbe_lun->blocksize) {
1425 			if (lbalen->flags & SWS_NDOB) {
1426 				memset(buf, 0, cbe_lun->blocksize);
1427 			} else {
1428 				memcpy(buf, io->scsiio.kern_data_ptr,
1429 				    cbe_lun->blocksize);
1430 			}
1431 			if (lbalen->flags & SWS_LBDATA)
1432 				scsi_ulto4b(lbalen->lba + lba, buf);
1433 			lba++;
1434 		}
1435 	}
1436 
1437 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1438 	beio->io_len = lba * cbe_lun->blocksize;
1439 
1440 	/* We can not do all in one run. Correct and schedule rerun. */
1441 	if (len_left > 0) {
1442 		lbalen->lba += lba;
1443 		lbalen->len -= lba;
1444 		beio->beio_cont = ctl_be_block_cw_done_ws;
1445 	}
1446 
1447 	be_lun->dispatch(be_lun, beio);
1448 }
1449 
1450 static void
1451 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1452 			    union ctl_io *io)
1453 {
1454 	struct ctl_be_block_io *beio;
1455 	struct ctl_ptr_len_flags *ptrlen;
1456 
1457 	DPRINTF("entered\n");
1458 
1459 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1460 	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1461 
1462 	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1463 		ctl_free_beio(beio);
1464 		ctl_set_invalid_field(&io->scsiio,
1465 				      /*sks_valid*/ 0,
1466 				      /*command*/ 1,
1467 				      /*field*/ 0,
1468 				      /*bit_valid*/ 0,
1469 				      /*bit*/ 0);
1470 		ctl_config_write_done(io);
1471 		return;
1472 	}
1473 
1474 	beio->io_len = 0;
1475 	beio->io_offset = -1;
1476 	beio->bio_cmd = BIO_DELETE;
1477 	beio->ds_trans_type = DEVSTAT_FREE;
1478 	DPRINTF("UNMAP\n");
1479 	be_lun->unmap(be_lun, beio);
1480 }
1481 
1482 static void
1483 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1484 {
1485 	union ctl_io *io;
1486 
1487 	io = beio->io;
1488 	ctl_free_beio(beio);
1489 	ctl_config_read_done(io);
1490 }
1491 
1492 static void
1493 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1494 			 union ctl_io *io)
1495 {
1496 	struct ctl_be_block_io *beio;
1497 	struct ctl_be_block_softc *softc;
1498 
1499 	DPRINTF("entered\n");
1500 
1501 	softc = be_lun->softc;
1502 	beio = ctl_alloc_beio(softc);
1503 	beio->io = io;
1504 	beio->lun = be_lun;
1505 	beio->beio_cont = ctl_be_block_cr_done;
1506 	PRIV(io)->ptr = (void *)beio;
1507 
1508 	switch (io->scsiio.cdb[0]) {
1509 	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1510 		beio->bio_cmd = -1;
1511 		beio->ds_trans_type = DEVSTAT_NO_DATA;
1512 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1513 		beio->io_len = 0;
1514 		if (be_lun->get_lba_status)
1515 			be_lun->get_lba_status(be_lun, beio);
1516 		else
1517 			ctl_be_block_cr_done(beio);
1518 		break;
1519 	default:
1520 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1521 		break;
1522 	}
1523 }
1524 
1525 static void
1526 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1527 {
1528 	union ctl_io *io;
1529 
1530 	io = beio->io;
1531 	ctl_free_beio(beio);
1532 	ctl_config_write_done(io);
1533 }
1534 
1535 static void
1536 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1537 			 union ctl_io *io)
1538 {
1539 	struct ctl_be_block_io *beio;
1540 	struct ctl_be_block_softc *softc;
1541 
1542 	DPRINTF("entered\n");
1543 
1544 	softc = be_lun->softc;
1545 	beio = ctl_alloc_beio(softc);
1546 	beio->io = io;
1547 	beio->lun = be_lun;
1548 	beio->beio_cont = ctl_be_block_cw_done;
1549 	switch (io->scsiio.tag_type) {
1550 	case CTL_TAG_ORDERED:
1551 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1552 		break;
1553 	case CTL_TAG_HEAD_OF_QUEUE:
1554 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1555 		break;
1556 	case CTL_TAG_UNTAGGED:
1557 	case CTL_TAG_SIMPLE:
1558 	case CTL_TAG_ACA:
1559 	default:
1560 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1561 		break;
1562 	}
1563 	PRIV(io)->ptr = (void *)beio;
1564 
1565 	switch (io->scsiio.cdb[0]) {
1566 	case SYNCHRONIZE_CACHE:
1567 	case SYNCHRONIZE_CACHE_16:
1568 		ctl_be_block_cw_dispatch_sync(be_lun, io);
1569 		break;
1570 	case WRITE_SAME_10:
1571 	case WRITE_SAME_16:
1572 		ctl_be_block_cw_dispatch_ws(be_lun, io);
1573 		break;
1574 	case UNMAP:
1575 		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1576 		break;
1577 	default:
1578 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1579 		break;
1580 	}
1581 }
1582 
1583 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1584 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1585 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1586 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1587 
1588 static void
1589 ctl_be_block_next(struct ctl_be_block_io *beio)
1590 {
1591 	struct ctl_be_block_lun *be_lun;
1592 	union ctl_io *io;
1593 
1594 	io = beio->io;
1595 	be_lun = beio->lun;
1596 	ctl_free_beio(beio);
1597 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1598 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1599 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1600 		ctl_data_submit_done(io);
1601 		return;
1602 	}
1603 
1604 	io->io_hdr.status &= ~CTL_STATUS_MASK;
1605 	io->io_hdr.status |= CTL_STATUS_NONE;
1606 
1607 	mtx_lock(&be_lun->queue_lock);
1608 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1609 	mtx_unlock(&be_lun->queue_lock);
1610 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1611 }
1612 
1613 static void
1614 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1615 			   union ctl_io *io)
1616 {
1617 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1618 	struct ctl_be_block_io *beio;
1619 	struct ctl_be_block_softc *softc;
1620 	struct ctl_lba_len_flags *lbalen;
1621 	struct ctl_ptr_len_flags *bptrlen;
1622 	uint64_t len_left, lbas;
1623 	int i;
1624 
1625 	softc = be_lun->softc;
1626 
1627 	DPRINTF("entered\n");
1628 
1629 	lbalen = ARGS(io);
1630 	if (lbalen->flags & CTL_LLF_WRITE) {
1631 		SDT_PROBE0(cbb, , write, start);
1632 	} else {
1633 		SDT_PROBE0(cbb, , read, start);
1634 	}
1635 
1636 	beio = ctl_alloc_beio(softc);
1637 	beio->io = io;
1638 	beio->lun = be_lun;
1639 	bptrlen = PRIV(io);
1640 	bptrlen->ptr = (void *)beio;
1641 
1642 	switch (io->scsiio.tag_type) {
1643 	case CTL_TAG_ORDERED:
1644 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1645 		break;
1646 	case CTL_TAG_HEAD_OF_QUEUE:
1647 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1648 		break;
1649 	case CTL_TAG_UNTAGGED:
1650 	case CTL_TAG_SIMPLE:
1651 	case CTL_TAG_ACA:
1652 	default:
1653 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1654 		break;
1655 	}
1656 
1657 	if (lbalen->flags & CTL_LLF_WRITE) {
1658 		beio->bio_cmd = BIO_WRITE;
1659 		beio->ds_trans_type = DEVSTAT_WRITE;
1660 	} else {
1661 		beio->bio_cmd = BIO_READ;
1662 		beio->ds_trans_type = DEVSTAT_READ;
1663 	}
1664 
1665 	DPRINTF("%s at LBA %jx len %u @%ju\n",
1666 	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1667 	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1668 	lbas = CTLBLK_MAX_IO_SIZE;
1669 	if (lbalen->flags & CTL_LLF_COMPARE) {
1670 		beio->two_sglists = 1;
1671 		lbas /= 2;
1672 	}
1673 	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1674 	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1675 	beio->io_len = lbas * cbe_lun->blocksize;
1676 	bptrlen->len += lbas;
1677 
1678 	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1679 		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1680 		    i, CTLBLK_MAX_SEGS));
1681 
1682 		/*
1683 		 * Setup the S/G entry for this chunk.
1684 		 */
1685 		ctl_alloc_seg(softc, &beio->sg_segs[i],
1686 		    MIN(CTLBLK_MAX_SEG, len_left));
1687 
1688 		DPRINTF("segment %d addr %p len %zd\n", i,
1689 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1690 
1691 		/* Set up second segment for compare operation. */
1692 		if (beio->two_sglists) {
1693 			ctl_alloc_seg(softc,
1694 			    &beio->sg_segs[i + CTLBLK_HALF_SEGS],
1695 			    beio->sg_segs[i].len);
1696 		}
1697 
1698 		beio->num_segs++;
1699 		len_left -= beio->sg_segs[i].len;
1700 	}
1701 	if (bptrlen->len < lbalen->len)
1702 		beio->beio_cont = ctl_be_block_next;
1703 	io->scsiio.be_move_done = ctl_be_block_move_done;
1704 	/* For compare we have separate S/G lists for read and datamove. */
1705 	if (beio->two_sglists)
1706 		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1707 	else
1708 		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1709 	io->scsiio.kern_data_len = beio->io_len;
1710 	io->scsiio.kern_sg_entries = beio->num_segs;
1711 	io->scsiio.kern_data_ref = ctl_refcnt_beio;
1712 	io->scsiio.kern_data_arg = beio;
1713 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1714 
1715 	/*
1716 	 * For the read case, we need to read the data into our buffers and
1717 	 * then we can send it back to the user.  For the write case, we
1718 	 * need to get the data from the user first.
1719 	 */
1720 	if (beio->bio_cmd == BIO_READ) {
1721 		SDT_PROBE0(cbb, , read, alloc_done);
1722 		be_lun->dispatch(be_lun, beio);
1723 	} else {
1724 		SDT_PROBE0(cbb, , write, alloc_done);
1725 		ctl_datamove(io);
1726 	}
1727 }
1728 
1729 static void
1730 ctl_be_block_worker(void *context, int pending)
1731 {
1732 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1733 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1734 	union ctl_io *io;
1735 	struct ctl_be_block_io *beio;
1736 
1737 	DPRINTF("entered\n");
1738 	/*
1739 	 * Fetch and process I/Os from all queues.  If we detect LUN
1740 	 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1741 	 * so make response maximally opaque to not confuse initiator.
1742 	 */
1743 	for (;;) {
1744 		mtx_lock(&be_lun->queue_lock);
1745 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1746 		if (io != NULL) {
1747 			DPRINTF("datamove queue\n");
1748 			STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
1749 			mtx_unlock(&be_lun->queue_lock);
1750 			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1751 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1752 				ctl_set_busy(&io->scsiio);
1753 				ctl_complete_beio(beio);
1754 				continue;
1755 			}
1756 			be_lun->dispatch(be_lun, beio);
1757 			continue;
1758 		}
1759 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1760 		if (io != NULL) {
1761 			DPRINTF("config write queue\n");
1762 			STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
1763 			mtx_unlock(&be_lun->queue_lock);
1764 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1765 				ctl_set_busy(&io->scsiio);
1766 				ctl_config_write_done(io);
1767 				continue;
1768 			}
1769 			ctl_be_block_cw_dispatch(be_lun, io);
1770 			continue;
1771 		}
1772 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1773 		if (io != NULL) {
1774 			DPRINTF("config read queue\n");
1775 			STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
1776 			mtx_unlock(&be_lun->queue_lock);
1777 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1778 				ctl_set_busy(&io->scsiio);
1779 				ctl_config_read_done(io);
1780 				continue;
1781 			}
1782 			ctl_be_block_cr_dispatch(be_lun, io);
1783 			continue;
1784 		}
1785 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1786 		if (io != NULL) {
1787 			DPRINTF("input queue\n");
1788 			STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
1789 			mtx_unlock(&be_lun->queue_lock);
1790 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1791 				ctl_set_busy(&io->scsiio);
1792 				ctl_data_submit_done(io);
1793 				continue;
1794 			}
1795 			ctl_be_block_dispatch(be_lun, io);
1796 			continue;
1797 		}
1798 
1799 		/*
1800 		 * If we get here, there is no work left in the queues, so
1801 		 * just break out and let the task queue go to sleep.
1802 		 */
1803 		mtx_unlock(&be_lun->queue_lock);
1804 		break;
1805 	}
1806 }
1807 
1808 /*
1809  * Entry point from CTL to the backend for I/O.  We queue everything to a
1810  * work thread, so this just puts the I/O on a queue and wakes up the
1811  * thread.
1812  */
1813 static int
1814 ctl_be_block_submit(union ctl_io *io)
1815 {
1816 	struct ctl_be_block_lun *be_lun;
1817 
1818 	DPRINTF("entered\n");
1819 
1820 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
1821 
1822 	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
1823 	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
1824 
1825 	PRIV(io)->len = 0;
1826 
1827 	mtx_lock(&be_lun->queue_lock);
1828 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1829 	mtx_unlock(&be_lun->queue_lock);
1830 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1831 
1832 	return (CTL_RETVAL_COMPLETE);
1833 }
1834 
1835 static int
1836 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1837 			int flag, struct thread *td)
1838 {
1839 	struct ctl_be_block_softc *softc = &backend_block_softc;
1840 	int error;
1841 
1842 	error = 0;
1843 	switch (cmd) {
1844 	case CTL_LUN_REQ: {
1845 		struct ctl_lun_req *lun_req;
1846 
1847 		lun_req = (struct ctl_lun_req *)addr;
1848 
1849 		switch (lun_req->reqtype) {
1850 		case CTL_LUNREQ_CREATE:
1851 			error = ctl_be_block_create(softc, lun_req);
1852 			break;
1853 		case CTL_LUNREQ_RM:
1854 			error = ctl_be_block_rm(softc, lun_req);
1855 			break;
1856 		case CTL_LUNREQ_MODIFY:
1857 			error = ctl_be_block_modify(softc, lun_req);
1858 			break;
1859 		default:
1860 			lun_req->status = CTL_LUN_ERROR;
1861 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1862 				 "invalid LUN request type %d",
1863 				 lun_req->reqtype);
1864 			break;
1865 		}
1866 		break;
1867 	}
1868 	default:
1869 		error = ENOTTY;
1870 		break;
1871 	}
1872 
1873 	return (error);
1874 }
1875 
1876 static int
1877 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1878 {
1879 	struct ctl_be_lun *cbe_lun;
1880 	struct ctl_be_block_filedata *file_data;
1881 	struct ctl_lun_create_params *params;
1882 	const char		     *value;
1883 	struct vattr		      vattr;
1884 	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1885 	int			      error;
1886 	long			      pconf;
1887 
1888 	cbe_lun = &be_lun->cbe_lun;
1889 	file_data = &be_lun->backend.file;
1890 	params = &be_lun->params;
1891 
1892 	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1893 	be_lun->dispatch = ctl_be_block_dispatch_file;
1894 	be_lun->lun_flush = ctl_be_block_flush_file;
1895 	be_lun->get_lba_status = ctl_be_block_gls_file;
1896 	be_lun->getattr = ctl_be_block_getattr_file;
1897 	be_lun->unmap = ctl_be_block_unmap_file;
1898 	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1899 
1900 	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1901 	if (error != 0) {
1902 		snprintf(req->error_str, sizeof(req->error_str),
1903 			 "error calling VOP_GETATTR() for file %s",
1904 			 be_lun->dev_path);
1905 		return (error);
1906 	}
1907 
1908 	error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf);
1909 	if (error != 0) {
1910 		snprintf(req->error_str, sizeof(req->error_str),
1911 		    "error calling VOP_PATHCONF() for file %s",
1912 		    be_lun->dev_path);
1913 		return (error);
1914 	}
1915 	if (pconf == 1)
1916 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
1917 
1918 	file_data->cred = crhold(curthread->td_ucred);
1919 	if (params->lun_size_bytes != 0)
1920 		be_lun->size_bytes = params->lun_size_bytes;
1921 	else
1922 		be_lun->size_bytes = vattr.va_size;
1923 
1924 	/*
1925 	 * For files we can use any logical block size.  Prefer 512 bytes
1926 	 * for compatibility reasons.  If file's vattr.va_blocksize
1927 	 * (preferred I/O block size) is bigger and multiple to chosen
1928 	 * logical block size -- report it as physical block size.
1929 	 */
1930 	if (params->blocksize_bytes != 0)
1931 		cbe_lun->blocksize = params->blocksize_bytes;
1932 	else if (cbe_lun->lun_type == T_CDROM)
1933 		cbe_lun->blocksize = 2048;
1934 	else
1935 		cbe_lun->blocksize = 512;
1936 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1937 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1938 	    0 : (be_lun->size_blocks - 1);
1939 
1940 	us = ps = vattr.va_blocksize;
1941 	uo = po = 0;
1942 
1943 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1944 	if (value != NULL)
1945 		ctl_expand_number(value, &ps);
1946 	value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1947 	if (value != NULL)
1948 		ctl_expand_number(value, &po);
1949 	pss = ps / cbe_lun->blocksize;
1950 	pos = po / cbe_lun->blocksize;
1951 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1952 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1953 		cbe_lun->pblockexp = fls(pss) - 1;
1954 		cbe_lun->pblockoff = (pss - pos) % pss;
1955 	}
1956 
1957 	value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1958 	if (value != NULL)
1959 		ctl_expand_number(value, &us);
1960 	value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1961 	if (value != NULL)
1962 		ctl_expand_number(value, &uo);
1963 	uss = us / cbe_lun->blocksize;
1964 	uos = uo / cbe_lun->blocksize;
1965 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1966 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1967 		cbe_lun->ublockexp = fls(uss) - 1;
1968 		cbe_lun->ublockoff = (uss - uos) % uss;
1969 	}
1970 
1971 	/*
1972 	 * Sanity check.  The media size has to be at least one
1973 	 * sector long.
1974 	 */
1975 	if (be_lun->size_bytes < cbe_lun->blocksize) {
1976 		error = EINVAL;
1977 		snprintf(req->error_str, sizeof(req->error_str),
1978 			 "file %s size %ju < block size %u", be_lun->dev_path,
1979 			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1980 	}
1981 
1982 	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1983 	return (error);
1984 }
1985 
1986 static int
1987 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1988 {
1989 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1990 	struct ctl_lun_create_params *params;
1991 	struct cdevsw		     *csw;
1992 	struct cdev		     *dev;
1993 	const char		     *value;
1994 	int			      error, atomic, maxio, ref, unmap, tmp;
1995 	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1996 
1997 	params = &be_lun->params;
1998 
1999 	be_lun->dev_type = CTL_BE_BLOCK_DEV;
2000 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2001 	if (csw == NULL)
2002 		return (ENXIO);
2003 	if (strcmp(csw->d_name, "zvol") == 0) {
2004 		be_lun->dispatch = ctl_be_block_dispatch_zvol;
2005 		be_lun->get_lba_status = ctl_be_block_gls_zvol;
2006 		atomic = maxio = CTLBLK_MAX_IO_SIZE;
2007 	} else {
2008 		be_lun->dispatch = ctl_be_block_dispatch_dev;
2009 		be_lun->get_lba_status = NULL;
2010 		atomic = 0;
2011 		maxio = dev->si_iosize_max;
2012 		if (maxio <= 0)
2013 			maxio = DFLTPHYS;
2014 		if (maxio > CTLBLK_MAX_SEG)
2015 			maxio = CTLBLK_MAX_SEG;
2016 	}
2017 	be_lun->lun_flush = ctl_be_block_flush_dev;
2018 	be_lun->getattr = ctl_be_block_getattr_dev;
2019 	be_lun->unmap = ctl_be_block_unmap_dev;
2020 
2021 	if (!csw->d_ioctl) {
2022 		dev_relthread(dev, ref);
2023 		snprintf(req->error_str, sizeof(req->error_str),
2024 			 "no d_ioctl for device %s!", be_lun->dev_path);
2025 		return (ENODEV);
2026 	}
2027 
2028 	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
2029 			       curthread);
2030 	if (error) {
2031 		dev_relthread(dev, ref);
2032 		snprintf(req->error_str, sizeof(req->error_str),
2033 			 "error %d returned for DIOCGSECTORSIZE ioctl "
2034 			 "on %s!", error, be_lun->dev_path);
2035 		return (error);
2036 	}
2037 
2038 	/*
2039 	 * If the user has asked for a blocksize that is greater than the
2040 	 * backing device's blocksize, we can do it only if the blocksize
2041 	 * the user is asking for is an even multiple of the underlying
2042 	 * device's blocksize.
2043 	 */
2044 	if ((params->blocksize_bytes != 0) &&
2045 	    (params->blocksize_bytes >= tmp)) {
2046 		if (params->blocksize_bytes % tmp == 0) {
2047 			cbe_lun->blocksize = params->blocksize_bytes;
2048 		} else {
2049 			dev_relthread(dev, ref);
2050 			snprintf(req->error_str, sizeof(req->error_str),
2051 				 "requested blocksize %u is not an even "
2052 				 "multiple of backing device blocksize %u",
2053 				 params->blocksize_bytes, tmp);
2054 			return (EINVAL);
2055 		}
2056 	} else if (params->blocksize_bytes != 0) {
2057 		dev_relthread(dev, ref);
2058 		snprintf(req->error_str, sizeof(req->error_str),
2059 			 "requested blocksize %u < backing device "
2060 			 "blocksize %u", params->blocksize_bytes, tmp);
2061 		return (EINVAL);
2062 	} else if (cbe_lun->lun_type == T_CDROM)
2063 		cbe_lun->blocksize = MAX(tmp, 2048);
2064 	else
2065 		cbe_lun->blocksize = tmp;
2066 
2067 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2068 			     curthread);
2069 	if (error) {
2070 		dev_relthread(dev, ref);
2071 		snprintf(req->error_str, sizeof(req->error_str),
2072 			 "error %d returned for DIOCGMEDIASIZE "
2073 			 " ioctl on %s!", error,
2074 			 be_lun->dev_path);
2075 		return (error);
2076 	}
2077 
2078 	if (params->lun_size_bytes != 0) {
2079 		if (params->lun_size_bytes > otmp) {
2080 			dev_relthread(dev, ref);
2081 			snprintf(req->error_str, sizeof(req->error_str),
2082 				 "requested LUN size %ju > backing device "
2083 				 "size %ju",
2084 				 (uintmax_t)params->lun_size_bytes,
2085 				 (uintmax_t)otmp);
2086 			return (EINVAL);
2087 		}
2088 
2089 		be_lun->size_bytes = params->lun_size_bytes;
2090 	} else
2091 		be_lun->size_bytes = otmp;
2092 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2093 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2094 	    0 : (be_lun->size_blocks - 1);
2095 
2096 	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2097 	    curthread);
2098 	if (error)
2099 		ps = po = 0;
2100 	else {
2101 		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2102 		    FREAD, curthread);
2103 		if (error)
2104 			po = 0;
2105 	}
2106 	us = ps;
2107 	uo = po;
2108 
2109 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2110 	if (value != NULL)
2111 		ctl_expand_number(value, &ps);
2112 	value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2113 	if (value != NULL)
2114 		ctl_expand_number(value, &po);
2115 	pss = ps / cbe_lun->blocksize;
2116 	pos = po / cbe_lun->blocksize;
2117 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2118 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2119 		cbe_lun->pblockexp = fls(pss) - 1;
2120 		cbe_lun->pblockoff = (pss - pos) % pss;
2121 	}
2122 
2123 	value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2124 	if (value != NULL)
2125 		ctl_expand_number(value, &us);
2126 	value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2127 	if (value != NULL)
2128 		ctl_expand_number(value, &uo);
2129 	uss = us / cbe_lun->blocksize;
2130 	uos = uo / cbe_lun->blocksize;
2131 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2132 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2133 		cbe_lun->ublockexp = fls(uss) - 1;
2134 		cbe_lun->ublockoff = (uss - uos) % uss;
2135 	}
2136 
2137 	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2138 	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2139 
2140 	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2141 		unmap = 1;
2142 	} else {
2143 		struct diocgattr_arg	arg;
2144 
2145 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2146 		arg.len = sizeof(arg.value.i);
2147 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2148 		    curthread);
2149 		unmap = (error == 0) ? arg.value.i : 0;
2150 	}
2151 	value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2152 	if (value != NULL)
2153 		unmap = (strcmp(value, "on") == 0);
2154 	if (unmap)
2155 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2156 	else
2157 		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2158 
2159 	dev_relthread(dev, ref);
2160 	return (0);
2161 }
2162 
2163 static int
2164 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2165 {
2166 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2167 	int flags;
2168 
2169 	if (be_lun->vn) {
2170 		flags = FREAD;
2171 		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2172 			flags |= FWRITE;
2173 		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2174 		be_lun->vn = NULL;
2175 
2176 		switch (be_lun->dev_type) {
2177 		case CTL_BE_BLOCK_DEV:
2178 			break;
2179 		case CTL_BE_BLOCK_FILE:
2180 			if (be_lun->backend.file.cred != NULL) {
2181 				crfree(be_lun->backend.file.cred);
2182 				be_lun->backend.file.cred = NULL;
2183 			}
2184 			break;
2185 		case CTL_BE_BLOCK_NONE:
2186 			break;
2187 		default:
2188 			panic("Unexpected backend type %d", be_lun->dev_type);
2189 			break;
2190 		}
2191 		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2192 	}
2193 	return (0);
2194 }
2195 
2196 static int
2197 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2198 {
2199 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2200 	struct nameidata nd;
2201 	const char	*value;
2202 	int		 error, flags;
2203 
2204 	error = 0;
2205 	if (rootvnode == NULL) {
2206 		snprintf(req->error_str, sizeof(req->error_str),
2207 			 "Root filesystem is not mounted");
2208 		return (1);
2209 	}
2210 	pwd_ensure_dirs();
2211 
2212 	value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2213 	if (value == NULL) {
2214 		snprintf(req->error_str, sizeof(req->error_str),
2215 			 "no file argument specified");
2216 		return (1);
2217 	}
2218 	free(be_lun->dev_path, M_CTLBLK);
2219 	be_lun->dev_path = strdup(value, M_CTLBLK);
2220 
2221 	flags = FREAD;
2222 	value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2223 	if (value != NULL) {
2224 		if (strcmp(value, "on") != 0)
2225 			flags |= FWRITE;
2226 	} else if (cbe_lun->lun_type == T_DIRECT)
2227 		flags |= FWRITE;
2228 
2229 again:
2230 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path);
2231 	error = vn_open(&nd, &flags, 0, NULL);
2232 	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2233 		flags &= ~FWRITE;
2234 		goto again;
2235 	}
2236 	if (error) {
2237 		/*
2238 		 * This is the only reasonable guess we can make as far as
2239 		 * path if the user doesn't give us a fully qualified path.
2240 		 * If they want to specify a file, they need to specify the
2241 		 * full path.
2242 		 */
2243 		if (be_lun->dev_path[0] != '/') {
2244 			char *dev_name;
2245 
2246 			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2247 				be_lun->dev_path);
2248 			free(be_lun->dev_path, M_CTLBLK);
2249 			be_lun->dev_path = dev_name;
2250 			goto again;
2251 		}
2252 		snprintf(req->error_str, sizeof(req->error_str),
2253 		    "error opening %s: %d", be_lun->dev_path, error);
2254 		return (error);
2255 	}
2256 	if (flags & FWRITE)
2257 		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2258 	else
2259 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2260 
2261 	NDFREE_PNBUF(&nd);
2262 	be_lun->vn = nd.ni_vp;
2263 
2264 	/* We only support disks and files. */
2265 	if (vn_isdisk_error(be_lun->vn, &error)) {
2266 		error = ctl_be_block_open_dev(be_lun, req);
2267 	} else if (be_lun->vn->v_type == VREG) {
2268 		error = ctl_be_block_open_file(be_lun, req);
2269 	} else {
2270 		error = EINVAL;
2271 		snprintf(req->error_str, sizeof(req->error_str),
2272 			 "%s is not a disk or plain file", be_lun->dev_path);
2273 	}
2274 	VOP_UNLOCK(be_lun->vn);
2275 
2276 	if (error != 0)
2277 		ctl_be_block_close(be_lun);
2278 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2279 	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2280 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2281 	value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2282 	if (value != NULL && strcmp(value, "on") == 0)
2283 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2284 	else if (value != NULL && strcmp(value, "read") == 0)
2285 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2286 	else if (value != NULL && strcmp(value, "soft") == 0)
2287 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2288 	else if (value != NULL && strcmp(value, "off") == 0)
2289 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2290 	return (0);
2291 }
2292 
2293 static int
2294 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2295 {
2296 	struct ctl_be_lun *cbe_lun;
2297 	struct ctl_be_block_lun *be_lun;
2298 	struct ctl_lun_create_params *params;
2299 	char num_thread_str[16];
2300 	char tmpstr[32];
2301 	const char *value;
2302 	int retval, num_threads;
2303 	int tmp_num_threads;
2304 
2305 	params = &req->reqdata.create;
2306 	retval = 0;
2307 	req->status = CTL_LUN_OK;
2308 
2309 	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2310 	cbe_lun = &be_lun->cbe_lun;
2311 	be_lun->params = req->reqdata.create;
2312 	be_lun->softc = softc;
2313 	STAILQ_INIT(&be_lun->input_queue);
2314 	STAILQ_INIT(&be_lun->config_read_queue);
2315 	STAILQ_INIT(&be_lun->config_write_queue);
2316 	STAILQ_INIT(&be_lun->datamove_queue);
2317 	mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2318 	mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2319 	cbe_lun->options = nvlist_clone(req->args_nvl);
2320 
2321 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2322 		cbe_lun->lun_type = params->device_type;
2323 	else
2324 		cbe_lun->lun_type = T_DIRECT;
2325 	be_lun->flags = 0;
2326 	cbe_lun->flags = 0;
2327 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2328 	if (value != NULL) {
2329 		if (strcmp(value, "primary") == 0)
2330 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2331 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2332 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2333 
2334 	if (cbe_lun->lun_type == T_DIRECT ||
2335 	    cbe_lun->lun_type == T_CDROM) {
2336 		be_lun->size_bytes = params->lun_size_bytes;
2337 		if (params->blocksize_bytes != 0)
2338 			cbe_lun->blocksize = params->blocksize_bytes;
2339 		else if (cbe_lun->lun_type == T_CDROM)
2340 			cbe_lun->blocksize = 2048;
2341 		else
2342 			cbe_lun->blocksize = 512;
2343 		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2344 		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2345 		    0 : (be_lun->size_blocks - 1);
2346 
2347 		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2348 		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2349 			retval = ctl_be_block_open(be_lun, req);
2350 			if (retval != 0) {
2351 				retval = 0;
2352 				req->status = CTL_LUN_WARNING;
2353 			}
2354 		}
2355 		num_threads = cbb_num_threads;
2356 	} else {
2357 		num_threads = 1;
2358 	}
2359 
2360 	value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2361 	if (value != NULL) {
2362 		tmp_num_threads = strtol(value, NULL, 0);
2363 
2364 		/*
2365 		 * We don't let the user specify less than one
2366 		 * thread, but hope he's clueful enough not to
2367 		 * specify 1000 threads.
2368 		 */
2369 		if (tmp_num_threads < 1) {
2370 			snprintf(req->error_str, sizeof(req->error_str),
2371 				 "invalid number of threads %s",
2372 				 num_thread_str);
2373 			goto bailout_error;
2374 		}
2375 		num_threads = tmp_num_threads;
2376 	}
2377 
2378 	if (be_lun->vn == NULL)
2379 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2380 	/* Tell the user the blocksize we ended up using */
2381 	params->lun_size_bytes = be_lun->size_bytes;
2382 	params->blocksize_bytes = cbe_lun->blocksize;
2383 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2384 		cbe_lun->req_lun_id = params->req_lun_id;
2385 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2386 	} else
2387 		cbe_lun->req_lun_id = 0;
2388 
2389 	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2390 	cbe_lun->be = &ctl_be_block_driver;
2391 
2392 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2393 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2394 			 softc->num_luns);
2395 		strncpy((char *)cbe_lun->serial_num, tmpstr,
2396 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2397 
2398 		/* Tell the user what we used for a serial number */
2399 		strncpy((char *)params->serial_num, tmpstr,
2400 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2401 	} else {
2402 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2403 			MIN(sizeof(cbe_lun->serial_num),
2404 			sizeof(params->serial_num)));
2405 	}
2406 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2407 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2408 		strncpy((char *)cbe_lun->device_id, tmpstr,
2409 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2410 
2411 		/* Tell the user what we used for a device ID */
2412 		strncpy((char *)params->device_id, tmpstr,
2413 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2414 	} else {
2415 		strncpy((char *)cbe_lun->device_id, params->device_id,
2416 			MIN(sizeof(cbe_lun->device_id),
2417 			    sizeof(params->device_id)));
2418 	}
2419 
2420 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2421 
2422 	be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2423 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2424 
2425 	if (be_lun->io_taskqueue == NULL) {
2426 		snprintf(req->error_str, sizeof(req->error_str),
2427 			 "unable to create taskqueue");
2428 		goto bailout_error;
2429 	}
2430 
2431 	/*
2432 	 * Note that we start the same number of threads by default for
2433 	 * both the file case and the block device case.  For the file
2434 	 * case, we need multiple threads to allow concurrency, because the
2435 	 * vnode interface is designed to be a blocking interface.  For the
2436 	 * block device case, ZFS zvols at least will block the caller's
2437 	 * context in many instances, and so we need multiple threads to
2438 	 * overcome that problem.  Other block devices don't need as many
2439 	 * threads, but they shouldn't cause too many problems.
2440 	 *
2441 	 * If the user wants to just have a single thread for a block
2442 	 * device, he can specify that when the LUN is created, or change
2443 	 * the tunable/sysctl to alter the default number of threads.
2444 	 */
2445 	retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2446 					 /*num threads*/num_threads,
2447 					 /*priority*/PUSER,
2448 					 /*proc*/control_softc->ctl_proc,
2449 					 /*thread name*/"block");
2450 
2451 	if (retval != 0)
2452 		goto bailout_error;
2453 
2454 	be_lun->num_threads = num_threads;
2455 
2456 	retval = ctl_add_lun(&be_lun->cbe_lun);
2457 	if (retval != 0) {
2458 		snprintf(req->error_str, sizeof(req->error_str),
2459 			 "ctl_add_lun() returned error %d, see dmesg for "
2460 			 "details", retval);
2461 		retval = 0;
2462 		goto bailout_error;
2463 	}
2464 
2465 	be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2466 					       cbe_lun->blocksize,
2467 					       DEVSTAT_ALL_SUPPORTED,
2468 					       cbe_lun->lun_type
2469 					       | DEVSTAT_TYPE_IF_OTHER,
2470 					       DEVSTAT_PRIORITY_OTHER);
2471 
2472 	mtx_lock(&softc->lock);
2473 	softc->num_luns++;
2474 	SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2475 	mtx_unlock(&softc->lock);
2476 
2477 	params->req_lun_id = cbe_lun->lun_id;
2478 
2479 	return (retval);
2480 
2481 bailout_error:
2482 	req->status = CTL_LUN_ERROR;
2483 
2484 	if (be_lun->io_taskqueue != NULL)
2485 		taskqueue_free(be_lun->io_taskqueue);
2486 	ctl_be_block_close(be_lun);
2487 	if (be_lun->dev_path != NULL)
2488 		free(be_lun->dev_path, M_CTLBLK);
2489 	nvlist_destroy(cbe_lun->options);
2490 	mtx_destroy(&be_lun->queue_lock);
2491 	mtx_destroy(&be_lun->io_lock);
2492 	free(be_lun, M_CTLBLK);
2493 
2494 	return (retval);
2495 }
2496 
2497 static int
2498 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2499 {
2500 	struct ctl_lun_rm_params *params;
2501 	struct ctl_be_block_lun *be_lun;
2502 	struct ctl_be_lun *cbe_lun;
2503 	int retval;
2504 
2505 	params = &req->reqdata.rm;
2506 
2507 	sx_xlock(&softc->modify_lock);
2508 	mtx_lock(&softc->lock);
2509 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2510 		if (be_lun->cbe_lun.lun_id == params->lun_id) {
2511 			SLIST_REMOVE(&softc->lun_list, be_lun,
2512 			    ctl_be_block_lun, links);
2513 			softc->num_luns--;
2514 			break;
2515 		}
2516 	}
2517 	mtx_unlock(&softc->lock);
2518 	sx_xunlock(&softc->modify_lock);
2519 	if (be_lun == NULL) {
2520 		snprintf(req->error_str, sizeof(req->error_str),
2521 			 "LUN %u is not managed by the block backend",
2522 			 params->lun_id);
2523 		goto bailout_error;
2524 	}
2525 	cbe_lun = &be_lun->cbe_lun;
2526 
2527 	if (be_lun->vn != NULL) {
2528 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2529 		ctl_lun_no_media(cbe_lun);
2530 		taskqueue_drain_all(be_lun->io_taskqueue);
2531 		ctl_be_block_close(be_lun);
2532 	}
2533 
2534 	mtx_lock(&softc->lock);
2535 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2536 	mtx_unlock(&softc->lock);
2537 
2538 	retval = ctl_remove_lun(cbe_lun);
2539 	if (retval != 0) {
2540 		snprintf(req->error_str, sizeof(req->error_str),
2541 			 "error %d returned from ctl_remove_lun() for "
2542 			 "LUN %d", retval, params->lun_id);
2543 		mtx_lock(&softc->lock);
2544 		be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2545 		mtx_unlock(&softc->lock);
2546 		goto bailout_error;
2547 	}
2548 
2549 	mtx_lock(&softc->lock);
2550 	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2551 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2552 		if (retval == EINTR)
2553 			break;
2554 	}
2555 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2556 	if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2557 		mtx_unlock(&softc->lock);
2558 		free(be_lun, M_CTLBLK);
2559 	} else {
2560 		mtx_unlock(&softc->lock);
2561 		return (EINTR);
2562 	}
2563 
2564 	req->status = CTL_LUN_OK;
2565 	return (0);
2566 
2567 bailout_error:
2568 	req->status = CTL_LUN_ERROR;
2569 	return (0);
2570 }
2571 
2572 static int
2573 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2574 {
2575 	struct ctl_lun_modify_params *params;
2576 	struct ctl_be_block_lun *be_lun;
2577 	struct ctl_be_lun *cbe_lun;
2578 	const char *value;
2579 	uint64_t oldsize;
2580 	int error, wasprim;
2581 
2582 	params = &req->reqdata.modify;
2583 
2584 	sx_xlock(&softc->modify_lock);
2585 	mtx_lock(&softc->lock);
2586 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2587 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2588 			break;
2589 	}
2590 	mtx_unlock(&softc->lock);
2591 	if (be_lun == NULL) {
2592 		snprintf(req->error_str, sizeof(req->error_str),
2593 			 "LUN %u is not managed by the block backend",
2594 			 params->lun_id);
2595 		goto bailout_error;
2596 	}
2597 	cbe_lun = &be_lun->cbe_lun;
2598 
2599 	if (params->lun_size_bytes != 0)
2600 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2601 
2602 	if (req->args_nvl != NULL) {
2603 		nvlist_destroy(cbe_lun->options);
2604 		cbe_lun->options = nvlist_clone(req->args_nvl);
2605 	}
2606 
2607 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2608 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2609 	if (value != NULL) {
2610 		if (strcmp(value, "primary") == 0)
2611 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2612 		else
2613 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2614 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2615 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2616 	else
2617 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2618 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2619 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2620 			ctl_lun_primary(cbe_lun);
2621 		else
2622 			ctl_lun_secondary(cbe_lun);
2623 	}
2624 
2625 	oldsize = be_lun->size_blocks;
2626 	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2627 	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2628 		if (be_lun->vn == NULL)
2629 			error = ctl_be_block_open(be_lun, req);
2630 		else if (vn_isdisk_error(be_lun->vn, &error))
2631 			error = ctl_be_block_open_dev(be_lun, req);
2632 		else if (be_lun->vn->v_type == VREG) {
2633 			vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2634 			error = ctl_be_block_open_file(be_lun, req);
2635 			VOP_UNLOCK(be_lun->vn);
2636 		} else
2637 			error = EINVAL;
2638 		if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2639 		    be_lun->vn != NULL) {
2640 			cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2641 			ctl_lun_has_media(cbe_lun);
2642 		} else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2643 		    be_lun->vn == NULL) {
2644 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2645 			ctl_lun_no_media(cbe_lun);
2646 		}
2647 		cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2648 	} else {
2649 		if (be_lun->vn != NULL) {
2650 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2651 			ctl_lun_no_media(cbe_lun);
2652 			taskqueue_drain_all(be_lun->io_taskqueue);
2653 			error = ctl_be_block_close(be_lun);
2654 		} else
2655 			error = 0;
2656 	}
2657 	if (be_lun->size_blocks != oldsize)
2658 		ctl_lun_capacity_changed(cbe_lun);
2659 
2660 	/* Tell the user the exact size we ended up using */
2661 	params->lun_size_bytes = be_lun->size_bytes;
2662 
2663 	sx_xunlock(&softc->modify_lock);
2664 	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2665 	return (0);
2666 
2667 bailout_error:
2668 	sx_xunlock(&softc->modify_lock);
2669 	req->status = CTL_LUN_ERROR;
2670 	return (0);
2671 }
2672 
2673 static void
2674 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
2675 {
2676 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
2677 	struct ctl_be_block_softc *softc = be_lun->softc;
2678 
2679 	taskqueue_drain_all(be_lun->io_taskqueue);
2680 	taskqueue_free(be_lun->io_taskqueue);
2681 	if (be_lun->disk_stats != NULL)
2682 		devstat_remove_entry(be_lun->disk_stats);
2683 	nvlist_destroy(be_lun->cbe_lun.options);
2684 	free(be_lun->dev_path, M_CTLBLK);
2685 	mtx_destroy(&be_lun->queue_lock);
2686 	mtx_destroy(&be_lun->io_lock);
2687 
2688 	mtx_lock(&softc->lock);
2689 	be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2690 	if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2691 		wakeup(be_lun);
2692 	else
2693 		free(be_lun, M_CTLBLK);
2694 	mtx_unlock(&softc->lock);
2695 }
2696 
2697 static int
2698 ctl_be_block_config_write(union ctl_io *io)
2699 {
2700 	struct ctl_be_block_lun *be_lun;
2701 	struct ctl_be_lun *cbe_lun;
2702 	int retval;
2703 
2704 	DPRINTF("entered\n");
2705 
2706 	cbe_lun = CTL_BACKEND_LUN(io);
2707 	be_lun = (struct ctl_be_block_lun *)cbe_lun;
2708 
2709 	retval = 0;
2710 	switch (io->scsiio.cdb[0]) {
2711 	case SYNCHRONIZE_CACHE:
2712 	case SYNCHRONIZE_CACHE_16:
2713 	case WRITE_SAME_10:
2714 	case WRITE_SAME_16:
2715 	case UNMAP:
2716 		/*
2717 		 * The upper level CTL code will filter out any CDBs with
2718 		 * the immediate bit set and return the proper error.
2719 		 *
2720 		 * We don't really need to worry about what LBA range the
2721 		 * user asked to be synced out.  When they issue a sync
2722 		 * cache command, we'll sync out the whole thing.
2723 		 */
2724 		mtx_lock(&be_lun->queue_lock);
2725 		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2726 				   links);
2727 		mtx_unlock(&be_lun->queue_lock);
2728 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2729 		break;
2730 	case START_STOP_UNIT: {
2731 		struct scsi_start_stop_unit *cdb;
2732 		struct ctl_lun_req req;
2733 
2734 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2735 		if ((cdb->how & SSS_PC_MASK) != 0) {
2736 			ctl_set_success(&io->scsiio);
2737 			ctl_config_write_done(io);
2738 			break;
2739 		}
2740 		if (cdb->how & SSS_START) {
2741 			if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2742 				retval = ctl_be_block_open(be_lun, &req);
2743 				cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2744 				if (retval == 0) {
2745 					cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2746 					ctl_lun_has_media(cbe_lun);
2747 				} else {
2748 					cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2749 					ctl_lun_no_media(cbe_lun);
2750 				}
2751 			}
2752 			ctl_start_lun(cbe_lun);
2753 		} else {
2754 			ctl_stop_lun(cbe_lun);
2755 			if (cdb->how & SSS_LOEJ) {
2756 				cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2757 				cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2758 				ctl_lun_ejected(cbe_lun);
2759 				if (be_lun->vn != NULL)
2760 					ctl_be_block_close(be_lun);
2761 			}
2762 		}
2763 
2764 		ctl_set_success(&io->scsiio);
2765 		ctl_config_write_done(io);
2766 		break;
2767 	}
2768 	case PREVENT_ALLOW:
2769 		ctl_set_success(&io->scsiio);
2770 		ctl_config_write_done(io);
2771 		break;
2772 	default:
2773 		ctl_set_invalid_opcode(&io->scsiio);
2774 		ctl_config_write_done(io);
2775 		retval = CTL_RETVAL_COMPLETE;
2776 		break;
2777 	}
2778 
2779 	return (retval);
2780 }
2781 
2782 static int
2783 ctl_be_block_config_read(union ctl_io *io)
2784 {
2785 	struct ctl_be_block_lun *be_lun;
2786 	int retval = 0;
2787 
2788 	DPRINTF("entered\n");
2789 
2790 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2791 
2792 	switch (io->scsiio.cdb[0]) {
2793 	case SERVICE_ACTION_IN:
2794 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2795 			mtx_lock(&be_lun->queue_lock);
2796 			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2797 			    &io->io_hdr, links);
2798 			mtx_unlock(&be_lun->queue_lock);
2799 			taskqueue_enqueue(be_lun->io_taskqueue,
2800 			    &be_lun->io_task);
2801 			retval = CTL_RETVAL_QUEUED;
2802 			break;
2803 		}
2804 		ctl_set_invalid_field(&io->scsiio,
2805 				      /*sks_valid*/ 1,
2806 				      /*command*/ 1,
2807 				      /*field*/ 1,
2808 				      /*bit_valid*/ 1,
2809 				      /*bit*/ 4);
2810 		ctl_config_read_done(io);
2811 		retval = CTL_RETVAL_COMPLETE;
2812 		break;
2813 	default:
2814 		ctl_set_invalid_opcode(&io->scsiio);
2815 		ctl_config_read_done(io);
2816 		retval = CTL_RETVAL_COMPLETE;
2817 		break;
2818 	}
2819 
2820 	return (retval);
2821 }
2822 
2823 static int
2824 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
2825 {
2826 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2827 	int retval;
2828 
2829 	retval = sbuf_cat(sb, "\t<num_threads>");
2830 	if (retval != 0)
2831 		goto bailout;
2832 	retval = sbuf_printf(sb, "%d", lun->num_threads);
2833 	if (retval != 0)
2834 		goto bailout;
2835 	retval = sbuf_cat(sb, "</num_threads>\n");
2836 
2837 bailout:
2838 	return (retval);
2839 }
2840 
2841 static uint64_t
2842 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
2843 {
2844 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2845 
2846 	if (lun->getattr == NULL)
2847 		return (UINT64_MAX);
2848 	return (lun->getattr(lun, attrname));
2849 }
2850 
2851 static int
2852 ctl_be_block_init(void)
2853 {
2854 	struct ctl_be_block_softc *softc = &backend_block_softc;
2855 
2856 	sx_init(&softc->modify_lock, "ctlblock modify");
2857 	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2858 	softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2859 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2860 	softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG,
2861 	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2862 	if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2863 		softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG,
2864 		    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2865 	SLIST_INIT(&softc->lun_list);
2866 	return (0);
2867 }
2868 
2869 static int
2870 ctl_be_block_shutdown(void)
2871 {
2872 	struct ctl_be_block_softc *softc = &backend_block_softc;
2873 	struct ctl_be_block_lun *lun;
2874 
2875 	mtx_lock(&softc->lock);
2876 	while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
2877 		SLIST_REMOVE_HEAD(&softc->lun_list, links);
2878 		softc->num_luns--;
2879 		/*
2880 		 * Drop our lock here.  Since ctl_remove_lun() can call
2881 		 * back into us, this could potentially lead to a recursive
2882 		 * lock of the same mutex, which would cause a hang.
2883 		 */
2884 		mtx_unlock(&softc->lock);
2885 		ctl_remove_lun(&lun->cbe_lun);
2886 		mtx_lock(&softc->lock);
2887 	}
2888 	mtx_unlock(&softc->lock);
2889 	uma_zdestroy(softc->bufmin_zone);
2890 	if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2891 		uma_zdestroy(softc->bufmax_zone);
2892 	uma_zdestroy(softc->beio_zone);
2893 	mtx_destroy(&softc->lock);
2894 	sx_destroy(&softc->modify_lock);
2895 	return (0);
2896 }
2897