xref: /freebsd/sys/cam/ctl/ctl_backend_block.c (revision 9ce06829f29232e312130530c304d287b39b0059)
1 /*-
2  * Copyright (c) 2003 Silicon Graphics International Corp.
3  * Copyright (c) 2009-2011 Spectra Logic Corporation
4  * Copyright (c) 2012 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * Portions of this software were developed by Edward Tomasz Napierala
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions, and the following disclaimer,
15  *    without modification.
16  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17  *    substantially similar to the "NO WARRANTY" disclaimer below
18  *    ("Disclaimer") and any redistribution must be conditioned upon
19  *    including a substantially similar Disclaimer requirement for further
20  *    binary redistribution.
21  *
22  * NO WARRANTY
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGES.
34  *
35  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36  */
37 /*
38  * CAM Target Layer driver backend for block devices.
39  *
40  * Author: Ken Merry <ken@FreeBSD.org>
41  */
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/types.h>
49 #include <sys/kthread.h>
50 #include <sys/bio.h>
51 #include <sys/fcntl.h>
52 #include <sys/limits.h>
53 #include <sys/lock.h>
54 #include <sys/mutex.h>
55 #include <sys/condvar.h>
56 #include <sys/malloc.h>
57 #include <sys/conf.h>
58 #include <sys/ioccom.h>
59 #include <sys/queue.h>
60 #include <sys/sbuf.h>
61 #include <sys/endian.h>
62 #include <sys/uio.h>
63 #include <sys/buf.h>
64 #include <sys/taskqueue.h>
65 #include <sys/vnode.h>
66 #include <sys/namei.h>
67 #include <sys/mount.h>
68 #include <sys/disk.h>
69 #include <sys/fcntl.h>
70 #include <sys/filedesc.h>
71 #include <sys/filio.h>
72 #include <sys/proc.h>
73 #include <sys/pcpu.h>
74 #include <sys/module.h>
75 #include <sys/sdt.h>
76 #include <sys/devicestat.h>
77 #include <sys/sysctl.h>
78 
79 #include <geom/geom.h>
80 
81 #include <cam/cam.h>
82 #include <cam/scsi/scsi_all.h>
83 #include <cam/scsi/scsi_da.h>
84 #include <cam/ctl/ctl_io.h>
85 #include <cam/ctl/ctl.h>
86 #include <cam/ctl/ctl_backend.h>
87 #include <cam/ctl/ctl_ioctl.h>
88 #include <cam/ctl/ctl_ha.h>
89 #include <cam/ctl/ctl_scsi_all.h>
90 #include <cam/ctl/ctl_private.h>
91 #include <cam/ctl/ctl_error.h>
92 
93 /*
94  * The idea here is that we'll allocate enough S/G space to hold a 1MB
95  * I/O.  If we get an I/O larger than that, we'll split it.
96  */
97 #define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
98 #define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
99 #define	CTLBLK_MAX_SEG		MAXPHYS
100 #define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
101 #define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
102 
103 #ifdef CTLBLK_DEBUG
104 #define DPRINTF(fmt, args...) \
105     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
106 #else
107 #define DPRINTF(fmt, args...) do {} while(0)
108 #endif
109 
110 #define PRIV(io)	\
111     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
112 #define ARGS(io)	\
113     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
114 
115 SDT_PROVIDER_DEFINE(cbb);
116 
117 typedef enum {
118 	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
119 	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
120 	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
121 } ctl_be_block_lun_flags;
122 
123 typedef enum {
124 	CTL_BE_BLOCK_NONE,
125 	CTL_BE_BLOCK_DEV,
126 	CTL_BE_BLOCK_FILE
127 } ctl_be_block_type;
128 
129 struct ctl_be_block_filedata {
130 	struct ucred *cred;
131 };
132 
133 union ctl_be_block_bedata {
134 	struct ctl_be_block_filedata file;
135 };
136 
137 struct ctl_be_block_io;
138 struct ctl_be_block_lun;
139 
140 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
141 			       struct ctl_be_block_io *beio);
142 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
143 				  const char *attrname);
144 
145 /*
146  * Backend LUN structure.  There is a 1:1 mapping between a block device
147  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
148  */
149 struct ctl_be_block_lun {
150 	struct ctl_lun_create_params params;
151 	char lunname[32];
152 	char *dev_path;
153 	ctl_be_block_type dev_type;
154 	struct vnode *vn;
155 	union ctl_be_block_bedata backend;
156 	cbb_dispatch_t dispatch;
157 	cbb_dispatch_t lun_flush;
158 	cbb_dispatch_t unmap;
159 	cbb_dispatch_t get_lba_status;
160 	cbb_getattr_t getattr;
161 	uma_zone_t lun_zone;
162 	uint64_t size_blocks;
163 	uint64_t size_bytes;
164 	struct ctl_be_block_softc *softc;
165 	struct devstat *disk_stats;
166 	ctl_be_block_lun_flags flags;
167 	STAILQ_ENTRY(ctl_be_block_lun) links;
168 	struct ctl_be_lun cbe_lun;
169 	struct taskqueue *io_taskqueue;
170 	struct task io_task;
171 	int num_threads;
172 	STAILQ_HEAD(, ctl_io_hdr) input_queue;
173 	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
174 	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
175 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
176 	struct mtx_padalign io_lock;
177 	struct mtx_padalign queue_lock;
178 };
179 
180 /*
181  * Overall softc structure for the block backend module.
182  */
183 struct ctl_be_block_softc {
184 	struct mtx			 lock;
185 	int				 num_luns;
186 	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
187 };
188 
189 static struct ctl_be_block_softc backend_block_softc;
190 
191 /*
192  * Per-I/O information.
193  */
194 struct ctl_be_block_io {
195 	union ctl_io			*io;
196 	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
197 	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
198 	int				bio_cmd;
199 	int				num_segs;
200 	int				num_bios_sent;
201 	int				num_bios_done;
202 	int				send_complete;
203 	int				num_errors;
204 	struct bintime			ds_t0;
205 	devstat_tag_type		ds_tag_type;
206 	devstat_trans_flags		ds_trans_type;
207 	uint64_t			io_len;
208 	uint64_t			io_offset;
209 	int				io_arg;
210 	struct ctl_be_block_softc	*softc;
211 	struct ctl_be_block_lun		*lun;
212 	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
213 };
214 
215 extern struct ctl_softc *control_softc;
216 
217 static int cbb_num_threads = 14;
218 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
219 	    "CAM Target Layer Block Backend");
220 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
221            &cbb_num_threads, 0, "Number of threads per backing file");
222 
223 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
224 static void ctl_free_beio(struct ctl_be_block_io *beio);
225 static void ctl_complete_beio(struct ctl_be_block_io *beio);
226 static int ctl_be_block_move_done(union ctl_io *io);
227 static void ctl_be_block_biodone(struct bio *bio);
228 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
229 				    struct ctl_be_block_io *beio);
230 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
231 				       struct ctl_be_block_io *beio);
232 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
233 				  struct ctl_be_block_io *beio);
234 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
235 					 const char *attrname);
236 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
237 				   struct ctl_be_block_io *beio);
238 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
239 				   struct ctl_be_block_io *beio);
240 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
241 				      struct ctl_be_block_io *beio);
242 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
243 					 const char *attrname);
244 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
245 				    union ctl_io *io);
246 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
247 				    union ctl_io *io);
248 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
249 				  union ctl_io *io);
250 static void ctl_be_block_worker(void *context, int pending);
251 static int ctl_be_block_submit(union ctl_io *io);
252 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
253 				   int flag, struct thread *td);
254 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
255 				  struct ctl_lun_req *req);
256 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
257 				 struct ctl_lun_req *req);
258 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
259 static int ctl_be_block_open(struct ctl_be_block_softc *softc,
260 			     struct ctl_be_block_lun *be_lun,
261 			     struct ctl_lun_req *req);
262 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
263 			       struct ctl_lun_req *req);
264 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
265 			   struct ctl_lun_req *req);
266 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
267 			   struct ctl_lun_req *req);
268 static void ctl_be_block_lun_shutdown(void *be_lun);
269 static void ctl_be_block_lun_config_status(void *be_lun,
270 					   ctl_lun_config_status status);
271 static int ctl_be_block_config_write(union ctl_io *io);
272 static int ctl_be_block_config_read(union ctl_io *io);
273 static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
274 static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
275 int ctl_be_block_init(void);
276 
277 static struct ctl_backend_driver ctl_be_block_driver =
278 {
279 	.name = "block",
280 	.flags = CTL_BE_FLAG_HAS_CONFIG,
281 	.init = ctl_be_block_init,
282 	.data_submit = ctl_be_block_submit,
283 	.data_move_done = ctl_be_block_move_done,
284 	.config_read = ctl_be_block_config_read,
285 	.config_write = ctl_be_block_config_write,
286 	.ioctl = ctl_be_block_ioctl,
287 	.lun_info = ctl_be_block_lun_info,
288 	.lun_attr = ctl_be_block_lun_attr
289 };
290 
291 MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
292 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
293 
294 static uma_zone_t beio_zone;
295 
296 static struct ctl_be_block_io *
297 ctl_alloc_beio(struct ctl_be_block_softc *softc)
298 {
299 	struct ctl_be_block_io *beio;
300 
301 	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
302 	beio->softc = softc;
303 	return (beio);
304 }
305 
306 static void
307 ctl_free_beio(struct ctl_be_block_io *beio)
308 {
309 	int duplicate_free;
310 	int i;
311 
312 	duplicate_free = 0;
313 
314 	for (i = 0; i < beio->num_segs; i++) {
315 		if (beio->sg_segs[i].addr == NULL)
316 			duplicate_free++;
317 
318 		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
319 		beio->sg_segs[i].addr = NULL;
320 
321 		/* For compare we had two equal S/G lists. */
322 		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
323 			uma_zfree(beio->lun->lun_zone,
324 			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
325 			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
326 		}
327 	}
328 
329 	if (duplicate_free > 0) {
330 		printf("%s: %d duplicate frees out of %d segments\n", __func__,
331 		       duplicate_free, beio->num_segs);
332 	}
333 
334 	uma_zfree(beio_zone, beio);
335 }
336 
337 static void
338 ctl_complete_beio(struct ctl_be_block_io *beio)
339 {
340 	union ctl_io *io = beio->io;
341 
342 	if (beio->beio_cont != NULL) {
343 		beio->beio_cont(beio);
344 	} else {
345 		ctl_free_beio(beio);
346 		ctl_data_submit_done(io);
347 	}
348 }
349 
350 static size_t
351 cmp(uint8_t *a, uint8_t *b, size_t size)
352 {
353 	size_t i;
354 
355 	for (i = 0; i < size; i++) {
356 		if (a[i] != b[i])
357 			break;
358 	}
359 	return (i);
360 }
361 
362 static void
363 ctl_be_block_compare(union ctl_io *io)
364 {
365 	struct ctl_be_block_io *beio;
366 	uint64_t off, res;
367 	int i;
368 	uint8_t info[8];
369 
370 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
371 	off = 0;
372 	for (i = 0; i < beio->num_segs; i++) {
373 		res = cmp(beio->sg_segs[i].addr,
374 		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
375 		    beio->sg_segs[i].len);
376 		off += res;
377 		if (res < beio->sg_segs[i].len)
378 			break;
379 	}
380 	if (i < beio->num_segs) {
381 		scsi_u64to8b(off, info);
382 		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
383 		    /*sense_key*/ SSD_KEY_MISCOMPARE,
384 		    /*asc*/ 0x1D, /*ascq*/ 0x00,
385 		    /*type*/ SSD_ELEM_INFO,
386 		    /*size*/ sizeof(info), /*data*/ &info,
387 		    /*type*/ SSD_ELEM_NONE);
388 	} else
389 		ctl_set_success(&io->scsiio);
390 }
391 
392 static int
393 ctl_be_block_move_done(union ctl_io *io)
394 {
395 	struct ctl_be_block_io *beio;
396 	struct ctl_be_block_lun *be_lun;
397 	struct ctl_lba_len_flags *lbalen;
398 #ifdef CTL_TIME_IO
399 	struct bintime cur_bt;
400 #endif
401 
402 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
403 	be_lun = beio->lun;
404 
405 	DPRINTF("entered\n");
406 
407 #ifdef CTL_TIME_IO
408 	getbintime(&cur_bt);
409 	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
410 	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
411 	io->io_hdr.num_dmas++;
412 #endif
413 	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
414 
415 	/*
416 	 * We set status at this point for read commands, and write
417 	 * commands with errors.
418 	 */
419 	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
420 		;
421 	} else if ((io->io_hdr.port_status == 0) &&
422 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
423 		lbalen = ARGS(beio->io);
424 		if (lbalen->flags & CTL_LLF_READ) {
425 			ctl_set_success(&io->scsiio);
426 		} else if (lbalen->flags & CTL_LLF_COMPARE) {
427 			/* We have two data blocks ready for comparison. */
428 			ctl_be_block_compare(io);
429 		}
430 	} else if ((io->io_hdr.port_status != 0) &&
431 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
432 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
433 		/*
434 		 * For hardware error sense keys, the sense key
435 		 * specific value is defined to be a retry count,
436 		 * but we use it to pass back an internal FETD
437 		 * error code.  XXX KDM  Hopefully the FETD is only
438 		 * using 16 bits for an error code, since that's
439 		 * all the space we have in the sks field.
440 		 */
441 		ctl_set_internal_failure(&io->scsiio,
442 					 /*sks_valid*/ 1,
443 					 /*retry_count*/
444 					 io->io_hdr.port_status);
445 	}
446 
447 	/*
448 	 * If this is a read, or a write with errors, it is done.
449 	 */
450 	if ((beio->bio_cmd == BIO_READ)
451 	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
452 	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
453 		ctl_complete_beio(beio);
454 		return (0);
455 	}
456 
457 	/*
458 	 * At this point, we have a write and the DMA completed
459 	 * successfully.  We now have to queue it to the task queue to
460 	 * execute the backend I/O.  That is because we do blocking
461 	 * memory allocations, and in the file backing case, blocking I/O.
462 	 * This move done routine is generally called in the SIM's
463 	 * interrupt context, and therefore we cannot block.
464 	 */
465 	mtx_lock(&be_lun->queue_lock);
466 	/*
467 	 * XXX KDM make sure that links is okay to use at this point.
468 	 * Otherwise, we either need to add another field to ctl_io_hdr,
469 	 * or deal with resource allocation here.
470 	 */
471 	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
472 	mtx_unlock(&be_lun->queue_lock);
473 
474 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
475 
476 	return (0);
477 }
478 
479 static void
480 ctl_be_block_biodone(struct bio *bio)
481 {
482 	struct ctl_be_block_io *beio;
483 	struct ctl_be_block_lun *be_lun;
484 	union ctl_io *io;
485 	int error;
486 
487 	beio = bio->bio_caller1;
488 	be_lun = beio->lun;
489 	io = beio->io;
490 
491 	DPRINTF("entered\n");
492 
493 	error = bio->bio_error;
494 	mtx_lock(&be_lun->io_lock);
495 	if (error != 0)
496 		beio->num_errors++;
497 
498 	beio->num_bios_done++;
499 
500 	/*
501 	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
502 	 * during the free might cause it to complain.
503 	 */
504 	g_destroy_bio(bio);
505 
506 	/*
507 	 * If the send complete bit isn't set, or we aren't the last I/O to
508 	 * complete, then we're done.
509 	 */
510 	if ((beio->send_complete == 0)
511 	 || (beio->num_bios_done < beio->num_bios_sent)) {
512 		mtx_unlock(&be_lun->io_lock);
513 		return;
514 	}
515 
516 	/*
517 	 * At this point, we've verified that we are the last I/O to
518 	 * complete, so it's safe to drop the lock.
519 	 */
520 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
521 	    beio->ds_tag_type, beio->ds_trans_type,
522 	    /*now*/ NULL, /*then*/&beio->ds_t0);
523 	mtx_unlock(&be_lun->io_lock);
524 
525 	/*
526 	 * If there are any errors from the backing device, we fail the
527 	 * entire I/O with a medium error.
528 	 */
529 	if (beio->num_errors > 0) {
530 		if (error == EOPNOTSUPP) {
531 			ctl_set_invalid_opcode(&io->scsiio);
532 		} else if (error == ENOSPC || error == EDQUOT) {
533 			ctl_set_space_alloc_fail(&io->scsiio);
534 		} else if (error == EROFS || error == EACCES) {
535 			ctl_set_hw_write_protected(&io->scsiio);
536 		} else if (beio->bio_cmd == BIO_FLUSH) {
537 			/* XXX KDM is there is a better error here? */
538 			ctl_set_internal_failure(&io->scsiio,
539 						 /*sks_valid*/ 1,
540 						 /*retry_count*/ 0xbad2);
541 		} else {
542 			ctl_set_medium_error(&io->scsiio,
543 			    beio->bio_cmd == BIO_READ);
544 		}
545 		ctl_complete_beio(beio);
546 		return;
547 	}
548 
549 	/*
550 	 * If this is a write, a flush, a delete or verify, we're all done.
551 	 * If this is a read, we can now send the data to the user.
552 	 */
553 	if ((beio->bio_cmd == BIO_WRITE)
554 	 || (beio->bio_cmd == BIO_FLUSH)
555 	 || (beio->bio_cmd == BIO_DELETE)
556 	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
557 		ctl_set_success(&io->scsiio);
558 		ctl_complete_beio(beio);
559 	} else {
560 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
561 		    beio->beio_cont == NULL) {
562 			ctl_set_success(&io->scsiio);
563 			ctl_serseq_done(io);
564 		}
565 #ifdef CTL_TIME_IO
566         	getbintime(&io->io_hdr.dma_start_bt);
567 #endif
568 		ctl_datamove(io);
569 	}
570 }
571 
572 static void
573 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
574 			struct ctl_be_block_io *beio)
575 {
576 	union ctl_io *io = beio->io;
577 	struct mount *mountpoint;
578 	int error, lock_flags;
579 
580 	DPRINTF("entered\n");
581 
582 	binuptime(&beio->ds_t0);
583 	mtx_lock(&be_lun->io_lock);
584 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
585 	mtx_unlock(&be_lun->io_lock);
586 
587 	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
588 
589 	if (MNT_SHARED_WRITES(mountpoint)
590 	 || ((mountpoint == NULL)
591 	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
592 		lock_flags = LK_SHARED;
593 	else
594 		lock_flags = LK_EXCLUSIVE;
595 
596 	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
597 
598 	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
599 	    curthread);
600 	VOP_UNLOCK(be_lun->vn, 0);
601 
602 	vn_finished_write(mountpoint);
603 
604 	mtx_lock(&be_lun->io_lock);
605 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
606 	    beio->ds_tag_type, beio->ds_trans_type,
607 	    /*now*/ NULL, /*then*/&beio->ds_t0);
608 	mtx_unlock(&be_lun->io_lock);
609 
610 	if (error == 0)
611 		ctl_set_success(&io->scsiio);
612 	else {
613 		/* XXX KDM is there is a better error here? */
614 		ctl_set_internal_failure(&io->scsiio,
615 					 /*sks_valid*/ 1,
616 					 /*retry_count*/ 0xbad1);
617 	}
618 
619 	ctl_complete_beio(beio);
620 }
621 
622 SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
623 SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
624 SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
625 SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
626 
627 static void
628 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
629 			   struct ctl_be_block_io *beio)
630 {
631 	struct ctl_be_block_filedata *file_data;
632 	union ctl_io *io;
633 	struct uio xuio;
634 	struct iovec *xiovec;
635 	size_t s;
636 	int error, flags, i;
637 
638 	DPRINTF("entered\n");
639 
640 	file_data = &be_lun->backend.file;
641 	io = beio->io;
642 	flags = 0;
643 	if (ARGS(io)->flags & CTL_LLF_DPO)
644 		flags |= IO_DIRECT;
645 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
646 		flags |= IO_SYNC;
647 
648 	bzero(&xuio, sizeof(xuio));
649 	if (beio->bio_cmd == BIO_READ) {
650 		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
651 		xuio.uio_rw = UIO_READ;
652 	} else {
653 		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
654 		xuio.uio_rw = UIO_WRITE;
655 	}
656 	xuio.uio_offset = beio->io_offset;
657 	xuio.uio_resid = beio->io_len;
658 	xuio.uio_segflg = UIO_SYSSPACE;
659 	xuio.uio_iov = beio->xiovecs;
660 	xuio.uio_iovcnt = beio->num_segs;
661 	xuio.uio_td = curthread;
662 
663 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
664 		xiovec->iov_base = beio->sg_segs[i].addr;
665 		xiovec->iov_len = beio->sg_segs[i].len;
666 	}
667 
668 	binuptime(&beio->ds_t0);
669 	mtx_lock(&be_lun->io_lock);
670 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
671 	mtx_unlock(&be_lun->io_lock);
672 
673 	if (beio->bio_cmd == BIO_READ) {
674 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
675 
676 		/*
677 		 * UFS pays attention to IO_DIRECT for reads.  If the
678 		 * DIRECTIO option is configured into the kernel, it calls
679 		 * ffs_rawread().  But that only works for single-segment
680 		 * uios with user space addresses.  In our case, with a
681 		 * kernel uio, it still reads into the buffer cache, but it
682 		 * will just try to release the buffer from the cache later
683 		 * on in ffs_read().
684 		 *
685 		 * ZFS does not pay attention to IO_DIRECT for reads.
686 		 *
687 		 * UFS does not pay attention to IO_SYNC for reads.
688 		 *
689 		 * ZFS pays attention to IO_SYNC (which translates into the
690 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
691 		 * attempts to sync the file before reading.
692 		 */
693 		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
694 
695 		VOP_UNLOCK(be_lun->vn, 0);
696 		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
697 		if (error == 0 && xuio.uio_resid > 0) {
698 			/*
699 			 * If we red less then requested (EOF), then
700 			 * we should clean the rest of the buffer.
701 			 */
702 			s = beio->io_len - xuio.uio_resid;
703 			for (i = 0; i < beio->num_segs; i++) {
704 				if (s >= beio->sg_segs[i].len) {
705 					s -= beio->sg_segs[i].len;
706 					continue;
707 				}
708 				bzero((uint8_t *)beio->sg_segs[i].addr + s,
709 				    beio->sg_segs[i].len - s);
710 				s = 0;
711 			}
712 		}
713 	} else {
714 		struct mount *mountpoint;
715 		int lock_flags;
716 
717 		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
718 
719 		if (MNT_SHARED_WRITES(mountpoint)
720 		 || ((mountpoint == NULL)
721 		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
722 			lock_flags = LK_SHARED;
723 		else
724 			lock_flags = LK_EXCLUSIVE;
725 
726 		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
727 
728 		/*
729 		 * UFS pays attention to IO_DIRECT for writes.  The write
730 		 * is done asynchronously.  (Normally the write would just
731 		 * get put into cache.
732 		 *
733 		 * UFS pays attention to IO_SYNC for writes.  It will
734 		 * attempt to write the buffer out synchronously if that
735 		 * flag is set.
736 		 *
737 		 * ZFS does not pay attention to IO_DIRECT for writes.
738 		 *
739 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
740 		 * for writes.  It will flush the transaction from the
741 		 * cache before returning.
742 		 */
743 		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
744 		VOP_UNLOCK(be_lun->vn, 0);
745 
746 		vn_finished_write(mountpoint);
747 		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
748         }
749 
750 	mtx_lock(&be_lun->io_lock);
751 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
752 	    beio->ds_tag_type, beio->ds_trans_type,
753 	    /*now*/ NULL, /*then*/&beio->ds_t0);
754 	mtx_unlock(&be_lun->io_lock);
755 
756 	/*
757 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
758 	 * return the I/O to the user.
759 	 */
760 	if (error != 0) {
761 		if (error == ENOSPC || error == EDQUOT) {
762 			ctl_set_space_alloc_fail(&io->scsiio);
763 		} else if (error == EROFS || error == EACCES) {
764 			ctl_set_hw_write_protected(&io->scsiio);
765 		} else {
766 			ctl_set_medium_error(&io->scsiio,
767 			    beio->bio_cmd == BIO_READ);
768 		}
769 		ctl_complete_beio(beio);
770 		return;
771 	}
772 
773 	/*
774 	 * If this is a write or a verify, we're all done.
775 	 * If this is a read, we can now send the data to the user.
776 	 */
777 	if ((beio->bio_cmd == BIO_WRITE) ||
778 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
779 		ctl_set_success(&io->scsiio);
780 		ctl_complete_beio(beio);
781 	} else {
782 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
783 		    beio->beio_cont == NULL) {
784 			ctl_set_success(&io->scsiio);
785 			ctl_serseq_done(io);
786 		}
787 #ifdef CTL_TIME_IO
788         	getbintime(&io->io_hdr.dma_start_bt);
789 #endif
790 		ctl_datamove(io);
791 	}
792 }
793 
794 static void
795 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
796 			struct ctl_be_block_io *beio)
797 {
798 	union ctl_io *io = beio->io;
799 	struct ctl_lba_len_flags *lbalen = ARGS(io);
800 	struct scsi_get_lba_status_data *data;
801 	off_t roff, off;
802 	int error, status;
803 
804 	DPRINTF("entered\n");
805 
806 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
807 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
808 	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
809 	    0, curthread->td_ucred, curthread);
810 	if (error == 0 && off > roff)
811 		status = 0;	/* mapped up to off */
812 	else {
813 		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
814 		    0, curthread->td_ucred, curthread);
815 		if (error == 0 && off > roff)
816 			status = 1;	/* deallocated up to off */
817 		else {
818 			status = 0;	/* unknown up to the end */
819 			off = be_lun->size_bytes;
820 		}
821 	}
822 	VOP_UNLOCK(be_lun->vn, 0);
823 
824 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
825 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
826 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
827 	    lbalen->lba), data->descr[0].length);
828 	data->descr[0].status = status;
829 
830 	ctl_complete_beio(beio);
831 }
832 
833 static uint64_t
834 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
835 {
836 	struct vattr		vattr;
837 	struct statfs		statfs;
838 	uint64_t		val;
839 	int			error;
840 
841 	val = UINT64_MAX;
842 	if (be_lun->vn == NULL)
843 		return (val);
844 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
845 	if (strcmp(attrname, "blocksused") == 0) {
846 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
847 		if (error == 0)
848 			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
849 	}
850 	if (strcmp(attrname, "blocksavail") == 0 &&
851 	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
852 		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
853 		if (error == 0)
854 			val = statfs.f_bavail * statfs.f_bsize /
855 			    be_lun->cbe_lun.blocksize;
856 	}
857 	VOP_UNLOCK(be_lun->vn, 0);
858 	return (val);
859 }
860 
861 static void
862 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
863 			   struct ctl_be_block_io *beio)
864 {
865 	union ctl_io *io;
866 	struct cdevsw *csw;
867 	struct cdev *dev;
868 	struct uio xuio;
869 	struct iovec *xiovec;
870 	int error, flags, i, ref;
871 
872 	DPRINTF("entered\n");
873 
874 	io = beio->io;
875 	flags = 0;
876 	if (ARGS(io)->flags & CTL_LLF_DPO)
877 		flags |= IO_DIRECT;
878 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
879 		flags |= IO_SYNC;
880 
881 	bzero(&xuio, sizeof(xuio));
882 	if (beio->bio_cmd == BIO_READ) {
883 		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
884 		xuio.uio_rw = UIO_READ;
885 	} else {
886 		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
887 		xuio.uio_rw = UIO_WRITE;
888 	}
889 	xuio.uio_offset = beio->io_offset;
890 	xuio.uio_resid = beio->io_len;
891 	xuio.uio_segflg = UIO_SYSSPACE;
892 	xuio.uio_iov = beio->xiovecs;
893 	xuio.uio_iovcnt = beio->num_segs;
894 	xuio.uio_td = curthread;
895 
896 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
897 		xiovec->iov_base = beio->sg_segs[i].addr;
898 		xiovec->iov_len = beio->sg_segs[i].len;
899 	}
900 
901 	binuptime(&beio->ds_t0);
902 	mtx_lock(&be_lun->io_lock);
903 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
904 	mtx_unlock(&be_lun->io_lock);
905 
906 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
907 	if (csw) {
908 		if (beio->bio_cmd == BIO_READ)
909 			error = csw->d_read(dev, &xuio, flags);
910 		else
911 			error = csw->d_write(dev, &xuio, flags);
912 		dev_relthread(dev, ref);
913 	} else
914 		error = ENXIO;
915 
916 	if (beio->bio_cmd == BIO_READ)
917 		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
918 	else
919 		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
920 
921 	mtx_lock(&be_lun->io_lock);
922 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
923 	    beio->ds_tag_type, beio->ds_trans_type,
924 	    /*now*/ NULL, /*then*/&beio->ds_t0);
925 	mtx_unlock(&be_lun->io_lock);
926 
927 	/*
928 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
929 	 * return the I/O to the user.
930 	 */
931 	if (error != 0) {
932 		if (error == ENOSPC || error == EDQUOT) {
933 			ctl_set_space_alloc_fail(&io->scsiio);
934 		} else if (error == EROFS || error == EACCES) {
935 			ctl_set_hw_write_protected(&io->scsiio);
936 		} else {
937 			ctl_set_medium_error(&io->scsiio,
938 			    beio->bio_cmd == BIO_READ);
939 		}
940 		ctl_complete_beio(beio);
941 		return;
942 	}
943 
944 	/*
945 	 * If this is a write or a verify, we're all done.
946 	 * If this is a read, we can now send the data to the user.
947 	 */
948 	if ((beio->bio_cmd == BIO_WRITE) ||
949 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
950 		ctl_set_success(&io->scsiio);
951 		ctl_complete_beio(beio);
952 	} else {
953 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
954 		    beio->beio_cont == NULL) {
955 			ctl_set_success(&io->scsiio);
956 			ctl_serseq_done(io);
957 		}
958 #ifdef CTL_TIME_IO
959         	getbintime(&io->io_hdr.dma_start_bt);
960 #endif
961 		ctl_datamove(io);
962 	}
963 }
964 
965 static void
966 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
967 			struct ctl_be_block_io *beio)
968 {
969 	union ctl_io *io = beio->io;
970 	struct cdevsw *csw;
971 	struct cdev *dev;
972 	struct ctl_lba_len_flags *lbalen = ARGS(io);
973 	struct scsi_get_lba_status_data *data;
974 	off_t roff, off;
975 	int error, ref, status;
976 
977 	DPRINTF("entered\n");
978 
979 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
980 	if (csw == NULL) {
981 		status = 0;	/* unknown up to the end */
982 		off = be_lun->size_bytes;
983 		goto done;
984 	}
985 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
986 	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
987 	    curthread);
988 	if (error == 0 && off > roff)
989 		status = 0;	/* mapped up to off */
990 	else {
991 		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
992 		    curthread);
993 		if (error == 0 && off > roff)
994 			status = 1;	/* deallocated up to off */
995 		else {
996 			status = 0;	/* unknown up to the end */
997 			off = be_lun->size_bytes;
998 		}
999 	}
1000 	dev_relthread(dev, ref);
1001 
1002 done:
1003 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1004 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1005 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1006 	    lbalen->lba), data->descr[0].length);
1007 	data->descr[0].status = status;
1008 
1009 	ctl_complete_beio(beio);
1010 }
1011 
1012 static void
1013 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1014 		       struct ctl_be_block_io *beio)
1015 {
1016 	struct bio *bio;
1017 	union ctl_io *io;
1018 	struct cdevsw *csw;
1019 	struct cdev *dev;
1020 	int ref;
1021 
1022 	io = beio->io;
1023 
1024 	DPRINTF("entered\n");
1025 
1026 	/* This can't fail, it's a blocking allocation. */
1027 	bio = g_alloc_bio();
1028 
1029 	bio->bio_cmd	    = BIO_FLUSH;
1030 	bio->bio_offset	    = 0;
1031 	bio->bio_data	    = 0;
1032 	bio->bio_done	    = ctl_be_block_biodone;
1033 	bio->bio_caller1    = beio;
1034 	bio->bio_pblkno	    = 0;
1035 
1036 	/*
1037 	 * We don't need to acquire the LUN lock here, because we are only
1038 	 * sending one bio, and so there is no other context to synchronize
1039 	 * with.
1040 	 */
1041 	beio->num_bios_sent = 1;
1042 	beio->send_complete = 1;
1043 
1044 	binuptime(&beio->ds_t0);
1045 	mtx_lock(&be_lun->io_lock);
1046 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1047 	mtx_unlock(&be_lun->io_lock);
1048 
1049 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1050 	if (csw) {
1051 		bio->bio_dev = dev;
1052 		csw->d_strategy(bio);
1053 		dev_relthread(dev, ref);
1054 	} else {
1055 		bio->bio_error = ENXIO;
1056 		ctl_be_block_biodone(bio);
1057 	}
1058 }
1059 
1060 static void
1061 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1062 		       struct ctl_be_block_io *beio,
1063 		       uint64_t off, uint64_t len, int last)
1064 {
1065 	struct bio *bio;
1066 	uint64_t maxlen;
1067 	struct cdevsw *csw;
1068 	struct cdev *dev;
1069 	int ref;
1070 
1071 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1072 	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1073 	while (len > 0) {
1074 		bio = g_alloc_bio();
1075 		bio->bio_cmd	    = BIO_DELETE;
1076 		bio->bio_dev	    = dev;
1077 		bio->bio_offset	    = off;
1078 		bio->bio_length	    = MIN(len, maxlen);
1079 		bio->bio_data	    = 0;
1080 		bio->bio_done	    = ctl_be_block_biodone;
1081 		bio->bio_caller1    = beio;
1082 		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1083 
1084 		off += bio->bio_length;
1085 		len -= bio->bio_length;
1086 
1087 		mtx_lock(&be_lun->io_lock);
1088 		beio->num_bios_sent++;
1089 		if (last && len == 0)
1090 			beio->send_complete = 1;
1091 		mtx_unlock(&be_lun->io_lock);
1092 
1093 		if (csw) {
1094 			csw->d_strategy(bio);
1095 		} else {
1096 			bio->bio_error = ENXIO;
1097 			ctl_be_block_biodone(bio);
1098 		}
1099 	}
1100 	if (csw)
1101 		dev_relthread(dev, ref);
1102 }
1103 
1104 static void
1105 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1106 		       struct ctl_be_block_io *beio)
1107 {
1108 	union ctl_io *io;
1109 	struct ctl_ptr_len_flags *ptrlen;
1110 	struct scsi_unmap_desc *buf, *end;
1111 	uint64_t len;
1112 
1113 	io = beio->io;
1114 
1115 	DPRINTF("entered\n");
1116 
1117 	binuptime(&beio->ds_t0);
1118 	mtx_lock(&be_lun->io_lock);
1119 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1120 	mtx_unlock(&be_lun->io_lock);
1121 
1122 	if (beio->io_offset == -1) {
1123 		beio->io_len = 0;
1124 		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1125 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1126 		end = buf + ptrlen->len / sizeof(*buf);
1127 		for (; buf < end; buf++) {
1128 			len = (uint64_t)scsi_4btoul(buf->length) *
1129 			    be_lun->cbe_lun.blocksize;
1130 			beio->io_len += len;
1131 			ctl_be_block_unmap_dev_range(be_lun, beio,
1132 			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1133 			    len, (end - buf < 2) ? TRUE : FALSE);
1134 		}
1135 	} else
1136 		ctl_be_block_unmap_dev_range(be_lun, beio,
1137 		    beio->io_offset, beio->io_len, TRUE);
1138 }
1139 
1140 static void
1141 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1142 			  struct ctl_be_block_io *beio)
1143 {
1144 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1145 	struct bio *bio;
1146 	struct cdevsw *csw;
1147 	struct cdev *dev;
1148 	off_t cur_offset;
1149 	int i, max_iosize, ref;
1150 
1151 	DPRINTF("entered\n");
1152 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1153 
1154 	/*
1155 	 * We have to limit our I/O size to the maximum supported by the
1156 	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1157 	 * set it properly, use DFLTPHYS.
1158 	 */
1159 	if (csw) {
1160 		max_iosize = dev->si_iosize_max;
1161 		if (max_iosize < PAGE_SIZE)
1162 			max_iosize = DFLTPHYS;
1163 	} else
1164 		max_iosize = DFLTPHYS;
1165 
1166 	cur_offset = beio->io_offset;
1167 	for (i = 0; i < beio->num_segs; i++) {
1168 		size_t cur_size;
1169 		uint8_t *cur_ptr;
1170 
1171 		cur_size = beio->sg_segs[i].len;
1172 		cur_ptr = beio->sg_segs[i].addr;
1173 
1174 		while (cur_size > 0) {
1175 			/* This can't fail, it's a blocking allocation. */
1176 			bio = g_alloc_bio();
1177 
1178 			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1179 
1180 			bio->bio_cmd = beio->bio_cmd;
1181 			bio->bio_dev = dev;
1182 			bio->bio_caller1 = beio;
1183 			bio->bio_length = min(cur_size, max_iosize);
1184 			bio->bio_offset = cur_offset;
1185 			bio->bio_data = cur_ptr;
1186 			bio->bio_done = ctl_be_block_biodone;
1187 			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1188 
1189 			cur_offset += bio->bio_length;
1190 			cur_ptr += bio->bio_length;
1191 			cur_size -= bio->bio_length;
1192 
1193 			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1194 			beio->num_bios_sent++;
1195 		}
1196 	}
1197 	binuptime(&beio->ds_t0);
1198 	mtx_lock(&be_lun->io_lock);
1199 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1200 	beio->send_complete = 1;
1201 	mtx_unlock(&be_lun->io_lock);
1202 
1203 	/*
1204 	 * Fire off all allocated requests!
1205 	 */
1206 	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1207 		TAILQ_REMOVE(&queue, bio, bio_queue);
1208 		if (csw)
1209 			csw->d_strategy(bio);
1210 		else {
1211 			bio->bio_error = ENXIO;
1212 			ctl_be_block_biodone(bio);
1213 		}
1214 	}
1215 	if (csw)
1216 		dev_relthread(dev, ref);
1217 }
1218 
1219 static uint64_t
1220 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1221 {
1222 	struct diocgattr_arg	arg;
1223 	struct cdevsw *csw;
1224 	struct cdev *dev;
1225 	int error, ref;
1226 
1227 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1228 	if (csw == NULL)
1229 		return (UINT64_MAX);
1230 	strlcpy(arg.name, attrname, sizeof(arg.name));
1231 	arg.len = sizeof(arg.value.off);
1232 	if (csw->d_ioctl) {
1233 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1234 		    curthread);
1235 	} else
1236 		error = ENODEV;
1237 	dev_relthread(dev, ref);
1238 	if (error != 0)
1239 		return (UINT64_MAX);
1240 	return (arg.value.off);
1241 }
1242 
1243 static void
1244 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1245 			    union ctl_io *io)
1246 {
1247 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1248 	struct ctl_be_block_io *beio;
1249 	struct ctl_lba_len_flags *lbalen;
1250 
1251 	DPRINTF("entered\n");
1252 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1253 	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1254 
1255 	beio->io_len = lbalen->len * cbe_lun->blocksize;
1256 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1257 	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1258 	beio->bio_cmd = BIO_FLUSH;
1259 	beio->ds_trans_type = DEVSTAT_NO_DATA;
1260 	DPRINTF("SYNC\n");
1261 	be_lun->lun_flush(be_lun, beio);
1262 }
1263 
1264 static void
1265 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1266 {
1267 	union ctl_io *io;
1268 
1269 	io = beio->io;
1270 	ctl_free_beio(beio);
1271 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1272 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1273 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1274 		ctl_config_write_done(io);
1275 		return;
1276 	}
1277 
1278 	ctl_be_block_config_write(io);
1279 }
1280 
1281 static void
1282 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1283 			    union ctl_io *io)
1284 {
1285 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1286 	struct ctl_be_block_io *beio;
1287 	struct ctl_lba_len_flags *lbalen;
1288 	uint64_t len_left, lba;
1289 	uint32_t pb, pbo, adj;
1290 	int i, seglen;
1291 	uint8_t *buf, *end;
1292 
1293 	DPRINTF("entered\n");
1294 
1295 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1296 	lbalen = ARGS(beio->io);
1297 
1298 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1299 	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1300 		ctl_free_beio(beio);
1301 		ctl_set_invalid_field(&io->scsiio,
1302 				      /*sks_valid*/ 1,
1303 				      /*command*/ 1,
1304 				      /*field*/ 1,
1305 				      /*bit_valid*/ 0,
1306 				      /*bit*/ 0);
1307 		ctl_config_write_done(io);
1308 		return;
1309 	}
1310 
1311 	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1312 		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1313 		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1314 		beio->bio_cmd = BIO_DELETE;
1315 		beio->ds_trans_type = DEVSTAT_FREE;
1316 
1317 		be_lun->unmap(be_lun, beio);
1318 		return;
1319 	}
1320 
1321 	beio->bio_cmd = BIO_WRITE;
1322 	beio->ds_trans_type = DEVSTAT_WRITE;
1323 
1324 	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1325 	       (uintmax_t)lbalen->lba, lbalen->len);
1326 
1327 	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1328 	if (be_lun->cbe_lun.pblockoff > 0)
1329 		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1330 	else
1331 		pbo = 0;
1332 	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1333 	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1334 
1335 		/*
1336 		 * Setup the S/G entry for this chunk.
1337 		 */
1338 		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1339 		if (pb > cbe_lun->blocksize) {
1340 			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1341 			    seglen - pbo) % pb;
1342 			if (seglen > adj)
1343 				seglen -= adj;
1344 			else
1345 				seglen -= seglen % cbe_lun->blocksize;
1346 		} else
1347 			seglen -= seglen % cbe_lun->blocksize;
1348 		beio->sg_segs[i].len = seglen;
1349 		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1350 
1351 		DPRINTF("segment %d addr %p len %zd\n", i,
1352 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1353 
1354 		beio->num_segs++;
1355 		len_left -= seglen;
1356 
1357 		buf = beio->sg_segs[i].addr;
1358 		end = buf + seglen;
1359 		for (; buf < end; buf += cbe_lun->blocksize) {
1360 			memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize);
1361 			if (lbalen->flags & SWS_LBDATA)
1362 				scsi_ulto4b(lbalen->lba + lba, buf);
1363 			lba++;
1364 		}
1365 	}
1366 
1367 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1368 	beio->io_len = lba * cbe_lun->blocksize;
1369 
1370 	/* We can not do all in one run. Correct and schedule rerun. */
1371 	if (len_left > 0) {
1372 		lbalen->lba += lba;
1373 		lbalen->len -= lba;
1374 		beio->beio_cont = ctl_be_block_cw_done_ws;
1375 	}
1376 
1377 	be_lun->dispatch(be_lun, beio);
1378 }
1379 
1380 static void
1381 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1382 			    union ctl_io *io)
1383 {
1384 	struct ctl_be_block_io *beio;
1385 	struct ctl_ptr_len_flags *ptrlen;
1386 
1387 	DPRINTF("entered\n");
1388 
1389 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1390 	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1391 
1392 	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1393 		ctl_free_beio(beio);
1394 		ctl_set_invalid_field(&io->scsiio,
1395 				      /*sks_valid*/ 0,
1396 				      /*command*/ 1,
1397 				      /*field*/ 0,
1398 				      /*bit_valid*/ 0,
1399 				      /*bit*/ 0);
1400 		ctl_config_write_done(io);
1401 		return;
1402 	}
1403 
1404 	beio->io_len = 0;
1405 	beio->io_offset = -1;
1406 	beio->bio_cmd = BIO_DELETE;
1407 	beio->ds_trans_type = DEVSTAT_FREE;
1408 	DPRINTF("UNMAP\n");
1409 	be_lun->unmap(be_lun, beio);
1410 }
1411 
1412 static void
1413 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1414 {
1415 	union ctl_io *io;
1416 
1417 	io = beio->io;
1418 	ctl_free_beio(beio);
1419 	ctl_config_read_done(io);
1420 }
1421 
1422 static void
1423 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1424 			 union ctl_io *io)
1425 {
1426 	struct ctl_be_block_io *beio;
1427 	struct ctl_be_block_softc *softc;
1428 
1429 	DPRINTF("entered\n");
1430 
1431 	softc = be_lun->softc;
1432 	beio = ctl_alloc_beio(softc);
1433 	beio->io = io;
1434 	beio->lun = be_lun;
1435 	beio->beio_cont = ctl_be_block_cr_done;
1436 	PRIV(io)->ptr = (void *)beio;
1437 
1438 	switch (io->scsiio.cdb[0]) {
1439 	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1440 		beio->bio_cmd = -1;
1441 		beio->ds_trans_type = DEVSTAT_NO_DATA;
1442 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1443 		beio->io_len = 0;
1444 		if (be_lun->get_lba_status)
1445 			be_lun->get_lba_status(be_lun, beio);
1446 		else
1447 			ctl_be_block_cr_done(beio);
1448 		break;
1449 	default:
1450 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1451 		break;
1452 	}
1453 }
1454 
1455 static void
1456 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1457 {
1458 	union ctl_io *io;
1459 
1460 	io = beio->io;
1461 	ctl_free_beio(beio);
1462 	ctl_config_write_done(io);
1463 }
1464 
1465 static void
1466 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1467 			 union ctl_io *io)
1468 {
1469 	struct ctl_be_block_io *beio;
1470 	struct ctl_be_block_softc *softc;
1471 
1472 	DPRINTF("entered\n");
1473 
1474 	softc = be_lun->softc;
1475 	beio = ctl_alloc_beio(softc);
1476 	beio->io = io;
1477 	beio->lun = be_lun;
1478 	beio->beio_cont = ctl_be_block_cw_done;
1479 	switch (io->scsiio.tag_type) {
1480 	case CTL_TAG_ORDERED:
1481 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1482 		break;
1483 	case CTL_TAG_HEAD_OF_QUEUE:
1484 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1485 		break;
1486 	case CTL_TAG_UNTAGGED:
1487 	case CTL_TAG_SIMPLE:
1488 	case CTL_TAG_ACA:
1489 	default:
1490 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1491 		break;
1492 	}
1493 	PRIV(io)->ptr = (void *)beio;
1494 
1495 	switch (io->scsiio.cdb[0]) {
1496 	case SYNCHRONIZE_CACHE:
1497 	case SYNCHRONIZE_CACHE_16:
1498 		ctl_be_block_cw_dispatch_sync(be_lun, io);
1499 		break;
1500 	case WRITE_SAME_10:
1501 	case WRITE_SAME_16:
1502 		ctl_be_block_cw_dispatch_ws(be_lun, io);
1503 		break;
1504 	case UNMAP:
1505 		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1506 		break;
1507 	default:
1508 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1509 		break;
1510 	}
1511 }
1512 
1513 SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1514 SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1515 SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1516 SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1517 
1518 static void
1519 ctl_be_block_next(struct ctl_be_block_io *beio)
1520 {
1521 	struct ctl_be_block_lun *be_lun;
1522 	union ctl_io *io;
1523 
1524 	io = beio->io;
1525 	be_lun = beio->lun;
1526 	ctl_free_beio(beio);
1527 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1528 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1529 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1530 		ctl_data_submit_done(io);
1531 		return;
1532 	}
1533 
1534 	io->io_hdr.status &= ~CTL_STATUS_MASK;
1535 	io->io_hdr.status |= CTL_STATUS_NONE;
1536 
1537 	mtx_lock(&be_lun->queue_lock);
1538 	/*
1539 	 * XXX KDM make sure that links is okay to use at this point.
1540 	 * Otherwise, we either need to add another field to ctl_io_hdr,
1541 	 * or deal with resource allocation here.
1542 	 */
1543 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1544 	mtx_unlock(&be_lun->queue_lock);
1545 
1546 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1547 }
1548 
1549 static void
1550 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1551 			   union ctl_io *io)
1552 {
1553 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1554 	struct ctl_be_block_io *beio;
1555 	struct ctl_be_block_softc *softc;
1556 	struct ctl_lba_len_flags *lbalen;
1557 	struct ctl_ptr_len_flags *bptrlen;
1558 	uint64_t len_left, lbas;
1559 	int i;
1560 
1561 	softc = be_lun->softc;
1562 
1563 	DPRINTF("entered\n");
1564 
1565 	lbalen = ARGS(io);
1566 	if (lbalen->flags & CTL_LLF_WRITE) {
1567 		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1568 	} else {
1569 		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1570 	}
1571 
1572 	beio = ctl_alloc_beio(softc);
1573 	beio->io = io;
1574 	beio->lun = be_lun;
1575 	bptrlen = PRIV(io);
1576 	bptrlen->ptr = (void *)beio;
1577 
1578 	switch (io->scsiio.tag_type) {
1579 	case CTL_TAG_ORDERED:
1580 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1581 		break;
1582 	case CTL_TAG_HEAD_OF_QUEUE:
1583 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1584 		break;
1585 	case CTL_TAG_UNTAGGED:
1586 	case CTL_TAG_SIMPLE:
1587 	case CTL_TAG_ACA:
1588 	default:
1589 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1590 		break;
1591 	}
1592 
1593 	if (lbalen->flags & CTL_LLF_WRITE) {
1594 		beio->bio_cmd = BIO_WRITE;
1595 		beio->ds_trans_type = DEVSTAT_WRITE;
1596 	} else {
1597 		beio->bio_cmd = BIO_READ;
1598 		beio->ds_trans_type = DEVSTAT_READ;
1599 	}
1600 
1601 	DPRINTF("%s at LBA %jx len %u @%ju\n",
1602 	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1603 	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1604 	if (lbalen->flags & CTL_LLF_COMPARE)
1605 		lbas = CTLBLK_HALF_IO_SIZE;
1606 	else
1607 		lbas = CTLBLK_MAX_IO_SIZE;
1608 	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1609 	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1610 	beio->io_len = lbas * cbe_lun->blocksize;
1611 	bptrlen->len += lbas;
1612 
1613 	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1614 		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1615 		    i, CTLBLK_MAX_SEGS));
1616 
1617 		/*
1618 		 * Setup the S/G entry for this chunk.
1619 		 */
1620 		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1621 		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1622 
1623 		DPRINTF("segment %d addr %p len %zd\n", i,
1624 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1625 
1626 		/* Set up second segment for compare operation. */
1627 		if (lbalen->flags & CTL_LLF_COMPARE) {
1628 			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1629 			    beio->sg_segs[i].len;
1630 			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1631 			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1632 		}
1633 
1634 		beio->num_segs++;
1635 		len_left -= beio->sg_segs[i].len;
1636 	}
1637 	if (bptrlen->len < lbalen->len)
1638 		beio->beio_cont = ctl_be_block_next;
1639 	io->scsiio.be_move_done = ctl_be_block_move_done;
1640 	/* For compare we have separate S/G lists for read and datamove. */
1641 	if (lbalen->flags & CTL_LLF_COMPARE)
1642 		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1643 	else
1644 		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1645 	io->scsiio.kern_data_len = beio->io_len;
1646 	io->scsiio.kern_data_resid = 0;
1647 	io->scsiio.kern_sg_entries = beio->num_segs;
1648 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1649 
1650 	/*
1651 	 * For the read case, we need to read the data into our buffers and
1652 	 * then we can send it back to the user.  For the write case, we
1653 	 * need to get the data from the user first.
1654 	 */
1655 	if (beio->bio_cmd == BIO_READ) {
1656 		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1657 		be_lun->dispatch(be_lun, beio);
1658 	} else {
1659 		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1660 #ifdef CTL_TIME_IO
1661         	getbintime(&io->io_hdr.dma_start_bt);
1662 #endif
1663 		ctl_datamove(io);
1664 	}
1665 }
1666 
1667 static void
1668 ctl_be_block_worker(void *context, int pending)
1669 {
1670 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1671 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1672 	union ctl_io *io;
1673 	struct ctl_be_block_io *beio;
1674 
1675 	DPRINTF("entered\n");
1676 	/*
1677 	 * Fetch and process I/Os from all queues.  If we detect LUN
1678 	 * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race,
1679 	 * so make response maximally opaque to not confuse initiator.
1680 	 */
1681 	for (;;) {
1682 		mtx_lock(&be_lun->queue_lock);
1683 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1684 		if (io != NULL) {
1685 			DPRINTF("datamove queue\n");
1686 			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1687 				      ctl_io_hdr, links);
1688 			mtx_unlock(&be_lun->queue_lock);
1689 			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1690 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1691 				ctl_set_busy(&io->scsiio);
1692 				ctl_complete_beio(beio);
1693 				return;
1694 			}
1695 			be_lun->dispatch(be_lun, beio);
1696 			continue;
1697 		}
1698 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1699 		if (io != NULL) {
1700 			DPRINTF("config write queue\n");
1701 			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1702 				      ctl_io_hdr, links);
1703 			mtx_unlock(&be_lun->queue_lock);
1704 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1705 				ctl_set_busy(&io->scsiio);
1706 				ctl_config_write_done(io);
1707 				return;
1708 			}
1709 			ctl_be_block_cw_dispatch(be_lun, io);
1710 			continue;
1711 		}
1712 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1713 		if (io != NULL) {
1714 			DPRINTF("config read queue\n");
1715 			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1716 				      ctl_io_hdr, links);
1717 			mtx_unlock(&be_lun->queue_lock);
1718 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1719 				ctl_set_busy(&io->scsiio);
1720 				ctl_config_read_done(io);
1721 				return;
1722 			}
1723 			ctl_be_block_cr_dispatch(be_lun, io);
1724 			continue;
1725 		}
1726 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1727 		if (io != NULL) {
1728 			DPRINTF("input queue\n");
1729 			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1730 				      ctl_io_hdr, links);
1731 			mtx_unlock(&be_lun->queue_lock);
1732 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1733 				ctl_set_busy(&io->scsiio);
1734 				ctl_data_submit_done(io);
1735 				return;
1736 			}
1737 			ctl_be_block_dispatch(be_lun, io);
1738 			continue;
1739 		}
1740 
1741 		/*
1742 		 * If we get here, there is no work left in the queues, so
1743 		 * just break out and let the task queue go to sleep.
1744 		 */
1745 		mtx_unlock(&be_lun->queue_lock);
1746 		break;
1747 	}
1748 }
1749 
1750 /*
1751  * Entry point from CTL to the backend for I/O.  We queue everything to a
1752  * work thread, so this just puts the I/O on a queue and wakes up the
1753  * thread.
1754  */
1755 static int
1756 ctl_be_block_submit(union ctl_io *io)
1757 {
1758 	struct ctl_be_block_lun *be_lun;
1759 	struct ctl_be_lun *cbe_lun;
1760 
1761 	DPRINTF("entered\n");
1762 
1763 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1764 		CTL_PRIV_BACKEND_LUN].ptr;
1765 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1766 
1767 	/*
1768 	 * Make sure we only get SCSI I/O.
1769 	 */
1770 	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1771 		"%#x) encountered", io->io_hdr.io_type));
1772 
1773 	PRIV(io)->len = 0;
1774 
1775 	mtx_lock(&be_lun->queue_lock);
1776 	/*
1777 	 * XXX KDM make sure that links is okay to use at this point.
1778 	 * Otherwise, we either need to add another field to ctl_io_hdr,
1779 	 * or deal with resource allocation here.
1780 	 */
1781 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1782 	mtx_unlock(&be_lun->queue_lock);
1783 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1784 
1785 	return (CTL_RETVAL_COMPLETE);
1786 }
1787 
1788 static int
1789 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1790 			int flag, struct thread *td)
1791 {
1792 	struct ctl_be_block_softc *softc;
1793 	int error;
1794 
1795 	softc = &backend_block_softc;
1796 
1797 	error = 0;
1798 
1799 	switch (cmd) {
1800 	case CTL_LUN_REQ: {
1801 		struct ctl_lun_req *lun_req;
1802 
1803 		lun_req = (struct ctl_lun_req *)addr;
1804 
1805 		switch (lun_req->reqtype) {
1806 		case CTL_LUNREQ_CREATE:
1807 			error = ctl_be_block_create(softc, lun_req);
1808 			break;
1809 		case CTL_LUNREQ_RM:
1810 			error = ctl_be_block_rm(softc, lun_req);
1811 			break;
1812 		case CTL_LUNREQ_MODIFY:
1813 			error = ctl_be_block_modify(softc, lun_req);
1814 			break;
1815 		default:
1816 			lun_req->status = CTL_LUN_ERROR;
1817 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1818 				 "invalid LUN request type %d",
1819 				 lun_req->reqtype);
1820 			break;
1821 		}
1822 		break;
1823 	}
1824 	default:
1825 		error = ENOTTY;
1826 		break;
1827 	}
1828 
1829 	return (error);
1830 }
1831 
1832 static int
1833 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1834 {
1835 	struct ctl_be_lun *cbe_lun;
1836 	struct ctl_be_block_filedata *file_data;
1837 	struct ctl_lun_create_params *params;
1838 	char			     *value;
1839 	struct vattr		      vattr;
1840 	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1841 	int			      error;
1842 
1843 	error = 0;
1844 	cbe_lun = &be_lun->cbe_lun;
1845 	file_data = &be_lun->backend.file;
1846 	params = &be_lun->params;
1847 
1848 	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1849 	be_lun->dispatch = ctl_be_block_dispatch_file;
1850 	be_lun->lun_flush = ctl_be_block_flush_file;
1851 	be_lun->get_lba_status = ctl_be_block_gls_file;
1852 	be_lun->getattr = ctl_be_block_getattr_file;
1853 	be_lun->unmap = NULL;
1854 	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1855 
1856 	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1857 	if (error != 0) {
1858 		snprintf(req->error_str, sizeof(req->error_str),
1859 			 "error calling VOP_GETATTR() for file %s",
1860 			 be_lun->dev_path);
1861 		return (error);
1862 	}
1863 
1864 	/*
1865 	 * Verify that we have the ability to upgrade to exclusive
1866 	 * access on this file so we can trap errors at open instead
1867 	 * of reporting them during first access.
1868 	 */
1869 	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1870 		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1871 		if (be_lun->vn->v_iflag & VI_DOOMED) {
1872 			error = EBADF;
1873 			snprintf(req->error_str, sizeof(req->error_str),
1874 				 "error locking file %s", be_lun->dev_path);
1875 			return (error);
1876 		}
1877 	}
1878 
1879 	file_data->cred = crhold(curthread->td_ucred);
1880 	if (params->lun_size_bytes != 0)
1881 		be_lun->size_bytes = params->lun_size_bytes;
1882 	else
1883 		be_lun->size_bytes = vattr.va_size;
1884 
1885 	/*
1886 	 * For files we can use any logical block size.  Prefer 512 bytes
1887 	 * for compatibility reasons.  If file's vattr.va_blocksize
1888 	 * (preferred I/O block size) is bigger and multiple to chosen
1889 	 * logical block size -- report it as physical block size.
1890 	 */
1891 	if (params->blocksize_bytes != 0)
1892 		cbe_lun->blocksize = params->blocksize_bytes;
1893 	else
1894 		cbe_lun->blocksize = 512;
1895 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1896 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1897 	    0 : (be_lun->size_blocks - 1);
1898 
1899 	us = ps = vattr.va_blocksize;
1900 	uo = po = 0;
1901 
1902 	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
1903 	if (value != NULL)
1904 		ctl_expand_number(value, &ps);
1905 	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
1906 	if (value != NULL)
1907 		ctl_expand_number(value, &po);
1908 	pss = ps / cbe_lun->blocksize;
1909 	pos = po / cbe_lun->blocksize;
1910 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1911 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1912 		cbe_lun->pblockexp = fls(pss) - 1;
1913 		cbe_lun->pblockoff = (pss - pos) % pss;
1914 	}
1915 
1916 	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
1917 	if (value != NULL)
1918 		ctl_expand_number(value, &us);
1919 	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
1920 	if (value != NULL)
1921 		ctl_expand_number(value, &uo);
1922 	uss = us / cbe_lun->blocksize;
1923 	uos = uo / cbe_lun->blocksize;
1924 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1925 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1926 		cbe_lun->ublockexp = fls(uss) - 1;
1927 		cbe_lun->ublockoff = (uss - uos) % uss;
1928 	}
1929 
1930 	/*
1931 	 * Sanity check.  The media size has to be at least one
1932 	 * sector long.
1933 	 */
1934 	if (be_lun->size_bytes < cbe_lun->blocksize) {
1935 		error = EINVAL;
1936 		snprintf(req->error_str, sizeof(req->error_str),
1937 			 "file %s size %ju < block size %u", be_lun->dev_path,
1938 			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1939 	}
1940 
1941 	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1942 	return (error);
1943 }
1944 
1945 static int
1946 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1947 {
1948 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1949 	struct ctl_lun_create_params *params;
1950 	struct cdevsw		     *csw;
1951 	struct cdev		     *dev;
1952 	char			     *value;
1953 	int			      error, atomic, maxio, ref, unmap, tmp;
1954 	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1955 
1956 	params = &be_lun->params;
1957 
1958 	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1959 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1960 	if (csw == NULL)
1961 		return (ENXIO);
1962 	if (strcmp(csw->d_name, "zvol") == 0) {
1963 		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1964 		be_lun->get_lba_status = ctl_be_block_gls_zvol;
1965 		atomic = maxio = CTLBLK_MAX_IO_SIZE;
1966 	} else {
1967 		be_lun->dispatch = ctl_be_block_dispatch_dev;
1968 		be_lun->get_lba_status = NULL;
1969 		atomic = 0;
1970 		maxio = dev->si_iosize_max;
1971 		if (maxio <= 0)
1972 			maxio = DFLTPHYS;
1973 		if (maxio > CTLBLK_MAX_IO_SIZE)
1974 			maxio = CTLBLK_MAX_IO_SIZE;
1975 	}
1976 	be_lun->lun_flush = ctl_be_block_flush_dev;
1977 	be_lun->getattr = ctl_be_block_getattr_dev;
1978 	be_lun->unmap = ctl_be_block_unmap_dev;
1979 
1980 	if (!csw->d_ioctl) {
1981 		dev_relthread(dev, ref);
1982 		snprintf(req->error_str, sizeof(req->error_str),
1983 			 "no d_ioctl for device %s!", be_lun->dev_path);
1984 		return (ENODEV);
1985 	}
1986 
1987 	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1988 			       curthread);
1989 	if (error) {
1990 		dev_relthread(dev, ref);
1991 		snprintf(req->error_str, sizeof(req->error_str),
1992 			 "error %d returned for DIOCGSECTORSIZE ioctl "
1993 			 "on %s!", error, be_lun->dev_path);
1994 		return (error);
1995 	}
1996 
1997 	/*
1998 	 * If the user has asked for a blocksize that is greater than the
1999 	 * backing device's blocksize, we can do it only if the blocksize
2000 	 * the user is asking for is an even multiple of the underlying
2001 	 * device's blocksize.
2002 	 */
2003 	if ((params->blocksize_bytes != 0) &&
2004 	    (params->blocksize_bytes >= tmp)) {
2005 		if (params->blocksize_bytes % tmp == 0) {
2006 			cbe_lun->blocksize = params->blocksize_bytes;
2007 		} else {
2008 			dev_relthread(dev, ref);
2009 			snprintf(req->error_str, sizeof(req->error_str),
2010 				 "requested blocksize %u is not an even "
2011 				 "multiple of backing device blocksize %u",
2012 				 params->blocksize_bytes, tmp);
2013 			return (EINVAL);
2014 		}
2015 	} else if (params->blocksize_bytes != 0) {
2016 		dev_relthread(dev, ref);
2017 		snprintf(req->error_str, sizeof(req->error_str),
2018 			 "requested blocksize %u < backing device "
2019 			 "blocksize %u", params->blocksize_bytes, tmp);
2020 		return (EINVAL);
2021 	} else
2022 		cbe_lun->blocksize = tmp;
2023 
2024 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2025 			     curthread);
2026 	if (error) {
2027 		dev_relthread(dev, ref);
2028 		snprintf(req->error_str, sizeof(req->error_str),
2029 			 "error %d returned for DIOCGMEDIASIZE "
2030 			 " ioctl on %s!", error,
2031 			 be_lun->dev_path);
2032 		return (error);
2033 	}
2034 
2035 	if (params->lun_size_bytes != 0) {
2036 		if (params->lun_size_bytes > otmp) {
2037 			dev_relthread(dev, ref);
2038 			snprintf(req->error_str, sizeof(req->error_str),
2039 				 "requested LUN size %ju > backing device "
2040 				 "size %ju",
2041 				 (uintmax_t)params->lun_size_bytes,
2042 				 (uintmax_t)otmp);
2043 			return (EINVAL);
2044 		}
2045 
2046 		be_lun->size_bytes = params->lun_size_bytes;
2047 	} else
2048 		be_lun->size_bytes = otmp;
2049 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2050 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2051 	    0 : (be_lun->size_blocks - 1);
2052 
2053 	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2054 	    curthread);
2055 	if (error)
2056 		ps = po = 0;
2057 	else {
2058 		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2059 		    FREAD, curthread);
2060 		if (error)
2061 			po = 0;
2062 	}
2063 	us = ps;
2064 	uo = po;
2065 
2066 	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
2067 	if (value != NULL)
2068 		ctl_expand_number(value, &ps);
2069 	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
2070 	if (value != NULL)
2071 		ctl_expand_number(value, &po);
2072 	pss = ps / cbe_lun->blocksize;
2073 	pos = po / cbe_lun->blocksize;
2074 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2075 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2076 		cbe_lun->pblockexp = fls(pss) - 1;
2077 		cbe_lun->pblockoff = (pss - pos) % pss;
2078 	}
2079 
2080 	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
2081 	if (value != NULL)
2082 		ctl_expand_number(value, &us);
2083 	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
2084 	if (value != NULL)
2085 		ctl_expand_number(value, &uo);
2086 	uss = us / cbe_lun->blocksize;
2087 	uos = uo / cbe_lun->blocksize;
2088 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2089 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2090 		cbe_lun->ublockexp = fls(uss) - 1;
2091 		cbe_lun->ublockoff = (uss - uos) % uss;
2092 	}
2093 
2094 	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2095 	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2096 
2097 	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2098 		unmap = 1;
2099 	} else {
2100 		struct diocgattr_arg	arg;
2101 
2102 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2103 		arg.len = sizeof(arg.value.i);
2104 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2105 		    curthread);
2106 		unmap = (error == 0) ? arg.value.i : 0;
2107 	}
2108 	value = ctl_get_opt(&cbe_lun->options, "unmap");
2109 	if (value != NULL)
2110 		unmap = (strcmp(value, "on") == 0);
2111 	if (unmap)
2112 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2113 	else
2114 		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2115 
2116 	dev_relthread(dev, ref);
2117 	return (0);
2118 }
2119 
2120 static int
2121 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2122 {
2123 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2124 	int flags;
2125 
2126 	if (be_lun->vn) {
2127 		flags = FREAD;
2128 		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2129 			flags |= FWRITE;
2130 		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2131 		be_lun->vn = NULL;
2132 
2133 		switch (be_lun->dev_type) {
2134 		case CTL_BE_BLOCK_DEV:
2135 			break;
2136 		case CTL_BE_BLOCK_FILE:
2137 			if (be_lun->backend.file.cred != NULL) {
2138 				crfree(be_lun->backend.file.cred);
2139 				be_lun->backend.file.cred = NULL;
2140 			}
2141 			break;
2142 		case CTL_BE_BLOCK_NONE:
2143 			break;
2144 		default:
2145 			panic("Unexpected backend type.");
2146 			break;
2147 		}
2148 		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2149 	}
2150 	return (0);
2151 }
2152 
2153 static int
2154 ctl_be_block_open(struct ctl_be_block_softc *softc,
2155 		  struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2156 {
2157 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2158 	struct nameidata nd;
2159 	char		*value;
2160 	int		 error, flags;
2161 
2162 	error = 0;
2163 	if (rootvnode == NULL) {
2164 		snprintf(req->error_str, sizeof(req->error_str),
2165 			 "Root filesystem is not mounted");
2166 		return (1);
2167 	}
2168 	pwd_ensure_dirs();
2169 
2170 	value = ctl_get_opt(&cbe_lun->options, "file");
2171 	if (value == NULL) {
2172 		snprintf(req->error_str, sizeof(req->error_str),
2173 			 "no file argument specified");
2174 		return (1);
2175 	}
2176 	free(be_lun->dev_path, M_CTLBLK);
2177 	be_lun->dev_path = strdup(value, M_CTLBLK);
2178 
2179 	flags = FREAD;
2180 	value = ctl_get_opt(&cbe_lun->options, "readonly");
2181 	if (value == NULL || strcmp(value, "on") != 0)
2182 		flags |= FWRITE;
2183 
2184 again:
2185 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2186 	error = vn_open(&nd, &flags, 0, NULL);
2187 	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2188 		flags &= ~FWRITE;
2189 		goto again;
2190 	}
2191 	if (error) {
2192 		/*
2193 		 * This is the only reasonable guess we can make as far as
2194 		 * path if the user doesn't give us a fully qualified path.
2195 		 * If they want to specify a file, they need to specify the
2196 		 * full path.
2197 		 */
2198 		if (be_lun->dev_path[0] != '/') {
2199 			char *dev_name;
2200 
2201 			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2202 				be_lun->dev_path);
2203 			free(be_lun->dev_path, M_CTLBLK);
2204 			be_lun->dev_path = dev_name;
2205 			goto again;
2206 		}
2207 		snprintf(req->error_str, sizeof(req->error_str),
2208 		    "error opening %s: %d", be_lun->dev_path, error);
2209 		return (error);
2210 	}
2211 	if (flags & FWRITE)
2212 		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2213 	else
2214 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2215 
2216 	NDFREE(&nd, NDF_ONLY_PNBUF);
2217 	be_lun->vn = nd.ni_vp;
2218 
2219 	/* We only support disks and files. */
2220 	if (vn_isdisk(be_lun->vn, &error)) {
2221 		error = ctl_be_block_open_dev(be_lun, req);
2222 	} else if (be_lun->vn->v_type == VREG) {
2223 		error = ctl_be_block_open_file(be_lun, req);
2224 	} else {
2225 		error = EINVAL;
2226 		snprintf(req->error_str, sizeof(req->error_str),
2227 			 "%s is not a disk or plain file", be_lun->dev_path);
2228 	}
2229 	VOP_UNLOCK(be_lun->vn, 0);
2230 
2231 	if (error != 0)
2232 		ctl_be_block_close(be_lun);
2233 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2234 	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2235 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2236 	value = ctl_get_opt(&cbe_lun->options, "serseq");
2237 	if (value != NULL && strcmp(value, "on") == 0)
2238 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2239 	else if (value != NULL && strcmp(value, "read") == 0)
2240 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2241 	else if (value != NULL && strcmp(value, "off") == 0)
2242 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2243 	return (0);
2244 }
2245 
2246 static int
2247 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2248 {
2249 	struct ctl_be_lun *cbe_lun;
2250 	struct ctl_be_block_lun *be_lun;
2251 	struct ctl_lun_create_params *params;
2252 	char num_thread_str[16];
2253 	char tmpstr[32];
2254 	char *value;
2255 	int retval, num_threads;
2256 	int tmp_num_threads;
2257 
2258 	params = &req->reqdata.create;
2259 	retval = 0;
2260 	req->status = CTL_LUN_OK;
2261 
2262 	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2263 	cbe_lun = &be_lun->cbe_lun;
2264 	cbe_lun->be_lun = be_lun;
2265 	be_lun->params = req->reqdata.create;
2266 	be_lun->softc = softc;
2267 	STAILQ_INIT(&be_lun->input_queue);
2268 	STAILQ_INIT(&be_lun->config_read_queue);
2269 	STAILQ_INIT(&be_lun->config_write_queue);
2270 	STAILQ_INIT(&be_lun->datamove_queue);
2271 	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2272 	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2273 	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2274 	ctl_init_opts(&cbe_lun->options,
2275 	    req->num_be_args, req->kern_be_args);
2276 	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2277 	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2278 	if (be_lun->lun_zone == NULL) {
2279 		snprintf(req->error_str, sizeof(req->error_str),
2280 			 "error allocating UMA zone");
2281 		goto bailout_error;
2282 	}
2283 
2284 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2285 		cbe_lun->lun_type = params->device_type;
2286 	else
2287 		cbe_lun->lun_type = T_DIRECT;
2288 	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2289 	cbe_lun->flags = 0;
2290 	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2291 	if (value != NULL) {
2292 		if (strcmp(value, "primary") == 0)
2293 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2294 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2295 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2296 
2297 	if (cbe_lun->lun_type == T_DIRECT) {
2298 		be_lun->size_bytes = params->lun_size_bytes;
2299 		if (params->blocksize_bytes != 0)
2300 			cbe_lun->blocksize = params->blocksize_bytes;
2301 		else
2302 			cbe_lun->blocksize = 512;
2303 		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2304 		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2305 		    0 : (be_lun->size_blocks - 1);
2306 
2307 		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2308 		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2309 			retval = ctl_be_block_open(softc, be_lun, req);
2310 			if (retval != 0) {
2311 				retval = 0;
2312 				req->status = CTL_LUN_WARNING;
2313 			}
2314 		}
2315 		num_threads = cbb_num_threads;
2316 	} else {
2317 		num_threads = 1;
2318 	}
2319 
2320 	/*
2321 	 * XXX This searching loop might be refactored to be combined with
2322 	 * the loop above,
2323 	 */
2324 	value = ctl_get_opt(&cbe_lun->options, "num_threads");
2325 	if (value != NULL) {
2326 		tmp_num_threads = strtol(value, NULL, 0);
2327 
2328 		/*
2329 		 * We don't let the user specify less than one
2330 		 * thread, but hope he's clueful enough not to
2331 		 * specify 1000 threads.
2332 		 */
2333 		if (tmp_num_threads < 1) {
2334 			snprintf(req->error_str, sizeof(req->error_str),
2335 				 "invalid number of threads %s",
2336 				 num_thread_str);
2337 			goto bailout_error;
2338 		}
2339 		num_threads = tmp_num_threads;
2340 	}
2341 
2342 	if (be_lun->vn == NULL)
2343 		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2344 	/* Tell the user the blocksize we ended up using */
2345 	params->lun_size_bytes = be_lun->size_bytes;
2346 	params->blocksize_bytes = cbe_lun->blocksize;
2347 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2348 		cbe_lun->req_lun_id = params->req_lun_id;
2349 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2350 	} else
2351 		cbe_lun->req_lun_id = 0;
2352 
2353 	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2354 	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2355 	cbe_lun->be = &ctl_be_block_driver;
2356 
2357 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2358 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2359 			 softc->num_luns);
2360 		strncpy((char *)cbe_lun->serial_num, tmpstr,
2361 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2362 
2363 		/* Tell the user what we used for a serial number */
2364 		strncpy((char *)params->serial_num, tmpstr,
2365 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2366 	} else {
2367 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2368 			MIN(sizeof(cbe_lun->serial_num),
2369 			sizeof(params->serial_num)));
2370 	}
2371 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2372 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2373 		strncpy((char *)cbe_lun->device_id, tmpstr,
2374 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2375 
2376 		/* Tell the user what we used for a device ID */
2377 		strncpy((char *)params->device_id, tmpstr,
2378 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2379 	} else {
2380 		strncpy((char *)cbe_lun->device_id, params->device_id,
2381 			MIN(sizeof(cbe_lun->device_id),
2382 			    sizeof(params->device_id)));
2383 	}
2384 
2385 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2386 
2387 	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2388 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2389 
2390 	if (be_lun->io_taskqueue == NULL) {
2391 		snprintf(req->error_str, sizeof(req->error_str),
2392 			 "unable to create taskqueue");
2393 		goto bailout_error;
2394 	}
2395 
2396 	/*
2397 	 * Note that we start the same number of threads by default for
2398 	 * both the file case and the block device case.  For the file
2399 	 * case, we need multiple threads to allow concurrency, because the
2400 	 * vnode interface is designed to be a blocking interface.  For the
2401 	 * block device case, ZFS zvols at least will block the caller's
2402 	 * context in many instances, and so we need multiple threads to
2403 	 * overcome that problem.  Other block devices don't need as many
2404 	 * threads, but they shouldn't cause too many problems.
2405 	 *
2406 	 * If the user wants to just have a single thread for a block
2407 	 * device, he can specify that when the LUN is created, or change
2408 	 * the tunable/sysctl to alter the default number of threads.
2409 	 */
2410 	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2411 					 /*num threads*/num_threads,
2412 					 /*priority*/PWAIT,
2413 					 /*thread name*/
2414 					 "%s taskq", be_lun->lunname);
2415 
2416 	if (retval != 0)
2417 		goto bailout_error;
2418 
2419 	be_lun->num_threads = num_threads;
2420 
2421 	mtx_lock(&softc->lock);
2422 	softc->num_luns++;
2423 	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2424 
2425 	mtx_unlock(&softc->lock);
2426 
2427 	retval = ctl_add_lun(&be_lun->cbe_lun);
2428 	if (retval != 0) {
2429 		mtx_lock(&softc->lock);
2430 		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2431 			      links);
2432 		softc->num_luns--;
2433 		mtx_unlock(&softc->lock);
2434 		snprintf(req->error_str, sizeof(req->error_str),
2435 			 "ctl_add_lun() returned error %d, see dmesg for "
2436 			 "details", retval);
2437 		retval = 0;
2438 		goto bailout_error;
2439 	}
2440 
2441 	mtx_lock(&softc->lock);
2442 
2443 	/*
2444 	 * Tell the config_status routine that we're waiting so it won't
2445 	 * clean up the LUN in the event of an error.
2446 	 */
2447 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2448 
2449 	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2450 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2451 		if (retval == EINTR)
2452 			break;
2453 	}
2454 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2455 
2456 	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2457 		snprintf(req->error_str, sizeof(req->error_str),
2458 			 "LUN configuration error, see dmesg for details");
2459 		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2460 			      links);
2461 		softc->num_luns--;
2462 		mtx_unlock(&softc->lock);
2463 		goto bailout_error;
2464 	} else {
2465 		params->req_lun_id = cbe_lun->lun_id;
2466 	}
2467 
2468 	mtx_unlock(&softc->lock);
2469 
2470 	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2471 					       cbe_lun->blocksize,
2472 					       DEVSTAT_ALL_SUPPORTED,
2473 					       cbe_lun->lun_type
2474 					       | DEVSTAT_TYPE_IF_OTHER,
2475 					       DEVSTAT_PRIORITY_OTHER);
2476 
2477 	return (retval);
2478 
2479 bailout_error:
2480 	req->status = CTL_LUN_ERROR;
2481 
2482 	if (be_lun->io_taskqueue != NULL)
2483 		taskqueue_free(be_lun->io_taskqueue);
2484 	ctl_be_block_close(be_lun);
2485 	if (be_lun->dev_path != NULL)
2486 		free(be_lun->dev_path, M_CTLBLK);
2487 	if (be_lun->lun_zone != NULL)
2488 		uma_zdestroy(be_lun->lun_zone);
2489 	ctl_free_opts(&cbe_lun->options);
2490 	mtx_destroy(&be_lun->queue_lock);
2491 	mtx_destroy(&be_lun->io_lock);
2492 	free(be_lun, M_CTLBLK);
2493 
2494 	return (retval);
2495 }
2496 
2497 static int
2498 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2499 {
2500 	struct ctl_lun_rm_params *params;
2501 	struct ctl_be_block_lun *be_lun;
2502 	struct ctl_be_lun *cbe_lun;
2503 	int retval;
2504 
2505 	params = &req->reqdata.rm;
2506 
2507 	mtx_lock(&softc->lock);
2508 	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2509 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2510 			break;
2511 	}
2512 	mtx_unlock(&softc->lock);
2513 
2514 	if (be_lun == NULL) {
2515 		snprintf(req->error_str, sizeof(req->error_str),
2516 			 "LUN %u is not managed by the block backend",
2517 			 params->lun_id);
2518 		goto bailout_error;
2519 	}
2520 	cbe_lun = &be_lun->cbe_lun;
2521 
2522 	retval = ctl_disable_lun(cbe_lun);
2523 	if (retval != 0) {
2524 		snprintf(req->error_str, sizeof(req->error_str),
2525 			 "error %d returned from ctl_disable_lun() for "
2526 			 "LUN %d", retval, params->lun_id);
2527 		goto bailout_error;
2528 	}
2529 
2530 	if (be_lun->vn != NULL) {
2531 		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2532 		ctl_lun_offline(cbe_lun);
2533 		taskqueue_drain_all(be_lun->io_taskqueue);
2534 		ctl_be_block_close(be_lun);
2535 	}
2536 
2537 	retval = ctl_invalidate_lun(cbe_lun);
2538 	if (retval != 0) {
2539 		snprintf(req->error_str, sizeof(req->error_str),
2540 			 "error %d returned from ctl_invalidate_lun() for "
2541 			 "LUN %d", retval, params->lun_id);
2542 		goto bailout_error;
2543 	}
2544 
2545 	mtx_lock(&softc->lock);
2546 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2547 	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2548                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2549                 if (retval == EINTR)
2550                         break;
2551         }
2552 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2553 
2554 	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2555 		snprintf(req->error_str, sizeof(req->error_str),
2556 			 "interrupted waiting for LUN to be freed");
2557 		mtx_unlock(&softc->lock);
2558 		goto bailout_error;
2559 	}
2560 
2561 	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2562 
2563 	softc->num_luns--;
2564 	mtx_unlock(&softc->lock);
2565 
2566 	taskqueue_drain_all(be_lun->io_taskqueue);
2567 	taskqueue_free(be_lun->io_taskqueue);
2568 
2569 	if (be_lun->disk_stats != NULL)
2570 		devstat_remove_entry(be_lun->disk_stats);
2571 
2572 	uma_zdestroy(be_lun->lun_zone);
2573 
2574 	ctl_free_opts(&cbe_lun->options);
2575 	free(be_lun->dev_path, M_CTLBLK);
2576 	mtx_destroy(&be_lun->queue_lock);
2577 	mtx_destroy(&be_lun->io_lock);
2578 	free(be_lun, M_CTLBLK);
2579 
2580 	req->status = CTL_LUN_OK;
2581 
2582 	return (0);
2583 
2584 bailout_error:
2585 
2586 	req->status = CTL_LUN_ERROR;
2587 
2588 	return (0);
2589 }
2590 
2591 static int
2592 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2593 {
2594 	struct ctl_lun_modify_params *params;
2595 	struct ctl_be_block_lun *be_lun;
2596 	struct ctl_be_lun *cbe_lun;
2597 	char *value;
2598 	uint64_t oldsize;
2599 	int error, wasprim;
2600 
2601 	params = &req->reqdata.modify;
2602 
2603 	mtx_lock(&softc->lock);
2604 	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2605 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2606 			break;
2607 	}
2608 	mtx_unlock(&softc->lock);
2609 
2610 	if (be_lun == NULL) {
2611 		snprintf(req->error_str, sizeof(req->error_str),
2612 			 "LUN %u is not managed by the block backend",
2613 			 params->lun_id);
2614 		goto bailout_error;
2615 	}
2616 	cbe_lun = &be_lun->cbe_lun;
2617 
2618 	if (params->lun_size_bytes != 0)
2619 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2620 	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
2621 
2622 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2623 	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2624 	if (value != NULL) {
2625 		if (strcmp(value, "primary") == 0)
2626 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2627 		else
2628 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2629 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2630 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2631 	else
2632 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2633 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2634 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2635 			ctl_lun_primary(cbe_lun);
2636 		else
2637 			ctl_lun_secondary(cbe_lun);
2638 	}
2639 
2640 	oldsize = be_lun->size_blocks;
2641 	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2642 	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2643 		if (be_lun->vn == NULL)
2644 			error = ctl_be_block_open(softc, be_lun, req);
2645 		else if (vn_isdisk(be_lun->vn, &error))
2646 			error = ctl_be_block_open_dev(be_lun, req);
2647 		else if (be_lun->vn->v_type == VREG)
2648 			error = ctl_be_block_open_file(be_lun, req);
2649 		else
2650 			error = EINVAL;
2651 		if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) &&
2652 		    be_lun->vn != NULL) {
2653 			cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE;
2654 			ctl_lun_online(cbe_lun);
2655 		}
2656 	} else {
2657 		if (be_lun->vn != NULL) {
2658 			cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2659 			ctl_lun_offline(cbe_lun);
2660 			taskqueue_drain_all(be_lun->io_taskqueue);
2661 			error = ctl_be_block_close(be_lun);
2662 		} else
2663 			error = 0;
2664 	}
2665 	if (be_lun->size_blocks != oldsize)
2666 		ctl_lun_capacity_changed(cbe_lun);
2667 
2668 	/* Tell the user the exact size we ended up using */
2669 	params->lun_size_bytes = be_lun->size_bytes;
2670 
2671 	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2672 	return (0);
2673 
2674 bailout_error:
2675 	req->status = CTL_LUN_ERROR;
2676 	return (0);
2677 }
2678 
2679 static void
2680 ctl_be_block_lun_shutdown(void *be_lun)
2681 {
2682 	struct ctl_be_block_lun *lun;
2683 	struct ctl_be_block_softc *softc;
2684 
2685 	lun = (struct ctl_be_block_lun *)be_lun;
2686 
2687 	softc = lun->softc;
2688 
2689 	mtx_lock(&softc->lock);
2690 	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2691 	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2692 		wakeup(lun);
2693 	mtx_unlock(&softc->lock);
2694 
2695 }
2696 
2697 static void
2698 ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2699 {
2700 	struct ctl_be_block_lun *lun;
2701 	struct ctl_be_block_softc *softc;
2702 
2703 	lun = (struct ctl_be_block_lun *)be_lun;
2704 	softc = lun->softc;
2705 
2706 	if (status == CTL_LUN_CONFIG_OK) {
2707 		mtx_lock(&softc->lock);
2708 		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2709 		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2710 			wakeup(lun);
2711 		mtx_unlock(&softc->lock);
2712 
2713 		/*
2714 		 * We successfully added the LUN, attempt to enable it.
2715 		 */
2716 		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2717 			printf("%s: ctl_enable_lun() failed!\n", __func__);
2718 			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2719 				printf("%s: ctl_invalidate_lun() failed!\n",
2720 				       __func__);
2721 			}
2722 		}
2723 
2724 		return;
2725 	}
2726 
2727 
2728 	mtx_lock(&softc->lock);
2729 	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2730 	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2731 	wakeup(lun);
2732 	mtx_unlock(&softc->lock);
2733 }
2734 
2735 
2736 static int
2737 ctl_be_block_config_write(union ctl_io *io)
2738 {
2739 	struct ctl_be_block_lun *be_lun;
2740 	struct ctl_be_lun *cbe_lun;
2741 	int retval;
2742 
2743 	retval = 0;
2744 
2745 	DPRINTF("entered\n");
2746 
2747 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2748 		CTL_PRIV_BACKEND_LUN].ptr;
2749 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2750 
2751 	switch (io->scsiio.cdb[0]) {
2752 	case SYNCHRONIZE_CACHE:
2753 	case SYNCHRONIZE_CACHE_16:
2754 	case WRITE_SAME_10:
2755 	case WRITE_SAME_16:
2756 	case UNMAP:
2757 		/*
2758 		 * The upper level CTL code will filter out any CDBs with
2759 		 * the immediate bit set and return the proper error.
2760 		 *
2761 		 * We don't really need to worry about what LBA range the
2762 		 * user asked to be synced out.  When they issue a sync
2763 		 * cache command, we'll sync out the whole thing.
2764 		 */
2765 		mtx_lock(&be_lun->queue_lock);
2766 		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2767 				   links);
2768 		mtx_unlock(&be_lun->queue_lock);
2769 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2770 		break;
2771 	case START_STOP_UNIT: {
2772 		struct scsi_start_stop_unit *cdb;
2773 
2774 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2775 
2776 		if (cdb->how & SSS_START)
2777 			retval = ctl_start_lun(cbe_lun);
2778 		else {
2779 			retval = ctl_stop_lun(cbe_lun);
2780 			/*
2781 			 * XXX KDM Copan-specific offline behavior.
2782 			 * Figure out a reasonable way to port this?
2783 			 */
2784 #ifdef NEEDTOPORT
2785 			if ((retval == 0)
2786 			 && (cdb->byte2 & SSS_ONOFFLINE))
2787 				retval = ctl_lun_offline(cbe_lun);
2788 #endif
2789 		}
2790 
2791 		/*
2792 		 * In general, the above routines should not fail.  They
2793 		 * just set state for the LUN.  So we've got something
2794 		 * pretty wrong here if we can't start or stop the LUN.
2795 		 */
2796 		if (retval != 0) {
2797 			ctl_set_internal_failure(&io->scsiio,
2798 						 /*sks_valid*/ 1,
2799 						 /*retry_count*/ 0xf051);
2800 			retval = CTL_RETVAL_COMPLETE;
2801 		} else {
2802 			ctl_set_success(&io->scsiio);
2803 		}
2804 		ctl_config_write_done(io);
2805 		break;
2806 	}
2807 	default:
2808 		ctl_set_invalid_opcode(&io->scsiio);
2809 		ctl_config_write_done(io);
2810 		retval = CTL_RETVAL_COMPLETE;
2811 		break;
2812 	}
2813 
2814 	return (retval);
2815 }
2816 
2817 static int
2818 ctl_be_block_config_read(union ctl_io *io)
2819 {
2820 	struct ctl_be_block_lun *be_lun;
2821 	struct ctl_be_lun *cbe_lun;
2822 	int retval = 0;
2823 
2824 	DPRINTF("entered\n");
2825 
2826 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2827 		CTL_PRIV_BACKEND_LUN].ptr;
2828 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2829 
2830 	switch (io->scsiio.cdb[0]) {
2831 	case SERVICE_ACTION_IN:
2832 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2833 			mtx_lock(&be_lun->queue_lock);
2834 			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2835 			    &io->io_hdr, links);
2836 			mtx_unlock(&be_lun->queue_lock);
2837 			taskqueue_enqueue(be_lun->io_taskqueue,
2838 			    &be_lun->io_task);
2839 			retval = CTL_RETVAL_QUEUED;
2840 			break;
2841 		}
2842 		ctl_set_invalid_field(&io->scsiio,
2843 				      /*sks_valid*/ 1,
2844 				      /*command*/ 1,
2845 				      /*field*/ 1,
2846 				      /*bit_valid*/ 1,
2847 				      /*bit*/ 4);
2848 		ctl_config_read_done(io);
2849 		retval = CTL_RETVAL_COMPLETE;
2850 		break;
2851 	default:
2852 		ctl_set_invalid_opcode(&io->scsiio);
2853 		ctl_config_read_done(io);
2854 		retval = CTL_RETVAL_COMPLETE;
2855 		break;
2856 	}
2857 
2858 	return (retval);
2859 }
2860 
2861 static int
2862 ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2863 {
2864 	struct ctl_be_block_lun *lun;
2865 	int retval;
2866 
2867 	lun = (struct ctl_be_block_lun *)be_lun;
2868 	retval = 0;
2869 
2870 	retval = sbuf_printf(sb, "\t<num_threads>");
2871 
2872 	if (retval != 0)
2873 		goto bailout;
2874 
2875 	retval = sbuf_printf(sb, "%d", lun->num_threads);
2876 
2877 	if (retval != 0)
2878 		goto bailout;
2879 
2880 	retval = sbuf_printf(sb, "</num_threads>\n");
2881 
2882 bailout:
2883 
2884 	return (retval);
2885 }
2886 
2887 static uint64_t
2888 ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2889 {
2890 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2891 
2892 	if (lun->getattr == NULL)
2893 		return (UINT64_MAX);
2894 	return (lun->getattr(lun, attrname));
2895 }
2896 
2897 int
2898 ctl_be_block_init(void)
2899 {
2900 	struct ctl_be_block_softc *softc;
2901 	int retval;
2902 
2903 	softc = &backend_block_softc;
2904 	retval = 0;
2905 
2906 	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2907 	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2908 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2909 	STAILQ_INIT(&softc->lun_list);
2910 
2911 	return (retval);
2912 }
2913