xref: /freebsd/sys/cam/ctl/ctl_backend_block.c (revision 5ca34122ecdd5abc62bdae39663fec9ac8523d87)
1 /*-
2  * Copyright (c) 2003 Silicon Graphics International Corp.
3  * Copyright (c) 2009-2011 Spectra Logic Corporation
4  * Copyright (c) 2012 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * Portions of this software were developed by Edward Tomasz Napierala
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions, and the following disclaimer,
15  *    without modification.
16  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17  *    substantially similar to the "NO WARRANTY" disclaimer below
18  *    ("Disclaimer") and any redistribution must be conditioned upon
19  *    including a substantially similar Disclaimer requirement for further
20  *    binary redistribution.
21  *
22  * NO WARRANTY
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGES.
34  *
35  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36  */
37 /*
38  * CAM Target Layer driver backend for block devices.
39  *
40  * Author: Ken Merry <ken@FreeBSD.org>
41  */
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/types.h>
49 #include <sys/kthread.h>
50 #include <sys/bio.h>
51 #include <sys/fcntl.h>
52 #include <sys/limits.h>
53 #include <sys/lock.h>
54 #include <sys/mutex.h>
55 #include <sys/condvar.h>
56 #include <sys/malloc.h>
57 #include <sys/conf.h>
58 #include <sys/ioccom.h>
59 #include <sys/queue.h>
60 #include <sys/sbuf.h>
61 #include <sys/endian.h>
62 #include <sys/uio.h>
63 #include <sys/buf.h>
64 #include <sys/taskqueue.h>
65 #include <sys/vnode.h>
66 #include <sys/namei.h>
67 #include <sys/mount.h>
68 #include <sys/disk.h>
69 #include <sys/fcntl.h>
70 #include <sys/filedesc.h>
71 #include <sys/filio.h>
72 #include <sys/proc.h>
73 #include <sys/pcpu.h>
74 #include <sys/module.h>
75 #include <sys/sdt.h>
76 #include <sys/devicestat.h>
77 #include <sys/sysctl.h>
78 
79 #include <geom/geom.h>
80 
81 #include <cam/cam.h>
82 #include <cam/scsi/scsi_all.h>
83 #include <cam/scsi/scsi_da.h>
84 #include <cam/ctl/ctl_io.h>
85 #include <cam/ctl/ctl.h>
86 #include <cam/ctl/ctl_backend.h>
87 #include <cam/ctl/ctl_ioctl.h>
88 #include <cam/ctl/ctl_ha.h>
89 #include <cam/ctl/ctl_scsi_all.h>
90 #include <cam/ctl/ctl_private.h>
91 #include <cam/ctl/ctl_error.h>
92 
93 /*
94  * The idea here is that we'll allocate enough S/G space to hold a 1MB
95  * I/O.  If we get an I/O larger than that, we'll split it.
96  */
97 #define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
98 #define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
99 #define	CTLBLK_MAX_SEG		MAXPHYS
100 #define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
101 #define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
102 
103 #ifdef CTLBLK_DEBUG
104 #define DPRINTF(fmt, args...) \
105     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
106 #else
107 #define DPRINTF(fmt, args...) do {} while(0)
108 #endif
109 
110 #define PRIV(io)	\
111     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
112 #define ARGS(io)	\
113     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
114 
115 SDT_PROVIDER_DEFINE(cbb);
116 
117 typedef enum {
118 	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
119 	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
120 	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
121 } ctl_be_block_lun_flags;
122 
123 typedef enum {
124 	CTL_BE_BLOCK_NONE,
125 	CTL_BE_BLOCK_DEV,
126 	CTL_BE_BLOCK_FILE
127 } ctl_be_block_type;
128 
129 struct ctl_be_block_filedata {
130 	struct ucred *cred;
131 };
132 
133 union ctl_be_block_bedata {
134 	struct ctl_be_block_filedata file;
135 };
136 
137 struct ctl_be_block_io;
138 struct ctl_be_block_lun;
139 
140 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
141 			       struct ctl_be_block_io *beio);
142 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
143 				  const char *attrname);
144 
145 /*
146  * Backend LUN structure.  There is a 1:1 mapping between a block device
147  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
148  */
149 struct ctl_be_block_lun {
150 	struct ctl_lun_create_params params;
151 	char lunname[32];
152 	char *dev_path;
153 	ctl_be_block_type dev_type;
154 	struct vnode *vn;
155 	union ctl_be_block_bedata backend;
156 	cbb_dispatch_t dispatch;
157 	cbb_dispatch_t lun_flush;
158 	cbb_dispatch_t unmap;
159 	cbb_dispatch_t get_lba_status;
160 	cbb_getattr_t getattr;
161 	uma_zone_t lun_zone;
162 	uint64_t size_blocks;
163 	uint64_t size_bytes;
164 	struct ctl_be_block_softc *softc;
165 	struct devstat *disk_stats;
166 	ctl_be_block_lun_flags flags;
167 	STAILQ_ENTRY(ctl_be_block_lun) links;
168 	struct ctl_be_lun cbe_lun;
169 	struct taskqueue *io_taskqueue;
170 	struct task io_task;
171 	int num_threads;
172 	STAILQ_HEAD(, ctl_io_hdr) input_queue;
173 	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
174 	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
175 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
176 	struct mtx_padalign io_lock;
177 	struct mtx_padalign queue_lock;
178 };
179 
180 /*
181  * Overall softc structure for the block backend module.
182  */
183 struct ctl_be_block_softc {
184 	struct mtx			 lock;
185 	int				 num_luns;
186 	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
187 };
188 
189 static struct ctl_be_block_softc backend_block_softc;
190 
191 /*
192  * Per-I/O information.
193  */
194 struct ctl_be_block_io {
195 	union ctl_io			*io;
196 	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
197 	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
198 	int				bio_cmd;
199 	int				num_segs;
200 	int				num_bios_sent;
201 	int				num_bios_done;
202 	int				send_complete;
203 	int				num_errors;
204 	struct bintime			ds_t0;
205 	devstat_tag_type		ds_tag_type;
206 	devstat_trans_flags		ds_trans_type;
207 	uint64_t			io_len;
208 	uint64_t			io_offset;
209 	int				io_arg;
210 	struct ctl_be_block_softc	*softc;
211 	struct ctl_be_block_lun		*lun;
212 	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
213 };
214 
215 extern struct ctl_softc *control_softc;
216 
217 static int cbb_num_threads = 14;
218 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
219 	    "CAM Target Layer Block Backend");
220 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
221            &cbb_num_threads, 0, "Number of threads per backing file");
222 
223 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
224 static void ctl_free_beio(struct ctl_be_block_io *beio);
225 static void ctl_complete_beio(struct ctl_be_block_io *beio);
226 static int ctl_be_block_move_done(union ctl_io *io);
227 static void ctl_be_block_biodone(struct bio *bio);
228 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
229 				    struct ctl_be_block_io *beio);
230 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
231 				       struct ctl_be_block_io *beio);
232 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
233 				  struct ctl_be_block_io *beio);
234 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
235 					 const char *attrname);
236 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
237 				   struct ctl_be_block_io *beio);
238 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
239 				   struct ctl_be_block_io *beio);
240 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
241 				      struct ctl_be_block_io *beio);
242 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
243 					 const char *attrname);
244 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
245 				    union ctl_io *io);
246 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
247 				    union ctl_io *io);
248 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
249 				  union ctl_io *io);
250 static void ctl_be_block_worker(void *context, int pending);
251 static int ctl_be_block_submit(union ctl_io *io);
252 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
253 				   int flag, struct thread *td);
254 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
255 				  struct ctl_lun_req *req);
256 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
257 				 struct ctl_lun_req *req);
258 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
259 static int ctl_be_block_open(struct ctl_be_block_softc *softc,
260 			     struct ctl_be_block_lun *be_lun,
261 			     struct ctl_lun_req *req);
262 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
263 			       struct ctl_lun_req *req);
264 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
265 			   struct ctl_lun_req *req);
266 static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
267 				  struct ctl_lun_req *req);
268 static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
269 				 struct ctl_lun_req *req);
270 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
271 			   struct ctl_lun_req *req);
272 static void ctl_be_block_lun_shutdown(void *be_lun);
273 static void ctl_be_block_lun_config_status(void *be_lun,
274 					   ctl_lun_config_status status);
275 static int ctl_be_block_config_write(union ctl_io *io);
276 static int ctl_be_block_config_read(union ctl_io *io);
277 static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
278 static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
279 int ctl_be_block_init(void);
280 
281 static struct ctl_backend_driver ctl_be_block_driver =
282 {
283 	.name = "block",
284 	.flags = CTL_BE_FLAG_HAS_CONFIG,
285 	.init = ctl_be_block_init,
286 	.data_submit = ctl_be_block_submit,
287 	.data_move_done = ctl_be_block_move_done,
288 	.config_read = ctl_be_block_config_read,
289 	.config_write = ctl_be_block_config_write,
290 	.ioctl = ctl_be_block_ioctl,
291 	.lun_info = ctl_be_block_lun_info,
292 	.lun_attr = ctl_be_block_lun_attr
293 };
294 
295 MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
296 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
297 
298 static uma_zone_t beio_zone;
299 
300 static struct ctl_be_block_io *
301 ctl_alloc_beio(struct ctl_be_block_softc *softc)
302 {
303 	struct ctl_be_block_io *beio;
304 
305 	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
306 	beio->softc = softc;
307 	return (beio);
308 }
309 
310 static void
311 ctl_free_beio(struct ctl_be_block_io *beio)
312 {
313 	int duplicate_free;
314 	int i;
315 
316 	duplicate_free = 0;
317 
318 	for (i = 0; i < beio->num_segs; i++) {
319 		if (beio->sg_segs[i].addr == NULL)
320 			duplicate_free++;
321 
322 		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
323 		beio->sg_segs[i].addr = NULL;
324 
325 		/* For compare we had two equal S/G lists. */
326 		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
327 			uma_zfree(beio->lun->lun_zone,
328 			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
329 			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
330 		}
331 	}
332 
333 	if (duplicate_free > 0) {
334 		printf("%s: %d duplicate frees out of %d segments\n", __func__,
335 		       duplicate_free, beio->num_segs);
336 	}
337 
338 	uma_zfree(beio_zone, beio);
339 }
340 
341 static void
342 ctl_complete_beio(struct ctl_be_block_io *beio)
343 {
344 	union ctl_io *io = beio->io;
345 
346 	if (beio->beio_cont != NULL) {
347 		beio->beio_cont(beio);
348 	} else {
349 		ctl_free_beio(beio);
350 		ctl_data_submit_done(io);
351 	}
352 }
353 
354 static size_t
355 cmp(uint8_t *a, uint8_t *b, size_t size)
356 {
357 	size_t i;
358 
359 	for (i = 0; i < size; i++) {
360 		if (a[i] != b[i])
361 			break;
362 	}
363 	return (i);
364 }
365 
366 static void
367 ctl_be_block_compare(union ctl_io *io)
368 {
369 	struct ctl_be_block_io *beio;
370 	uint64_t off, res;
371 	int i;
372 	uint8_t info[8];
373 
374 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
375 	off = 0;
376 	for (i = 0; i < beio->num_segs; i++) {
377 		res = cmp(beio->sg_segs[i].addr,
378 		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
379 		    beio->sg_segs[i].len);
380 		off += res;
381 		if (res < beio->sg_segs[i].len)
382 			break;
383 	}
384 	if (i < beio->num_segs) {
385 		scsi_u64to8b(off, info);
386 		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
387 		    /*sense_key*/ SSD_KEY_MISCOMPARE,
388 		    /*asc*/ 0x1D, /*ascq*/ 0x00,
389 		    /*type*/ SSD_ELEM_INFO,
390 		    /*size*/ sizeof(info), /*data*/ &info,
391 		    /*type*/ SSD_ELEM_NONE);
392 	} else
393 		ctl_set_success(&io->scsiio);
394 }
395 
396 static int
397 ctl_be_block_move_done(union ctl_io *io)
398 {
399 	struct ctl_be_block_io *beio;
400 	struct ctl_be_block_lun *be_lun;
401 	struct ctl_lba_len_flags *lbalen;
402 #ifdef CTL_TIME_IO
403 	struct bintime cur_bt;
404 #endif
405 
406 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
407 	be_lun = beio->lun;
408 
409 	DPRINTF("entered\n");
410 
411 #ifdef CTL_TIME_IO
412 	getbintime(&cur_bt);
413 	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
414 	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
415 	io->io_hdr.num_dmas++;
416 #endif
417 	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
418 
419 	/*
420 	 * We set status at this point for read commands, and write
421 	 * commands with errors.
422 	 */
423 	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
424 		;
425 	} else if ((io->io_hdr.port_status == 0) &&
426 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
427 		lbalen = ARGS(beio->io);
428 		if (lbalen->flags & CTL_LLF_READ) {
429 			ctl_set_success(&io->scsiio);
430 		} else if (lbalen->flags & CTL_LLF_COMPARE) {
431 			/* We have two data blocks ready for comparison. */
432 			ctl_be_block_compare(io);
433 		}
434 	} else if ((io->io_hdr.port_status != 0) &&
435 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
436 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
437 		/*
438 		 * For hardware error sense keys, the sense key
439 		 * specific value is defined to be a retry count,
440 		 * but we use it to pass back an internal FETD
441 		 * error code.  XXX KDM  Hopefully the FETD is only
442 		 * using 16 bits for an error code, since that's
443 		 * all the space we have in the sks field.
444 		 */
445 		ctl_set_internal_failure(&io->scsiio,
446 					 /*sks_valid*/ 1,
447 					 /*retry_count*/
448 					 io->io_hdr.port_status);
449 	}
450 
451 	/*
452 	 * If this is a read, or a write with errors, it is done.
453 	 */
454 	if ((beio->bio_cmd == BIO_READ)
455 	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
456 	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
457 		ctl_complete_beio(beio);
458 		return (0);
459 	}
460 
461 	/*
462 	 * At this point, we have a write and the DMA completed
463 	 * successfully.  We now have to queue it to the task queue to
464 	 * execute the backend I/O.  That is because we do blocking
465 	 * memory allocations, and in the file backing case, blocking I/O.
466 	 * This move done routine is generally called in the SIM's
467 	 * interrupt context, and therefore we cannot block.
468 	 */
469 	mtx_lock(&be_lun->queue_lock);
470 	/*
471 	 * XXX KDM make sure that links is okay to use at this point.
472 	 * Otherwise, we either need to add another field to ctl_io_hdr,
473 	 * or deal with resource allocation here.
474 	 */
475 	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
476 	mtx_unlock(&be_lun->queue_lock);
477 
478 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
479 
480 	return (0);
481 }
482 
483 static void
484 ctl_be_block_biodone(struct bio *bio)
485 {
486 	struct ctl_be_block_io *beio;
487 	struct ctl_be_block_lun *be_lun;
488 	union ctl_io *io;
489 	int error;
490 
491 	beio = bio->bio_caller1;
492 	be_lun = beio->lun;
493 	io = beio->io;
494 
495 	DPRINTF("entered\n");
496 
497 	error = bio->bio_error;
498 	mtx_lock(&be_lun->io_lock);
499 	if (error != 0)
500 		beio->num_errors++;
501 
502 	beio->num_bios_done++;
503 
504 	/*
505 	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
506 	 * during the free might cause it to complain.
507 	 */
508 	g_destroy_bio(bio);
509 
510 	/*
511 	 * If the send complete bit isn't set, or we aren't the last I/O to
512 	 * complete, then we're done.
513 	 */
514 	if ((beio->send_complete == 0)
515 	 || (beio->num_bios_done < beio->num_bios_sent)) {
516 		mtx_unlock(&be_lun->io_lock);
517 		return;
518 	}
519 
520 	/*
521 	 * At this point, we've verified that we are the last I/O to
522 	 * complete, so it's safe to drop the lock.
523 	 */
524 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
525 	    beio->ds_tag_type, beio->ds_trans_type,
526 	    /*now*/ NULL, /*then*/&beio->ds_t0);
527 	mtx_unlock(&be_lun->io_lock);
528 
529 	/*
530 	 * If there are any errors from the backing device, we fail the
531 	 * entire I/O with a medium error.
532 	 */
533 	if (beio->num_errors > 0) {
534 		if (error == EOPNOTSUPP) {
535 			ctl_set_invalid_opcode(&io->scsiio);
536 		} else if (error == ENOSPC || error == EDQUOT) {
537 			ctl_set_space_alloc_fail(&io->scsiio);
538 		} else if (error == EROFS || error == EACCES) {
539 			ctl_set_hw_write_protected(&io->scsiio);
540 		} else if (beio->bio_cmd == BIO_FLUSH) {
541 			/* XXX KDM is there is a better error here? */
542 			ctl_set_internal_failure(&io->scsiio,
543 						 /*sks_valid*/ 1,
544 						 /*retry_count*/ 0xbad2);
545 		} else {
546 			ctl_set_medium_error(&io->scsiio,
547 			    beio->bio_cmd == BIO_READ);
548 		}
549 		ctl_complete_beio(beio);
550 		return;
551 	}
552 
553 	/*
554 	 * If this is a write, a flush, a delete or verify, we're all done.
555 	 * If this is a read, we can now send the data to the user.
556 	 */
557 	if ((beio->bio_cmd == BIO_WRITE)
558 	 || (beio->bio_cmd == BIO_FLUSH)
559 	 || (beio->bio_cmd == BIO_DELETE)
560 	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
561 		ctl_set_success(&io->scsiio);
562 		ctl_complete_beio(beio);
563 	} else {
564 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
565 		    beio->beio_cont == NULL) {
566 			ctl_set_success(&io->scsiio);
567 			ctl_serseq_done(io);
568 		}
569 #ifdef CTL_TIME_IO
570         	getbintime(&io->io_hdr.dma_start_bt);
571 #endif
572 		ctl_datamove(io);
573 	}
574 }
575 
576 static void
577 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
578 			struct ctl_be_block_io *beio)
579 {
580 	union ctl_io *io = beio->io;
581 	struct mount *mountpoint;
582 	int error, lock_flags;
583 
584 	DPRINTF("entered\n");
585 
586 	binuptime(&beio->ds_t0);
587 	mtx_lock(&be_lun->io_lock);
588 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
589 	mtx_unlock(&be_lun->io_lock);
590 
591 	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
592 
593 	if (MNT_SHARED_WRITES(mountpoint)
594 	 || ((mountpoint == NULL)
595 	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
596 		lock_flags = LK_SHARED;
597 	else
598 		lock_flags = LK_EXCLUSIVE;
599 
600 	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
601 
602 	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
603 	    curthread);
604 	VOP_UNLOCK(be_lun->vn, 0);
605 
606 	vn_finished_write(mountpoint);
607 
608 	mtx_lock(&be_lun->io_lock);
609 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
610 	    beio->ds_tag_type, beio->ds_trans_type,
611 	    /*now*/ NULL, /*then*/&beio->ds_t0);
612 	mtx_unlock(&be_lun->io_lock);
613 
614 	if (error == 0)
615 		ctl_set_success(&io->scsiio);
616 	else {
617 		/* XXX KDM is there is a better error here? */
618 		ctl_set_internal_failure(&io->scsiio,
619 					 /*sks_valid*/ 1,
620 					 /*retry_count*/ 0xbad1);
621 	}
622 
623 	ctl_complete_beio(beio);
624 }
625 
626 SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
627 SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
628 SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
629 SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
630 
631 static void
632 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
633 			   struct ctl_be_block_io *beio)
634 {
635 	struct ctl_be_block_filedata *file_data;
636 	union ctl_io *io;
637 	struct uio xuio;
638 	struct iovec *xiovec;
639 	size_t s;
640 	int error, flags, i;
641 
642 	DPRINTF("entered\n");
643 
644 	file_data = &be_lun->backend.file;
645 	io = beio->io;
646 	flags = 0;
647 	if (ARGS(io)->flags & CTL_LLF_DPO)
648 		flags |= IO_DIRECT;
649 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
650 		flags |= IO_SYNC;
651 
652 	bzero(&xuio, sizeof(xuio));
653 	if (beio->bio_cmd == BIO_READ) {
654 		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
655 		xuio.uio_rw = UIO_READ;
656 	} else {
657 		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
658 		xuio.uio_rw = UIO_WRITE;
659 	}
660 	xuio.uio_offset = beio->io_offset;
661 	xuio.uio_resid = beio->io_len;
662 	xuio.uio_segflg = UIO_SYSSPACE;
663 	xuio.uio_iov = beio->xiovecs;
664 	xuio.uio_iovcnt = beio->num_segs;
665 	xuio.uio_td = curthread;
666 
667 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
668 		xiovec->iov_base = beio->sg_segs[i].addr;
669 		xiovec->iov_len = beio->sg_segs[i].len;
670 	}
671 
672 	binuptime(&beio->ds_t0);
673 	mtx_lock(&be_lun->io_lock);
674 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
675 	mtx_unlock(&be_lun->io_lock);
676 
677 	if (beio->bio_cmd == BIO_READ) {
678 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
679 
680 		/*
681 		 * UFS pays attention to IO_DIRECT for reads.  If the
682 		 * DIRECTIO option is configured into the kernel, it calls
683 		 * ffs_rawread().  But that only works for single-segment
684 		 * uios with user space addresses.  In our case, with a
685 		 * kernel uio, it still reads into the buffer cache, but it
686 		 * will just try to release the buffer from the cache later
687 		 * on in ffs_read().
688 		 *
689 		 * ZFS does not pay attention to IO_DIRECT for reads.
690 		 *
691 		 * UFS does not pay attention to IO_SYNC for reads.
692 		 *
693 		 * ZFS pays attention to IO_SYNC (which translates into the
694 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
695 		 * attempts to sync the file before reading.
696 		 */
697 		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
698 
699 		VOP_UNLOCK(be_lun->vn, 0);
700 		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
701 		if (error == 0 && xuio.uio_resid > 0) {
702 			/*
703 			 * If we red less then requested (EOF), then
704 			 * we should clean the rest of the buffer.
705 			 */
706 			s = beio->io_len - xuio.uio_resid;
707 			for (i = 0; i < beio->num_segs; i++) {
708 				if (s >= beio->sg_segs[i].len) {
709 					s -= beio->sg_segs[i].len;
710 					continue;
711 				}
712 				bzero((uint8_t *)beio->sg_segs[i].addr + s,
713 				    beio->sg_segs[i].len - s);
714 				s = 0;
715 			}
716 		}
717 	} else {
718 		struct mount *mountpoint;
719 		int lock_flags;
720 
721 		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
722 
723 		if (MNT_SHARED_WRITES(mountpoint)
724 		 || ((mountpoint == NULL)
725 		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
726 			lock_flags = LK_SHARED;
727 		else
728 			lock_flags = LK_EXCLUSIVE;
729 
730 		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
731 
732 		/*
733 		 * UFS pays attention to IO_DIRECT for writes.  The write
734 		 * is done asynchronously.  (Normally the write would just
735 		 * get put into cache.
736 		 *
737 		 * UFS pays attention to IO_SYNC for writes.  It will
738 		 * attempt to write the buffer out synchronously if that
739 		 * flag is set.
740 		 *
741 		 * ZFS does not pay attention to IO_DIRECT for writes.
742 		 *
743 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
744 		 * for writes.  It will flush the transaction from the
745 		 * cache before returning.
746 		 */
747 		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
748 		VOP_UNLOCK(be_lun->vn, 0);
749 
750 		vn_finished_write(mountpoint);
751 		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
752         }
753 
754 	mtx_lock(&be_lun->io_lock);
755 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
756 	    beio->ds_tag_type, beio->ds_trans_type,
757 	    /*now*/ NULL, /*then*/&beio->ds_t0);
758 	mtx_unlock(&be_lun->io_lock);
759 
760 	/*
761 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
762 	 * return the I/O to the user.
763 	 */
764 	if (error != 0) {
765 		if (error == ENOSPC || error == EDQUOT) {
766 			ctl_set_space_alloc_fail(&io->scsiio);
767 		} else if (error == EROFS || error == EACCES) {
768 			ctl_set_hw_write_protected(&io->scsiio);
769 		} else {
770 			ctl_set_medium_error(&io->scsiio,
771 			    beio->bio_cmd == BIO_READ);
772 		}
773 		ctl_complete_beio(beio);
774 		return;
775 	}
776 
777 	/*
778 	 * If this is a write or a verify, we're all done.
779 	 * If this is a read, we can now send the data to the user.
780 	 */
781 	if ((beio->bio_cmd == BIO_WRITE) ||
782 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
783 		ctl_set_success(&io->scsiio);
784 		ctl_complete_beio(beio);
785 	} else {
786 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
787 		    beio->beio_cont == NULL) {
788 			ctl_set_success(&io->scsiio);
789 			ctl_serseq_done(io);
790 		}
791 #ifdef CTL_TIME_IO
792         	getbintime(&io->io_hdr.dma_start_bt);
793 #endif
794 		ctl_datamove(io);
795 	}
796 }
797 
798 static void
799 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
800 			struct ctl_be_block_io *beio)
801 {
802 	union ctl_io *io = beio->io;
803 	struct ctl_lba_len_flags *lbalen = ARGS(io);
804 	struct scsi_get_lba_status_data *data;
805 	off_t roff, off;
806 	int error, status;
807 
808 	DPRINTF("entered\n");
809 
810 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
811 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
812 	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
813 	    0, curthread->td_ucred, curthread);
814 	if (error == 0 && off > roff)
815 		status = 0;	/* mapped up to off */
816 	else {
817 		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
818 		    0, curthread->td_ucred, curthread);
819 		if (error == 0 && off > roff)
820 			status = 1;	/* deallocated up to off */
821 		else {
822 			status = 0;	/* unknown up to the end */
823 			off = be_lun->size_bytes;
824 		}
825 	}
826 	VOP_UNLOCK(be_lun->vn, 0);
827 
828 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
829 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
830 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
831 	    lbalen->lba), data->descr[0].length);
832 	data->descr[0].status = status;
833 
834 	ctl_complete_beio(beio);
835 }
836 
837 static uint64_t
838 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
839 {
840 	struct vattr		vattr;
841 	struct statfs		statfs;
842 	uint64_t		val;
843 	int			error;
844 
845 	val = UINT64_MAX;
846 	if (be_lun->vn == NULL)
847 		return (val);
848 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
849 	if (strcmp(attrname, "blocksused") == 0) {
850 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
851 		if (error == 0)
852 			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
853 	}
854 	if (strcmp(attrname, "blocksavail") == 0 &&
855 	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
856 		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
857 		if (error == 0)
858 			val = statfs.f_bavail * statfs.f_bsize /
859 			    be_lun->cbe_lun.blocksize;
860 	}
861 	VOP_UNLOCK(be_lun->vn, 0);
862 	return (val);
863 }
864 
865 static void
866 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
867 			   struct ctl_be_block_io *beio)
868 {
869 	union ctl_io *io;
870 	struct cdevsw *csw;
871 	struct cdev *dev;
872 	struct uio xuio;
873 	struct iovec *xiovec;
874 	int error, flags, i, ref;
875 
876 	DPRINTF("entered\n");
877 
878 	io = beio->io;
879 	flags = 0;
880 	if (ARGS(io)->flags & CTL_LLF_DPO)
881 		flags |= IO_DIRECT;
882 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
883 		flags |= IO_SYNC;
884 
885 	bzero(&xuio, sizeof(xuio));
886 	if (beio->bio_cmd == BIO_READ) {
887 		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
888 		xuio.uio_rw = UIO_READ;
889 	} else {
890 		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
891 		xuio.uio_rw = UIO_WRITE;
892 	}
893 	xuio.uio_offset = beio->io_offset;
894 	xuio.uio_resid = beio->io_len;
895 	xuio.uio_segflg = UIO_SYSSPACE;
896 	xuio.uio_iov = beio->xiovecs;
897 	xuio.uio_iovcnt = beio->num_segs;
898 	xuio.uio_td = curthread;
899 
900 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
901 		xiovec->iov_base = beio->sg_segs[i].addr;
902 		xiovec->iov_len = beio->sg_segs[i].len;
903 	}
904 
905 	binuptime(&beio->ds_t0);
906 	mtx_lock(&be_lun->io_lock);
907 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
908 	mtx_unlock(&be_lun->io_lock);
909 
910 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
911 	if (csw) {
912 		if (beio->bio_cmd == BIO_READ)
913 			error = csw->d_read(dev, &xuio, flags);
914 		else
915 			error = csw->d_write(dev, &xuio, flags);
916 		dev_relthread(dev, ref);
917 	} else
918 		error = ENXIO;
919 
920 	if (beio->bio_cmd == BIO_READ)
921 		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
922 	else
923 		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
924 
925 	mtx_lock(&be_lun->io_lock);
926 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
927 	    beio->ds_tag_type, beio->ds_trans_type,
928 	    /*now*/ NULL, /*then*/&beio->ds_t0);
929 	mtx_unlock(&be_lun->io_lock);
930 
931 	/*
932 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
933 	 * return the I/O to the user.
934 	 */
935 	if (error != 0) {
936 		if (error == ENOSPC || error == EDQUOT) {
937 			ctl_set_space_alloc_fail(&io->scsiio);
938 		} else if (error == EROFS || error == EACCES) {
939 			ctl_set_hw_write_protected(&io->scsiio);
940 		} else {
941 			ctl_set_medium_error(&io->scsiio,
942 			    beio->bio_cmd == BIO_READ);
943 		}
944 		ctl_complete_beio(beio);
945 		return;
946 	}
947 
948 	/*
949 	 * If this is a write or a verify, we're all done.
950 	 * If this is a read, we can now send the data to the user.
951 	 */
952 	if ((beio->bio_cmd == BIO_WRITE) ||
953 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
954 		ctl_set_success(&io->scsiio);
955 		ctl_complete_beio(beio);
956 	} else {
957 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
958 		    beio->beio_cont == NULL) {
959 			ctl_set_success(&io->scsiio);
960 			ctl_serseq_done(io);
961 		}
962 #ifdef CTL_TIME_IO
963         	getbintime(&io->io_hdr.dma_start_bt);
964 #endif
965 		ctl_datamove(io);
966 	}
967 }
968 
969 static void
970 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
971 			struct ctl_be_block_io *beio)
972 {
973 	union ctl_io *io = beio->io;
974 	struct cdevsw *csw;
975 	struct cdev *dev;
976 	struct ctl_lba_len_flags *lbalen = ARGS(io);
977 	struct scsi_get_lba_status_data *data;
978 	off_t roff, off;
979 	int error, ref, status;
980 
981 	DPRINTF("entered\n");
982 
983 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
984 	if (csw == NULL) {
985 		status = 0;	/* unknown up to the end */
986 		off = be_lun->size_bytes;
987 		goto done;
988 	}
989 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
990 	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
991 	    curthread);
992 	if (error == 0 && off > roff)
993 		status = 0;	/* mapped up to off */
994 	else {
995 		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
996 		    curthread);
997 		if (error == 0 && off > roff)
998 			status = 1;	/* deallocated up to off */
999 		else {
1000 			status = 0;	/* unknown up to the end */
1001 			off = be_lun->size_bytes;
1002 		}
1003 	}
1004 	dev_relthread(dev, ref);
1005 
1006 done:
1007 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1008 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1009 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1010 	    lbalen->lba), data->descr[0].length);
1011 	data->descr[0].status = status;
1012 
1013 	ctl_complete_beio(beio);
1014 }
1015 
1016 static void
1017 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1018 		       struct ctl_be_block_io *beio)
1019 {
1020 	struct bio *bio;
1021 	union ctl_io *io;
1022 	struct cdevsw *csw;
1023 	struct cdev *dev;
1024 	int ref;
1025 
1026 	io = beio->io;
1027 
1028 	DPRINTF("entered\n");
1029 
1030 	/* This can't fail, it's a blocking allocation. */
1031 	bio = g_alloc_bio();
1032 
1033 	bio->bio_cmd	    = BIO_FLUSH;
1034 	bio->bio_offset	    = 0;
1035 	bio->bio_data	    = 0;
1036 	bio->bio_done	    = ctl_be_block_biodone;
1037 	bio->bio_caller1    = beio;
1038 	bio->bio_pblkno	    = 0;
1039 
1040 	/*
1041 	 * We don't need to acquire the LUN lock here, because we are only
1042 	 * sending one bio, and so there is no other context to synchronize
1043 	 * with.
1044 	 */
1045 	beio->num_bios_sent = 1;
1046 	beio->send_complete = 1;
1047 
1048 	binuptime(&beio->ds_t0);
1049 	mtx_lock(&be_lun->io_lock);
1050 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1051 	mtx_unlock(&be_lun->io_lock);
1052 
1053 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1054 	if (csw) {
1055 		bio->bio_dev = dev;
1056 		csw->d_strategy(bio);
1057 		dev_relthread(dev, ref);
1058 	} else {
1059 		bio->bio_error = ENXIO;
1060 		ctl_be_block_biodone(bio);
1061 	}
1062 }
1063 
1064 static void
1065 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1066 		       struct ctl_be_block_io *beio,
1067 		       uint64_t off, uint64_t len, int last)
1068 {
1069 	struct bio *bio;
1070 	uint64_t maxlen;
1071 	struct cdevsw *csw;
1072 	struct cdev *dev;
1073 	int ref;
1074 
1075 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1076 	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1077 	while (len > 0) {
1078 		bio = g_alloc_bio();
1079 		bio->bio_cmd	    = BIO_DELETE;
1080 		bio->bio_dev	    = dev;
1081 		bio->bio_offset	    = off;
1082 		bio->bio_length	    = MIN(len, maxlen);
1083 		bio->bio_data	    = 0;
1084 		bio->bio_done	    = ctl_be_block_biodone;
1085 		bio->bio_caller1    = beio;
1086 		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1087 
1088 		off += bio->bio_length;
1089 		len -= bio->bio_length;
1090 
1091 		mtx_lock(&be_lun->io_lock);
1092 		beio->num_bios_sent++;
1093 		if (last && len == 0)
1094 			beio->send_complete = 1;
1095 		mtx_unlock(&be_lun->io_lock);
1096 
1097 		if (csw) {
1098 			csw->d_strategy(bio);
1099 		} else {
1100 			bio->bio_error = ENXIO;
1101 			ctl_be_block_biodone(bio);
1102 		}
1103 	}
1104 	if (csw)
1105 		dev_relthread(dev, ref);
1106 }
1107 
1108 static void
1109 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1110 		       struct ctl_be_block_io *beio)
1111 {
1112 	union ctl_io *io;
1113 	struct ctl_ptr_len_flags *ptrlen;
1114 	struct scsi_unmap_desc *buf, *end;
1115 	uint64_t len;
1116 
1117 	io = beio->io;
1118 
1119 	DPRINTF("entered\n");
1120 
1121 	binuptime(&beio->ds_t0);
1122 	mtx_lock(&be_lun->io_lock);
1123 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1124 	mtx_unlock(&be_lun->io_lock);
1125 
1126 	if (beio->io_offset == -1) {
1127 		beio->io_len = 0;
1128 		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1129 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1130 		end = buf + ptrlen->len / sizeof(*buf);
1131 		for (; buf < end; buf++) {
1132 			len = (uint64_t)scsi_4btoul(buf->length) *
1133 			    be_lun->cbe_lun.blocksize;
1134 			beio->io_len += len;
1135 			ctl_be_block_unmap_dev_range(be_lun, beio,
1136 			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1137 			    len, (end - buf < 2) ? TRUE : FALSE);
1138 		}
1139 	} else
1140 		ctl_be_block_unmap_dev_range(be_lun, beio,
1141 		    beio->io_offset, beio->io_len, TRUE);
1142 }
1143 
1144 static void
1145 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1146 			  struct ctl_be_block_io *beio)
1147 {
1148 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1149 	struct bio *bio;
1150 	struct cdevsw *csw;
1151 	struct cdev *dev;
1152 	off_t cur_offset;
1153 	int i, max_iosize, ref;
1154 
1155 	DPRINTF("entered\n");
1156 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1157 
1158 	/*
1159 	 * We have to limit our I/O size to the maximum supported by the
1160 	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1161 	 * set it properly, use DFLTPHYS.
1162 	 */
1163 	if (csw) {
1164 		max_iosize = dev->si_iosize_max;
1165 		if (max_iosize < PAGE_SIZE)
1166 			max_iosize = DFLTPHYS;
1167 	} else
1168 		max_iosize = DFLTPHYS;
1169 
1170 	cur_offset = beio->io_offset;
1171 	for (i = 0; i < beio->num_segs; i++) {
1172 		size_t cur_size;
1173 		uint8_t *cur_ptr;
1174 
1175 		cur_size = beio->sg_segs[i].len;
1176 		cur_ptr = beio->sg_segs[i].addr;
1177 
1178 		while (cur_size > 0) {
1179 			/* This can't fail, it's a blocking allocation. */
1180 			bio = g_alloc_bio();
1181 
1182 			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1183 
1184 			bio->bio_cmd = beio->bio_cmd;
1185 			bio->bio_dev = dev;
1186 			bio->bio_caller1 = beio;
1187 			bio->bio_length = min(cur_size, max_iosize);
1188 			bio->bio_offset = cur_offset;
1189 			bio->bio_data = cur_ptr;
1190 			bio->bio_done = ctl_be_block_biodone;
1191 			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1192 
1193 			cur_offset += bio->bio_length;
1194 			cur_ptr += bio->bio_length;
1195 			cur_size -= bio->bio_length;
1196 
1197 			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1198 			beio->num_bios_sent++;
1199 		}
1200 	}
1201 	binuptime(&beio->ds_t0);
1202 	mtx_lock(&be_lun->io_lock);
1203 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1204 	beio->send_complete = 1;
1205 	mtx_unlock(&be_lun->io_lock);
1206 
1207 	/*
1208 	 * Fire off all allocated requests!
1209 	 */
1210 	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1211 		TAILQ_REMOVE(&queue, bio, bio_queue);
1212 		if (csw)
1213 			csw->d_strategy(bio);
1214 		else {
1215 			bio->bio_error = ENXIO;
1216 			ctl_be_block_biodone(bio);
1217 		}
1218 	}
1219 	if (csw)
1220 		dev_relthread(dev, ref);
1221 }
1222 
1223 static uint64_t
1224 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1225 {
1226 	struct diocgattr_arg	arg;
1227 	struct cdevsw *csw;
1228 	struct cdev *dev;
1229 	int error, ref;
1230 
1231 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1232 	if (csw == NULL)
1233 		return (UINT64_MAX);
1234 	strlcpy(arg.name, attrname, sizeof(arg.name));
1235 	arg.len = sizeof(arg.value.off);
1236 	if (csw->d_ioctl) {
1237 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1238 		    curthread);
1239 	} else
1240 		error = ENODEV;
1241 	dev_relthread(dev, ref);
1242 	if (error != 0)
1243 		return (UINT64_MAX);
1244 	return (arg.value.off);
1245 }
1246 
1247 static void
1248 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1249 			    union ctl_io *io)
1250 {
1251 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1252 	struct ctl_be_block_io *beio;
1253 	struct ctl_lba_len_flags *lbalen;
1254 
1255 	DPRINTF("entered\n");
1256 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1257 	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1258 
1259 	beio->io_len = lbalen->len * cbe_lun->blocksize;
1260 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1261 	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1262 	beio->bio_cmd = BIO_FLUSH;
1263 	beio->ds_trans_type = DEVSTAT_NO_DATA;
1264 	DPRINTF("SYNC\n");
1265 	be_lun->lun_flush(be_lun, beio);
1266 }
1267 
1268 static void
1269 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1270 {
1271 	union ctl_io *io;
1272 
1273 	io = beio->io;
1274 	ctl_free_beio(beio);
1275 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1276 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1277 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1278 		ctl_config_write_done(io);
1279 		return;
1280 	}
1281 
1282 	ctl_be_block_config_write(io);
1283 }
1284 
1285 static void
1286 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1287 			    union ctl_io *io)
1288 {
1289 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1290 	struct ctl_be_block_io *beio;
1291 	struct ctl_lba_len_flags *lbalen;
1292 	uint64_t len_left, lba;
1293 	uint32_t pb, pbo, adj;
1294 	int i, seglen;
1295 	uint8_t *buf, *end;
1296 
1297 	DPRINTF("entered\n");
1298 
1299 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1300 	lbalen = ARGS(beio->io);
1301 
1302 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1303 	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1304 		ctl_free_beio(beio);
1305 		ctl_set_invalid_field(&io->scsiio,
1306 				      /*sks_valid*/ 1,
1307 				      /*command*/ 1,
1308 				      /*field*/ 1,
1309 				      /*bit_valid*/ 0,
1310 				      /*bit*/ 0);
1311 		ctl_config_write_done(io);
1312 		return;
1313 	}
1314 
1315 	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1316 		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1317 		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1318 		beio->bio_cmd = BIO_DELETE;
1319 		beio->ds_trans_type = DEVSTAT_FREE;
1320 
1321 		be_lun->unmap(be_lun, beio);
1322 		return;
1323 	}
1324 
1325 	beio->bio_cmd = BIO_WRITE;
1326 	beio->ds_trans_type = DEVSTAT_WRITE;
1327 
1328 	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1329 	       (uintmax_t)lbalen->lba, lbalen->len);
1330 
1331 	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1332 	if (be_lun->cbe_lun.pblockoff > 0)
1333 		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1334 	else
1335 		pbo = 0;
1336 	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1337 	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1338 
1339 		/*
1340 		 * Setup the S/G entry for this chunk.
1341 		 */
1342 		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1343 		if (pb > cbe_lun->blocksize) {
1344 			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1345 			    seglen - pbo) % pb;
1346 			if (seglen > adj)
1347 				seglen -= adj;
1348 			else
1349 				seglen -= seglen % cbe_lun->blocksize;
1350 		} else
1351 			seglen -= seglen % cbe_lun->blocksize;
1352 		beio->sg_segs[i].len = seglen;
1353 		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1354 
1355 		DPRINTF("segment %d addr %p len %zd\n", i,
1356 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1357 
1358 		beio->num_segs++;
1359 		len_left -= seglen;
1360 
1361 		buf = beio->sg_segs[i].addr;
1362 		end = buf + seglen;
1363 		for (; buf < end; buf += cbe_lun->blocksize) {
1364 			memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize);
1365 			if (lbalen->flags & SWS_LBDATA)
1366 				scsi_ulto4b(lbalen->lba + lba, buf);
1367 			lba++;
1368 		}
1369 	}
1370 
1371 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1372 	beio->io_len = lba * cbe_lun->blocksize;
1373 
1374 	/* We can not do all in one run. Correct and schedule rerun. */
1375 	if (len_left > 0) {
1376 		lbalen->lba += lba;
1377 		lbalen->len -= lba;
1378 		beio->beio_cont = ctl_be_block_cw_done_ws;
1379 	}
1380 
1381 	be_lun->dispatch(be_lun, beio);
1382 }
1383 
1384 static void
1385 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1386 			    union ctl_io *io)
1387 {
1388 	struct ctl_be_block_io *beio;
1389 	struct ctl_ptr_len_flags *ptrlen;
1390 
1391 	DPRINTF("entered\n");
1392 
1393 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1394 	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1395 
1396 	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1397 		ctl_free_beio(beio);
1398 		ctl_set_invalid_field(&io->scsiio,
1399 				      /*sks_valid*/ 0,
1400 				      /*command*/ 1,
1401 				      /*field*/ 0,
1402 				      /*bit_valid*/ 0,
1403 				      /*bit*/ 0);
1404 		ctl_config_write_done(io);
1405 		return;
1406 	}
1407 
1408 	beio->io_len = 0;
1409 	beio->io_offset = -1;
1410 	beio->bio_cmd = BIO_DELETE;
1411 	beio->ds_trans_type = DEVSTAT_FREE;
1412 	DPRINTF("UNMAP\n");
1413 	be_lun->unmap(be_lun, beio);
1414 }
1415 
1416 static void
1417 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1418 {
1419 	union ctl_io *io;
1420 
1421 	io = beio->io;
1422 	ctl_free_beio(beio);
1423 	ctl_config_read_done(io);
1424 }
1425 
1426 static void
1427 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1428 			 union ctl_io *io)
1429 {
1430 	struct ctl_be_block_io *beio;
1431 	struct ctl_be_block_softc *softc;
1432 
1433 	DPRINTF("entered\n");
1434 
1435 	softc = be_lun->softc;
1436 	beio = ctl_alloc_beio(softc);
1437 	beio->io = io;
1438 	beio->lun = be_lun;
1439 	beio->beio_cont = ctl_be_block_cr_done;
1440 	PRIV(io)->ptr = (void *)beio;
1441 
1442 	switch (io->scsiio.cdb[0]) {
1443 	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1444 		beio->bio_cmd = -1;
1445 		beio->ds_trans_type = DEVSTAT_NO_DATA;
1446 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1447 		beio->io_len = 0;
1448 		if (be_lun->get_lba_status)
1449 			be_lun->get_lba_status(be_lun, beio);
1450 		else
1451 			ctl_be_block_cr_done(beio);
1452 		break;
1453 	default:
1454 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1455 		break;
1456 	}
1457 }
1458 
1459 static void
1460 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1461 {
1462 	union ctl_io *io;
1463 
1464 	io = beio->io;
1465 	ctl_free_beio(beio);
1466 	ctl_config_write_done(io);
1467 }
1468 
1469 static void
1470 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1471 			 union ctl_io *io)
1472 {
1473 	struct ctl_be_block_io *beio;
1474 	struct ctl_be_block_softc *softc;
1475 
1476 	DPRINTF("entered\n");
1477 
1478 	softc = be_lun->softc;
1479 	beio = ctl_alloc_beio(softc);
1480 	beio->io = io;
1481 	beio->lun = be_lun;
1482 	beio->beio_cont = ctl_be_block_cw_done;
1483 	switch (io->scsiio.tag_type) {
1484 	case CTL_TAG_ORDERED:
1485 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1486 		break;
1487 	case CTL_TAG_HEAD_OF_QUEUE:
1488 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1489 		break;
1490 	case CTL_TAG_UNTAGGED:
1491 	case CTL_TAG_SIMPLE:
1492 	case CTL_TAG_ACA:
1493 	default:
1494 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1495 		break;
1496 	}
1497 	PRIV(io)->ptr = (void *)beio;
1498 
1499 	switch (io->scsiio.cdb[0]) {
1500 	case SYNCHRONIZE_CACHE:
1501 	case SYNCHRONIZE_CACHE_16:
1502 		ctl_be_block_cw_dispatch_sync(be_lun, io);
1503 		break;
1504 	case WRITE_SAME_10:
1505 	case WRITE_SAME_16:
1506 		ctl_be_block_cw_dispatch_ws(be_lun, io);
1507 		break;
1508 	case UNMAP:
1509 		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1510 		break;
1511 	default:
1512 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1513 		break;
1514 	}
1515 }
1516 
1517 SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1518 SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1519 SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1520 SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1521 
1522 static void
1523 ctl_be_block_next(struct ctl_be_block_io *beio)
1524 {
1525 	struct ctl_be_block_lun *be_lun;
1526 	union ctl_io *io;
1527 
1528 	io = beio->io;
1529 	be_lun = beio->lun;
1530 	ctl_free_beio(beio);
1531 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1532 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1533 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1534 		ctl_data_submit_done(io);
1535 		return;
1536 	}
1537 
1538 	io->io_hdr.status &= ~CTL_STATUS_MASK;
1539 	io->io_hdr.status |= CTL_STATUS_NONE;
1540 
1541 	mtx_lock(&be_lun->queue_lock);
1542 	/*
1543 	 * XXX KDM make sure that links is okay to use at this point.
1544 	 * Otherwise, we either need to add another field to ctl_io_hdr,
1545 	 * or deal with resource allocation here.
1546 	 */
1547 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1548 	mtx_unlock(&be_lun->queue_lock);
1549 
1550 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1551 }
1552 
1553 static void
1554 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1555 			   union ctl_io *io)
1556 {
1557 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1558 	struct ctl_be_block_io *beio;
1559 	struct ctl_be_block_softc *softc;
1560 	struct ctl_lba_len_flags *lbalen;
1561 	struct ctl_ptr_len_flags *bptrlen;
1562 	uint64_t len_left, lbas;
1563 	int i;
1564 
1565 	softc = be_lun->softc;
1566 
1567 	DPRINTF("entered\n");
1568 
1569 	lbalen = ARGS(io);
1570 	if (lbalen->flags & CTL_LLF_WRITE) {
1571 		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1572 	} else {
1573 		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1574 	}
1575 
1576 	beio = ctl_alloc_beio(softc);
1577 	beio->io = io;
1578 	beio->lun = be_lun;
1579 	bptrlen = PRIV(io);
1580 	bptrlen->ptr = (void *)beio;
1581 
1582 	switch (io->scsiio.tag_type) {
1583 	case CTL_TAG_ORDERED:
1584 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1585 		break;
1586 	case CTL_TAG_HEAD_OF_QUEUE:
1587 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1588 		break;
1589 	case CTL_TAG_UNTAGGED:
1590 	case CTL_TAG_SIMPLE:
1591 	case CTL_TAG_ACA:
1592 	default:
1593 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1594 		break;
1595 	}
1596 
1597 	if (lbalen->flags & CTL_LLF_WRITE) {
1598 		beio->bio_cmd = BIO_WRITE;
1599 		beio->ds_trans_type = DEVSTAT_WRITE;
1600 	} else {
1601 		beio->bio_cmd = BIO_READ;
1602 		beio->ds_trans_type = DEVSTAT_READ;
1603 	}
1604 
1605 	DPRINTF("%s at LBA %jx len %u @%ju\n",
1606 	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1607 	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1608 	if (lbalen->flags & CTL_LLF_COMPARE)
1609 		lbas = CTLBLK_HALF_IO_SIZE;
1610 	else
1611 		lbas = CTLBLK_MAX_IO_SIZE;
1612 	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1613 	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1614 	beio->io_len = lbas * cbe_lun->blocksize;
1615 	bptrlen->len += lbas;
1616 
1617 	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1618 		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1619 		    i, CTLBLK_MAX_SEGS));
1620 
1621 		/*
1622 		 * Setup the S/G entry for this chunk.
1623 		 */
1624 		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1625 		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1626 
1627 		DPRINTF("segment %d addr %p len %zd\n", i,
1628 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1629 
1630 		/* Set up second segment for compare operation. */
1631 		if (lbalen->flags & CTL_LLF_COMPARE) {
1632 			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1633 			    beio->sg_segs[i].len;
1634 			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1635 			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1636 		}
1637 
1638 		beio->num_segs++;
1639 		len_left -= beio->sg_segs[i].len;
1640 	}
1641 	if (bptrlen->len < lbalen->len)
1642 		beio->beio_cont = ctl_be_block_next;
1643 	io->scsiio.be_move_done = ctl_be_block_move_done;
1644 	/* For compare we have separate S/G lists for read and datamove. */
1645 	if (lbalen->flags & CTL_LLF_COMPARE)
1646 		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1647 	else
1648 		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1649 	io->scsiio.kern_data_len = beio->io_len;
1650 	io->scsiio.kern_data_resid = 0;
1651 	io->scsiio.kern_sg_entries = beio->num_segs;
1652 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1653 
1654 	/*
1655 	 * For the read case, we need to read the data into our buffers and
1656 	 * then we can send it back to the user.  For the write case, we
1657 	 * need to get the data from the user first.
1658 	 */
1659 	if (beio->bio_cmd == BIO_READ) {
1660 		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1661 		be_lun->dispatch(be_lun, beio);
1662 	} else {
1663 		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1664 #ifdef CTL_TIME_IO
1665         	getbintime(&io->io_hdr.dma_start_bt);
1666 #endif
1667 		ctl_datamove(io);
1668 	}
1669 }
1670 
1671 static void
1672 ctl_be_block_worker(void *context, int pending)
1673 {
1674 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1675 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1676 	union ctl_io *io;
1677 	struct ctl_be_block_io *beio;
1678 
1679 	DPRINTF("entered\n");
1680 	/*
1681 	 * Fetch and process I/Os from all queues.  If we detect LUN
1682 	 * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race,
1683 	 * so make response maximally opaque to not confuse initiator.
1684 	 */
1685 	for (;;) {
1686 		mtx_lock(&be_lun->queue_lock);
1687 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1688 		if (io != NULL) {
1689 			DPRINTF("datamove queue\n");
1690 			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1691 				      ctl_io_hdr, links);
1692 			mtx_unlock(&be_lun->queue_lock);
1693 			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1694 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1695 				ctl_set_busy(&io->scsiio);
1696 				ctl_complete_beio(beio);
1697 				return;
1698 			}
1699 			be_lun->dispatch(be_lun, beio);
1700 			continue;
1701 		}
1702 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1703 		if (io != NULL) {
1704 			DPRINTF("config write queue\n");
1705 			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1706 				      ctl_io_hdr, links);
1707 			mtx_unlock(&be_lun->queue_lock);
1708 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1709 				ctl_set_busy(&io->scsiio);
1710 				ctl_config_write_done(io);
1711 				return;
1712 			}
1713 			ctl_be_block_cw_dispatch(be_lun, io);
1714 			continue;
1715 		}
1716 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1717 		if (io != NULL) {
1718 			DPRINTF("config read queue\n");
1719 			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1720 				      ctl_io_hdr, links);
1721 			mtx_unlock(&be_lun->queue_lock);
1722 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1723 				ctl_set_busy(&io->scsiio);
1724 				ctl_config_read_done(io);
1725 				return;
1726 			}
1727 			ctl_be_block_cr_dispatch(be_lun, io);
1728 			continue;
1729 		}
1730 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1731 		if (io != NULL) {
1732 			DPRINTF("input queue\n");
1733 			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1734 				      ctl_io_hdr, links);
1735 			mtx_unlock(&be_lun->queue_lock);
1736 			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1737 				ctl_set_busy(&io->scsiio);
1738 				ctl_data_submit_done(io);
1739 				return;
1740 			}
1741 			ctl_be_block_dispatch(be_lun, io);
1742 			continue;
1743 		}
1744 
1745 		/*
1746 		 * If we get here, there is no work left in the queues, so
1747 		 * just break out and let the task queue go to sleep.
1748 		 */
1749 		mtx_unlock(&be_lun->queue_lock);
1750 		break;
1751 	}
1752 }
1753 
1754 /*
1755  * Entry point from CTL to the backend for I/O.  We queue everything to a
1756  * work thread, so this just puts the I/O on a queue and wakes up the
1757  * thread.
1758  */
1759 static int
1760 ctl_be_block_submit(union ctl_io *io)
1761 {
1762 	struct ctl_be_block_lun *be_lun;
1763 	struct ctl_be_lun *cbe_lun;
1764 
1765 	DPRINTF("entered\n");
1766 
1767 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1768 		CTL_PRIV_BACKEND_LUN].ptr;
1769 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1770 
1771 	/*
1772 	 * Make sure we only get SCSI I/O.
1773 	 */
1774 	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1775 		"%#x) encountered", io->io_hdr.io_type));
1776 
1777 	PRIV(io)->len = 0;
1778 
1779 	mtx_lock(&be_lun->queue_lock);
1780 	/*
1781 	 * XXX KDM make sure that links is okay to use at this point.
1782 	 * Otherwise, we either need to add another field to ctl_io_hdr,
1783 	 * or deal with resource allocation here.
1784 	 */
1785 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1786 	mtx_unlock(&be_lun->queue_lock);
1787 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1788 
1789 	return (CTL_RETVAL_COMPLETE);
1790 }
1791 
1792 static int
1793 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1794 			int flag, struct thread *td)
1795 {
1796 	struct ctl_be_block_softc *softc;
1797 	int error;
1798 
1799 	softc = &backend_block_softc;
1800 
1801 	error = 0;
1802 
1803 	switch (cmd) {
1804 	case CTL_LUN_REQ: {
1805 		struct ctl_lun_req *lun_req;
1806 
1807 		lun_req = (struct ctl_lun_req *)addr;
1808 
1809 		switch (lun_req->reqtype) {
1810 		case CTL_LUNREQ_CREATE:
1811 			error = ctl_be_block_create(softc, lun_req);
1812 			break;
1813 		case CTL_LUNREQ_RM:
1814 			error = ctl_be_block_rm(softc, lun_req);
1815 			break;
1816 		case CTL_LUNREQ_MODIFY:
1817 			error = ctl_be_block_modify(softc, lun_req);
1818 			break;
1819 		default:
1820 			lun_req->status = CTL_LUN_ERROR;
1821 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1822 				 "invalid LUN request type %d",
1823 				 lun_req->reqtype);
1824 			break;
1825 		}
1826 		break;
1827 	}
1828 	default:
1829 		error = ENOTTY;
1830 		break;
1831 	}
1832 
1833 	return (error);
1834 }
1835 
1836 static int
1837 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1838 {
1839 	struct ctl_be_lun *cbe_lun;
1840 	struct ctl_be_block_filedata *file_data;
1841 	struct ctl_lun_create_params *params;
1842 	char			     *value;
1843 	struct vattr		      vattr;
1844 	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1845 	int			      error;
1846 
1847 	error = 0;
1848 	cbe_lun = &be_lun->cbe_lun;
1849 	file_data = &be_lun->backend.file;
1850 	params = &be_lun->params;
1851 
1852 	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1853 	be_lun->dispatch = ctl_be_block_dispatch_file;
1854 	be_lun->lun_flush = ctl_be_block_flush_file;
1855 	be_lun->get_lba_status = ctl_be_block_gls_file;
1856 	be_lun->getattr = ctl_be_block_getattr_file;
1857 	be_lun->unmap = NULL;
1858 	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1859 
1860 	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1861 	if (error != 0) {
1862 		snprintf(req->error_str, sizeof(req->error_str),
1863 			 "error calling VOP_GETATTR() for file %s",
1864 			 be_lun->dev_path);
1865 		return (error);
1866 	}
1867 
1868 	/*
1869 	 * Verify that we have the ability to upgrade to exclusive
1870 	 * access on this file so we can trap errors at open instead
1871 	 * of reporting them during first access.
1872 	 */
1873 	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1874 		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1875 		if (be_lun->vn->v_iflag & VI_DOOMED) {
1876 			error = EBADF;
1877 			snprintf(req->error_str, sizeof(req->error_str),
1878 				 "error locking file %s", be_lun->dev_path);
1879 			return (error);
1880 		}
1881 	}
1882 
1883 	file_data->cred = crhold(curthread->td_ucred);
1884 	if (params->lun_size_bytes != 0)
1885 		be_lun->size_bytes = params->lun_size_bytes;
1886 	else
1887 		be_lun->size_bytes = vattr.va_size;
1888 
1889 	/*
1890 	 * For files we can use any logical block size.  Prefer 512 bytes
1891 	 * for compatibility reasons.  If file's vattr.va_blocksize
1892 	 * (preferred I/O block size) is bigger and multiple to chosen
1893 	 * logical block size -- report it as physical block size.
1894 	 */
1895 	if (params->blocksize_bytes != 0)
1896 		cbe_lun->blocksize = params->blocksize_bytes;
1897 	else
1898 		cbe_lun->blocksize = 512;
1899 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1900 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1901 	    0 : (be_lun->size_blocks - 1);
1902 
1903 	us = ps = vattr.va_blocksize;
1904 	uo = po = 0;
1905 
1906 	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
1907 	if (value != NULL)
1908 		ctl_expand_number(value, &ps);
1909 	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
1910 	if (value != NULL)
1911 		ctl_expand_number(value, &po);
1912 	pss = ps / cbe_lun->blocksize;
1913 	pos = po / cbe_lun->blocksize;
1914 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1915 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1916 		cbe_lun->pblockexp = fls(pss) - 1;
1917 		cbe_lun->pblockoff = (pss - pos) % pss;
1918 	}
1919 
1920 	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
1921 	if (value != NULL)
1922 		ctl_expand_number(value, &us);
1923 	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
1924 	if (value != NULL)
1925 		ctl_expand_number(value, &uo);
1926 	uss = us / cbe_lun->blocksize;
1927 	uos = uo / cbe_lun->blocksize;
1928 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1929 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1930 		cbe_lun->ublockexp = fls(uss) - 1;
1931 		cbe_lun->ublockoff = (uss - uos) % uss;
1932 	}
1933 
1934 	/*
1935 	 * Sanity check.  The media size has to be at least one
1936 	 * sector long.
1937 	 */
1938 	if (be_lun->size_bytes < cbe_lun->blocksize) {
1939 		error = EINVAL;
1940 		snprintf(req->error_str, sizeof(req->error_str),
1941 			 "file %s size %ju < block size %u", be_lun->dev_path,
1942 			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1943 	}
1944 
1945 	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1946 	return (error);
1947 }
1948 
1949 static int
1950 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1951 {
1952 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1953 	struct ctl_lun_create_params *params;
1954 	struct cdevsw		     *csw;
1955 	struct cdev		     *dev;
1956 	char			     *value;
1957 	int			      error, atomic, maxio, ref, unmap, tmp;
1958 	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1959 
1960 	params = &be_lun->params;
1961 
1962 	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1963 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1964 	if (csw == NULL)
1965 		return (ENXIO);
1966 	if (strcmp(csw->d_name, "zvol") == 0) {
1967 		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1968 		be_lun->get_lba_status = ctl_be_block_gls_zvol;
1969 		atomic = maxio = CTLBLK_MAX_IO_SIZE;
1970 	} else {
1971 		be_lun->dispatch = ctl_be_block_dispatch_dev;
1972 		be_lun->get_lba_status = NULL;
1973 		atomic = 0;
1974 		maxio = dev->si_iosize_max;
1975 		if (maxio <= 0)
1976 			maxio = DFLTPHYS;
1977 		if (maxio > CTLBLK_MAX_IO_SIZE)
1978 			maxio = CTLBLK_MAX_IO_SIZE;
1979 	}
1980 	be_lun->lun_flush = ctl_be_block_flush_dev;
1981 	be_lun->getattr = ctl_be_block_getattr_dev;
1982 	be_lun->unmap = ctl_be_block_unmap_dev;
1983 
1984 	if (!csw->d_ioctl) {
1985 		dev_relthread(dev, ref);
1986 		snprintf(req->error_str, sizeof(req->error_str),
1987 			 "no d_ioctl for device %s!", be_lun->dev_path);
1988 		return (ENODEV);
1989 	}
1990 
1991 	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1992 			       curthread);
1993 	if (error) {
1994 		dev_relthread(dev, ref);
1995 		snprintf(req->error_str, sizeof(req->error_str),
1996 			 "error %d returned for DIOCGSECTORSIZE ioctl "
1997 			 "on %s!", error, be_lun->dev_path);
1998 		return (error);
1999 	}
2000 
2001 	/*
2002 	 * If the user has asked for a blocksize that is greater than the
2003 	 * backing device's blocksize, we can do it only if the blocksize
2004 	 * the user is asking for is an even multiple of the underlying
2005 	 * device's blocksize.
2006 	 */
2007 	if ((params->blocksize_bytes != 0) &&
2008 	    (params->blocksize_bytes >= tmp)) {
2009 		if (params->blocksize_bytes % tmp == 0) {
2010 			cbe_lun->blocksize = params->blocksize_bytes;
2011 		} else {
2012 			dev_relthread(dev, ref);
2013 			snprintf(req->error_str, sizeof(req->error_str),
2014 				 "requested blocksize %u is not an even "
2015 				 "multiple of backing device blocksize %u",
2016 				 params->blocksize_bytes, tmp);
2017 			return (EINVAL);
2018 		}
2019 	} else if (params->blocksize_bytes != 0) {
2020 		dev_relthread(dev, ref);
2021 		snprintf(req->error_str, sizeof(req->error_str),
2022 			 "requested blocksize %u < backing device "
2023 			 "blocksize %u", params->blocksize_bytes, tmp);
2024 		return (EINVAL);
2025 	} else
2026 		cbe_lun->blocksize = tmp;
2027 
2028 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2029 			     curthread);
2030 	if (error) {
2031 		dev_relthread(dev, ref);
2032 		snprintf(req->error_str, sizeof(req->error_str),
2033 			 "error %d returned for DIOCGMEDIASIZE "
2034 			 " ioctl on %s!", error,
2035 			 be_lun->dev_path);
2036 		return (error);
2037 	}
2038 
2039 	if (params->lun_size_bytes != 0) {
2040 		if (params->lun_size_bytes > otmp) {
2041 			dev_relthread(dev, ref);
2042 			snprintf(req->error_str, sizeof(req->error_str),
2043 				 "requested LUN size %ju > backing device "
2044 				 "size %ju",
2045 				 (uintmax_t)params->lun_size_bytes,
2046 				 (uintmax_t)otmp);
2047 			return (EINVAL);
2048 		}
2049 
2050 		be_lun->size_bytes = params->lun_size_bytes;
2051 	} else
2052 		be_lun->size_bytes = otmp;
2053 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2054 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2055 	    0 : (be_lun->size_blocks - 1);
2056 
2057 	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2058 	    curthread);
2059 	if (error)
2060 		ps = po = 0;
2061 	else {
2062 		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2063 		    FREAD, curthread);
2064 		if (error)
2065 			po = 0;
2066 	}
2067 	us = ps;
2068 	uo = po;
2069 
2070 	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
2071 	if (value != NULL)
2072 		ctl_expand_number(value, &ps);
2073 	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
2074 	if (value != NULL)
2075 		ctl_expand_number(value, &po);
2076 	pss = ps / cbe_lun->blocksize;
2077 	pos = po / cbe_lun->blocksize;
2078 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2079 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2080 		cbe_lun->pblockexp = fls(pss) - 1;
2081 		cbe_lun->pblockoff = (pss - pos) % pss;
2082 	}
2083 
2084 	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
2085 	if (value != NULL)
2086 		ctl_expand_number(value, &us);
2087 	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
2088 	if (value != NULL)
2089 		ctl_expand_number(value, &uo);
2090 	uss = us / cbe_lun->blocksize;
2091 	uos = uo / cbe_lun->blocksize;
2092 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2093 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2094 		cbe_lun->ublockexp = fls(uss) - 1;
2095 		cbe_lun->ublockoff = (uss - uos) % uss;
2096 	}
2097 
2098 	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2099 	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2100 
2101 	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2102 		unmap = 1;
2103 	} else {
2104 		struct diocgattr_arg	arg;
2105 
2106 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2107 		arg.len = sizeof(arg.value.i);
2108 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2109 		    curthread);
2110 		unmap = (error == 0) ? arg.value.i : 0;
2111 	}
2112 	value = ctl_get_opt(&cbe_lun->options, "unmap");
2113 	if (value != NULL)
2114 		unmap = (strcmp(value, "on") == 0);
2115 	if (unmap)
2116 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2117 	else
2118 		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2119 
2120 	dev_relthread(dev, ref);
2121 	return (0);
2122 }
2123 
2124 static int
2125 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2126 {
2127 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2128 	int flags;
2129 
2130 	if (be_lun->vn) {
2131 		flags = FREAD;
2132 		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2133 			flags |= FWRITE;
2134 		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2135 		be_lun->vn = NULL;
2136 
2137 		switch (be_lun->dev_type) {
2138 		case CTL_BE_BLOCK_DEV:
2139 			break;
2140 		case CTL_BE_BLOCK_FILE:
2141 			if (be_lun->backend.file.cred != NULL) {
2142 				crfree(be_lun->backend.file.cred);
2143 				be_lun->backend.file.cred = NULL;
2144 			}
2145 			break;
2146 		case CTL_BE_BLOCK_NONE:
2147 			break;
2148 		default:
2149 			panic("Unexpected backend type.");
2150 			break;
2151 		}
2152 		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2153 	}
2154 	return (0);
2155 }
2156 
2157 static int
2158 ctl_be_block_open(struct ctl_be_block_softc *softc,
2159 		  struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2160 {
2161 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2162 	struct nameidata nd;
2163 	char		*value;
2164 	int		 error, flags;
2165 
2166 	error = 0;
2167 	if (rootvnode == NULL) {
2168 		snprintf(req->error_str, sizeof(req->error_str),
2169 			 "Root filesystem is not mounted");
2170 		return (1);
2171 	}
2172 	pwd_ensure_dirs();
2173 
2174 	value = ctl_get_opt(&cbe_lun->options, "file");
2175 	if (value == NULL) {
2176 		snprintf(req->error_str, sizeof(req->error_str),
2177 			 "no file argument specified");
2178 		return (1);
2179 	}
2180 	free(be_lun->dev_path, M_CTLBLK);
2181 	be_lun->dev_path = strdup(value, M_CTLBLK);
2182 
2183 	flags = FREAD;
2184 	value = ctl_get_opt(&cbe_lun->options, "readonly");
2185 	if (value == NULL || strcmp(value, "on") != 0)
2186 		flags |= FWRITE;
2187 
2188 again:
2189 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2190 	error = vn_open(&nd, &flags, 0, NULL);
2191 	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2192 		flags &= ~FWRITE;
2193 		goto again;
2194 	}
2195 	if (error) {
2196 		/*
2197 		 * This is the only reasonable guess we can make as far as
2198 		 * path if the user doesn't give us a fully qualified path.
2199 		 * If they want to specify a file, they need to specify the
2200 		 * full path.
2201 		 */
2202 		if (be_lun->dev_path[0] != '/') {
2203 			char *dev_name;
2204 
2205 			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2206 				be_lun->dev_path);
2207 			free(be_lun->dev_path, M_CTLBLK);
2208 			be_lun->dev_path = dev_name;
2209 			goto again;
2210 		}
2211 		snprintf(req->error_str, sizeof(req->error_str),
2212 		    "error opening %s: %d", be_lun->dev_path, error);
2213 		return (error);
2214 	}
2215 	if (flags & FWRITE)
2216 		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2217 	else
2218 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2219 
2220 	NDFREE(&nd, NDF_ONLY_PNBUF);
2221 	be_lun->vn = nd.ni_vp;
2222 
2223 	/* We only support disks and files. */
2224 	if (vn_isdisk(be_lun->vn, &error)) {
2225 		error = ctl_be_block_open_dev(be_lun, req);
2226 	} else if (be_lun->vn->v_type == VREG) {
2227 		error = ctl_be_block_open_file(be_lun, req);
2228 	} else {
2229 		error = EINVAL;
2230 		snprintf(req->error_str, sizeof(req->error_str),
2231 			 "%s is not a disk or plain file", be_lun->dev_path);
2232 	}
2233 	VOP_UNLOCK(be_lun->vn, 0);
2234 
2235 	if (error != 0)
2236 		ctl_be_block_close(be_lun);
2237 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2238 	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2239 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2240 	value = ctl_get_opt(&cbe_lun->options, "serseq");
2241 	if (value != NULL && strcmp(value, "on") == 0)
2242 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2243 	else if (value != NULL && strcmp(value, "read") == 0)
2244 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2245 	else if (value != NULL && strcmp(value, "off") == 0)
2246 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2247 	return (0);
2248 }
2249 
2250 static int
2251 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2252 {
2253 	struct ctl_be_lun *cbe_lun;
2254 	struct ctl_be_block_lun *be_lun;
2255 	struct ctl_lun_create_params *params;
2256 	char num_thread_str[16];
2257 	char tmpstr[32];
2258 	char *value;
2259 	int retval, num_threads;
2260 	int tmp_num_threads;
2261 
2262 	params = &req->reqdata.create;
2263 	retval = 0;
2264 	req->status = CTL_LUN_OK;
2265 
2266 	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2267 	cbe_lun = &be_lun->cbe_lun;
2268 	cbe_lun->be_lun = be_lun;
2269 	be_lun->params = req->reqdata.create;
2270 	be_lun->softc = softc;
2271 	STAILQ_INIT(&be_lun->input_queue);
2272 	STAILQ_INIT(&be_lun->config_read_queue);
2273 	STAILQ_INIT(&be_lun->config_write_queue);
2274 	STAILQ_INIT(&be_lun->datamove_queue);
2275 	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2276 	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2277 	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2278 	ctl_init_opts(&cbe_lun->options,
2279 	    req->num_be_args, req->kern_be_args);
2280 	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2281 	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2282 	if (be_lun->lun_zone == NULL) {
2283 		snprintf(req->error_str, sizeof(req->error_str),
2284 			 "error allocating UMA zone");
2285 		goto bailout_error;
2286 	}
2287 
2288 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2289 		cbe_lun->lun_type = params->device_type;
2290 	else
2291 		cbe_lun->lun_type = T_DIRECT;
2292 	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2293 	cbe_lun->flags = 0;
2294 	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2295 	if (value != NULL) {
2296 		if (strcmp(value, "primary") == 0)
2297 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2298 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2299 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2300 
2301 	if (cbe_lun->lun_type == T_DIRECT) {
2302 		be_lun->size_bytes = params->lun_size_bytes;
2303 		if (params->blocksize_bytes != 0)
2304 			cbe_lun->blocksize = params->blocksize_bytes;
2305 		else
2306 			cbe_lun->blocksize = 512;
2307 		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2308 		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2309 		    0 : (be_lun->size_blocks - 1);
2310 
2311 		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2312 		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2313 			retval = ctl_be_block_open(softc, be_lun, req);
2314 			if (retval != 0) {
2315 				retval = 0;
2316 				req->status = CTL_LUN_WARNING;
2317 			}
2318 		}
2319 		num_threads = cbb_num_threads;
2320 	} else {
2321 		num_threads = 1;
2322 	}
2323 
2324 	/*
2325 	 * XXX This searching loop might be refactored to be combined with
2326 	 * the loop above,
2327 	 */
2328 	value = ctl_get_opt(&cbe_lun->options, "num_threads");
2329 	if (value != NULL) {
2330 		tmp_num_threads = strtol(value, NULL, 0);
2331 
2332 		/*
2333 		 * We don't let the user specify less than one
2334 		 * thread, but hope he's clueful enough not to
2335 		 * specify 1000 threads.
2336 		 */
2337 		if (tmp_num_threads < 1) {
2338 			snprintf(req->error_str, sizeof(req->error_str),
2339 				 "invalid number of threads %s",
2340 				 num_thread_str);
2341 			goto bailout_error;
2342 		}
2343 		num_threads = tmp_num_threads;
2344 	}
2345 
2346 	if (be_lun->vn == NULL)
2347 		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2348 	/* Tell the user the blocksize we ended up using */
2349 	params->lun_size_bytes = be_lun->size_bytes;
2350 	params->blocksize_bytes = cbe_lun->blocksize;
2351 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2352 		cbe_lun->req_lun_id = params->req_lun_id;
2353 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2354 	} else
2355 		cbe_lun->req_lun_id = 0;
2356 
2357 	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2358 	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2359 	cbe_lun->be = &ctl_be_block_driver;
2360 
2361 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2362 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2363 			 softc->num_luns);
2364 		strncpy((char *)cbe_lun->serial_num, tmpstr,
2365 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2366 
2367 		/* Tell the user what we used for a serial number */
2368 		strncpy((char *)params->serial_num, tmpstr,
2369 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2370 	} else {
2371 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2372 			MIN(sizeof(cbe_lun->serial_num),
2373 			sizeof(params->serial_num)));
2374 	}
2375 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2376 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2377 		strncpy((char *)cbe_lun->device_id, tmpstr,
2378 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2379 
2380 		/* Tell the user what we used for a device ID */
2381 		strncpy((char *)params->device_id, tmpstr,
2382 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2383 	} else {
2384 		strncpy((char *)cbe_lun->device_id, params->device_id,
2385 			MIN(sizeof(cbe_lun->device_id),
2386 			    sizeof(params->device_id)));
2387 	}
2388 
2389 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2390 
2391 	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2392 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2393 
2394 	if (be_lun->io_taskqueue == NULL) {
2395 		snprintf(req->error_str, sizeof(req->error_str),
2396 			 "unable to create taskqueue");
2397 		goto bailout_error;
2398 	}
2399 
2400 	/*
2401 	 * Note that we start the same number of threads by default for
2402 	 * both the file case and the block device case.  For the file
2403 	 * case, we need multiple threads to allow concurrency, because the
2404 	 * vnode interface is designed to be a blocking interface.  For the
2405 	 * block device case, ZFS zvols at least will block the caller's
2406 	 * context in many instances, and so we need multiple threads to
2407 	 * overcome that problem.  Other block devices don't need as many
2408 	 * threads, but they shouldn't cause too many problems.
2409 	 *
2410 	 * If the user wants to just have a single thread for a block
2411 	 * device, he can specify that when the LUN is created, or change
2412 	 * the tunable/sysctl to alter the default number of threads.
2413 	 */
2414 	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2415 					 /*num threads*/num_threads,
2416 					 /*priority*/PWAIT,
2417 					 /*thread name*/
2418 					 "%s taskq", be_lun->lunname);
2419 
2420 	if (retval != 0)
2421 		goto bailout_error;
2422 
2423 	be_lun->num_threads = num_threads;
2424 
2425 	mtx_lock(&softc->lock);
2426 	softc->num_luns++;
2427 	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2428 
2429 	mtx_unlock(&softc->lock);
2430 
2431 	retval = ctl_add_lun(&be_lun->cbe_lun);
2432 	if (retval != 0) {
2433 		mtx_lock(&softc->lock);
2434 		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2435 			      links);
2436 		softc->num_luns--;
2437 		mtx_unlock(&softc->lock);
2438 		snprintf(req->error_str, sizeof(req->error_str),
2439 			 "ctl_add_lun() returned error %d, see dmesg for "
2440 			 "details", retval);
2441 		retval = 0;
2442 		goto bailout_error;
2443 	}
2444 
2445 	mtx_lock(&softc->lock);
2446 
2447 	/*
2448 	 * Tell the config_status routine that we're waiting so it won't
2449 	 * clean up the LUN in the event of an error.
2450 	 */
2451 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2452 
2453 	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2454 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2455 		if (retval == EINTR)
2456 			break;
2457 	}
2458 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2459 
2460 	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2461 		snprintf(req->error_str, sizeof(req->error_str),
2462 			 "LUN configuration error, see dmesg for details");
2463 		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2464 			      links);
2465 		softc->num_luns--;
2466 		mtx_unlock(&softc->lock);
2467 		goto bailout_error;
2468 	} else {
2469 		params->req_lun_id = cbe_lun->lun_id;
2470 	}
2471 
2472 	mtx_unlock(&softc->lock);
2473 
2474 	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2475 					       cbe_lun->blocksize,
2476 					       DEVSTAT_ALL_SUPPORTED,
2477 					       cbe_lun->lun_type
2478 					       | DEVSTAT_TYPE_IF_OTHER,
2479 					       DEVSTAT_PRIORITY_OTHER);
2480 
2481 	return (retval);
2482 
2483 bailout_error:
2484 	req->status = CTL_LUN_ERROR;
2485 
2486 	if (be_lun->io_taskqueue != NULL)
2487 		taskqueue_free(be_lun->io_taskqueue);
2488 	ctl_be_block_close(be_lun);
2489 	if (be_lun->dev_path != NULL)
2490 		free(be_lun->dev_path, M_CTLBLK);
2491 	if (be_lun->lun_zone != NULL)
2492 		uma_zdestroy(be_lun->lun_zone);
2493 	ctl_free_opts(&cbe_lun->options);
2494 	mtx_destroy(&be_lun->queue_lock);
2495 	mtx_destroy(&be_lun->io_lock);
2496 	free(be_lun, M_CTLBLK);
2497 
2498 	return (retval);
2499 }
2500 
2501 static int
2502 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2503 {
2504 	struct ctl_lun_rm_params *params;
2505 	struct ctl_be_block_lun *be_lun;
2506 	struct ctl_be_lun *cbe_lun;
2507 	int retval;
2508 
2509 	params = &req->reqdata.rm;
2510 
2511 	mtx_lock(&softc->lock);
2512 	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2513 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2514 			break;
2515 	}
2516 	mtx_unlock(&softc->lock);
2517 
2518 	if (be_lun == NULL) {
2519 		snprintf(req->error_str, sizeof(req->error_str),
2520 			 "LUN %u is not managed by the block backend",
2521 			 params->lun_id);
2522 		goto bailout_error;
2523 	}
2524 	cbe_lun = &be_lun->cbe_lun;
2525 
2526 	retval = ctl_disable_lun(cbe_lun);
2527 	if (retval != 0) {
2528 		snprintf(req->error_str, sizeof(req->error_str),
2529 			 "error %d returned from ctl_disable_lun() for "
2530 			 "LUN %d", retval, params->lun_id);
2531 		goto bailout_error;
2532 	}
2533 
2534 	if (be_lun->vn != NULL) {
2535 		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2536 		ctl_lun_offline(cbe_lun);
2537 		taskqueue_drain_all(be_lun->io_taskqueue);
2538 		ctl_be_block_close(be_lun);
2539 	}
2540 
2541 	retval = ctl_invalidate_lun(cbe_lun);
2542 	if (retval != 0) {
2543 		snprintf(req->error_str, sizeof(req->error_str),
2544 			 "error %d returned from ctl_invalidate_lun() for "
2545 			 "LUN %d", retval, params->lun_id);
2546 		goto bailout_error;
2547 	}
2548 
2549 	mtx_lock(&softc->lock);
2550 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2551 	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2552                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2553                 if (retval == EINTR)
2554                         break;
2555         }
2556 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2557 
2558 	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2559 		snprintf(req->error_str, sizeof(req->error_str),
2560 			 "interrupted waiting for LUN to be freed");
2561 		mtx_unlock(&softc->lock);
2562 		goto bailout_error;
2563 	}
2564 
2565 	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2566 
2567 	softc->num_luns--;
2568 	mtx_unlock(&softc->lock);
2569 
2570 	taskqueue_drain_all(be_lun->io_taskqueue);
2571 	taskqueue_free(be_lun->io_taskqueue);
2572 
2573 	if (be_lun->disk_stats != NULL)
2574 		devstat_remove_entry(be_lun->disk_stats);
2575 
2576 	uma_zdestroy(be_lun->lun_zone);
2577 
2578 	ctl_free_opts(&cbe_lun->options);
2579 	free(be_lun->dev_path, M_CTLBLK);
2580 	mtx_destroy(&be_lun->queue_lock);
2581 	mtx_destroy(&be_lun->io_lock);
2582 	free(be_lun, M_CTLBLK);
2583 
2584 	req->status = CTL_LUN_OK;
2585 
2586 	return (0);
2587 
2588 bailout_error:
2589 
2590 	req->status = CTL_LUN_ERROR;
2591 
2592 	return (0);
2593 }
2594 
2595 static int
2596 ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2597 			 struct ctl_lun_req *req)
2598 {
2599 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2600 	struct vattr vattr;
2601 	int error;
2602 	struct ctl_lun_create_params *params = &be_lun->params;
2603 
2604 	if (params->lun_size_bytes != 0) {
2605 		be_lun->size_bytes = params->lun_size_bytes;
2606 	} else  {
2607 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2608 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2609 		VOP_UNLOCK(be_lun->vn, 0);
2610 		if (error != 0) {
2611 			snprintf(req->error_str, sizeof(req->error_str),
2612 				 "error calling VOP_GETATTR() for file %s",
2613 				 be_lun->dev_path);
2614 			return (error);
2615 		}
2616 		be_lun->size_bytes = vattr.va_size;
2617 	}
2618 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2619 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2620 	    0 : (be_lun->size_blocks - 1);
2621 	return (0);
2622 }
2623 
2624 static int
2625 ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2626 			struct ctl_lun_req *req)
2627 {
2628 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2629 	struct ctl_lun_create_params *params = &be_lun->params;
2630 	struct cdevsw *csw;
2631 	struct cdev *dev;
2632 	uint64_t size_bytes;
2633 	int error, ref;
2634 
2635 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2636 	if (csw == NULL)
2637 		return (ENXIO);
2638 	if (csw->d_ioctl == NULL) {
2639 		dev_relthread(dev, ref);
2640 		snprintf(req->error_str, sizeof(req->error_str),
2641 			 "no d_ioctl for device %s!", be_lun->dev_path);
2642 		return (ENODEV);
2643 	}
2644 
2645 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD,
2646 	    curthread);
2647 	dev_relthread(dev, ref);
2648 	if (error) {
2649 		snprintf(req->error_str, sizeof(req->error_str),
2650 			 "error %d returned for DIOCGMEDIASIZE ioctl "
2651 			 "on %s!", error, be_lun->dev_path);
2652 		return (error);
2653 	}
2654 
2655 	if (params->lun_size_bytes != 0) {
2656 		if (params->lun_size_bytes > size_bytes) {
2657 			snprintf(req->error_str, sizeof(req->error_str),
2658 				 "requested LUN size %ju > backing device "
2659 				 "size %ju",
2660 				 (uintmax_t)params->lun_size_bytes,
2661 				 (uintmax_t)size_bytes);
2662 			return (EINVAL);
2663 		}
2664 		be_lun->size_bytes = params->lun_size_bytes;
2665 	} else {
2666 		be_lun->size_bytes = size_bytes;
2667 	}
2668 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2669 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2670 	    0 : (be_lun->size_blocks - 1);
2671 	return (0);
2672 }
2673 
2674 static int
2675 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2676 {
2677 	struct ctl_lun_modify_params *params;
2678 	struct ctl_be_block_lun *be_lun;
2679 	struct ctl_be_lun *cbe_lun;
2680 	char *value;
2681 	uint64_t oldsize;
2682 	int error, wasprim;
2683 
2684 	params = &req->reqdata.modify;
2685 
2686 	mtx_lock(&softc->lock);
2687 	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2688 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2689 			break;
2690 	}
2691 	mtx_unlock(&softc->lock);
2692 
2693 	if (be_lun == NULL) {
2694 		snprintf(req->error_str, sizeof(req->error_str),
2695 			 "LUN %u is not managed by the block backend",
2696 			 params->lun_id);
2697 		goto bailout_error;
2698 	}
2699 	cbe_lun = &be_lun->cbe_lun;
2700 
2701 	if (params->lun_size_bytes != 0)
2702 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2703 	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
2704 
2705 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2706 	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2707 	if (value != NULL) {
2708 		if (strcmp(value, "primary") == 0)
2709 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2710 		else
2711 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2712 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2713 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2714 	else
2715 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2716 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2717 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2718 			ctl_lun_primary(cbe_lun);
2719 		else
2720 			ctl_lun_secondary(cbe_lun);
2721 	}
2722 
2723 	oldsize = be_lun->size_blocks;
2724 	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2725 	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2726 		if (be_lun->vn == NULL)
2727 			error = ctl_be_block_open(softc, be_lun, req);
2728 		else if (vn_isdisk(be_lun->vn, &error))
2729 			error = ctl_be_block_modify_dev(be_lun, req);
2730 		else if (be_lun->vn->v_type == VREG)
2731 			error = ctl_be_block_modify_file(be_lun, req);
2732 		else
2733 			error = EINVAL;
2734 		if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) &&
2735 		    be_lun->vn != NULL) {
2736 			cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE;
2737 			ctl_lun_online(cbe_lun);
2738 		}
2739 	} else {
2740 		if (be_lun->vn != NULL) {
2741 			cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2742 			ctl_lun_offline(cbe_lun);
2743 			taskqueue_drain_all(be_lun->io_taskqueue);
2744 			error = ctl_be_block_close(be_lun);
2745 		} else
2746 			error = 0;
2747 	}
2748 	if (be_lun->size_blocks != oldsize)
2749 		ctl_lun_capacity_changed(cbe_lun);
2750 
2751 	/* Tell the user the exact size we ended up using */
2752 	params->lun_size_bytes = be_lun->size_bytes;
2753 
2754 	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2755 	return (0);
2756 
2757 bailout_error:
2758 	req->status = CTL_LUN_ERROR;
2759 	return (0);
2760 }
2761 
2762 static void
2763 ctl_be_block_lun_shutdown(void *be_lun)
2764 {
2765 	struct ctl_be_block_lun *lun;
2766 	struct ctl_be_block_softc *softc;
2767 
2768 	lun = (struct ctl_be_block_lun *)be_lun;
2769 
2770 	softc = lun->softc;
2771 
2772 	mtx_lock(&softc->lock);
2773 	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2774 	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2775 		wakeup(lun);
2776 	mtx_unlock(&softc->lock);
2777 
2778 }
2779 
2780 static void
2781 ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2782 {
2783 	struct ctl_be_block_lun *lun;
2784 	struct ctl_be_block_softc *softc;
2785 
2786 	lun = (struct ctl_be_block_lun *)be_lun;
2787 	softc = lun->softc;
2788 
2789 	if (status == CTL_LUN_CONFIG_OK) {
2790 		mtx_lock(&softc->lock);
2791 		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2792 		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2793 			wakeup(lun);
2794 		mtx_unlock(&softc->lock);
2795 
2796 		/*
2797 		 * We successfully added the LUN, attempt to enable it.
2798 		 */
2799 		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2800 			printf("%s: ctl_enable_lun() failed!\n", __func__);
2801 			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2802 				printf("%s: ctl_invalidate_lun() failed!\n",
2803 				       __func__);
2804 			}
2805 		}
2806 
2807 		return;
2808 	}
2809 
2810 
2811 	mtx_lock(&softc->lock);
2812 	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2813 	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2814 	wakeup(lun);
2815 	mtx_unlock(&softc->lock);
2816 }
2817 
2818 
2819 static int
2820 ctl_be_block_config_write(union ctl_io *io)
2821 {
2822 	struct ctl_be_block_lun *be_lun;
2823 	struct ctl_be_lun *cbe_lun;
2824 	int retval;
2825 
2826 	retval = 0;
2827 
2828 	DPRINTF("entered\n");
2829 
2830 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2831 		CTL_PRIV_BACKEND_LUN].ptr;
2832 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2833 
2834 	switch (io->scsiio.cdb[0]) {
2835 	case SYNCHRONIZE_CACHE:
2836 	case SYNCHRONIZE_CACHE_16:
2837 	case WRITE_SAME_10:
2838 	case WRITE_SAME_16:
2839 	case UNMAP:
2840 		/*
2841 		 * The upper level CTL code will filter out any CDBs with
2842 		 * the immediate bit set and return the proper error.
2843 		 *
2844 		 * We don't really need to worry about what LBA range the
2845 		 * user asked to be synced out.  When they issue a sync
2846 		 * cache command, we'll sync out the whole thing.
2847 		 */
2848 		mtx_lock(&be_lun->queue_lock);
2849 		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2850 				   links);
2851 		mtx_unlock(&be_lun->queue_lock);
2852 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2853 		break;
2854 	case START_STOP_UNIT: {
2855 		struct scsi_start_stop_unit *cdb;
2856 
2857 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2858 
2859 		if (cdb->how & SSS_START)
2860 			retval = ctl_start_lun(cbe_lun);
2861 		else {
2862 			retval = ctl_stop_lun(cbe_lun);
2863 			/*
2864 			 * XXX KDM Copan-specific offline behavior.
2865 			 * Figure out a reasonable way to port this?
2866 			 */
2867 #ifdef NEEDTOPORT
2868 			if ((retval == 0)
2869 			 && (cdb->byte2 & SSS_ONOFFLINE))
2870 				retval = ctl_lun_offline(cbe_lun);
2871 #endif
2872 		}
2873 
2874 		/*
2875 		 * In general, the above routines should not fail.  They
2876 		 * just set state for the LUN.  So we've got something
2877 		 * pretty wrong here if we can't start or stop the LUN.
2878 		 */
2879 		if (retval != 0) {
2880 			ctl_set_internal_failure(&io->scsiio,
2881 						 /*sks_valid*/ 1,
2882 						 /*retry_count*/ 0xf051);
2883 			retval = CTL_RETVAL_COMPLETE;
2884 		} else {
2885 			ctl_set_success(&io->scsiio);
2886 		}
2887 		ctl_config_write_done(io);
2888 		break;
2889 	}
2890 	default:
2891 		ctl_set_invalid_opcode(&io->scsiio);
2892 		ctl_config_write_done(io);
2893 		retval = CTL_RETVAL_COMPLETE;
2894 		break;
2895 	}
2896 
2897 	return (retval);
2898 }
2899 
2900 static int
2901 ctl_be_block_config_read(union ctl_io *io)
2902 {
2903 	struct ctl_be_block_lun *be_lun;
2904 	struct ctl_be_lun *cbe_lun;
2905 	int retval = 0;
2906 
2907 	DPRINTF("entered\n");
2908 
2909 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2910 		CTL_PRIV_BACKEND_LUN].ptr;
2911 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2912 
2913 	switch (io->scsiio.cdb[0]) {
2914 	case SERVICE_ACTION_IN:
2915 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2916 			mtx_lock(&be_lun->queue_lock);
2917 			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2918 			    &io->io_hdr, links);
2919 			mtx_unlock(&be_lun->queue_lock);
2920 			taskqueue_enqueue(be_lun->io_taskqueue,
2921 			    &be_lun->io_task);
2922 			retval = CTL_RETVAL_QUEUED;
2923 			break;
2924 		}
2925 		ctl_set_invalid_field(&io->scsiio,
2926 				      /*sks_valid*/ 1,
2927 				      /*command*/ 1,
2928 				      /*field*/ 1,
2929 				      /*bit_valid*/ 1,
2930 				      /*bit*/ 4);
2931 		ctl_config_read_done(io);
2932 		retval = CTL_RETVAL_COMPLETE;
2933 		break;
2934 	default:
2935 		ctl_set_invalid_opcode(&io->scsiio);
2936 		ctl_config_read_done(io);
2937 		retval = CTL_RETVAL_COMPLETE;
2938 		break;
2939 	}
2940 
2941 	return (retval);
2942 }
2943 
2944 static int
2945 ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2946 {
2947 	struct ctl_be_block_lun *lun;
2948 	int retval;
2949 
2950 	lun = (struct ctl_be_block_lun *)be_lun;
2951 	retval = 0;
2952 
2953 	retval = sbuf_printf(sb, "\t<num_threads>");
2954 
2955 	if (retval != 0)
2956 		goto bailout;
2957 
2958 	retval = sbuf_printf(sb, "%d", lun->num_threads);
2959 
2960 	if (retval != 0)
2961 		goto bailout;
2962 
2963 	retval = sbuf_printf(sb, "</num_threads>\n");
2964 
2965 bailout:
2966 
2967 	return (retval);
2968 }
2969 
2970 static uint64_t
2971 ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2972 {
2973 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2974 
2975 	if (lun->getattr == NULL)
2976 		return (UINT64_MAX);
2977 	return (lun->getattr(lun, attrname));
2978 }
2979 
2980 int
2981 ctl_be_block_init(void)
2982 {
2983 	struct ctl_be_block_softc *softc;
2984 	int retval;
2985 
2986 	softc = &backend_block_softc;
2987 	retval = 0;
2988 
2989 	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2990 	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2991 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2992 	STAILQ_INIT(&softc->lun_list);
2993 
2994 	return (retval);
2995 }
2996