xref: /freebsd/sys/cam/ctl/ctl_backend_ramdisk.c (revision dab59af3bcc7cb7ba01569d3044894b3e860ad56)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003, 2008 Silicon Graphics International Corp.
5  * Copyright (c) 2012 The FreeBSD Foundation
6  * Copyright (c) 2014-2017 Alexander Motin <mav@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Portions of this software were developed by Edward Tomasz Napierala
10  * under sponsorship from the FreeBSD Foundation.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions, and the following disclaimer,
17  *    without modification.
18  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
19  *    substantially similar to the "NO WARRANTY" disclaimer below
20  *    ("Disclaimer") and any redistribution must be conditioned upon
21  *    including a substantially similar Disclaimer requirement for further
22  *    binary redistribution.
23  *
24  * NO WARRANTY
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
34  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGES.
36  *
37  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_ramdisk.c#3 $
38  */
39 /*
40  * CAM Target Layer black hole and RAM disk backend.
41  *
42  * Author: Ken Merry <ken@FreeBSD.org>
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/condvar.h>
49 #include <sys/types.h>
50 #include <sys/limits.h>
51 #include <sys/lock.h>
52 #include <sys/mutex.h>
53 #include <sys/malloc.h>
54 #include <sys/sx.h>
55 #include <sys/taskqueue.h>
56 #include <sys/time.h>
57 #include <sys/queue.h>
58 #include <sys/conf.h>
59 #include <sys/ioccom.h>
60 #include <sys/module.h>
61 #include <sys/sysctl.h>
62 #include <sys/nv.h>
63 #include <sys/dnv.h>
64 
65 #include <cam/scsi/scsi_all.h>
66 #include <cam/scsi/scsi_da.h>
67 #include <cam/ctl/ctl_io.h>
68 #include <cam/ctl/ctl.h>
69 #include <cam/ctl/ctl_util.h>
70 #include <cam/ctl/ctl_backend.h>
71 #include <cam/ctl/ctl_debug.h>
72 #include <cam/ctl/ctl_ioctl.h>
73 #include <cam/ctl/ctl_ha.h>
74 #include <cam/ctl/ctl_private.h>
75 #include <cam/ctl/ctl_error.h>
76 
77 #define PRIV(io)	\
78     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
79 #define ARGS(io)	\
80     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
81 
82 #define	PPP	(PAGE_SIZE / sizeof(uint8_t **))
83 #ifdef __LP64__
84 #define	PPPS	(PAGE_SHIFT - 3)
85 #else
86 #define	PPPS	(PAGE_SHIFT - 2)
87 #endif
88 #define	SGPP	(PAGE_SIZE / sizeof(struct ctl_sg_entry))
89 
90 #define	P_UNMAPPED	NULL			/* Page is unmapped. */
91 #define	P_ANCHORED	((void *)(uintptr_t)1)	/* Page is anchored. */
92 
93 typedef enum {
94 	GP_READ,	/* Return data page or zero page. */
95 	GP_WRITE,	/* Return data page, try allocate if none. */
96 	GP_ANCHOR,	/* Return data page, try anchor if none. */
97 	GP_OTHER,	/* Return what present, do not allocate/anchor. */
98 } getpage_op_t;
99 
100 typedef enum {
101 	CTL_BE_RAMDISK_LUN_UNCONFIGURED	= 0x01,
102 	CTL_BE_RAMDISK_LUN_WAITING	= 0x04
103 } ctl_be_ramdisk_lun_flags;
104 
105 struct ctl_be_ramdisk_lun {
106 	struct ctl_be_lun	cbe_lun;	/* Must be first element. */
107 	struct ctl_lun_create_params params;
108 	int			indir;
109 	uint8_t			**pages;
110 	uint8_t			*zero_page;
111 	struct sx		page_lock;
112 	u_int			pblocksize;
113 	u_int			pblockmul;
114 	uint64_t		size_bytes;
115 	uint64_t		size_blocks;
116 	uint64_t		cap_bytes;
117 	uint64_t		cap_used;
118 	struct ctl_be_ramdisk_softc *softc;
119 	ctl_be_ramdisk_lun_flags flags;
120 	SLIST_ENTRY(ctl_be_ramdisk_lun) links;
121 	struct taskqueue	*io_taskqueue;
122 	struct task		io_task;
123 	STAILQ_HEAD(, ctl_io_hdr) cont_queue;
124 	struct mtx_padalign	queue_lock;
125 };
126 
127 struct ctl_be_ramdisk_softc {
128 	struct sx modify_lock;
129 	struct mtx lock;
130 	int num_luns;
131 	SLIST_HEAD(, ctl_be_ramdisk_lun) lun_list;
132 };
133 
134 static struct ctl_be_ramdisk_softc rd_softc;
135 
136 static int ctl_backend_ramdisk_init(void);
137 static int ctl_backend_ramdisk_shutdown(void);
138 static int ctl_backend_ramdisk_move_done(union ctl_io *io, bool samethr);
139 static void ctl_backend_ramdisk_compare(union ctl_io *io);
140 static void ctl_backend_ramdisk_rw(union ctl_io *io);
141 static int ctl_backend_ramdisk_submit(union ctl_io *io);
142 static void ctl_backend_ramdisk_worker(void *context, int pending);
143 static int ctl_backend_ramdisk_config_read(union ctl_io *io);
144 static int ctl_backend_ramdisk_config_write(union ctl_io *io);
145 static uint64_t ctl_backend_ramdisk_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
146 static int ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd,
147 				     caddr_t addr, int flag, struct thread *td);
148 static int ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
149 				  struct ctl_lun_req *req);
150 static int ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
151 				      struct ctl_lun_req *req);
152 static int ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
153 				  struct ctl_lun_req *req);
154 static void ctl_backend_ramdisk_lun_shutdown(struct ctl_be_lun *cbe_lun);
155 
156 static struct ctl_backend_driver ctl_be_ramdisk_driver =
157 {
158 	.name = "ramdisk",
159 	.flags = CTL_BE_FLAG_HAS_CONFIG,
160 	.init = ctl_backend_ramdisk_init,
161 	.shutdown = ctl_backend_ramdisk_shutdown,
162 	.data_submit = ctl_backend_ramdisk_submit,
163 	.config_read = ctl_backend_ramdisk_config_read,
164 	.config_write = ctl_backend_ramdisk_config_write,
165 	.ioctl = ctl_backend_ramdisk_ioctl,
166 	.lun_attr = ctl_backend_ramdisk_lun_attr,
167 };
168 
169 MALLOC_DEFINE(M_RAMDISK, "ctlramdisk", "Memory used for CTL RAMdisk");
170 CTL_BACKEND_DECLARE(cbr, ctl_be_ramdisk_driver);
171 
172 static int
173 ctl_backend_ramdisk_init(void)
174 {
175 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
176 
177 	memset(softc, 0, sizeof(*softc));
178 	sx_init(&softc->modify_lock, "ctlrammod");
179 	mtx_init(&softc->lock, "ctlram", NULL, MTX_DEF);
180 	SLIST_INIT(&softc->lun_list);
181 	return (0);
182 }
183 
184 static int
185 ctl_backend_ramdisk_shutdown(void)
186 {
187 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
188 	struct ctl_be_ramdisk_lun *lun;
189 
190 	mtx_lock(&softc->lock);
191 	while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
192 		SLIST_REMOVE_HEAD(&softc->lun_list, links);
193 		softc->num_luns--;
194 		/*
195 		 * Drop our lock here.  Since ctl_remove_lun() can call
196 		 * back into us, this could potentially lead to a recursive
197 		 * lock of the same mutex, which would cause a hang.
198 		 */
199 		mtx_unlock(&softc->lock);
200 		ctl_remove_lun(&lun->cbe_lun);
201 		mtx_lock(&softc->lock);
202 	}
203 	mtx_unlock(&softc->lock);
204 	mtx_destroy(&softc->lock);
205 	sx_destroy(&softc->modify_lock);
206 	return (0);
207 }
208 
209 static uint8_t *
210 ctl_backend_ramdisk_getpage(struct ctl_be_ramdisk_lun *be_lun, off_t pn,
211     getpage_op_t op)
212 {
213 	uint8_t **p, ***pp;
214 	off_t i;
215 	int s;
216 
217 	if (be_lun->cap_bytes == 0) {
218 		switch (op) {
219 		case GP_READ:
220 			return (be_lun->zero_page);
221 		case GP_WRITE:
222 			return ((uint8_t *)be_lun->pages);
223 		case GP_ANCHOR:
224 			return (P_ANCHORED);
225 		default:
226 			return (P_UNMAPPED);
227 		}
228 	}
229 	if (op == GP_WRITE || op == GP_ANCHOR) {
230 		sx_xlock(&be_lun->page_lock);
231 		pp = &be_lun->pages;
232 		for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
233 			if (*pp == NULL) {
234 				*pp = malloc(PAGE_SIZE, M_RAMDISK,
235 				    M_WAITOK|M_ZERO);
236 			}
237 			i = pn >> s;
238 			pp = (uint8_t ***)&(*pp)[i];
239 			pn -= i << s;
240 		}
241 		if (*pp == P_UNMAPPED && be_lun->cap_used < be_lun->cap_bytes) {
242 			if (op == GP_WRITE) {
243 				*pp = malloc(be_lun->pblocksize, M_RAMDISK,
244 				    M_WAITOK|M_ZERO);
245 			} else
246 				*pp = P_ANCHORED;
247 			be_lun->cap_used += be_lun->pblocksize;
248 		} else if (*pp == P_ANCHORED && op == GP_WRITE) {
249 			*pp = malloc(be_lun->pblocksize, M_RAMDISK,
250 			    M_WAITOK|M_ZERO);
251 		}
252 		sx_xunlock(&be_lun->page_lock);
253 		return ((uint8_t *)*pp);
254 	} else {
255 		sx_slock(&be_lun->page_lock);
256 		p = be_lun->pages;
257 		for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
258 			if (p == NULL)
259 				break;
260 			i = pn >> s;
261 			p = (uint8_t **)p[i];
262 			pn -= i << s;
263 		}
264 		sx_sunlock(&be_lun->page_lock);
265 		if ((p == P_UNMAPPED || p == P_ANCHORED) && op == GP_READ)
266 			return (be_lun->zero_page);
267 		return ((uint8_t *)p);
268 	}
269 };
270 
271 static void
272 ctl_backend_ramdisk_unmappage(struct ctl_be_ramdisk_lun *be_lun, off_t pn)
273 {
274 	uint8_t ***pp;
275 	off_t i;
276 	int s;
277 
278 	if (be_lun->cap_bytes == 0)
279 		return;
280 	sx_xlock(&be_lun->page_lock);
281 	pp = &be_lun->pages;
282 	for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
283 		if (*pp == NULL)
284 			goto noindir;
285 		i = pn >> s;
286 		pp = (uint8_t ***)&(*pp)[i];
287 		pn -= i << s;
288 	}
289 	if (*pp == P_ANCHORED) {
290 		be_lun->cap_used -= be_lun->pblocksize;
291 		*pp = P_UNMAPPED;
292 	} else if (*pp != P_UNMAPPED) {
293 		free(*pp, M_RAMDISK);
294 		be_lun->cap_used -= be_lun->pblocksize;
295 		*pp = P_UNMAPPED;
296 	}
297 noindir:
298 	sx_xunlock(&be_lun->page_lock);
299 };
300 
301 static void
302 ctl_backend_ramdisk_anchorpage(struct ctl_be_ramdisk_lun *be_lun, off_t pn)
303 {
304 	uint8_t ***pp;
305 	off_t i;
306 	int s;
307 
308 	if (be_lun->cap_bytes == 0)
309 		return;
310 	sx_xlock(&be_lun->page_lock);
311 	pp = &be_lun->pages;
312 	for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
313 		if (*pp == NULL)
314 			goto noindir;
315 		i = pn >> s;
316 		pp = (uint8_t ***)&(*pp)[i];
317 		pn -= i << s;
318 	}
319 	if (*pp == P_UNMAPPED && be_lun->cap_used < be_lun->cap_bytes) {
320 		be_lun->cap_used += be_lun->pblocksize;
321 		*pp = P_ANCHORED;
322 	} else if (*pp != P_ANCHORED) {
323 		free(*pp, M_RAMDISK);
324 		*pp = P_ANCHORED;
325 	}
326 noindir:
327 	sx_xunlock(&be_lun->page_lock);
328 };
329 
330 static void
331 ctl_backend_ramdisk_freeallpages(uint8_t **p, int indir)
332 {
333 	int i;
334 
335 	if (p == NULL)
336 		return;
337 	if (indir == 0) {
338 		free(p, M_RAMDISK);
339 		return;
340 	}
341 	for (i = 0; i < PPP; i++) {
342 		if (p[i] == NULL)
343 			continue;
344 		ctl_backend_ramdisk_freeallpages((uint8_t **)p[i], indir - 1);
345 	}
346 	free(p, M_RAMDISK);
347 };
348 
349 static size_t
350 cmp(uint8_t *a, uint8_t *b, size_t size)
351 {
352 	size_t i;
353 
354 	for (i = 0; i < size; i++) {
355 		if (a[i] != b[i])
356 			break;
357 	}
358 	return (i);
359 }
360 
361 static int
362 ctl_backend_ramdisk_cmp(union ctl_io *io)
363 {
364 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
365 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
366 	uint8_t *page;
367 	uint64_t lba;
368 	u_int lbaoff, lbas, res, off;
369 
370 	lbas = ctl_kern_data_len(io) / cbe_lun->blocksize;
371 	lba = ARGS(io)->lba + PRIV(io)->len - lbas;
372 	off = 0;
373 	for (; lbas > 0; lbas--, lba++) {
374 		page = ctl_backend_ramdisk_getpage(be_lun,
375 		    lba >> cbe_lun->pblockexp, GP_READ);
376 		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
377 		page += lbaoff * cbe_lun->blocksize;
378 		res = cmp(ctl_kern_data_ptr(io) + off, page,
379 		    cbe_lun->blocksize);
380 		off += res;
381 		if (res < cbe_lun->blocksize)
382 			break;
383 	}
384 	free(io->scsiio.kern_data_ptr, M_RAMDISK);
385 	if (lbas > 0) {
386 		off += ctl_kern_rel_offset(io) - ctl_kern_data_len(io);
387 		ctl_io_set_compare_failure(io, off);
388 		return (1);
389 	}
390 	return (0);
391 }
392 
393 static int
394 ctl_backend_ramdisk_move_done(union ctl_io *io, bool samethr)
395 {
396 	struct ctl_be_ramdisk_lun *be_lun =
397 	    (struct ctl_be_ramdisk_lun *)CTL_BACKEND_LUN(io);
398 
399 	CTL_DEBUG_PRINT(("ctl_backend_ramdisk_move_done\n"));
400 	if (ctl_kern_sg_entries(io) > 0)
401 		free(ctl_kern_data_ptr(io), M_RAMDISK);
402 	ctl_add_kern_rel_offset(io, ctl_kern_data_len(io));
403 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
404 	    (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
405 		if (ARGS(io)->flags & CTL_LLF_COMPARE) {
406 			/* We have data block ready for comparison. */
407 			if (ctl_backend_ramdisk_cmp(io))
408 				goto done;
409 		}
410 		if (ARGS(io)->len > PRIV(io)->len) {
411 			mtx_lock(&be_lun->queue_lock);
412 			STAILQ_INSERT_TAIL(&be_lun->cont_queue,
413 			    &io->io_hdr, links);
414 			mtx_unlock(&be_lun->queue_lock);
415 			taskqueue_enqueue(be_lun->io_taskqueue,
416 			    &be_lun->io_task);
417 			return (0);
418 		}
419 		ctl_io_set_success(io);
420 	}
421 done:
422 	ctl_data_submit_done(io);
423 	return(0);
424 }
425 
426 static void
427 ctl_backend_ramdisk_compare(union ctl_io *io)
428 {
429 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
430 	u_int lbas, len;
431 
432 	lbas = ARGS(io)->len - PRIV(io)->len;
433 	lbas = MIN(lbas, 131072 / cbe_lun->blocksize);
434 	len = lbas * cbe_lun->blocksize;
435 
436 	ctl_set_be_move_done(io, ctl_backend_ramdisk_move_done);
437 	ctl_set_kern_data_ptr(io, malloc(len, M_RAMDISK, M_WAITOK));
438 	ctl_set_kern_data_len(io, len);
439 	ctl_set_kern_sg_entries(io, 0);
440 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
441 	PRIV(io)->len += lbas;
442 	ctl_datamove(io);
443 }
444 
445 static void
446 ctl_backend_ramdisk_rw(union ctl_io *io)
447 {
448 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
449 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
450 	struct ctl_sg_entry *sg_entries;
451 	uint8_t *page;
452 	uint64_t lba;
453 	u_int i, len, lbaoff, lbas, sgs, off;
454 	getpage_op_t op;
455 
456 	lba = ARGS(io)->lba + PRIV(io)->len;
457 	lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
458 	lbas = ARGS(io)->len - PRIV(io)->len;
459 	lbas = MIN(lbas, (SGPP << cbe_lun->pblockexp) - lbaoff);
460 	sgs = (lbas + lbaoff + be_lun->pblockmul - 1) >> cbe_lun->pblockexp;
461 	off = lbaoff * cbe_lun->blocksize;
462 	op = (ARGS(io)->flags & CTL_LLF_WRITE) ? GP_WRITE : GP_READ;
463 	if (sgs > 1) {
464 		sg_entries = malloc(sizeof(struct ctl_sg_entry) * sgs,
465 		    M_RAMDISK, M_WAITOK);
466 		ctl_set_kern_data_ptr(io, sg_entries);
467 		len = lbas * cbe_lun->blocksize;
468 		for (i = 0; i < sgs; i++) {
469 			page = ctl_backend_ramdisk_getpage(be_lun,
470 			    (lba >> cbe_lun->pblockexp) + i, op);
471 			if (page == P_UNMAPPED || page == P_ANCHORED) {
472 				free(sg_entries, M_RAMDISK);
473 nospc:
474 				ctl_io_set_space_alloc_fail(io);
475 				ctl_data_submit_done(io);
476 				return;
477 			}
478 			sg_entries[i].addr = page + off;
479 			sg_entries[i].len = MIN(len, be_lun->pblocksize - off);
480 			len -= sg_entries[i].len;
481 			off = 0;
482 		}
483 	} else {
484 		page = ctl_backend_ramdisk_getpage(be_lun,
485 		    lba >> cbe_lun->pblockexp, op);
486 		if (page == P_UNMAPPED || page == P_ANCHORED)
487 			goto nospc;
488 		sgs = 0;
489 		ctl_set_kern_data_ptr(io, page + off);
490 	}
491 
492 	ctl_set_be_move_done(io, ctl_backend_ramdisk_move_done);
493 	ctl_set_kern_data_len(io, lbas * cbe_lun->blocksize);
494 	ctl_set_kern_sg_entries(io, sgs);
495 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
496 	PRIV(io)->len += lbas;
497 	if ((ARGS(io)->flags & CTL_LLF_READ) &&
498 	    ARGS(io)->len <= PRIV(io)->len) {
499 		ctl_io_set_success(io);
500 		if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
501 			ctl_serseq_done(io);
502 	}
503 	ctl_datamove(io);
504 }
505 
506 static int
507 ctl_backend_ramdisk_submit(union ctl_io *io)
508 {
509 	struct ctl_lba_len_flags *lbalen = ARGS(io);
510 
511 	if (lbalen->flags & CTL_LLF_VERIFY) {
512 		ctl_io_set_success(io);
513 		ctl_data_submit_done(io);
514 		return (CTL_RETVAL_COMPLETE);
515 	}
516 	PRIV(io)->len = 0;
517 	if (lbalen->flags & CTL_LLF_COMPARE)
518 		ctl_backend_ramdisk_compare(io);
519 	else
520 		ctl_backend_ramdisk_rw(io);
521 	return (CTL_RETVAL_COMPLETE);
522 }
523 
524 static void
525 ctl_backend_ramdisk_worker(void *context, int pending)
526 {
527 	struct ctl_be_ramdisk_lun *be_lun;
528 	union ctl_io *io;
529 
530 	be_lun = (struct ctl_be_ramdisk_lun *)context;
531 	mtx_lock(&be_lun->queue_lock);
532 	for (;;) {
533 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->cont_queue);
534 		if (io != NULL) {
535 			STAILQ_REMOVE_HEAD(&be_lun->cont_queue, links);
536 			mtx_unlock(&be_lun->queue_lock);
537 			if (ARGS(io)->flags & CTL_LLF_COMPARE)
538 				ctl_backend_ramdisk_compare(io);
539 			else
540 				ctl_backend_ramdisk_rw(io);
541 			mtx_lock(&be_lun->queue_lock);
542 			continue;
543 		}
544 
545 		/*
546 		 * If we get here, there is no work left in the queues, so
547 		 * just break out and let the task queue go to sleep.
548 		 */
549 		break;
550 	}
551 	mtx_unlock(&be_lun->queue_lock);
552 }
553 
554 static int
555 ctl_backend_ramdisk_gls(union ctl_io *io)
556 {
557 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
558 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
559 	struct scsi_get_lba_status_data *data;
560 	uint8_t *page;
561 	u_int lbaoff;
562 
563 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
564 	scsi_u64to8b(ARGS(io)->lba, data->descr[0].addr);
565 	lbaoff = ARGS(io)->lba & ~(UINT_MAX << cbe_lun->pblockexp);
566 	scsi_ulto4b(be_lun->pblockmul - lbaoff, data->descr[0].length);
567 	page = ctl_backend_ramdisk_getpage(be_lun,
568 	    ARGS(io)->lba >> cbe_lun->pblockexp, GP_OTHER);
569 	if (page == P_UNMAPPED)
570 		data->descr[0].status = 1;
571 	else if (page == P_ANCHORED)
572 		data->descr[0].status = 2;
573 	else
574 		data->descr[0].status = 0;
575 	ctl_config_read_done(io);
576 	return (CTL_RETVAL_COMPLETE);
577 }
578 
579 static int
580 ctl_backend_ramdisk_scsi_config_read(union ctl_io *io)
581 {
582 	int retval = 0;
583 
584 	switch (io->scsiio.cdb[0]) {
585 	case SERVICE_ACTION_IN:
586 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
587 			retval = ctl_backend_ramdisk_gls(io);
588 			break;
589 		}
590 		ctl_set_invalid_field(&io->scsiio,
591 				      /*sks_valid*/ 1,
592 				      /*command*/ 1,
593 				      /*field*/ 1,
594 				      /*bit_valid*/ 1,
595 				      /*bit*/ 4);
596 		ctl_config_read_done(io);
597 		retval = CTL_RETVAL_COMPLETE;
598 		break;
599 	default:
600 		ctl_set_invalid_opcode(&io->scsiio);
601 		ctl_config_read_done(io);
602 		retval = CTL_RETVAL_COMPLETE;
603 		break;
604 	}
605 	return (retval);
606 }
607 
608 static int
609 ramdisk_namespace_data(union ctl_io *io)
610 {
611 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
612 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
613 	struct nvme_namespace_data *nsdata;
614 
615 	if (io->nvmeio.kern_data_len != sizeof(struct nvme_namespace_data) ||
616 	    io->nvmeio.kern_sg_entries != 0)
617 		return (CTL_RETVAL_ERROR);
618 
619 	nsdata = (struct nvme_namespace_data *)io->nvmeio.kern_data_ptr;
620 	memset(nsdata, 0, sizeof(*nsdata));
621 	nsdata->nsze = htole64(be_lun->size_blocks);
622 	nsdata->ncap = htole64(be_lun->cap_bytes / cbe_lun->blocksize);
623 	nsdata->nuse = htole64(be_lun->cap_used / cbe_lun->blocksize);
624 	nsdata->nsfeat = NVMEM(NVME_NS_DATA_NSFEAT_THIN_PROV) |
625 	    NVMEM(NVME_NS_DATA_NSFEAT_DEALLOC);
626 	nsdata->nlbaf = 1 - 1;
627 	nsdata->dlfeat = NVMEM(NVME_NS_DATA_DLFEAT_DWZ) |
628 	    NVMEF(NVME_NS_DATA_DLFEAT_READ, NVME_NS_DATA_DLFEAT_READ_00);
629 	nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0);
630 	nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS,
631 	    ffs(cbe_lun->blocksize) - 1);
632 
633 	ctl_lun_nsdata_ids(cbe_lun, nsdata);
634 	ctl_config_read_done(io);
635 	return (CTL_RETVAL_COMPLETE);
636 }
637 
638 static int
639 ramdisk_nvme_ids(union ctl_io *io)
640 {
641 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
642 
643 	if (io->nvmeio.kern_data_len != 4096 || io->nvmeio.kern_sg_entries != 0)
644 		return (CTL_RETVAL_ERROR);
645 
646 	ctl_lun_nvme_ids(cbe_lun, io->nvmeio.kern_data_ptr);
647 	ctl_config_read_done(io);
648 	return (CTL_RETVAL_COMPLETE);
649 }
650 
651 static int
652 ctl_backend_ramdisk_nvme_config_read(union ctl_io *io)
653 {
654 	switch (io->nvmeio.cmd.opc) {
655 	case NVME_OPC_IDENTIFY:
656 	{
657 		uint8_t cns;
658 
659 		cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
660 		switch (cns) {
661 		case 0:
662 			return (ramdisk_namespace_data(io));
663 		case 3:
664 			return (ramdisk_nvme_ids(io));
665 		default:
666 			ctl_nvme_set_invalid_field(&io->nvmeio);
667 			ctl_config_read_done(io);
668 			return (CTL_RETVAL_COMPLETE);
669 		}
670 	}
671 	default:
672 		ctl_nvme_set_invalid_opcode(&io->nvmeio);
673 		ctl_config_read_done(io);
674 		return (CTL_RETVAL_COMPLETE);
675 	}
676 }
677 
678 static int
679 ctl_backend_ramdisk_config_read(union ctl_io *io)
680 {
681 	switch (io->io_hdr.io_type) {
682 	case CTL_IO_SCSI:
683 		return (ctl_backend_ramdisk_scsi_config_read(io));
684 	case CTL_IO_NVME_ADMIN:
685 		return (ctl_backend_ramdisk_nvme_config_read(io));
686 	default:
687 		__assert_unreachable();
688 	}
689 }
690 
691 static void
692 ctl_backend_ramdisk_delete(struct ctl_be_lun *cbe_lun, off_t lba, off_t len,
693     int anchor)
694 {
695 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
696 	uint8_t *page;
697 	uint64_t p, lp;
698 	u_int lbaoff;
699 	getpage_op_t op = anchor ? GP_ANCHOR : GP_OTHER;
700 
701 	/* Partially zero first partial page. */
702 	p = lba >> cbe_lun->pblockexp;
703 	lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
704 	if (lbaoff != 0) {
705 		page = ctl_backend_ramdisk_getpage(be_lun, p, op);
706 		if (page != P_UNMAPPED && page != P_ANCHORED) {
707 			memset(page + lbaoff * cbe_lun->blocksize, 0,
708 			    min(len, be_lun->pblockmul - lbaoff) *
709 			    cbe_lun->blocksize);
710 		}
711 		p++;
712 	}
713 
714 	/* Partially zero last partial page. */
715 	lp = (lba + len) >> cbe_lun->pblockexp;
716 	lbaoff = (lba + len) & ~(UINT_MAX << cbe_lun->pblockexp);
717 	if (p <= lp && lbaoff != 0) {
718 		page = ctl_backend_ramdisk_getpage(be_lun, lp, op);
719 		if (page != P_UNMAPPED && page != P_ANCHORED)
720 			memset(page, 0, lbaoff * cbe_lun->blocksize);
721 	}
722 
723 	/* Delete remaining full pages. */
724 	if (anchor) {
725 		for (; p < lp; p++)
726 			ctl_backend_ramdisk_anchorpage(be_lun, p);
727 	} else {
728 		for (; p < lp; p++)
729 			ctl_backend_ramdisk_unmappage(be_lun, p);
730 	}
731 }
732 
733 static void
734 ctl_backend_ramdisk_ws(union ctl_io *io)
735 {
736 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
737 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
738 	struct ctl_lba_len_flags *lbalen = ARGS(io);
739 	uint8_t *page;
740 	uint64_t lba;
741 	u_int lbaoff, lbas;
742 
743 	CTL_IO_ASSERT(io, SCSI);
744 
745 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB)) {
746 		ctl_set_invalid_field(&io->scsiio,
747 				      /*sks_valid*/ 1,
748 				      /*command*/ 1,
749 				      /*field*/ 1,
750 				      /*bit_valid*/ 0,
751 				      /*bit*/ 0);
752 		ctl_config_write_done(io);
753 		return;
754 	}
755 	if (lbalen->flags & SWS_UNMAP) {
756 		ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len,
757 		    (lbalen->flags & SWS_ANCHOR) != 0);
758 		ctl_set_success(&io->scsiio);
759 		ctl_config_write_done(io);
760 		return;
761 	}
762 
763 	for (lba = lbalen->lba, lbas = lbalen->len; lbas > 0; lba++, lbas--) {
764 		page = ctl_backend_ramdisk_getpage(be_lun,
765 		    lba >> cbe_lun->pblockexp, GP_WRITE);
766 		if (page == P_UNMAPPED || page == P_ANCHORED) {
767 			ctl_set_space_alloc_fail(&io->scsiio);
768 			ctl_data_submit_done(io);
769 			return;
770 		}
771 		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
772 		page += lbaoff * cbe_lun->blocksize;
773 		if (lbalen->flags & SWS_NDOB) {
774 			memset(page, 0, cbe_lun->blocksize);
775 		} else {
776 			memcpy(page, io->scsiio.kern_data_ptr,
777 			    cbe_lun->blocksize);
778 		}
779 		if (lbalen->flags & SWS_LBDATA)
780 			scsi_ulto4b(lba, page);
781 	}
782 	ctl_set_success(&io->scsiio);
783 	ctl_config_write_done(io);
784 }
785 
786 static void
787 ctl_backend_ramdisk_unmap(union ctl_io *io)
788 {
789 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
790 	struct ctl_ptr_len_flags *ptrlen = (struct ctl_ptr_len_flags *)ARGS(io);
791 	struct scsi_unmap_desc *buf, *end;
792 
793 	CTL_IO_ASSERT(io, SCSI);
794 
795 	if ((ptrlen->flags & ~SU_ANCHOR) != 0) {
796 		ctl_set_invalid_field(&io->scsiio,
797 				      /*sks_valid*/ 0,
798 				      /*command*/ 0,
799 				      /*field*/ 0,
800 				      /*bit_valid*/ 0,
801 				      /*bit*/ 0);
802 		ctl_config_write_done(io);
803 		return;
804 	}
805 
806 	buf = (struct scsi_unmap_desc *)ptrlen->ptr;
807 	end = buf + ptrlen->len / sizeof(*buf);
808 	for (; buf < end; buf++) {
809 		ctl_backend_ramdisk_delete(cbe_lun,
810 		    scsi_8btou64(buf->lba), scsi_4btoul(buf->length),
811 		    (ptrlen->flags & SU_ANCHOR) != 0);
812 	}
813 
814 	ctl_set_success(&io->scsiio);
815 	ctl_config_write_done(io);
816 }
817 
818 static int
819 ctl_backend_ramdisk_scsi_config_write(union ctl_io *io)
820 {
821 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
822 	int retval = 0;
823 
824 	switch (io->scsiio.cdb[0]) {
825 	case SYNCHRONIZE_CACHE:
826 	case SYNCHRONIZE_CACHE_16:
827 		/* We have no cache to flush. */
828 		ctl_set_success(&io->scsiio);
829 		ctl_config_write_done(io);
830 		break;
831 	case START_STOP_UNIT: {
832 		struct scsi_start_stop_unit *cdb;
833 
834 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
835 		if ((cdb->how & SSS_PC_MASK) != 0) {
836 			ctl_set_success(&io->scsiio);
837 			ctl_config_write_done(io);
838 			break;
839 		}
840 		if (cdb->how & SSS_START) {
841 			if (cdb->how & SSS_LOEJ)
842 				ctl_lun_has_media(cbe_lun);
843 			ctl_start_lun(cbe_lun);
844 		} else {
845 			ctl_stop_lun(cbe_lun);
846 			if (cdb->how & SSS_LOEJ)
847 				ctl_lun_ejected(cbe_lun);
848 		}
849 		ctl_set_success(&io->scsiio);
850 		ctl_config_write_done(io);
851 		break;
852 	}
853 	case PREVENT_ALLOW:
854 		ctl_set_success(&io->scsiio);
855 		ctl_config_write_done(io);
856 		break;
857 	case WRITE_SAME_10:
858 	case WRITE_SAME_16:
859 		ctl_backend_ramdisk_ws(io);
860 		break;
861 	case UNMAP:
862 		ctl_backend_ramdisk_unmap(io);
863 		break;
864 	default:
865 		ctl_set_invalid_opcode(&io->scsiio);
866 		ctl_config_write_done(io);
867 		retval = CTL_RETVAL_COMPLETE;
868 		break;
869 	}
870 
871 	return (retval);
872 }
873 
874 static void
875 ctl_backend_ramdisk_wu(union ctl_io *io)
876 {
877 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
878 	struct ctl_lba_len_flags *lbalen = ARGS(io);
879 
880 	CTL_IO_ASSERT(io, NVME);
881 
882 	/*
883 	 * XXX: Not quite right as reads will return zeroes rather
884 	 * than failing.
885 	 */
886 	ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len, 1);
887 	ctl_nvme_set_success(&io->nvmeio);
888 	ctl_config_write_done(io);
889 }
890 
891 static void
892 ctl_backend_ramdisk_wz(union ctl_io *io)
893 {
894 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
895 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
896 	struct ctl_lba_len_flags *lbalen = ARGS(io);
897 	uint8_t *page;
898 	uint64_t lba;
899 	u_int lbaoff, lbas;
900 
901 	CTL_IO_ASSERT(io, NVME);
902 
903 	if ((le32toh(io->nvmeio.cmd.cdw12) & (1U << 25)) != 0) {
904 		ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len,
905 		    0);
906 		ctl_nvme_set_success(&io->nvmeio);
907 		ctl_config_write_done(io);
908 		return;
909 	}
910 
911 	for (lba = lbalen->lba, lbas = lbalen->len; lbas > 0; lba++, lbas--) {
912 		page = ctl_backend_ramdisk_getpage(be_lun,
913 		    lba >> cbe_lun->pblockexp, GP_WRITE);
914 		if (page == P_UNMAPPED || page == P_ANCHORED) {
915 			ctl_nvme_set_space_alloc_fail(&io->nvmeio);
916 			ctl_data_submit_done(io);
917 			return;
918 		}
919 		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
920 		page += lbaoff * cbe_lun->blocksize;
921 		memset(page, 0, cbe_lun->blocksize);
922 	}
923 	ctl_nvme_set_success(&io->nvmeio);
924 	ctl_config_write_done(io);
925 }
926 
927 static void
928 ctl_backend_ramdisk_dsm(union ctl_io *io)
929 {
930 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
931 	struct nvme_dsm_range *r;
932 	uint64_t lba;
933 	uint32_t num_blocks;
934 	u_int i, ranges;
935 
936 	CTL_IO_ASSERT(io, NVME);
937 
938 	ranges = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
939 	r = (struct nvme_dsm_range *)io->nvmeio.kern_data_ptr;
940 	for (i = 0; i < ranges; i++) {
941 		lba = le64toh(r[i].starting_lba);
942 		num_blocks = le32toh(r[i].length);
943 		if ((le32toh(r[i].attributes) & (1U << 2)) != 0)
944 			ctl_backend_ramdisk_delete(cbe_lun, lba, num_blocks, 0);
945 	}
946 
947 	ctl_nvme_set_success(&io->nvmeio);
948 	ctl_config_write_done(io);
949 }
950 
951 static int
952 ctl_backend_ramdisk_nvme_config_write(union ctl_io *io)
953 {
954 	switch (io->nvmeio.cmd.opc) {
955 	case NVME_OPC_FLUSH:
956 		/* We have no cache to flush. */
957 		ctl_nvme_set_success(&io->nvmeio);
958 		ctl_config_write_done(io);
959 		break;
960 	case NVME_OPC_WRITE_UNCORRECTABLE:
961 		ctl_backend_ramdisk_wu(io);
962 		break;
963 	case NVME_OPC_WRITE_ZEROES:
964 		ctl_backend_ramdisk_wz(io);
965 		break;
966 	case NVME_OPC_DATASET_MANAGEMENT:
967 		ctl_backend_ramdisk_dsm(io);
968 		break;
969 	default:
970 		ctl_nvme_set_invalid_opcode(&io->nvmeio);
971 		ctl_config_write_done(io);
972 		break;
973 	}
974 	return (CTL_RETVAL_COMPLETE);
975 }
976 
977 static int
978 ctl_backend_ramdisk_config_write(union ctl_io *io)
979 {
980 	switch (io->io_hdr.io_type) {
981 	case CTL_IO_SCSI:
982 		return (ctl_backend_ramdisk_scsi_config_write(io));
983 	case CTL_IO_NVME:
984 		return (ctl_backend_ramdisk_nvme_config_write(io));
985 	default:
986 		__assert_unreachable();
987 	}
988 }
989 
990 static uint64_t
991 ctl_backend_ramdisk_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
992 {
993 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
994 	uint64_t		val;
995 
996 	val = UINT64_MAX;
997 	if (be_lun->cap_bytes == 0)
998 		return (val);
999 	sx_slock(&be_lun->page_lock);
1000 	if (strcmp(attrname, "blocksused") == 0) {
1001 		val = be_lun->cap_used / be_lun->cbe_lun.blocksize;
1002 	} else if (strcmp(attrname, "blocksavail") == 0) {
1003 		val = (be_lun->cap_bytes - be_lun->cap_used) /
1004 		    be_lun->cbe_lun.blocksize;
1005 	}
1006 	sx_sunlock(&be_lun->page_lock);
1007 	return (val);
1008 }
1009 
1010 static int
1011 ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1012 			  int flag, struct thread *td)
1013 {
1014 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
1015 	struct ctl_lun_req *lun_req;
1016 	int retval;
1017 
1018 	retval = 0;
1019 	switch (cmd) {
1020 	case CTL_LUN_REQ:
1021 		lun_req = (struct ctl_lun_req *)addr;
1022 		switch (lun_req->reqtype) {
1023 		case CTL_LUNREQ_CREATE:
1024 			retval = ctl_backend_ramdisk_create(softc, lun_req);
1025 			break;
1026 		case CTL_LUNREQ_RM:
1027 			retval = ctl_backend_ramdisk_rm(softc, lun_req);
1028 			break;
1029 		case CTL_LUNREQ_MODIFY:
1030 			retval = ctl_backend_ramdisk_modify(softc, lun_req);
1031 			break;
1032 		default:
1033 			lun_req->status = CTL_LUN_ERROR;
1034 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1035 				 "%s: invalid LUN request type %d", __func__,
1036 				 lun_req->reqtype);
1037 			break;
1038 		}
1039 		break;
1040 	default:
1041 		retval = ENOTTY;
1042 		break;
1043 	}
1044 
1045 	return (retval);
1046 }
1047 
1048 static int
1049 ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
1050 		       struct ctl_lun_req *req)
1051 {
1052 	struct ctl_be_ramdisk_lun *be_lun;
1053 	struct ctl_lun_rm_params *params;
1054 	int retval;
1055 
1056 	params = &req->reqdata.rm;
1057 	sx_xlock(&softc->modify_lock);
1058 	mtx_lock(&softc->lock);
1059 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
1060 		if (be_lun->cbe_lun.lun_id == params->lun_id) {
1061 			SLIST_REMOVE(&softc->lun_list, be_lun,
1062 			    ctl_be_ramdisk_lun, links);
1063 			softc->num_luns--;
1064 			break;
1065 		}
1066 	}
1067 	mtx_unlock(&softc->lock);
1068 	sx_xunlock(&softc->modify_lock);
1069 	if (be_lun == NULL) {
1070 		snprintf(req->error_str, sizeof(req->error_str),
1071 			 "%s: LUN %u is not managed by the ramdisk backend",
1072 			 __func__, params->lun_id);
1073 		goto bailout_error;
1074 	}
1075 
1076 	/*
1077 	 * Set the waiting flag before we invalidate the LUN.  Our shutdown
1078 	 * routine can be called any time after we invalidate the LUN,
1079 	 * and can be called from our context.
1080 	 *
1081 	 * This tells the shutdown routine that we're waiting, or we're
1082 	 * going to wait for the shutdown to happen.
1083 	 */
1084 	mtx_lock(&softc->lock);
1085 	be_lun->flags |= CTL_BE_RAMDISK_LUN_WAITING;
1086 	mtx_unlock(&softc->lock);
1087 
1088 	retval = ctl_remove_lun(&be_lun->cbe_lun);
1089 	if (retval != 0) {
1090 		snprintf(req->error_str, sizeof(req->error_str),
1091 			 "%s: error %d returned from ctl_remove_lun() for "
1092 			 "LUN %d", __func__, retval, params->lun_id);
1093 		mtx_lock(&softc->lock);
1094 		be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING;
1095 		mtx_unlock(&softc->lock);
1096 		goto bailout_error;
1097 	}
1098 
1099 	mtx_lock(&softc->lock);
1100 	while ((be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) == 0) {
1101 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlramrm", 0);
1102 		if (retval == EINTR)
1103 			break;
1104 	}
1105 	be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING;
1106 	if (be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) {
1107 		mtx_unlock(&softc->lock);
1108 		free(be_lun, M_RAMDISK);
1109 	} else {
1110 		mtx_unlock(&softc->lock);
1111 		return (EINTR);
1112 	}
1113 
1114 	req->status = CTL_LUN_OK;
1115 	return (retval);
1116 
1117 bailout_error:
1118 	req->status = CTL_LUN_ERROR;
1119 	return (0);
1120 }
1121 
1122 static int
1123 ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
1124 			   struct ctl_lun_req *req)
1125 {
1126 	struct ctl_be_ramdisk_lun *be_lun;
1127 	struct ctl_be_lun *cbe_lun;
1128 	struct ctl_lun_create_params *params;
1129 	const char *value;
1130 	char tmpstr[32];
1131 	uint64_t t;
1132 	int retval;
1133 
1134 	retval = 0;
1135 	params = &req->reqdata.create;
1136 
1137 	be_lun = malloc(sizeof(*be_lun), M_RAMDISK, M_ZERO | M_WAITOK);
1138 	cbe_lun = &be_lun->cbe_lun;
1139 	cbe_lun->options = nvlist_clone(req->args_nvl);
1140 	be_lun->params = req->reqdata.create;
1141 	be_lun->softc = softc;
1142 
1143 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1144 		cbe_lun->lun_type = params->device_type;
1145 	else
1146 		cbe_lun->lun_type = T_DIRECT;
1147 	be_lun->flags = 0;
1148 	cbe_lun->flags = 0;
1149 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
1150 	if (value != NULL) {
1151 		if (strcmp(value, "primary") == 0)
1152 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1153 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
1154 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1155 
1156 	be_lun->pblocksize = PAGE_SIZE;
1157 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1158 	if (value != NULL) {
1159 		ctl_expand_number(value, &t);
1160 		be_lun->pblocksize = t;
1161 	}
1162 	if (be_lun->pblocksize < 512 || be_lun->pblocksize > 131072) {
1163 		snprintf(req->error_str, sizeof(req->error_str),
1164 			 "%s: unsupported pblocksize %u", __func__,
1165 			 be_lun->pblocksize);
1166 		goto bailout_error;
1167 	}
1168 
1169 	if (cbe_lun->lun_type == T_DIRECT ||
1170 	    cbe_lun->lun_type == T_CDROM) {
1171 		if (params->blocksize_bytes != 0)
1172 			cbe_lun->blocksize = params->blocksize_bytes;
1173 		else if (cbe_lun->lun_type == T_CDROM)
1174 			cbe_lun->blocksize = 2048;
1175 		else
1176 			cbe_lun->blocksize = 512;
1177 		be_lun->pblockmul = be_lun->pblocksize / cbe_lun->blocksize;
1178 		if (be_lun->pblockmul < 1 || !powerof2(be_lun->pblockmul)) {
1179 			snprintf(req->error_str, sizeof(req->error_str),
1180 				 "%s: pblocksize %u not exp2 of blocksize %u",
1181 				 __func__,
1182 				 be_lun->pblocksize, cbe_lun->blocksize);
1183 			goto bailout_error;
1184 		}
1185 		if (params->lun_size_bytes < cbe_lun->blocksize) {
1186 			snprintf(req->error_str, sizeof(req->error_str),
1187 				 "%s: LUN size %ju < blocksize %u", __func__,
1188 				 params->lun_size_bytes, cbe_lun->blocksize);
1189 			goto bailout_error;
1190 		}
1191 		be_lun->size_blocks = params->lun_size_bytes / cbe_lun->blocksize;
1192 		be_lun->size_bytes = be_lun->size_blocks * cbe_lun->blocksize;
1193 		be_lun->indir = 0;
1194 		t = be_lun->size_bytes / be_lun->pblocksize;
1195 		while (t > 1) {
1196 			t /= PPP;
1197 			be_lun->indir++;
1198 		}
1199 		cbe_lun->maxlba = be_lun->size_blocks - 1;
1200 		cbe_lun->pblockexp = fls(be_lun->pblockmul) - 1;
1201 		cbe_lun->pblockoff = 0;
1202 		cbe_lun->ublockexp = cbe_lun->pblockexp;
1203 		cbe_lun->ublockoff = 0;
1204 		cbe_lun->atomicblock = be_lun->pblocksize;
1205 		cbe_lun->opttxferlen = SGPP * be_lun->pblocksize;
1206 		value = dnvlist_get_string(cbe_lun->options, "capacity", NULL);
1207 		if (value != NULL)
1208 			ctl_expand_number(value, &be_lun->cap_bytes);
1209 	} else {
1210 		be_lun->pblockmul = 1;
1211 		cbe_lun->pblockexp = 0;
1212 	}
1213 
1214 	/* Tell the user the blocksize we ended up using */
1215 	params->blocksize_bytes = cbe_lun->blocksize;
1216 	params->lun_size_bytes = be_lun->size_bytes;
1217 
1218 	value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
1219 	if (value == NULL || strcmp(value, "off") != 0)
1220 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
1221 	value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
1222 	if (value != NULL) {
1223 		if (strcmp(value, "on") == 0)
1224 			cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
1225 	} else if (cbe_lun->lun_type != T_DIRECT)
1226 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
1227 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
1228 	value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
1229 	if (value != NULL && strcmp(value, "on") == 0)
1230 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
1231 	else if (value != NULL && strcmp(value, "read") == 0)
1232 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
1233 	else if (value != NULL && strcmp(value, "soft") == 0)
1234 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
1235 	else if (value != NULL && strcmp(value, "off") == 0)
1236 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
1237 
1238 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
1239 		cbe_lun->req_lun_id = params->req_lun_id;
1240 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
1241 	} else
1242 		cbe_lun->req_lun_id = 0;
1243 
1244 	cbe_lun->lun_shutdown = ctl_backend_ramdisk_lun_shutdown;
1245 	cbe_lun->be = &ctl_be_ramdisk_driver;
1246 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
1247 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
1248 			 softc->num_luns);
1249 		strncpy((char *)cbe_lun->serial_num, tmpstr,
1250 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
1251 
1252 		/* Tell the user what we used for a serial number */
1253 		strncpy((char *)params->serial_num, tmpstr,
1254 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
1255 	} else {
1256 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
1257 			MIN(sizeof(cbe_lun->serial_num),
1258 			    sizeof(params->serial_num)));
1259 	}
1260 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
1261 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
1262 		strncpy((char *)cbe_lun->device_id, tmpstr,
1263 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
1264 
1265 		/* Tell the user what we used for a device ID */
1266 		strncpy((char *)params->device_id, tmpstr,
1267 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
1268 	} else {
1269 		strncpy((char *)cbe_lun->device_id, params->device_id,
1270 			MIN(sizeof(cbe_lun->device_id),
1271 			    sizeof(params->device_id)));
1272 	}
1273 
1274 	STAILQ_INIT(&be_lun->cont_queue);
1275 	sx_init(&be_lun->page_lock, "ctlram page");
1276 	if (be_lun->cap_bytes == 0) {
1277 		be_lun->indir = 0;
1278 		be_lun->pages = malloc(be_lun->pblocksize, M_RAMDISK, M_WAITOK);
1279 	}
1280 	be_lun->zero_page = malloc(be_lun->pblocksize, M_RAMDISK,
1281 	    M_WAITOK|M_ZERO);
1282 	mtx_init(&be_lun->queue_lock, "ctlram queue", NULL, MTX_DEF);
1283 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_backend_ramdisk_worker,
1284 	    be_lun);
1285 
1286 	be_lun->io_taskqueue = taskqueue_create("ctlramtq", M_WAITOK,
1287 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
1288 	if (be_lun->io_taskqueue == NULL) {
1289 		snprintf(req->error_str, sizeof(req->error_str),
1290 			 "%s: Unable to create taskqueue", __func__);
1291 		goto bailout_error;
1292 	}
1293 
1294 	retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
1295 					 /*num threads*/1,
1296 					 /*priority*/PUSER,
1297 					 /*proc*/control_softc->ctl_proc,
1298 					 /*thread name*/"ramdisk");
1299 	if (retval != 0)
1300 		goto bailout_error;
1301 
1302 	retval = ctl_add_lun(&be_lun->cbe_lun);
1303 	if (retval != 0) {
1304 		snprintf(req->error_str, sizeof(req->error_str),
1305 			 "%s: ctl_add_lun() returned error %d, see dmesg for "
1306 			"details", __func__, retval);
1307 		retval = 0;
1308 		goto bailout_error;
1309 	}
1310 
1311 	mtx_lock(&softc->lock);
1312 	softc->num_luns++;
1313 	SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
1314 	mtx_unlock(&softc->lock);
1315 
1316 	params->req_lun_id = cbe_lun->lun_id;
1317 
1318 	req->status = CTL_LUN_OK;
1319 	return (retval);
1320 
1321 bailout_error:
1322 	req->status = CTL_LUN_ERROR;
1323 	if (be_lun != NULL) {
1324 		if (be_lun->io_taskqueue != NULL)
1325 			taskqueue_free(be_lun->io_taskqueue);
1326 		nvlist_destroy(cbe_lun->options);
1327 		free(be_lun->zero_page, M_RAMDISK);
1328 		ctl_backend_ramdisk_freeallpages(be_lun->pages, be_lun->indir);
1329 		sx_destroy(&be_lun->page_lock);
1330 		mtx_destroy(&be_lun->queue_lock);
1331 		free(be_lun, M_RAMDISK);
1332 	}
1333 	return (retval);
1334 }
1335 
1336 static int
1337 ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
1338 		       struct ctl_lun_req *req)
1339 {
1340 	struct ctl_be_ramdisk_lun *be_lun;
1341 	struct ctl_be_lun *cbe_lun;
1342 	struct ctl_lun_modify_params *params;
1343 	const char *value;
1344 	uint32_t blocksize;
1345 	int wasprim;
1346 
1347 	params = &req->reqdata.modify;
1348 	sx_xlock(&softc->modify_lock);
1349 	mtx_lock(&softc->lock);
1350 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
1351 		if (be_lun->cbe_lun.lun_id == params->lun_id)
1352 			break;
1353 	}
1354 	mtx_unlock(&softc->lock);
1355 	if (be_lun == NULL) {
1356 		snprintf(req->error_str, sizeof(req->error_str),
1357 			 "%s: LUN %u is not managed by the ramdisk backend",
1358 			 __func__, params->lun_id);
1359 		goto bailout_error;
1360 	}
1361 	cbe_lun = &be_lun->cbe_lun;
1362 
1363 	if (params->lun_size_bytes != 0)
1364 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
1365 
1366 	if (req->args_nvl != NULL) {
1367 		nvlist_destroy(cbe_lun->options);
1368 		cbe_lun->options = nvlist_clone(req->args_nvl);
1369 	}
1370 
1371 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
1372 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
1373 	if (value != NULL) {
1374 		if (strcmp(value, "primary") == 0)
1375 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1376 		else
1377 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
1378 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
1379 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1380 	else
1381 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
1382 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
1383 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
1384 			ctl_lun_primary(cbe_lun);
1385 		else
1386 			ctl_lun_secondary(cbe_lun);
1387 	}
1388 
1389 	blocksize = be_lun->cbe_lun.blocksize;
1390 	if (be_lun->params.lun_size_bytes < blocksize) {
1391 		snprintf(req->error_str, sizeof(req->error_str),
1392 			"%s: LUN size %ju < blocksize %u", __func__,
1393 			be_lun->params.lun_size_bytes, blocksize);
1394 		goto bailout_error;
1395 	}
1396 	be_lun->size_blocks = be_lun->params.lun_size_bytes / blocksize;
1397 	be_lun->size_bytes = be_lun->size_blocks * blocksize;
1398 	be_lun->cbe_lun.maxlba = be_lun->size_blocks - 1;
1399 	ctl_lun_capacity_changed(&be_lun->cbe_lun);
1400 
1401 	/* Tell the user the exact size we ended up using */
1402 	params->lun_size_bytes = be_lun->size_bytes;
1403 
1404 	sx_xunlock(&softc->modify_lock);
1405 	req->status = CTL_LUN_OK;
1406 	return (0);
1407 
1408 bailout_error:
1409 	sx_xunlock(&softc->modify_lock);
1410 	req->status = CTL_LUN_ERROR;
1411 	return (0);
1412 }
1413 
1414 static void
1415 ctl_backend_ramdisk_lun_shutdown(struct ctl_be_lun *cbe_lun)
1416 {
1417 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
1418 	struct ctl_be_ramdisk_softc *softc = be_lun->softc;
1419 
1420 	taskqueue_drain_all(be_lun->io_taskqueue);
1421 	taskqueue_free(be_lun->io_taskqueue);
1422 	nvlist_destroy(be_lun->cbe_lun.options);
1423 	free(be_lun->zero_page, M_RAMDISK);
1424 	ctl_backend_ramdisk_freeallpages(be_lun->pages, be_lun->indir);
1425 	sx_destroy(&be_lun->page_lock);
1426 	mtx_destroy(&be_lun->queue_lock);
1427 
1428 	mtx_lock(&softc->lock);
1429 	be_lun->flags |= CTL_BE_RAMDISK_LUN_UNCONFIGURED;
1430 	if (be_lun->flags & CTL_BE_RAMDISK_LUN_WAITING)
1431 		wakeup(be_lun);
1432 	else
1433 		free(be_lun, M_RAMDISK);
1434 	mtx_unlock(&softc->lock);
1435 }
1436