xref: /freebsd/sys/cam/ctl/ctl_backend_ramdisk.c (revision 07cc7ea7386c5428cef9e8f06d4ebd8144dec311)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003, 2008 Silicon Graphics International Corp.
5  * Copyright (c) 2012 The FreeBSD Foundation
6  * Copyright (c) 2014-2017 Alexander Motin <mav@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Portions of this software were developed by Edward Tomasz Napierala
10  * under sponsorship from the FreeBSD Foundation.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions, and the following disclaimer,
17  *    without modification.
18  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
19  *    substantially similar to the "NO WARRANTY" disclaimer below
20  *    ("Disclaimer") and any redistribution must be conditioned upon
21  *    including a substantially similar Disclaimer requirement for further
22  *    binary redistribution.
23  *
24  * NO WARRANTY
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
34  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGES.
36  *
37  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_ramdisk.c#3 $
38  */
39 /*
40  * CAM Target Layer black hole and RAM disk backend.
41  *
42  * Author: Ken Merry <ken@FreeBSD.org>
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/condvar.h>
49 #include <sys/types.h>
50 #include <sys/limits.h>
51 #include <sys/lock.h>
52 #include <sys/mutex.h>
53 #include <sys/malloc.h>
54 #include <sys/sx.h>
55 #include <sys/taskqueue.h>
56 #include <sys/time.h>
57 #include <sys/queue.h>
58 #include <sys/conf.h>
59 #include <sys/ioccom.h>
60 #include <sys/module.h>
61 #include <sys/sysctl.h>
62 #include <sys/nv.h>
63 #include <sys/dnv.h>
64 
65 #include <cam/scsi/scsi_all.h>
66 #include <cam/scsi/scsi_da.h>
67 #include <cam/ctl/ctl_io.h>
68 #include <cam/ctl/ctl.h>
69 #include <cam/ctl/ctl_util.h>
70 #include <cam/ctl/ctl_backend.h>
71 #include <cam/ctl/ctl_debug.h>
72 #include <cam/ctl/ctl_ioctl.h>
73 #include <cam/ctl/ctl_ha.h>
74 #include <cam/ctl/ctl_private.h>
75 #include <cam/ctl/ctl_error.h>
76 
77 #define PRIV(io)	\
78     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
79 #define ARGS(io)	\
80     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
81 
82 #define	PPP	(PAGE_SIZE / sizeof(uint8_t **))
83 #ifdef __LP64__
84 #define	PPPS	(PAGE_SHIFT - 3)
85 #else
86 #define	PPPS	(PAGE_SHIFT - 2)
87 #endif
88 #define	SGPP	(PAGE_SIZE / sizeof(struct ctl_sg_entry))
89 
90 #define	P_UNMAPPED	NULL			/* Page is unmapped. */
91 #define	P_ANCHORED	((void *)(uintptr_t)1)	/* Page is anchored. */
92 
93 typedef enum {
94 	GP_READ,	/* Return data page or zero page. */
95 	GP_WRITE,	/* Return data page, try allocate if none. */
96 	GP_ANCHOR,	/* Return data page, try anchor if none. */
97 	GP_OTHER,	/* Return what present, do not allocate/anchor. */
98 } getpage_op_t;
99 
100 typedef enum {
101 	CTL_BE_RAMDISK_LUN_UNCONFIGURED	= 0x01,
102 	CTL_BE_RAMDISK_LUN_WAITING	= 0x04
103 } ctl_be_ramdisk_lun_flags;
104 
105 struct ctl_be_ramdisk_lun {
106 	struct ctl_be_lun	cbe_lun;	/* Must be first element. */
107 	struct ctl_lun_create_params params;
108 	int			indir;
109 	uint8_t			**pages;
110 	uint8_t			*zero_page;
111 	struct sx		page_lock;
112 	u_int			pblocksize;
113 	u_int			pblockmul;
114 	uint64_t		size_bytes;
115 	uint64_t		size_blocks;
116 	uint64_t		cap_bytes;
117 	uint64_t		cap_used;
118 	struct ctl_be_ramdisk_softc *softc;
119 	ctl_be_ramdisk_lun_flags flags;
120 	SLIST_ENTRY(ctl_be_ramdisk_lun) links;
121 	struct taskqueue	*io_taskqueue;
122 	struct task		io_task;
123 	STAILQ_HEAD(, ctl_io_hdr) cont_queue;
124 	struct mtx_padalign	queue_lock;
125 };
126 
127 struct ctl_be_ramdisk_softc {
128 	struct sx modify_lock;
129 	struct mtx lock;
130 	int num_luns;
131 	SLIST_HEAD(, ctl_be_ramdisk_lun) lun_list;
132 };
133 
134 static struct ctl_be_ramdisk_softc rd_softc;
135 extern struct ctl_softc *control_softc;
136 
137 static int ctl_backend_ramdisk_init(void);
138 static int ctl_backend_ramdisk_shutdown(void);
139 static int ctl_backend_ramdisk_move_done(union ctl_io *io, bool samethr);
140 static void ctl_backend_ramdisk_compare(union ctl_io *io);
141 static void ctl_backend_ramdisk_rw(union ctl_io *io);
142 static int ctl_backend_ramdisk_submit(union ctl_io *io);
143 static void ctl_backend_ramdisk_worker(void *context, int pending);
144 static int ctl_backend_ramdisk_config_read(union ctl_io *io);
145 static int ctl_backend_ramdisk_config_write(union ctl_io *io);
146 static uint64_t ctl_backend_ramdisk_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
147 static int ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd,
148 				     caddr_t addr, int flag, struct thread *td);
149 static int ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
150 				  struct ctl_lun_req *req);
151 static int ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
152 				      struct ctl_lun_req *req);
153 static int ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
154 				  struct ctl_lun_req *req);
155 static void ctl_backend_ramdisk_lun_shutdown(struct ctl_be_lun *cbe_lun);
156 
157 static struct ctl_backend_driver ctl_be_ramdisk_driver =
158 {
159 	.name = "ramdisk",
160 	.flags = CTL_BE_FLAG_HAS_CONFIG,
161 	.init = ctl_backend_ramdisk_init,
162 	.shutdown = ctl_backend_ramdisk_shutdown,
163 	.data_submit = ctl_backend_ramdisk_submit,
164 	.config_read = ctl_backend_ramdisk_config_read,
165 	.config_write = ctl_backend_ramdisk_config_write,
166 	.ioctl = ctl_backend_ramdisk_ioctl,
167 	.lun_attr = ctl_backend_ramdisk_lun_attr,
168 };
169 
170 MALLOC_DEFINE(M_RAMDISK, "ctlramdisk", "Memory used for CTL RAMdisk");
171 CTL_BACKEND_DECLARE(cbr, ctl_be_ramdisk_driver);
172 
173 static int
174 ctl_backend_ramdisk_init(void)
175 {
176 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
177 
178 	memset(softc, 0, sizeof(*softc));
179 	sx_init(&softc->modify_lock, "ctlrammod");
180 	mtx_init(&softc->lock, "ctlram", NULL, MTX_DEF);
181 	SLIST_INIT(&softc->lun_list);
182 	return (0);
183 }
184 
185 static int
186 ctl_backend_ramdisk_shutdown(void)
187 {
188 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
189 	struct ctl_be_ramdisk_lun *lun;
190 
191 	mtx_lock(&softc->lock);
192 	while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
193 		SLIST_REMOVE_HEAD(&softc->lun_list, links);
194 		softc->num_luns--;
195 		/*
196 		 * Drop our lock here.  Since ctl_remove_lun() can call
197 		 * back into us, this could potentially lead to a recursive
198 		 * lock of the same mutex, which would cause a hang.
199 		 */
200 		mtx_unlock(&softc->lock);
201 		ctl_remove_lun(&lun->cbe_lun);
202 		mtx_lock(&softc->lock);
203 	}
204 	mtx_unlock(&softc->lock);
205 	mtx_destroy(&softc->lock);
206 	sx_destroy(&softc->modify_lock);
207 	return (0);
208 }
209 
210 static uint8_t *
211 ctl_backend_ramdisk_getpage(struct ctl_be_ramdisk_lun *be_lun, off_t pn,
212     getpage_op_t op)
213 {
214 	uint8_t **p, ***pp;
215 	off_t i;
216 	int s;
217 
218 	if (be_lun->cap_bytes == 0) {
219 		switch (op) {
220 		case GP_READ:
221 			return (be_lun->zero_page);
222 		case GP_WRITE:
223 			return ((uint8_t *)be_lun->pages);
224 		case GP_ANCHOR:
225 			return (P_ANCHORED);
226 		default:
227 			return (P_UNMAPPED);
228 		}
229 	}
230 	if (op == GP_WRITE || op == GP_ANCHOR) {
231 		sx_xlock(&be_lun->page_lock);
232 		pp = &be_lun->pages;
233 		for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
234 			if (*pp == NULL) {
235 				*pp = malloc(PAGE_SIZE, M_RAMDISK,
236 				    M_WAITOK|M_ZERO);
237 			}
238 			i = pn >> s;
239 			pp = (uint8_t ***)&(*pp)[i];
240 			pn -= i << s;
241 		}
242 		if (*pp == P_UNMAPPED && be_lun->cap_used < be_lun->cap_bytes) {
243 			if (op == GP_WRITE) {
244 				*pp = malloc(be_lun->pblocksize, M_RAMDISK,
245 				    M_WAITOK|M_ZERO);
246 			} else
247 				*pp = P_ANCHORED;
248 			be_lun->cap_used += be_lun->pblocksize;
249 		} else if (*pp == P_ANCHORED && op == GP_WRITE) {
250 			*pp = malloc(be_lun->pblocksize, M_RAMDISK,
251 			    M_WAITOK|M_ZERO);
252 		}
253 		sx_xunlock(&be_lun->page_lock);
254 		return ((uint8_t *)*pp);
255 	} else {
256 		sx_slock(&be_lun->page_lock);
257 		p = be_lun->pages;
258 		for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
259 			if (p == NULL)
260 				break;
261 			i = pn >> s;
262 			p = (uint8_t **)p[i];
263 			pn -= i << s;
264 		}
265 		sx_sunlock(&be_lun->page_lock);
266 		if ((p == P_UNMAPPED || p == P_ANCHORED) && op == GP_READ)
267 			return (be_lun->zero_page);
268 		return ((uint8_t *)p);
269 	}
270 };
271 
272 static void
273 ctl_backend_ramdisk_unmappage(struct ctl_be_ramdisk_lun *be_lun, off_t pn)
274 {
275 	uint8_t ***pp;
276 	off_t i;
277 	int s;
278 
279 	if (be_lun->cap_bytes == 0)
280 		return;
281 	sx_xlock(&be_lun->page_lock);
282 	pp = &be_lun->pages;
283 	for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
284 		if (*pp == NULL)
285 			goto noindir;
286 		i = pn >> s;
287 		pp = (uint8_t ***)&(*pp)[i];
288 		pn -= i << s;
289 	}
290 	if (*pp == P_ANCHORED) {
291 		be_lun->cap_used -= be_lun->pblocksize;
292 		*pp = P_UNMAPPED;
293 	} else if (*pp != P_UNMAPPED) {
294 		free(*pp, M_RAMDISK);
295 		be_lun->cap_used -= be_lun->pblocksize;
296 		*pp = P_UNMAPPED;
297 	}
298 noindir:
299 	sx_xunlock(&be_lun->page_lock);
300 };
301 
302 static void
303 ctl_backend_ramdisk_anchorpage(struct ctl_be_ramdisk_lun *be_lun, off_t pn)
304 {
305 	uint8_t ***pp;
306 	off_t i;
307 	int s;
308 
309 	if (be_lun->cap_bytes == 0)
310 		return;
311 	sx_xlock(&be_lun->page_lock);
312 	pp = &be_lun->pages;
313 	for (s = (be_lun->indir - 1) * PPPS; s >= 0; s -= PPPS) {
314 		if (*pp == NULL)
315 			goto noindir;
316 		i = pn >> s;
317 		pp = (uint8_t ***)&(*pp)[i];
318 		pn -= i << s;
319 	}
320 	if (*pp == P_UNMAPPED && be_lun->cap_used < be_lun->cap_bytes) {
321 		be_lun->cap_used += be_lun->pblocksize;
322 		*pp = P_ANCHORED;
323 	} else if (*pp != P_ANCHORED) {
324 		free(*pp, M_RAMDISK);
325 		*pp = P_ANCHORED;
326 	}
327 noindir:
328 	sx_xunlock(&be_lun->page_lock);
329 };
330 
331 static void
332 ctl_backend_ramdisk_freeallpages(uint8_t **p, int indir)
333 {
334 	int i;
335 
336 	if (p == NULL)
337 		return;
338 	if (indir == 0) {
339 		free(p, M_RAMDISK);
340 		return;
341 	}
342 	for (i = 0; i < PPP; i++) {
343 		if (p[i] == NULL)
344 			continue;
345 		ctl_backend_ramdisk_freeallpages((uint8_t **)p[i], indir - 1);
346 	}
347 	free(p, M_RAMDISK);
348 };
349 
350 static size_t
351 cmp(uint8_t *a, uint8_t *b, size_t size)
352 {
353 	size_t i;
354 
355 	for (i = 0; i < size; i++) {
356 		if (a[i] != b[i])
357 			break;
358 	}
359 	return (i);
360 }
361 
362 static int
363 ctl_backend_ramdisk_cmp(union ctl_io *io)
364 {
365 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
366 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
367 	uint8_t *page;
368 	uint64_t lba;
369 	u_int lbaoff, lbas, res, off;
370 
371 	lbas = ctl_kern_data_len(io) / cbe_lun->blocksize;
372 	lba = ARGS(io)->lba + PRIV(io)->len - lbas;
373 	off = 0;
374 	for (; lbas > 0; lbas--, lba++) {
375 		page = ctl_backend_ramdisk_getpage(be_lun,
376 		    lba >> cbe_lun->pblockexp, GP_READ);
377 		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
378 		page += lbaoff * cbe_lun->blocksize;
379 		res = cmp(ctl_kern_data_ptr(io) + off, page,
380 		    cbe_lun->blocksize);
381 		off += res;
382 		if (res < cbe_lun->blocksize)
383 			break;
384 	}
385 	free(io->scsiio.kern_data_ptr, M_RAMDISK);
386 	if (lbas > 0) {
387 		off += ctl_kern_rel_offset(io) - ctl_kern_data_len(io);
388 		ctl_io_set_compare_failure(io, off);
389 		return (1);
390 	}
391 	return (0);
392 }
393 
394 static int
395 ctl_backend_ramdisk_move_done(union ctl_io *io, bool samethr)
396 {
397 	struct ctl_be_ramdisk_lun *be_lun =
398 	    (struct ctl_be_ramdisk_lun *)CTL_BACKEND_LUN(io);
399 
400 	CTL_DEBUG_PRINT(("ctl_backend_ramdisk_move_done\n"));
401 	if (ctl_kern_sg_entries(io) > 0)
402 		free(ctl_kern_data_ptr(io), M_RAMDISK);
403 	ctl_add_kern_rel_offset(io, ctl_kern_data_len(io));
404 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
405 	    (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
406 		if (ARGS(io)->flags & CTL_LLF_COMPARE) {
407 			/* We have data block ready for comparison. */
408 			if (ctl_backend_ramdisk_cmp(io))
409 				goto done;
410 		}
411 		if (ARGS(io)->len > PRIV(io)->len) {
412 			mtx_lock(&be_lun->queue_lock);
413 			STAILQ_INSERT_TAIL(&be_lun->cont_queue,
414 			    &io->io_hdr, links);
415 			mtx_unlock(&be_lun->queue_lock);
416 			taskqueue_enqueue(be_lun->io_taskqueue,
417 			    &be_lun->io_task);
418 			return (0);
419 		}
420 		ctl_io_set_success(io);
421 	}
422 done:
423 	ctl_data_submit_done(io);
424 	return(0);
425 }
426 
427 static void
428 ctl_backend_ramdisk_compare(union ctl_io *io)
429 {
430 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
431 	u_int lbas, len;
432 
433 	lbas = ARGS(io)->len - PRIV(io)->len;
434 	lbas = MIN(lbas, 131072 / cbe_lun->blocksize);
435 	len = lbas * cbe_lun->blocksize;
436 
437 	ctl_set_be_move_done(io, ctl_backend_ramdisk_move_done);
438 	ctl_set_kern_data_ptr(io, malloc(len, M_RAMDISK, M_WAITOK));
439 	ctl_set_kern_data_len(io, len);
440 	ctl_set_kern_sg_entries(io, 0);
441 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
442 	PRIV(io)->len += lbas;
443 	ctl_datamove(io);
444 }
445 
446 static void
447 ctl_backend_ramdisk_rw(union ctl_io *io)
448 {
449 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
450 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
451 	struct ctl_sg_entry *sg_entries;
452 	uint8_t *page;
453 	uint64_t lba;
454 	u_int i, len, lbaoff, lbas, sgs, off;
455 	getpage_op_t op;
456 
457 	lba = ARGS(io)->lba + PRIV(io)->len;
458 	lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
459 	lbas = ARGS(io)->len - PRIV(io)->len;
460 	lbas = MIN(lbas, (SGPP << cbe_lun->pblockexp) - lbaoff);
461 	sgs = (lbas + lbaoff + be_lun->pblockmul - 1) >> cbe_lun->pblockexp;
462 	off = lbaoff * cbe_lun->blocksize;
463 	op = (ARGS(io)->flags & CTL_LLF_WRITE) ? GP_WRITE : GP_READ;
464 	if (sgs > 1) {
465 		sg_entries = malloc(sizeof(struct ctl_sg_entry) * sgs,
466 		    M_RAMDISK, M_WAITOK);
467 		ctl_set_kern_data_ptr(io, sg_entries);
468 		len = lbas * cbe_lun->blocksize;
469 		for (i = 0; i < sgs; i++) {
470 			page = ctl_backend_ramdisk_getpage(be_lun,
471 			    (lba >> cbe_lun->pblockexp) + i, op);
472 			if (page == P_UNMAPPED || page == P_ANCHORED) {
473 				free(sg_entries, M_RAMDISK);
474 nospc:
475 				ctl_io_set_space_alloc_fail(io);
476 				ctl_data_submit_done(io);
477 				return;
478 			}
479 			sg_entries[i].addr = page + off;
480 			sg_entries[i].len = MIN(len, be_lun->pblocksize - off);
481 			len -= sg_entries[i].len;
482 			off = 0;
483 		}
484 	} else {
485 		page = ctl_backend_ramdisk_getpage(be_lun,
486 		    lba >> cbe_lun->pblockexp, op);
487 		if (page == P_UNMAPPED || page == P_ANCHORED)
488 			goto nospc;
489 		sgs = 0;
490 		ctl_set_kern_data_ptr(io, page + off);
491 	}
492 
493 	ctl_set_be_move_done(io, ctl_backend_ramdisk_move_done);
494 	ctl_set_kern_data_len(io, lbas * cbe_lun->blocksize);
495 	ctl_set_kern_sg_entries(io, sgs);
496 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
497 	PRIV(io)->len += lbas;
498 	if ((ARGS(io)->flags & CTL_LLF_READ) &&
499 	    ARGS(io)->len <= PRIV(io)->len) {
500 		ctl_io_set_success(io);
501 		if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
502 			ctl_serseq_done(io);
503 	}
504 	ctl_datamove(io);
505 }
506 
507 static int
508 ctl_backend_ramdisk_submit(union ctl_io *io)
509 {
510 	struct ctl_lba_len_flags *lbalen = ARGS(io);
511 
512 	if (lbalen->flags & CTL_LLF_VERIFY) {
513 		ctl_io_set_success(io);
514 		ctl_data_submit_done(io);
515 		return (CTL_RETVAL_COMPLETE);
516 	}
517 	PRIV(io)->len = 0;
518 	if (lbalen->flags & CTL_LLF_COMPARE)
519 		ctl_backend_ramdisk_compare(io);
520 	else
521 		ctl_backend_ramdisk_rw(io);
522 	return (CTL_RETVAL_COMPLETE);
523 }
524 
525 static void
526 ctl_backend_ramdisk_worker(void *context, int pending)
527 {
528 	struct ctl_be_ramdisk_lun *be_lun;
529 	union ctl_io *io;
530 
531 	be_lun = (struct ctl_be_ramdisk_lun *)context;
532 	mtx_lock(&be_lun->queue_lock);
533 	for (;;) {
534 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->cont_queue);
535 		if (io != NULL) {
536 			STAILQ_REMOVE_HEAD(&be_lun->cont_queue, links);
537 			mtx_unlock(&be_lun->queue_lock);
538 			if (ARGS(io)->flags & CTL_LLF_COMPARE)
539 				ctl_backend_ramdisk_compare(io);
540 			else
541 				ctl_backend_ramdisk_rw(io);
542 			mtx_lock(&be_lun->queue_lock);
543 			continue;
544 		}
545 
546 		/*
547 		 * If we get here, there is no work left in the queues, so
548 		 * just break out and let the task queue go to sleep.
549 		 */
550 		break;
551 	}
552 	mtx_unlock(&be_lun->queue_lock);
553 }
554 
555 static int
556 ctl_backend_ramdisk_gls(union ctl_io *io)
557 {
558 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
559 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
560 	struct scsi_get_lba_status_data *data;
561 	uint8_t *page;
562 	u_int lbaoff;
563 
564 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
565 	scsi_u64to8b(ARGS(io)->lba, data->descr[0].addr);
566 	lbaoff = ARGS(io)->lba & ~(UINT_MAX << cbe_lun->pblockexp);
567 	scsi_ulto4b(be_lun->pblockmul - lbaoff, data->descr[0].length);
568 	page = ctl_backend_ramdisk_getpage(be_lun,
569 	    ARGS(io)->lba >> cbe_lun->pblockexp, GP_OTHER);
570 	if (page == P_UNMAPPED)
571 		data->descr[0].status = 1;
572 	else if (page == P_ANCHORED)
573 		data->descr[0].status = 2;
574 	else
575 		data->descr[0].status = 0;
576 	ctl_config_read_done(io);
577 	return (CTL_RETVAL_COMPLETE);
578 }
579 
580 static int
581 ctl_backend_ramdisk_scsi_config_read(union ctl_io *io)
582 {
583 	int retval = 0;
584 
585 	switch (io->scsiio.cdb[0]) {
586 	case SERVICE_ACTION_IN:
587 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
588 			retval = ctl_backend_ramdisk_gls(io);
589 			break;
590 		}
591 		ctl_set_invalid_field(&io->scsiio,
592 				      /*sks_valid*/ 1,
593 				      /*command*/ 1,
594 				      /*field*/ 1,
595 				      /*bit_valid*/ 1,
596 				      /*bit*/ 4);
597 		ctl_config_read_done(io);
598 		retval = CTL_RETVAL_COMPLETE;
599 		break;
600 	default:
601 		ctl_set_invalid_opcode(&io->scsiio);
602 		ctl_config_read_done(io);
603 		retval = CTL_RETVAL_COMPLETE;
604 		break;
605 	}
606 	return (retval);
607 }
608 
609 static int
610 ramdisk_namespace_data(union ctl_io *io)
611 {
612 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
613 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
614 	struct nvme_namespace_data *nsdata;
615 
616 	if (io->nvmeio.kern_data_len != sizeof(struct nvme_namespace_data) ||
617 	    io->nvmeio.kern_sg_entries != 0)
618 		return (CTL_RETVAL_ERROR);
619 
620 	nsdata = (struct nvme_namespace_data *)io->nvmeio.kern_data_ptr;
621 	memset(nsdata, 0, sizeof(*nsdata));
622 	nsdata->nsze = htole64(be_lun->size_blocks);
623 	nsdata->ncap = htole64(be_lun->cap_bytes / cbe_lun->blocksize);
624 	nsdata->nuse = htole64(be_lun->cap_used / cbe_lun->blocksize);
625 	nsdata->nsfeat = NVMEM(NVME_NS_DATA_NSFEAT_THIN_PROV) |
626 	    NVMEM(NVME_NS_DATA_NSFEAT_DEALLOC);
627 	nsdata->nlbaf = 1 - 1;
628 	nsdata->dlfeat = NVMEM(NVME_NS_DATA_DLFEAT_DWZ) |
629 	    NVMEF(NVME_NS_DATA_DLFEAT_READ, NVME_NS_DATA_DLFEAT_READ_00);
630 	nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0);
631 	nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS,
632 	    ffs(cbe_lun->blocksize) - 1);
633 
634 	ctl_lun_nsdata_ids(cbe_lun, nsdata);
635 	ctl_config_read_done(io);
636 	return (CTL_RETVAL_COMPLETE);
637 }
638 
639 static int
640 ramdisk_nvme_ids(union ctl_io *io)
641 {
642 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
643 
644 	if (io->nvmeio.kern_data_len != 4096 || io->nvmeio.kern_sg_entries != 0)
645 		return (CTL_RETVAL_ERROR);
646 
647 	ctl_lun_nvme_ids(cbe_lun, io->nvmeio.kern_data_ptr);
648 	ctl_config_read_done(io);
649 	return (CTL_RETVAL_COMPLETE);
650 }
651 
652 static int
653 ctl_backend_ramdisk_nvme_config_read(union ctl_io *io)
654 {
655 	switch (io->nvmeio.cmd.opc) {
656 	case NVME_OPC_IDENTIFY:
657 	{
658 		uint8_t cns;
659 
660 		cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
661 		switch (cns) {
662 		case 0:
663 			return (ramdisk_namespace_data(io));
664 		case 3:
665 			return (ramdisk_nvme_ids(io));
666 		default:
667 			ctl_nvme_set_invalid_field(&io->nvmeio);
668 			ctl_config_read_done(io);
669 			return (CTL_RETVAL_COMPLETE);
670 		}
671 	}
672 	default:
673 		ctl_nvme_set_invalid_opcode(&io->nvmeio);
674 		ctl_config_read_done(io);
675 		return (CTL_RETVAL_COMPLETE);
676 	}
677 }
678 
679 static int
680 ctl_backend_ramdisk_config_read(union ctl_io *io)
681 {
682 	switch (io->io_hdr.io_type) {
683 	case CTL_IO_SCSI:
684 		return (ctl_backend_ramdisk_scsi_config_read(io));
685 	case CTL_IO_NVME_ADMIN:
686 		return (ctl_backend_ramdisk_nvme_config_read(io));
687 	default:
688 		__assert_unreachable();
689 	}
690 }
691 
692 static void
693 ctl_backend_ramdisk_delete(struct ctl_be_lun *cbe_lun, off_t lba, off_t len,
694     int anchor)
695 {
696 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
697 	uint8_t *page;
698 	uint64_t p, lp;
699 	u_int lbaoff;
700 	getpage_op_t op = anchor ? GP_ANCHOR : GP_OTHER;
701 
702 	/* Partially zero first partial page. */
703 	p = lba >> cbe_lun->pblockexp;
704 	lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
705 	if (lbaoff != 0) {
706 		page = ctl_backend_ramdisk_getpage(be_lun, p, op);
707 		if (page != P_UNMAPPED && page != P_ANCHORED) {
708 			memset(page + lbaoff * cbe_lun->blocksize, 0,
709 			    min(len, be_lun->pblockmul - lbaoff) *
710 			    cbe_lun->blocksize);
711 		}
712 		p++;
713 	}
714 
715 	/* Partially zero last partial page. */
716 	lp = (lba + len) >> cbe_lun->pblockexp;
717 	lbaoff = (lba + len) & ~(UINT_MAX << cbe_lun->pblockexp);
718 	if (p <= lp && lbaoff != 0) {
719 		page = ctl_backend_ramdisk_getpage(be_lun, lp, op);
720 		if (page != P_UNMAPPED && page != P_ANCHORED)
721 			memset(page, 0, lbaoff * cbe_lun->blocksize);
722 	}
723 
724 	/* Delete remaining full pages. */
725 	if (anchor) {
726 		for (; p < lp; p++)
727 			ctl_backend_ramdisk_anchorpage(be_lun, p);
728 	} else {
729 		for (; p < lp; p++)
730 			ctl_backend_ramdisk_unmappage(be_lun, p);
731 	}
732 }
733 
734 static void
735 ctl_backend_ramdisk_ws(union ctl_io *io)
736 {
737 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
738 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
739 	struct ctl_lba_len_flags *lbalen = ARGS(io);
740 	uint8_t *page;
741 	uint64_t lba;
742 	u_int lbaoff, lbas;
743 
744 	CTL_IO_ASSERT(io, SCSI);
745 
746 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB)) {
747 		ctl_set_invalid_field(&io->scsiio,
748 				      /*sks_valid*/ 1,
749 				      /*command*/ 1,
750 				      /*field*/ 1,
751 				      /*bit_valid*/ 0,
752 				      /*bit*/ 0);
753 		ctl_config_write_done(io);
754 		return;
755 	}
756 	if (lbalen->flags & SWS_UNMAP) {
757 		ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len,
758 		    (lbalen->flags & SWS_ANCHOR) != 0);
759 		ctl_set_success(&io->scsiio);
760 		ctl_config_write_done(io);
761 		return;
762 	}
763 
764 	for (lba = lbalen->lba, lbas = lbalen->len; lbas > 0; lba++, lbas--) {
765 		page = ctl_backend_ramdisk_getpage(be_lun,
766 		    lba >> cbe_lun->pblockexp, GP_WRITE);
767 		if (page == P_UNMAPPED || page == P_ANCHORED) {
768 			ctl_set_space_alloc_fail(&io->scsiio);
769 			ctl_data_submit_done(io);
770 			return;
771 		}
772 		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
773 		page += lbaoff * cbe_lun->blocksize;
774 		if (lbalen->flags & SWS_NDOB) {
775 			memset(page, 0, cbe_lun->blocksize);
776 		} else {
777 			memcpy(page, io->scsiio.kern_data_ptr,
778 			    cbe_lun->blocksize);
779 		}
780 		if (lbalen->flags & SWS_LBDATA)
781 			scsi_ulto4b(lba, page);
782 	}
783 	ctl_set_success(&io->scsiio);
784 	ctl_config_write_done(io);
785 }
786 
787 static void
788 ctl_backend_ramdisk_unmap(union ctl_io *io)
789 {
790 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
791 	struct ctl_ptr_len_flags *ptrlen = (struct ctl_ptr_len_flags *)ARGS(io);
792 	struct scsi_unmap_desc *buf, *end;
793 
794 	CTL_IO_ASSERT(io, SCSI);
795 
796 	if ((ptrlen->flags & ~SU_ANCHOR) != 0) {
797 		ctl_set_invalid_field(&io->scsiio,
798 				      /*sks_valid*/ 0,
799 				      /*command*/ 0,
800 				      /*field*/ 0,
801 				      /*bit_valid*/ 0,
802 				      /*bit*/ 0);
803 		ctl_config_write_done(io);
804 		return;
805 	}
806 
807 	buf = (struct scsi_unmap_desc *)ptrlen->ptr;
808 	end = buf + ptrlen->len / sizeof(*buf);
809 	for (; buf < end; buf++) {
810 		ctl_backend_ramdisk_delete(cbe_lun,
811 		    scsi_8btou64(buf->lba), scsi_4btoul(buf->length),
812 		    (ptrlen->flags & SU_ANCHOR) != 0);
813 	}
814 
815 	ctl_set_success(&io->scsiio);
816 	ctl_config_write_done(io);
817 }
818 
819 static int
820 ctl_backend_ramdisk_scsi_config_write(union ctl_io *io)
821 {
822 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
823 	int retval = 0;
824 
825 	switch (io->scsiio.cdb[0]) {
826 	case SYNCHRONIZE_CACHE:
827 	case SYNCHRONIZE_CACHE_16:
828 		/* We have no cache to flush. */
829 		ctl_set_success(&io->scsiio);
830 		ctl_config_write_done(io);
831 		break;
832 	case START_STOP_UNIT: {
833 		struct scsi_start_stop_unit *cdb;
834 
835 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
836 		if ((cdb->how & SSS_PC_MASK) != 0) {
837 			ctl_set_success(&io->scsiio);
838 			ctl_config_write_done(io);
839 			break;
840 		}
841 		if (cdb->how & SSS_START) {
842 			if (cdb->how & SSS_LOEJ)
843 				ctl_lun_has_media(cbe_lun);
844 			ctl_start_lun(cbe_lun);
845 		} else {
846 			ctl_stop_lun(cbe_lun);
847 			if (cdb->how & SSS_LOEJ)
848 				ctl_lun_ejected(cbe_lun);
849 		}
850 		ctl_set_success(&io->scsiio);
851 		ctl_config_write_done(io);
852 		break;
853 	}
854 	case PREVENT_ALLOW:
855 		ctl_set_success(&io->scsiio);
856 		ctl_config_write_done(io);
857 		break;
858 	case WRITE_SAME_10:
859 	case WRITE_SAME_16:
860 		ctl_backend_ramdisk_ws(io);
861 		break;
862 	case UNMAP:
863 		ctl_backend_ramdisk_unmap(io);
864 		break;
865 	default:
866 		ctl_set_invalid_opcode(&io->scsiio);
867 		ctl_config_write_done(io);
868 		retval = CTL_RETVAL_COMPLETE;
869 		break;
870 	}
871 
872 	return (retval);
873 }
874 
875 static void
876 ctl_backend_ramdisk_wu(union ctl_io *io)
877 {
878 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
879 	struct ctl_lba_len_flags *lbalen = ARGS(io);
880 
881 	CTL_IO_ASSERT(io, NVME);
882 
883 	/*
884 	 * XXX: Not quite right as reads will return zeroes rather
885 	 * than failing.
886 	 */
887 	ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len, 1);
888 	ctl_nvme_set_success(&io->nvmeio);
889 	ctl_config_write_done(io);
890 }
891 
892 static void
893 ctl_backend_ramdisk_wz(union ctl_io *io)
894 {
895 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
896 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
897 	struct ctl_lba_len_flags *lbalen = ARGS(io);
898 	uint8_t *page;
899 	uint64_t lba;
900 	u_int lbaoff, lbas;
901 
902 	CTL_IO_ASSERT(io, NVME);
903 
904 	if ((le32toh(io->nvmeio.cmd.cdw12) & (1U << 25)) != 0) {
905 		ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len,
906 		    0);
907 		ctl_nvme_set_success(&io->nvmeio);
908 		ctl_config_write_done(io);
909 		return;
910 	}
911 
912 	for (lba = lbalen->lba, lbas = lbalen->len; lbas > 0; lba++, lbas--) {
913 		page = ctl_backend_ramdisk_getpage(be_lun,
914 		    lba >> cbe_lun->pblockexp, GP_WRITE);
915 		if (page == P_UNMAPPED || page == P_ANCHORED) {
916 			ctl_nvme_set_space_alloc_fail(&io->nvmeio);
917 			ctl_data_submit_done(io);
918 			return;
919 		}
920 		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
921 		page += lbaoff * cbe_lun->blocksize;
922 		memset(page, 0, cbe_lun->blocksize);
923 	}
924 	ctl_nvme_set_success(&io->nvmeio);
925 	ctl_config_write_done(io);
926 }
927 
928 static void
929 ctl_backend_ramdisk_dsm(union ctl_io *io)
930 {
931 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
932 	struct nvme_dsm_range *r;
933 	uint64_t lba;
934 	uint32_t num_blocks;
935 	u_int i, ranges;
936 
937 	CTL_IO_ASSERT(io, NVME);
938 
939 	ranges = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
940 	r = (struct nvme_dsm_range *)io->nvmeio.kern_data_ptr;
941 	for (i = 0; i < ranges; i++) {
942 		lba = le64toh(r[i].starting_lba);
943 		num_blocks = le32toh(r[i].length);
944 		if ((le32toh(r[i].attributes) & (1U << 2)) != 0)
945 			ctl_backend_ramdisk_delete(cbe_lun, lba, num_blocks, 0);
946 	}
947 
948 	ctl_nvme_set_success(&io->nvmeio);
949 	ctl_config_write_done(io);
950 }
951 
952 static int
953 ctl_backend_ramdisk_nvme_config_write(union ctl_io *io)
954 {
955 	switch (io->nvmeio.cmd.opc) {
956 	case NVME_OPC_FLUSH:
957 		/* We have no cache to flush. */
958 		ctl_nvme_set_success(&io->nvmeio);
959 		ctl_config_write_done(io);
960 		break;
961 	case NVME_OPC_WRITE_UNCORRECTABLE:
962 		ctl_backend_ramdisk_wu(io);
963 		break;
964 	case NVME_OPC_WRITE_ZEROES:
965 		ctl_backend_ramdisk_wz(io);
966 		break;
967 	case NVME_OPC_DATASET_MANAGEMENT:
968 		ctl_backend_ramdisk_dsm(io);
969 		break;
970 	default:
971 		ctl_nvme_set_invalid_opcode(&io->nvmeio);
972 		ctl_config_write_done(io);
973 		break;
974 	}
975 	return (CTL_RETVAL_COMPLETE);
976 }
977 
978 static int
979 ctl_backend_ramdisk_config_write(union ctl_io *io)
980 {
981 	switch (io->io_hdr.io_type) {
982 	case CTL_IO_SCSI:
983 		return (ctl_backend_ramdisk_scsi_config_write(io));
984 	case CTL_IO_NVME:
985 		return (ctl_backend_ramdisk_nvme_config_write(io));
986 	default:
987 		__assert_unreachable();
988 	}
989 }
990 
991 static uint64_t
992 ctl_backend_ramdisk_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
993 {
994 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
995 	uint64_t		val;
996 
997 	val = UINT64_MAX;
998 	if (be_lun->cap_bytes == 0)
999 		return (val);
1000 	sx_slock(&be_lun->page_lock);
1001 	if (strcmp(attrname, "blocksused") == 0) {
1002 		val = be_lun->cap_used / be_lun->cbe_lun.blocksize;
1003 	} else if (strcmp(attrname, "blocksavail") == 0) {
1004 		val = (be_lun->cap_bytes - be_lun->cap_used) /
1005 		    be_lun->cbe_lun.blocksize;
1006 	}
1007 	sx_sunlock(&be_lun->page_lock);
1008 	return (val);
1009 }
1010 
1011 static int
1012 ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1013 			  int flag, struct thread *td)
1014 {
1015 	struct ctl_be_ramdisk_softc *softc = &rd_softc;
1016 	struct ctl_lun_req *lun_req;
1017 	int retval;
1018 
1019 	retval = 0;
1020 	switch (cmd) {
1021 	case CTL_LUN_REQ:
1022 		lun_req = (struct ctl_lun_req *)addr;
1023 		switch (lun_req->reqtype) {
1024 		case CTL_LUNREQ_CREATE:
1025 			retval = ctl_backend_ramdisk_create(softc, lun_req);
1026 			break;
1027 		case CTL_LUNREQ_RM:
1028 			retval = ctl_backend_ramdisk_rm(softc, lun_req);
1029 			break;
1030 		case CTL_LUNREQ_MODIFY:
1031 			retval = ctl_backend_ramdisk_modify(softc, lun_req);
1032 			break;
1033 		default:
1034 			lun_req->status = CTL_LUN_ERROR;
1035 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1036 				 "%s: invalid LUN request type %d", __func__,
1037 				 lun_req->reqtype);
1038 			break;
1039 		}
1040 		break;
1041 	default:
1042 		retval = ENOTTY;
1043 		break;
1044 	}
1045 
1046 	return (retval);
1047 }
1048 
1049 static int
1050 ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
1051 		       struct ctl_lun_req *req)
1052 {
1053 	struct ctl_be_ramdisk_lun *be_lun;
1054 	struct ctl_lun_rm_params *params;
1055 	int retval;
1056 
1057 	params = &req->reqdata.rm;
1058 	sx_xlock(&softc->modify_lock);
1059 	mtx_lock(&softc->lock);
1060 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
1061 		if (be_lun->cbe_lun.lun_id == params->lun_id) {
1062 			SLIST_REMOVE(&softc->lun_list, be_lun,
1063 			    ctl_be_ramdisk_lun, links);
1064 			softc->num_luns--;
1065 			break;
1066 		}
1067 	}
1068 	mtx_unlock(&softc->lock);
1069 	sx_xunlock(&softc->modify_lock);
1070 	if (be_lun == NULL) {
1071 		snprintf(req->error_str, sizeof(req->error_str),
1072 			 "%s: LUN %u is not managed by the ramdisk backend",
1073 			 __func__, params->lun_id);
1074 		goto bailout_error;
1075 	}
1076 
1077 	/*
1078 	 * Set the waiting flag before we invalidate the LUN.  Our shutdown
1079 	 * routine can be called any time after we invalidate the LUN,
1080 	 * and can be called from our context.
1081 	 *
1082 	 * This tells the shutdown routine that we're waiting, or we're
1083 	 * going to wait for the shutdown to happen.
1084 	 */
1085 	mtx_lock(&softc->lock);
1086 	be_lun->flags |= CTL_BE_RAMDISK_LUN_WAITING;
1087 	mtx_unlock(&softc->lock);
1088 
1089 	retval = ctl_remove_lun(&be_lun->cbe_lun);
1090 	if (retval != 0) {
1091 		snprintf(req->error_str, sizeof(req->error_str),
1092 			 "%s: error %d returned from ctl_remove_lun() for "
1093 			 "LUN %d", __func__, retval, params->lun_id);
1094 		mtx_lock(&softc->lock);
1095 		be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING;
1096 		mtx_unlock(&softc->lock);
1097 		goto bailout_error;
1098 	}
1099 
1100 	mtx_lock(&softc->lock);
1101 	while ((be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) == 0) {
1102 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlramrm", 0);
1103 		if (retval == EINTR)
1104 			break;
1105 	}
1106 	be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING;
1107 	if (be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) {
1108 		mtx_unlock(&softc->lock);
1109 		free(be_lun, M_RAMDISK);
1110 	} else {
1111 		mtx_unlock(&softc->lock);
1112 		return (EINTR);
1113 	}
1114 
1115 	req->status = CTL_LUN_OK;
1116 	return (retval);
1117 
1118 bailout_error:
1119 	req->status = CTL_LUN_ERROR;
1120 	return (0);
1121 }
1122 
1123 static int
1124 ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
1125 			   struct ctl_lun_req *req)
1126 {
1127 	struct ctl_be_ramdisk_lun *be_lun;
1128 	struct ctl_be_lun *cbe_lun;
1129 	struct ctl_lun_create_params *params;
1130 	const char *value;
1131 	char tmpstr[32];
1132 	uint64_t t;
1133 	int retval;
1134 
1135 	retval = 0;
1136 	params = &req->reqdata.create;
1137 
1138 	be_lun = malloc(sizeof(*be_lun), M_RAMDISK, M_ZERO | M_WAITOK);
1139 	cbe_lun = &be_lun->cbe_lun;
1140 	cbe_lun->options = nvlist_clone(req->args_nvl);
1141 	be_lun->params = req->reqdata.create;
1142 	be_lun->softc = softc;
1143 
1144 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1145 		cbe_lun->lun_type = params->device_type;
1146 	else
1147 		cbe_lun->lun_type = T_DIRECT;
1148 	be_lun->flags = 0;
1149 	cbe_lun->flags = 0;
1150 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
1151 	if (value != NULL) {
1152 		if (strcmp(value, "primary") == 0)
1153 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1154 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
1155 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1156 
1157 	be_lun->pblocksize = PAGE_SIZE;
1158 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1159 	if (value != NULL) {
1160 		ctl_expand_number(value, &t);
1161 		be_lun->pblocksize = t;
1162 	}
1163 	if (be_lun->pblocksize < 512 || be_lun->pblocksize > 131072) {
1164 		snprintf(req->error_str, sizeof(req->error_str),
1165 			 "%s: unsupported pblocksize %u", __func__,
1166 			 be_lun->pblocksize);
1167 		goto bailout_error;
1168 	}
1169 
1170 	if (cbe_lun->lun_type == T_DIRECT ||
1171 	    cbe_lun->lun_type == T_CDROM) {
1172 		if (params->blocksize_bytes != 0)
1173 			cbe_lun->blocksize = params->blocksize_bytes;
1174 		else if (cbe_lun->lun_type == T_CDROM)
1175 			cbe_lun->blocksize = 2048;
1176 		else
1177 			cbe_lun->blocksize = 512;
1178 		be_lun->pblockmul = be_lun->pblocksize / cbe_lun->blocksize;
1179 		if (be_lun->pblockmul < 1 || !powerof2(be_lun->pblockmul)) {
1180 			snprintf(req->error_str, sizeof(req->error_str),
1181 				 "%s: pblocksize %u not exp2 of blocksize %u",
1182 				 __func__,
1183 				 be_lun->pblocksize, cbe_lun->blocksize);
1184 			goto bailout_error;
1185 		}
1186 		if (params->lun_size_bytes < cbe_lun->blocksize) {
1187 			snprintf(req->error_str, sizeof(req->error_str),
1188 				 "%s: LUN size %ju < blocksize %u", __func__,
1189 				 params->lun_size_bytes, cbe_lun->blocksize);
1190 			goto bailout_error;
1191 		}
1192 		be_lun->size_blocks = params->lun_size_bytes / cbe_lun->blocksize;
1193 		be_lun->size_bytes = be_lun->size_blocks * cbe_lun->blocksize;
1194 		be_lun->indir = 0;
1195 		t = be_lun->size_bytes / be_lun->pblocksize;
1196 		while (t > 1) {
1197 			t /= PPP;
1198 			be_lun->indir++;
1199 		}
1200 		cbe_lun->maxlba = be_lun->size_blocks - 1;
1201 		cbe_lun->pblockexp = fls(be_lun->pblockmul) - 1;
1202 		cbe_lun->pblockoff = 0;
1203 		cbe_lun->ublockexp = cbe_lun->pblockexp;
1204 		cbe_lun->ublockoff = 0;
1205 		cbe_lun->atomicblock = be_lun->pblocksize;
1206 		cbe_lun->opttxferlen = SGPP * be_lun->pblocksize;
1207 		value = dnvlist_get_string(cbe_lun->options, "capacity", NULL);
1208 		if (value != NULL)
1209 			ctl_expand_number(value, &be_lun->cap_bytes);
1210 	} else {
1211 		be_lun->pblockmul = 1;
1212 		cbe_lun->pblockexp = 0;
1213 	}
1214 
1215 	/* Tell the user the blocksize we ended up using */
1216 	params->blocksize_bytes = cbe_lun->blocksize;
1217 	params->lun_size_bytes = be_lun->size_bytes;
1218 
1219 	value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
1220 	if (value == NULL || strcmp(value, "off") != 0)
1221 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
1222 	value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
1223 	if (value != NULL) {
1224 		if (strcmp(value, "on") == 0)
1225 			cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
1226 	} else if (cbe_lun->lun_type != T_DIRECT)
1227 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
1228 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
1229 	value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
1230 	if (value != NULL && strcmp(value, "on") == 0)
1231 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
1232 	else if (value != NULL && strcmp(value, "read") == 0)
1233 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
1234 	else if (value != NULL && strcmp(value, "soft") == 0)
1235 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
1236 	else if (value != NULL && strcmp(value, "off") == 0)
1237 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
1238 
1239 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
1240 		cbe_lun->req_lun_id = params->req_lun_id;
1241 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
1242 	} else
1243 		cbe_lun->req_lun_id = 0;
1244 
1245 	cbe_lun->lun_shutdown = ctl_backend_ramdisk_lun_shutdown;
1246 	cbe_lun->be = &ctl_be_ramdisk_driver;
1247 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
1248 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
1249 			 softc->num_luns);
1250 		strncpy((char *)cbe_lun->serial_num, tmpstr,
1251 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
1252 
1253 		/* Tell the user what we used for a serial number */
1254 		strncpy((char *)params->serial_num, tmpstr,
1255 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
1256 	} else {
1257 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
1258 			MIN(sizeof(cbe_lun->serial_num),
1259 			    sizeof(params->serial_num)));
1260 	}
1261 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
1262 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
1263 		strncpy((char *)cbe_lun->device_id, tmpstr,
1264 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
1265 
1266 		/* Tell the user what we used for a device ID */
1267 		strncpy((char *)params->device_id, tmpstr,
1268 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
1269 	} else {
1270 		strncpy((char *)cbe_lun->device_id, params->device_id,
1271 			MIN(sizeof(cbe_lun->device_id),
1272 			    sizeof(params->device_id)));
1273 	}
1274 
1275 	STAILQ_INIT(&be_lun->cont_queue);
1276 	sx_init(&be_lun->page_lock, "ctlram page");
1277 	if (be_lun->cap_bytes == 0) {
1278 		be_lun->indir = 0;
1279 		be_lun->pages = malloc(be_lun->pblocksize, M_RAMDISK, M_WAITOK);
1280 	}
1281 	be_lun->zero_page = malloc(be_lun->pblocksize, M_RAMDISK,
1282 	    M_WAITOK|M_ZERO);
1283 	mtx_init(&be_lun->queue_lock, "ctlram queue", NULL, MTX_DEF);
1284 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_backend_ramdisk_worker,
1285 	    be_lun);
1286 
1287 	be_lun->io_taskqueue = taskqueue_create("ctlramtq", M_WAITOK,
1288 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
1289 	if (be_lun->io_taskqueue == NULL) {
1290 		snprintf(req->error_str, sizeof(req->error_str),
1291 			 "%s: Unable to create taskqueue", __func__);
1292 		goto bailout_error;
1293 	}
1294 
1295 	retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
1296 					 /*num threads*/1,
1297 					 /*priority*/PUSER,
1298 					 /*proc*/control_softc->ctl_proc,
1299 					 /*thread name*/"ramdisk");
1300 	if (retval != 0)
1301 		goto bailout_error;
1302 
1303 	retval = ctl_add_lun(&be_lun->cbe_lun);
1304 	if (retval != 0) {
1305 		snprintf(req->error_str, sizeof(req->error_str),
1306 			 "%s: ctl_add_lun() returned error %d, see dmesg for "
1307 			"details", __func__, retval);
1308 		retval = 0;
1309 		goto bailout_error;
1310 	}
1311 
1312 	mtx_lock(&softc->lock);
1313 	softc->num_luns++;
1314 	SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
1315 	mtx_unlock(&softc->lock);
1316 
1317 	params->req_lun_id = cbe_lun->lun_id;
1318 
1319 	req->status = CTL_LUN_OK;
1320 	return (retval);
1321 
1322 bailout_error:
1323 	req->status = CTL_LUN_ERROR;
1324 	if (be_lun != NULL) {
1325 		if (be_lun->io_taskqueue != NULL)
1326 			taskqueue_free(be_lun->io_taskqueue);
1327 		nvlist_destroy(cbe_lun->options);
1328 		free(be_lun->zero_page, M_RAMDISK);
1329 		ctl_backend_ramdisk_freeallpages(be_lun->pages, be_lun->indir);
1330 		sx_destroy(&be_lun->page_lock);
1331 		mtx_destroy(&be_lun->queue_lock);
1332 		free(be_lun, M_RAMDISK);
1333 	}
1334 	return (retval);
1335 }
1336 
1337 static int
1338 ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
1339 		       struct ctl_lun_req *req)
1340 {
1341 	struct ctl_be_ramdisk_lun *be_lun;
1342 	struct ctl_be_lun *cbe_lun;
1343 	struct ctl_lun_modify_params *params;
1344 	const char *value;
1345 	uint32_t blocksize;
1346 	int wasprim;
1347 
1348 	params = &req->reqdata.modify;
1349 	sx_xlock(&softc->modify_lock);
1350 	mtx_lock(&softc->lock);
1351 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
1352 		if (be_lun->cbe_lun.lun_id == params->lun_id)
1353 			break;
1354 	}
1355 	mtx_unlock(&softc->lock);
1356 	if (be_lun == NULL) {
1357 		snprintf(req->error_str, sizeof(req->error_str),
1358 			 "%s: LUN %u is not managed by the ramdisk backend",
1359 			 __func__, params->lun_id);
1360 		goto bailout_error;
1361 	}
1362 	cbe_lun = &be_lun->cbe_lun;
1363 
1364 	if (params->lun_size_bytes != 0)
1365 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
1366 
1367 	if (req->args_nvl != NULL) {
1368 		nvlist_destroy(cbe_lun->options);
1369 		cbe_lun->options = nvlist_clone(req->args_nvl);
1370 	}
1371 
1372 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
1373 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
1374 	if (value != NULL) {
1375 		if (strcmp(value, "primary") == 0)
1376 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1377 		else
1378 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
1379 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
1380 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
1381 	else
1382 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
1383 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
1384 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
1385 			ctl_lun_primary(cbe_lun);
1386 		else
1387 			ctl_lun_secondary(cbe_lun);
1388 	}
1389 
1390 	blocksize = be_lun->cbe_lun.blocksize;
1391 	if (be_lun->params.lun_size_bytes < blocksize) {
1392 		snprintf(req->error_str, sizeof(req->error_str),
1393 			"%s: LUN size %ju < blocksize %u", __func__,
1394 			be_lun->params.lun_size_bytes, blocksize);
1395 		goto bailout_error;
1396 	}
1397 	be_lun->size_blocks = be_lun->params.lun_size_bytes / blocksize;
1398 	be_lun->size_bytes = be_lun->size_blocks * blocksize;
1399 	be_lun->cbe_lun.maxlba = be_lun->size_blocks - 1;
1400 	ctl_lun_capacity_changed(&be_lun->cbe_lun);
1401 
1402 	/* Tell the user the exact size we ended up using */
1403 	params->lun_size_bytes = be_lun->size_bytes;
1404 
1405 	sx_xunlock(&softc->modify_lock);
1406 	req->status = CTL_LUN_OK;
1407 	return (0);
1408 
1409 bailout_error:
1410 	sx_xunlock(&softc->modify_lock);
1411 	req->status = CTL_LUN_ERROR;
1412 	return (0);
1413 }
1414 
1415 static void
1416 ctl_backend_ramdisk_lun_shutdown(struct ctl_be_lun *cbe_lun)
1417 {
1418 	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
1419 	struct ctl_be_ramdisk_softc *softc = be_lun->softc;
1420 
1421 	taskqueue_drain_all(be_lun->io_taskqueue);
1422 	taskqueue_free(be_lun->io_taskqueue);
1423 	nvlist_destroy(be_lun->cbe_lun.options);
1424 	free(be_lun->zero_page, M_RAMDISK);
1425 	ctl_backend_ramdisk_freeallpages(be_lun->pages, be_lun->indir);
1426 	sx_destroy(&be_lun->page_lock);
1427 	mtx_destroy(&be_lun->queue_lock);
1428 
1429 	mtx_lock(&softc->lock);
1430 	be_lun->flags |= CTL_BE_RAMDISK_LUN_UNCONFIGURED;
1431 	if (be_lun->flags & CTL_BE_RAMDISK_LUN_WAITING)
1432 		wakeup(be_lun);
1433 	else
1434 		free(be_lun, M_RAMDISK);
1435 	mtx_unlock(&softc->lock);
1436 }
1437