xref: /titanic_51/usr/src/uts/common/io/lofi.c (revision 4f3b09fdc1c2f924ddba94e505ee4c2bff8a18d4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * lofi (loopback file) driver - allows you to attach a file to a device,
28  * which can then be accessed through that device. The simple model is that
29  * you tell lofi to open a file, and then use the block device you get as
30  * you would any block device. lofi translates access to the block device
31  * into I/O on the underlying file. This is mostly useful for
32  * mounting images of filesystems.
33  *
34  * lofi is controlled through /dev/lofictl - this is the only device exported
35  * during attach, and is minor number 0. lofiadm communicates with lofi through
36  * ioctls on this device. When a file is attached to lofi, block and character
37  * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices
38  * are identified by their minor number, and the minor number is also used
39  * as the name in /dev/lofi. If we ever decide to support virtual disks,
40  * we'll have to divide the minor number space to identify fdisk partitions
41  * and slices, and the name will then be the minor number shifted down a
42  * few bits. Minor devices are tracked with state structures handled with
43  * ddi_soft_state(9F) for simplicity.
44  *
45  * A file attached to lofi is opened when attached and not closed until
46  * explicitly detached from lofi. This seems more sensible than deferring
47  * the open until the /dev/lofi device is opened, for a number of reasons.
48  * One is that any failure is likely to be noticed by the person (or script)
49  * running lofiadm. Another is that it would be a security problem if the
50  * file was replaced by another one after being added but before being opened.
51  *
52  * The only hard part about lofi is the ioctls. In order to support things
53  * like 'newfs' on a lofi device, it needs to support certain disk ioctls.
54  * So it has to fake disk geometry and partition information. More may need
55  * to be faked if your favorite utility doesn't work and you think it should
56  * (fdformat doesn't work because it really wants to know the type of floppy
57  * controller to talk to, and that didn't seem easy to fake. Or possibly even
58  * necessary, since we have mkfs_pcfs now).
59  *
60  * Normally, a lofi device cannot be detached if it is open (i.e. busy).  To
61  * support simulation of hotplug events, an optional force flag is provided.
62  * If a lofi device is open when a force detach is requested, then the
63  * underlying file is closed and any subsequent operations return EIO.  When the
64  * device is closed for the last time, it will be cleaned up at that time.  In
65  * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is
66  * detached but not removed.
67  *
68  * Known problems:
69  *
70  *	UFS logging. Mounting a UFS filesystem image "logging"
71  *	works for basic copy testing but wedges during a build of ON through
72  *	that image. Some deadlock in lufs holding the log mutex and then
73  *	getting stuck on a buf. So for now, don't do that.
74  *
75  *	Direct I/O. Since the filesystem data is being cached in the buffer
76  *	cache, _and_ again in the underlying filesystem, it's tempting to
77  *	enable direct I/O on the underlying file. Don't, because that deadlocks.
78  *	I think to fix the cache-twice problem we might need filesystem support.
79  *
80  *	lofi on itself. The simple lock strategy (lofi_lock) precludes this
81  *	because you'll be in lofi_ioctl, holding the lock when you open the
82  *	file, which, if it's lofi, will grab lofi_lock. We prevent this for
83  *	now, though not using ddi_soft_state(9F) would make it possible to
84  *	do. Though it would still be silly.
85  *
86  * Interesting things to do:
87  *
88  *	Allow multiple files for each device. A poor-man's metadisk, basically.
89  *
90  *	Pass-through ioctls on block devices. You can (though it's not
91  *	documented), give lofi a block device as a file name. Then we shouldn't
92  *	need to fake a geometry, however, it may be relevant if you're replacing
93  *	metadisk, or using lofi to get crypto.
94  *	It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1
95  *	and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home.
96  *	In fact this even makes sense if you have lofi "above" metadisk.
97  *
98  * Encryption:
99  *	Each lofi device can have its own symmetric key and cipher.
100  *	They are passed to us by lofiadm(1m) in the correct format for use
101  *	with the misc/kcf crypto_* routines.
102  *
103  *	Each block has its own IV, that is calculated in lofi_blk_mech(), based
104  *	on the "master" key held in the lsp and the block number of the buffer.
105  */
106 
107 #include <sys/types.h>
108 #include <netinet/in.h>
109 #include <sys/sysmacros.h>
110 #include <sys/uio.h>
111 #include <sys/kmem.h>
112 #include <sys/cred.h>
113 #include <sys/mman.h>
114 #include <sys/errno.h>
115 #include <sys/aio_req.h>
116 #include <sys/stat.h>
117 #include <sys/file.h>
118 #include <sys/modctl.h>
119 #include <sys/conf.h>
120 #include <sys/debug.h>
121 #include <sys/vnode.h>
122 #include <sys/lofi.h>
123 #include <sys/fcntl.h>
124 #include <sys/pathname.h>
125 #include <sys/filio.h>
126 #include <sys/fdio.h>
127 #include <sys/open.h>
128 #include <sys/disp.h>
129 #include <vm/seg_map.h>
130 #include <sys/ddi.h>
131 #include <sys/sunddi.h>
132 #include <sys/zmod.h>
133 #include <sys/crypto/common.h>
134 #include <sys/crypto/api.h>
135 #include <LzmaDec.h>
136 
137 /*
138  * The basis for CRYOFF is derived from usr/src/uts/common/sys/fs/ufs_fs.h.
139  * Crypto metadata, if it exists, is located at the end of the boot block
140  * (BBOFF + BBSIZE, which is SBOFF).  The super block and everything after
141  * is offset by the size of the crypto metadata which is handled by
142  * lsp->ls_crypto_offset.
143  */
144 #define	CRYOFF	((off_t)8192)
145 
146 #define	NBLOCKS_PROP_NAME	"Nblocks"
147 #define	SIZE_PROP_NAME		"Size"
148 
149 #define	SETUP_C_DATA(cd, buf, len) 		\
150 	(cd).cd_format = CRYPTO_DATA_RAW;	\
151 	(cd).cd_offset = 0;			\
152 	(cd).cd_miscdata = NULL;		\
153 	(cd).cd_length = (len);			\
154 	(cd).cd_raw.iov_base = (buf);		\
155 	(cd).cd_raw.iov_len = (len);
156 
157 #define	UIO_CHECK(uio)	\
158 	if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \
159 	    ((uio)->uio_resid % DEV_BSIZE) != 0) { \
160 		return (EINVAL); \
161 	}
162 
163 static dev_info_t *lofi_dip = NULL;
164 static void *lofi_statep = NULL;
165 static kmutex_t lofi_lock;		/* state lock */
166 
167 /*
168  * Because lofi_taskq_nthreads limits the actual swamping of the device, the
169  * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
170  * high.  If we want to be assured that the underlying device is always busy,
171  * we must be sure that the number of bytes enqueued when the number of
172  * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for
173  * the duration of the sleep time in taskq_ent_alloc().  That is, lofi should
174  * set maxalloc to be the maximum throughput (in bytes per second) of the
175  * underlying device divided by the minimum I/O size.  We assume a realistic
176  * maximum throughput of one hundred megabytes per second; we set maxalloc on
177  * the lofi task queue to be 104857600 divided by DEV_BSIZE.
178  */
179 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE;
180 static int lofi_taskq_nthreads = 4;	/* # of taskq threads per device */
181 
182 uint32_t lofi_max_files = LOFI_MAX_FILES;
183 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC;
184 
185 static int gzip_decompress(void *src, size_t srclen, void *dst,
186 	size_t *destlen, int level);
187 
188 static int lzma_decompress(void *src, size_t srclen, void *dst,
189 	size_t *dstlen, int level);
190 
191 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = {
192 	{gzip_decompress,	NULL,	6,	"gzip"}, /* default */
193 	{gzip_decompress,	NULL,	6,	"gzip-6"},
194 	{gzip_decompress,	NULL,	9,	"gzip-9"},
195 	{lzma_decompress,	NULL,	0,	"lzma"}
196 };
197 
198 /*ARGSUSED*/
199 static void
200 *SzAlloc(void *p, size_t size)
201 {
202 	return (kmem_alloc(size, KM_SLEEP));
203 }
204 
205 /*ARGSUSED*/
206 static void
207 SzFree(void *p, void *address, size_t size)
208 {
209 	kmem_free(address, size);
210 }
211 
212 static ISzAlloc g_Alloc = { SzAlloc, SzFree };
213 
214 static int
215 lofi_busy(void)
216 {
217 	minor_t	minor;
218 
219 	/*
220 	 * We need to make sure no mappings exist - mod_remove won't
221 	 * help because the device isn't open.
222 	 */
223 	mutex_enter(&lofi_lock);
224 	for (minor = 1; minor <= lofi_max_files; minor++) {
225 		if (ddi_get_soft_state(lofi_statep, minor) != NULL) {
226 			mutex_exit(&lofi_lock);
227 			return (EBUSY);
228 		}
229 	}
230 	mutex_exit(&lofi_lock);
231 	return (0);
232 }
233 
234 static int
235 is_opened(struct lofi_state *lsp)
236 {
237 	ASSERT(mutex_owned(&lofi_lock));
238 	return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count);
239 }
240 
241 static int
242 mark_opened(struct lofi_state *lsp, int otyp)
243 {
244 	ASSERT(mutex_owned(&lofi_lock));
245 	switch (otyp) {
246 	case OTYP_CHR:
247 		lsp->ls_chr_open = 1;
248 		break;
249 	case OTYP_BLK:
250 		lsp->ls_blk_open = 1;
251 		break;
252 	case OTYP_LYR:
253 		lsp->ls_lyr_open_count++;
254 		break;
255 	default:
256 		return (-1);
257 	}
258 	return (0);
259 }
260 
261 static void
262 mark_closed(struct lofi_state *lsp, int otyp)
263 {
264 	ASSERT(mutex_owned(&lofi_lock));
265 	switch (otyp) {
266 	case OTYP_CHR:
267 		lsp->ls_chr_open = 0;
268 		break;
269 	case OTYP_BLK:
270 		lsp->ls_blk_open = 0;
271 		break;
272 	case OTYP_LYR:
273 		lsp->ls_lyr_open_count--;
274 		break;
275 	default:
276 		break;
277 	}
278 }
279 
280 static void
281 lofi_free_crypto(struct lofi_state *lsp)
282 {
283 	ASSERT(mutex_owned(&lofi_lock));
284 
285 	if (lsp->ls_crypto_enabled) {
286 		/*
287 		 * Clean up the crypto state so that it doesn't hang around
288 		 * in memory after we are done with it.
289 		 */
290 		bzero(lsp->ls_key.ck_data,
291 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
292 		kmem_free(lsp->ls_key.ck_data,
293 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
294 		lsp->ls_key.ck_data = NULL;
295 		lsp->ls_key.ck_length = 0;
296 
297 		if (lsp->ls_mech.cm_param != NULL) {
298 			kmem_free(lsp->ls_mech.cm_param,
299 			    lsp->ls_mech.cm_param_len);
300 			lsp->ls_mech.cm_param = NULL;
301 			lsp->ls_mech.cm_param_len = 0;
302 		}
303 
304 		if (lsp->ls_iv_mech.cm_param != NULL) {
305 			kmem_free(lsp->ls_iv_mech.cm_param,
306 			    lsp->ls_iv_mech.cm_param_len);
307 			lsp->ls_iv_mech.cm_param = NULL;
308 			lsp->ls_iv_mech.cm_param_len = 0;
309 		}
310 
311 		mutex_destroy(&lsp->ls_crypto_lock);
312 	}
313 }
314 
315 static void
316 lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp,
317     cred_t *credp)
318 {
319 	dev_t	newdev;
320 	char	namebuf[50];
321 
322 	ASSERT(mutex_owned(&lofi_lock));
323 
324 	lofi_free_crypto(lsp);
325 
326 	if (lsp->ls_vp) {
327 		(void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag,
328 		    1, 0, credp, NULL);
329 		VN_RELE(lsp->ls_vp);
330 		lsp->ls_vp = NULL;
331 	}
332 
333 	newdev = makedevice(getmajor(dev), minor);
334 	(void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
335 	(void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
336 
337 	(void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
338 	ddi_remove_minor_node(lofi_dip, namebuf);
339 	(void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor);
340 	ddi_remove_minor_node(lofi_dip, namebuf);
341 
342 	kmem_free(lsp->ls_filename, lsp->ls_filename_sz);
343 	taskq_destroy(lsp->ls_taskq);
344 	if (lsp->ls_kstat) {
345 		kstat_delete(lsp->ls_kstat);
346 		mutex_destroy(&lsp->ls_kstat_lock);
347 	}
348 
349 	if (lsp->ls_uncomp_seg_sz > 0) {
350 		kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz);
351 		lsp->ls_uncomp_seg_sz = 0;
352 	}
353 	ddi_soft_state_free(lofi_statep, minor);
354 }
355 
356 /*ARGSUSED*/
357 static int
358 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp)
359 {
360 	minor_t	minor;
361 	struct lofi_state *lsp;
362 
363 	mutex_enter(&lofi_lock);
364 	minor = getminor(*devp);
365 	if (minor == 0) {
366 		/* master control device */
367 		/* must be opened exclusively */
368 		if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) {
369 			mutex_exit(&lofi_lock);
370 			return (EINVAL);
371 		}
372 		lsp = ddi_get_soft_state(lofi_statep, 0);
373 		if (lsp == NULL) {
374 			mutex_exit(&lofi_lock);
375 			return (ENXIO);
376 		}
377 		if (is_opened(lsp)) {
378 			mutex_exit(&lofi_lock);
379 			return (EBUSY);
380 		}
381 		(void) mark_opened(lsp, OTYP_CHR);
382 		mutex_exit(&lofi_lock);
383 		return (0);
384 	}
385 
386 	/* otherwise, the mapping should already exist */
387 	lsp = ddi_get_soft_state(lofi_statep, minor);
388 	if (lsp == NULL) {
389 		mutex_exit(&lofi_lock);
390 		return (EINVAL);
391 	}
392 
393 	if (lsp->ls_vp == NULL) {
394 		mutex_exit(&lofi_lock);
395 		return (ENXIO);
396 	}
397 
398 	if (mark_opened(lsp, otyp) == -1) {
399 		mutex_exit(&lofi_lock);
400 		return (EINVAL);
401 	}
402 
403 	mutex_exit(&lofi_lock);
404 	return (0);
405 }
406 
407 /*ARGSUSED*/
408 static int
409 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp)
410 {
411 	minor_t	minor;
412 	struct lofi_state *lsp;
413 
414 	mutex_enter(&lofi_lock);
415 	minor = getminor(dev);
416 	lsp = ddi_get_soft_state(lofi_statep, minor);
417 	if (lsp == NULL) {
418 		mutex_exit(&lofi_lock);
419 		return (EINVAL);
420 	}
421 	mark_closed(lsp, otyp);
422 
423 	/*
424 	 * If we forcibly closed the underlying device (li_force), or
425 	 * asked for cleanup (li_cleanup), finish up if we're the last
426 	 * out of the door.
427 	 */
428 	if (minor != 0 && !is_opened(lsp) &&
429 	    (lsp->ls_cleanup || lsp->ls_vp == NULL))
430 		lofi_free_handle(dev, minor, lsp, credp);
431 
432 	mutex_exit(&lofi_lock);
433 	return (0);
434 }
435 
436 /*
437  * Sets the mechanism's initialization vector (IV) if one is needed.
438  * The IV is computed from the data block number.  lsp->ls_mech is
439  * altered so that:
440  *	lsp->ls_mech.cm_param_len is set to the IV len.
441  *	lsp->ls_mech.cm_param is set to the IV.
442  */
443 static int
444 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno)
445 {
446 	int	ret;
447 	crypto_data_t cdata;
448 	char	*iv;
449 	size_t	iv_len;
450 	size_t	min;
451 	void	*data;
452 	size_t	datasz;
453 
454 	ASSERT(mutex_owned(&lsp->ls_crypto_lock));
455 
456 	if (lsp == NULL)
457 		return (CRYPTO_DEVICE_ERROR);
458 
459 	/* lsp->ls_mech.cm_param{_len} has already been set for static iv */
460 	if (lsp->ls_iv_type == IVM_NONE) {
461 		return (CRYPTO_SUCCESS);
462 	}
463 
464 	/*
465 	 * if kmem already alloced from previous call and it's the same size
466 	 * we need now, just recycle it; allocate new kmem only if we have to
467 	 */
468 	if (lsp->ls_mech.cm_param == NULL ||
469 	    lsp->ls_mech.cm_param_len != lsp->ls_iv_len) {
470 		iv_len = lsp->ls_iv_len;
471 		iv = kmem_zalloc(iv_len, KM_SLEEP);
472 	} else {
473 		iv_len = lsp->ls_mech.cm_param_len;
474 		iv = lsp->ls_mech.cm_param;
475 		bzero(iv, iv_len);
476 	}
477 
478 	switch (lsp->ls_iv_type) {
479 	case IVM_ENC_BLKNO:
480 		/* iv is not static, lblkno changes each time */
481 		data = &lblkno;
482 		datasz = sizeof (lblkno);
483 		break;
484 	default:
485 		data = 0;
486 		datasz = 0;
487 		break;
488 	}
489 
490 	/*
491 	 * write blkno into the iv buffer padded on the left in case
492 	 * blkno ever grows bigger than its current longlong_t size
493 	 * or a variation other than blkno is used for the iv data
494 	 */
495 	min = MIN(datasz, iv_len);
496 	bcopy(data, iv + (iv_len - min), min);
497 
498 	/* encrypt the data in-place to get the IV */
499 	SETUP_C_DATA(cdata, iv, iv_len);
500 
501 	ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key,
502 	    NULL, NULL, NULL);
503 	if (ret != CRYPTO_SUCCESS) {
504 		cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)",
505 		    lblkno, ret);
506 		if (lsp->ls_mech.cm_param != iv)
507 			kmem_free(iv, iv_len);
508 
509 		return (ret);
510 	}
511 
512 	/* clean up the iv from the last computation */
513 	if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv)
514 		kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len);
515 
516 	lsp->ls_mech.cm_param_len = iv_len;
517 	lsp->ls_mech.cm_param = iv;
518 
519 	return (CRYPTO_SUCCESS);
520 }
521 
522 /*
523  * Performs encryption and decryption of a chunk of data of size "len",
524  * one DEV_BSIZE block at a time.  "len" is assumed to be a multiple of
525  * DEV_BSIZE.
526  */
527 static int
528 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext,
529     caddr_t ciphertext, size_t len, boolean_t op_encrypt)
530 {
531 	crypto_data_t cdata;
532 	crypto_data_t wdata;
533 	int ret;
534 	longlong_t lblkno = bp->b_lblkno;
535 
536 	mutex_enter(&lsp->ls_crypto_lock);
537 
538 	/*
539 	 * though we could encrypt/decrypt entire "len" chunk of data, we need
540 	 * to break it into DEV_BSIZE pieces to capture blkno incrementing
541 	 */
542 	SETUP_C_DATA(cdata, plaintext, len);
543 	cdata.cd_length = DEV_BSIZE;
544 	if (ciphertext != NULL) {		/* not in-place crypto */
545 		SETUP_C_DATA(wdata, ciphertext, len);
546 		wdata.cd_length = DEV_BSIZE;
547 	}
548 
549 	do {
550 		ret = lofi_blk_mech(lsp, lblkno);
551 		if (ret != CRYPTO_SUCCESS)
552 			continue;
553 
554 		if (op_encrypt) {
555 			ret = crypto_encrypt(&lsp->ls_mech, &cdata,
556 			    &lsp->ls_key, NULL,
557 			    ((ciphertext != NULL) ? &wdata : NULL), NULL);
558 		} else {
559 			ret = crypto_decrypt(&lsp->ls_mech, &cdata,
560 			    &lsp->ls_key, NULL,
561 			    ((ciphertext != NULL) ? &wdata : NULL), NULL);
562 		}
563 
564 		cdata.cd_offset += DEV_BSIZE;
565 		if (ciphertext != NULL)
566 			wdata.cd_offset += DEV_BSIZE;
567 		lblkno++;
568 	} while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len);
569 
570 	mutex_exit(&lsp->ls_crypto_lock);
571 
572 	if (ret != CRYPTO_SUCCESS) {
573 		cmn_err(CE_WARN, "%s failed for block %lld:  (0x%x)",
574 		    op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()",
575 		    lblkno, ret);
576 	}
577 
578 	return (ret);
579 }
580 
581 #define	RDWR_RAW	1
582 #define	RDWR_BCOPY	2
583 
584 static int
585 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
586     struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn)
587 {
588 	ssize_t resid;
589 	int isread;
590 	int error;
591 
592 	/*
593 	 * Handles reads/writes for both plain and encrypted lofi
594 	 * Note:  offset is already shifted by lsp->ls_crypto_offset
595 	 * when it gets here.
596 	 */
597 
598 	isread = bp->b_flags & B_READ;
599 	if (isread) {
600 		if (method == RDWR_BCOPY) {
601 			/* DO NOT update bp->b_resid for bcopy */
602 			bcopy(bcopy_locn, bufaddr, len);
603 			error = 0;
604 		} else {		/* RDWR_RAW */
605 			error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len,
606 			    offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred,
607 			    &resid);
608 			bp->b_resid = resid;
609 		}
610 		if (lsp->ls_crypto_enabled && error == 0) {
611 			if (lofi_crypto(lsp, bp, bufaddr, NULL, len,
612 			    B_FALSE) != CRYPTO_SUCCESS) {
613 				/*
614 				 * XXX: original code didn't set residual
615 				 * back to len because no error was expected
616 				 * from bcopy() if encryption is not enabled
617 				 */
618 				if (method != RDWR_BCOPY)
619 					bp->b_resid = len;
620 				error = EIO;
621 			}
622 		}
623 		return (error);
624 	} else {
625 		void *iobuf = bufaddr;
626 
627 		if (lsp->ls_crypto_enabled) {
628 			/* don't do in-place crypto to keep bufaddr intact */
629 			iobuf = kmem_alloc(len, KM_SLEEP);
630 			if (lofi_crypto(lsp, bp, bufaddr, iobuf, len,
631 			    B_TRUE) != CRYPTO_SUCCESS) {
632 				kmem_free(iobuf, len);
633 				if (method != RDWR_BCOPY)
634 					bp->b_resid = len;
635 				return (EIO);
636 			}
637 		}
638 		if (method == RDWR_BCOPY) {
639 			/* DO NOT update bp->b_resid for bcopy */
640 			bcopy(iobuf, bcopy_locn, len);
641 			error = 0;
642 		} else {		/* RDWR_RAW */
643 			error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len,
644 			    offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred,
645 			    &resid);
646 			bp->b_resid = resid;
647 		}
648 		if (lsp->ls_crypto_enabled) {
649 			kmem_free(iobuf, len);
650 		}
651 		return (error);
652 	}
653 }
654 
655 static int
656 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
657     struct lofi_state *lsp)
658 {
659 	int error;
660 	offset_t alignedoffset, mapoffset;
661 	size_t	xfersize;
662 	int	isread;
663 	int	smflags;
664 	caddr_t	mapaddr;
665 	size_t	len;
666 	enum seg_rw srw;
667 	int	save_error;
668 
669 	/*
670 	 * Note:  offset is already shifted by lsp->ls_crypto_offset
671 	 * when it gets here.
672 	 */
673 	if (lsp->ls_crypto_enabled)
674 		ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size);
675 
676 	/*
677 	 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
678 	 * an 8K boundary, but the buf transfer address may not be
679 	 * aligned on more than a 512-byte boundary (we don't enforce
680 	 * that even though we could). This matters since the initial
681 	 * part of the transfer may not start at offset 0 within the
682 	 * segmap'd chunk. So we have to compensate for that with
683 	 * 'mapoffset'. Subsequent chunks always start off at the
684 	 * beginning, and the last is capped by b_resid
685 	 *
686 	 * Visually, where "|" represents page map boundaries:
687 	 *   alignedoffset (mapaddr begins at this segmap boundary)
688 	 *    |   offset (from beginning of file)
689 	 *    |    |	   len
690 	 *    v    v	    v
691 	 * ===|====X========|====...======|========X====|====
692 	 *	   /-------------...---------------/
693 	 *		^ bp->b_bcount/bp->b_resid at start
694 	 *    /----/--------/----...------/--------/
695 	 *	^	^	^   ^		^
696 	 *	|	|	|   |		nth xfersize (<= MAXBSIZE)
697 	 *	|	|	2nd thru n-1st xfersize (= MAXBSIZE)
698 	 *	|	1st xfersize (<= MAXBSIZE)
699 	 *    mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter)
700 	 *
701 	 * Notes: "alignedoffset" is "offset" rounded down to nearest
702 	 * MAXBSIZE boundary.  "len" is next page boundary of size
703 	 * PAGESIZE after "alignedoffset".
704 	 */
705 	mapoffset = offset & MAXBOFFSET;
706 	alignedoffset = offset - mapoffset;
707 	bp->b_resid = bp->b_bcount;
708 	isread = bp->b_flags & B_READ;
709 	srw = isread ? S_READ : S_WRITE;
710 	do {
711 		xfersize = MIN(lsp->ls_vp_comp_size - offset,
712 		    MIN(MAXBSIZE - mapoffset, bp->b_resid));
713 		len = roundup(mapoffset + xfersize, PAGESIZE);
714 		mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp,
715 		    alignedoffset, MAXBSIZE, 1, srw);
716 		/*
717 		 * Now fault in the pages. This lets us check
718 		 * for errors before we reference mapaddr and
719 		 * try to resolve the fault in bcopy (which would
720 		 * panic instead). And this can easily happen,
721 		 * particularly if you've lofi'd a file over NFS
722 		 * and someone deletes the file on the server.
723 		 */
724 		error = segmap_fault(kas.a_hat, segkmap, mapaddr,
725 		    len, F_SOFTLOCK, srw);
726 		if (error) {
727 			(void) segmap_release(segkmap, mapaddr, 0);
728 			if (FC_CODE(error) == FC_OBJERR)
729 				error = FC_ERRNO(error);
730 			else
731 				error = EIO;
732 			break;
733 		}
734 		/* error may be non-zero for encrypted lofi */
735 		error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize,
736 		    RDWR_BCOPY, mapaddr + mapoffset);
737 		if (error == 0) {
738 			bp->b_resid -= xfersize;
739 			bufaddr += xfersize;
740 			offset += xfersize;
741 		}
742 		smflags = 0;
743 		if (isread) {
744 			smflags |= SM_FREE;
745 			/*
746 			 * If we're reading an entire page starting
747 			 * at a page boundary, there's a good chance
748 			 * we won't need it again. Put it on the
749 			 * head of the freelist.
750 			 */
751 			if (mapoffset == 0 && xfersize == MAXBSIZE)
752 				smflags |= SM_DONTNEED;
753 		} else {
754 			if (error == 0)		/* write back good pages */
755 				smflags |= SM_WRITE;
756 		}
757 		(void) segmap_fault(kas.a_hat, segkmap, mapaddr,
758 		    len, F_SOFTUNLOCK, srw);
759 		save_error = segmap_release(segkmap, mapaddr, smflags);
760 		if (error == 0)
761 			error = save_error;
762 		/* only the first map may start partial */
763 		mapoffset = 0;
764 		alignedoffset += MAXBSIZE;
765 	} while ((error == 0) && (bp->b_resid > 0) &&
766 	    (offset < lsp->ls_vp_comp_size));
767 
768 	return (error);
769 }
770 
771 /*ARGSUSED*/
772 static int
773 gzip_decompress(void *src, size_t srclen, void *dst,
774     size_t *dstlen, int level)
775 {
776 	ASSERT(*dstlen >= srclen);
777 
778 	if (z_uncompress(dst, dstlen, src, srclen) != Z_OK)
779 		return (-1);
780 	return (0);
781 }
782 
783 #define	LZMA_HEADER_SIZE	(LZMA_PROPS_SIZE + 8)
784 /*ARGSUSED*/
785 static int
786 lzma_decompress(void *src, size_t srclen, void *dst,
787 	size_t *dstlen, int level)
788 {
789 	size_t insizepure;
790 	void *actual_src;
791 	ELzmaStatus status;
792 
793 	insizepure = srclen - LZMA_HEADER_SIZE;
794 	actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE);
795 
796 	if (LzmaDecode((Byte *)dst, (size_t *)dstlen,
797 	    (const Byte *)actual_src, &insizepure,
798 	    (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status,
799 	    &g_Alloc) != SZ_OK) {
800 		return (-1);
801 	}
802 	return (0);
803 }
804 
805 /*
806  * This is basically what strategy used to be before we found we
807  * needed task queues.
808  */
809 static void
810 lofi_strategy_task(void *arg)
811 {
812 	struct buf *bp = (struct buf *)arg;
813 	int error;
814 	struct lofi_state *lsp;
815 	offset_t offset;
816 	caddr_t	bufaddr;
817 	size_t	len;
818 	size_t	xfersize;
819 	boolean_t bufinited = B_FALSE;
820 
821 	lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
822 	if (lsp == NULL) {
823 		error = ENXIO;
824 		goto errout;
825 	}
826 	if (lsp->ls_kstat) {
827 		mutex_enter(lsp->ls_kstat->ks_lock);
828 		kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat));
829 		mutex_exit(lsp->ls_kstat->ks_lock);
830 	}
831 	bp_mapin(bp);
832 	bufaddr = bp->b_un.b_addr;
833 	offset = bp->b_lblkno * DEV_BSIZE;	/* offset within file */
834 	if (lsp->ls_crypto_enabled) {
835 		/* encrypted data really begins after crypto header */
836 		offset += lsp->ls_crypto_offset;
837 	}
838 	len = bp->b_bcount;
839 	bufinited = B_TRUE;
840 
841 	if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
842 		error = EIO;
843 		goto errout;
844 	}
845 
846 	/*
847 	 * We used to always use vn_rdwr here, but we cannot do that because
848 	 * we might decide to read or write from the the underlying
849 	 * file during this call, which would be a deadlock because
850 	 * we have the rw_lock. So instead we page, unless it's not
851 	 * mapable or it's a character device or it's an encrypted lofi.
852 	 */
853 	if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) ||
854 	    lsp->ls_crypto_enabled) {
855 		error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW,
856 		    NULL);
857 	} else if (lsp->ls_uncomp_seg_sz == 0) {
858 		error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp);
859 	} else {
860 		unsigned char *compressed_seg = NULL, *cmpbuf;
861 		unsigned char *uncompressed_seg = NULL;
862 		lofi_compress_info_t *li;
863 		size_t oblkcount;
864 		unsigned long seglen;
865 		uint64_t sblkno, eblkno, cmpbytes;
866 		offset_t sblkoff, eblkoff;
867 		u_offset_t salign, ealign;
868 		u_offset_t sdiff;
869 		uint32_t comp_data_sz;
870 		uint64_t i;
871 
872 		/*
873 		 * From here on we're dealing primarily with compressed files
874 		 */
875 		ASSERT(!lsp->ls_crypto_enabled);
876 
877 		/*
878 		 * Compressed files can only be read from and
879 		 * not written to
880 		 */
881 		if (!(bp->b_flags & B_READ)) {
882 			bp->b_resid = bp->b_bcount;
883 			error = EROFS;
884 			goto done;
885 		}
886 
887 		ASSERT(lsp->ls_comp_algorithm_index >= 0);
888 		li = &lofi_compress_table[lsp->ls_comp_algorithm_index];
889 		/*
890 		 * Compute starting and ending compressed segment numbers
891 		 * We use only bitwise operations avoiding division and
892 		 * modulus because we enforce the compression segment size
893 		 * to a power of 2
894 		 */
895 		sblkno = offset >> lsp->ls_comp_seg_shift;
896 		sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1);
897 		eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift;
898 		eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1);
899 
900 		/*
901 		 * Align start offset to block boundary for segmap
902 		 */
903 		salign = lsp->ls_comp_seg_index[sblkno];
904 		sdiff = salign & (DEV_BSIZE - 1);
905 		salign -= sdiff;
906 		if (eblkno >= (lsp->ls_comp_index_sz - 1)) {
907 			/*
908 			 * We're dealing with the last segment of
909 			 * the compressed file -- the size of this
910 			 * segment *may not* be the same as the
911 			 * segment size for the file
912 			 */
913 			eblkoff = (offset + bp->b_bcount) &
914 			    (lsp->ls_uncomp_last_seg_sz - 1);
915 			ealign = lsp->ls_vp_comp_size;
916 		} else {
917 			ealign = lsp->ls_comp_seg_index[eblkno + 1];
918 		}
919 
920 		/*
921 		 * Preserve original request paramaters
922 		 */
923 		oblkcount = bp->b_bcount;
924 
925 		/*
926 		 * Assign the calculated parameters
927 		 */
928 		comp_data_sz = ealign - salign;
929 		bp->b_bcount = comp_data_sz;
930 
931 		/*
932 		 * Allocate fixed size memory blocks to hold compressed
933 		 * segments and one uncompressed segment since we
934 		 * uncompress segments one at a time
935 		 */
936 		compressed_seg = kmem_alloc(bp->b_bcount, KM_SLEEP);
937 		uncompressed_seg = kmem_alloc(lsp->ls_uncomp_seg_sz, KM_SLEEP);
938 		/*
939 		 * Map in the calculated number of blocks
940 		 */
941 		error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign,
942 		    bp, lsp);
943 
944 		bp->b_bcount = oblkcount;
945 		bp->b_resid = oblkcount;
946 		if (error != 0)
947 			goto done;
948 
949 		/*
950 		 * We have the compressed blocks, now uncompress them
951 		 */
952 		cmpbuf = compressed_seg + sdiff;
953 		for (i = sblkno; i <= eblkno; i++) {
954 			ASSERT(i < lsp->ls_comp_index_sz - 1);
955 
956 			/*
957 			 * The last segment is special in that it is
958 			 * most likely not going to be the same
959 			 * (uncompressed) size as the other segments.
960 			 */
961 			if (i == (lsp->ls_comp_index_sz - 2)) {
962 				seglen = lsp->ls_uncomp_last_seg_sz;
963 			} else {
964 				seglen = lsp->ls_uncomp_seg_sz;
965 			}
966 
967 			/*
968 			 * Each of the segment index entries contains
969 			 * the starting block number for that segment.
970 			 * The number of compressed bytes in a segment
971 			 * is thus the difference between the starting
972 			 * block number of this segment and the starting
973 			 * block number of the next segment.
974 			 */
975 			cmpbytes = lsp->ls_comp_seg_index[i + 1] -
976 			    lsp->ls_comp_seg_index[i];
977 
978 			/*
979 			 * The first byte in a compressed segment is a flag
980 			 * that indicates whether this segment is compressed
981 			 * at all
982 			 */
983 			if (*cmpbuf == UNCOMPRESSED) {
984 				bcopy((cmpbuf + SEGHDR), uncompressed_seg,
985 				    (cmpbytes - SEGHDR));
986 			} else {
987 				if (li->l_decompress((cmpbuf + SEGHDR),
988 				    (cmpbytes - SEGHDR), uncompressed_seg,
989 				    &seglen, li->l_level) != 0) {
990 					error = EIO;
991 					goto done;
992 				}
993 			}
994 
995 			/*
996 			 * Determine how much uncompressed data we
997 			 * have to copy and copy it
998 			 */
999 			xfersize = lsp->ls_uncomp_seg_sz - sblkoff;
1000 			if (i == eblkno)
1001 				xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff);
1002 
1003 			bcopy((uncompressed_seg + sblkoff), bufaddr, xfersize);
1004 
1005 			cmpbuf += cmpbytes;
1006 			bufaddr += xfersize;
1007 			bp->b_resid -= xfersize;
1008 			sblkoff = 0;
1009 
1010 			if (bp->b_resid == 0)
1011 				break;
1012 		}
1013 done:
1014 		if (compressed_seg != NULL)
1015 			kmem_free(compressed_seg, comp_data_sz);
1016 		if (uncompressed_seg != NULL)
1017 			kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz);
1018 	} /* end of handling compressed files */
1019 
1020 errout:
1021 	if (bufinited && lsp->ls_kstat) {
1022 		size_t n_done = bp->b_bcount - bp->b_resid;
1023 		kstat_io_t *kioptr;
1024 
1025 		mutex_enter(lsp->ls_kstat->ks_lock);
1026 		kioptr = KSTAT_IO_PTR(lsp->ls_kstat);
1027 		if (bp->b_flags & B_READ) {
1028 			kioptr->nread += n_done;
1029 			kioptr->reads++;
1030 		} else {
1031 			kioptr->nwritten += n_done;
1032 			kioptr->writes++;
1033 		}
1034 		kstat_runq_exit(kioptr);
1035 		mutex_exit(lsp->ls_kstat->ks_lock);
1036 	}
1037 
1038 	mutex_enter(&lsp->ls_vp_lock);
1039 	if (--lsp->ls_vp_iocount == 0)
1040 		cv_broadcast(&lsp->ls_vp_cv);
1041 	mutex_exit(&lsp->ls_vp_lock);
1042 
1043 	bioerror(bp, error);
1044 	biodone(bp);
1045 }
1046 
1047 static int
1048 lofi_strategy(struct buf *bp)
1049 {
1050 	struct lofi_state *lsp;
1051 	offset_t	offset;
1052 
1053 	/*
1054 	 * We cannot just do I/O here, because the current thread
1055 	 * _might_ end up back in here because the underlying filesystem
1056 	 * wants a buffer, which eventually gets into bio_recycle and
1057 	 * might call into lofi to write out a delayed-write buffer.
1058 	 * This is bad if the filesystem above lofi is the same as below.
1059 	 *
1060 	 * We could come up with a complex strategy using threads to
1061 	 * do the I/O asynchronously, or we could use task queues. task
1062 	 * queues were incredibly easy so they win.
1063 	 */
1064 	lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
1065 	if (lsp == NULL) {
1066 		bioerror(bp, ENXIO);
1067 		biodone(bp);
1068 		return (0);
1069 	}
1070 
1071 	mutex_enter(&lsp->ls_vp_lock);
1072 	if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
1073 		bioerror(bp, EIO);
1074 		biodone(bp);
1075 		mutex_exit(&lsp->ls_vp_lock);
1076 		return (0);
1077 	}
1078 
1079 	offset = bp->b_lblkno * DEV_BSIZE;	/* offset within file */
1080 	if (lsp->ls_crypto_enabled) {
1081 		/* encrypted data really begins after crypto header */
1082 		offset += lsp->ls_crypto_offset;
1083 	}
1084 	if (offset == lsp->ls_vp_size) {
1085 		/* EOF */
1086 		if ((bp->b_flags & B_READ) != 0) {
1087 			bp->b_resid = bp->b_bcount;
1088 			bioerror(bp, 0);
1089 		} else {
1090 			/* writes should fail */
1091 			bioerror(bp, ENXIO);
1092 		}
1093 		biodone(bp);
1094 		mutex_exit(&lsp->ls_vp_lock);
1095 		return (0);
1096 	}
1097 	if (offset > lsp->ls_vp_size) {
1098 		bioerror(bp, ENXIO);
1099 		biodone(bp);
1100 		mutex_exit(&lsp->ls_vp_lock);
1101 		return (0);
1102 	}
1103 	lsp->ls_vp_iocount++;
1104 	mutex_exit(&lsp->ls_vp_lock);
1105 
1106 	if (lsp->ls_kstat) {
1107 		mutex_enter(lsp->ls_kstat->ks_lock);
1108 		kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat));
1109 		mutex_exit(lsp->ls_kstat->ks_lock);
1110 	}
1111 	(void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP);
1112 	return (0);
1113 }
1114 
1115 /*ARGSUSED2*/
1116 static int
1117 lofi_read(dev_t dev, struct uio *uio, struct cred *credp)
1118 {
1119 	if (getminor(dev) == 0)
1120 		return (EINVAL);
1121 	UIO_CHECK(uio);
1122 	return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio));
1123 }
1124 
1125 /*ARGSUSED2*/
1126 static int
1127 lofi_write(dev_t dev, struct uio *uio, struct cred *credp)
1128 {
1129 	if (getminor(dev) == 0)
1130 		return (EINVAL);
1131 	UIO_CHECK(uio);
1132 	return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio));
1133 }
1134 
1135 /*ARGSUSED2*/
1136 static int
1137 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp)
1138 {
1139 	if (getminor(dev) == 0)
1140 		return (EINVAL);
1141 	UIO_CHECK(aio->aio_uio);
1142 	return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio));
1143 }
1144 
1145 /*ARGSUSED2*/
1146 static int
1147 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp)
1148 {
1149 	if (getminor(dev) == 0)
1150 		return (EINVAL);
1151 	UIO_CHECK(aio->aio_uio);
1152 	return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio));
1153 }
1154 
1155 /*ARGSUSED*/
1156 static int
1157 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1158 {
1159 	switch (infocmd) {
1160 	case DDI_INFO_DEVT2DEVINFO:
1161 		*result = lofi_dip;
1162 		return (DDI_SUCCESS);
1163 	case DDI_INFO_DEVT2INSTANCE:
1164 		*result = 0;
1165 		return (DDI_SUCCESS);
1166 	}
1167 	return (DDI_FAILURE);
1168 }
1169 
1170 static int
1171 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1172 {
1173 	int	error;
1174 
1175 	if (cmd != DDI_ATTACH)
1176 		return (DDI_FAILURE);
1177 	error = ddi_soft_state_zalloc(lofi_statep, 0);
1178 	if (error == DDI_FAILURE) {
1179 		return (DDI_FAILURE);
1180 	}
1181 	error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0,
1182 	    DDI_PSEUDO, NULL);
1183 	if (error == DDI_FAILURE) {
1184 		ddi_soft_state_free(lofi_statep, 0);
1185 		return (DDI_FAILURE);
1186 	}
1187 	/* driver handles kernel-issued IOCTLs */
1188 	if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
1189 	    DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
1190 		ddi_remove_minor_node(dip, NULL);
1191 		ddi_soft_state_free(lofi_statep, 0);
1192 		return (DDI_FAILURE);
1193 	}
1194 	lofi_dip = dip;
1195 	ddi_report_dev(dip);
1196 	return (DDI_SUCCESS);
1197 }
1198 
1199 static int
1200 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1201 {
1202 	if (cmd != DDI_DETACH)
1203 		return (DDI_FAILURE);
1204 	if (lofi_busy())
1205 		return (DDI_FAILURE);
1206 	lofi_dip = NULL;
1207 	ddi_remove_minor_node(dip, NULL);
1208 	ddi_prop_remove_all(dip);
1209 	ddi_soft_state_free(lofi_statep, 0);
1210 	return (DDI_SUCCESS);
1211 }
1212 
1213 /*
1214  * With addition of encryption, be careful that encryption key is wiped before
1215  * kernel memory structures are freed, and also that key is not accidentally
1216  * passed out into userland structures.
1217  */
1218 static void
1219 free_lofi_ioctl(struct lofi_ioctl *klip)
1220 {
1221 	/* Make sure this encryption key doesn't stick around */
1222 	bzero(klip->li_key, sizeof (klip->li_key));
1223 	kmem_free(klip, sizeof (struct lofi_ioctl));
1224 }
1225 
1226 /*
1227  * These two just simplify the rest of the ioctls that need to copyin/out
1228  * the lofi_ioctl structure.
1229  */
1230 struct lofi_ioctl *
1231 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag)
1232 {
1233 	struct lofi_ioctl *klip;
1234 	int	error;
1235 
1236 	klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP);
1237 	error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag);
1238 	if (error) {
1239 		free_lofi_ioctl(klip);
1240 		return (NULL);
1241 	}
1242 
1243 	/* make sure filename is always null-terminated */
1244 	klip->li_filename[MAXPATHLEN-1] = '\0';
1245 
1246 	/* validate minor number */
1247 	if (klip->li_minor > lofi_max_files) {
1248 		free_lofi_ioctl(klip);
1249 		cmn_err(CE_WARN, "attempt to map more than lofi_max_files (%d)",
1250 		    lofi_max_files);
1251 		return (NULL);
1252 	}
1253 	return (klip);
1254 }
1255 
1256 int
1257 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip,
1258 	int flag)
1259 {
1260 	int	error;
1261 
1262 	/*
1263 	 * NOTE: Do NOT copy the crypto_key_t "back" to userland.
1264 	 * This ensures that an attacker can't trivially find the
1265 	 * key for a mapping just by issuing the ioctl.
1266 	 *
1267 	 * It can still be found by poking around in kmem with mdb(1),
1268 	 * but there is no point in making it easy when the info isn't
1269 	 * of any use in this direction anyway.
1270 	 *
1271 	 * Either way we don't actually have the raw key stored in
1272 	 * a form that we can get it anyway, since we just used it
1273 	 * to create a ctx template and didn't keep "the original".
1274 	 */
1275 	error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag);
1276 	if (error)
1277 		return (EFAULT);
1278 	return (0);
1279 }
1280 
1281 /*
1282  * Return the minor number 'filename' is mapped to, if it is.
1283  */
1284 static int
1285 file_to_minor(char *filename)
1286 {
1287 	minor_t	minor;
1288 	struct lofi_state *lsp;
1289 
1290 	ASSERT(mutex_owned(&lofi_lock));
1291 	for (minor = 1; minor <= lofi_max_files; minor++) {
1292 		lsp = ddi_get_soft_state(lofi_statep, minor);
1293 		if (lsp == NULL)
1294 			continue;
1295 		if (strcmp(lsp->ls_filename, filename) == 0)
1296 			return (minor);
1297 	}
1298 	return (0);
1299 }
1300 
1301 /*
1302  * lofiadm does some validation, but since Joe Random (or crashme) could
1303  * do our ioctls, we need to do some validation too.
1304  */
1305 static int
1306 valid_filename(const char *filename)
1307 {
1308 	static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/";
1309 	static char *charprefix = "/dev/" LOFI_CHAR_NAME "/";
1310 
1311 	/* must be absolute path */
1312 	if (filename[0] != '/')
1313 		return (0);
1314 	/* must not be lofi */
1315 	if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0)
1316 		return (0);
1317 	if (strncmp(filename, charprefix, strlen(charprefix)) == 0)
1318 		return (0);
1319 	return (1);
1320 }
1321 
1322 /*
1323  * Fakes up a disk geometry, and one big partition, based on the size
1324  * of the file. This is needed because we allow newfs'ing the device,
1325  * and newfs will do several disk ioctls to figure out the geometry and
1326  * partition information. It uses that information to determine the parameters
1327  * to pass to mkfs. Geometry is pretty much irrelevant these days, but we
1328  * have to support it.
1329  */
1330 static void
1331 fake_disk_geometry(struct lofi_state *lsp)
1332 {
1333 	u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset;
1334 
1335 	/* dk_geom - see dkio(7I) */
1336 	/*
1337 	 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
1338 	 * of sectors), but that breaks programs like fdisk which want to
1339 	 * partition a disk by cylinder. With one cylinder, you can't create
1340 	 * an fdisk partition and put pcfs on it for testing (hard to pick
1341 	 * a number between one and one).
1342 	 *
1343 	 * The cheezy floppy test is an attempt to not have too few cylinders
1344 	 * for a small file, or so many on a big file that you waste space
1345 	 * for backup superblocks or cylinder group structures.
1346 	 */
1347 	if (dsize < (2 * 1024 * 1024)) /* floppy? */
1348 		lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024);
1349 	else
1350 		lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024);
1351 	/* in case file file is < 100k */
1352 	if (lsp->ls_dkg.dkg_ncyl == 0)
1353 		lsp->ls_dkg.dkg_ncyl = 1;
1354 	lsp->ls_dkg.dkg_acyl = 0;
1355 	lsp->ls_dkg.dkg_bcyl = 0;
1356 	lsp->ls_dkg.dkg_nhead = 1;
1357 	lsp->ls_dkg.dkg_obs1 = 0;
1358 	lsp->ls_dkg.dkg_intrlv = 0;
1359 	lsp->ls_dkg.dkg_obs2 = 0;
1360 	lsp->ls_dkg.dkg_obs3 = 0;
1361 	lsp->ls_dkg.dkg_apc = 0;
1362 	lsp->ls_dkg.dkg_rpm = 7200;
1363 	lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl;
1364 	lsp->ls_dkg.dkg_nsect = dsize / (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl);
1365 	lsp->ls_dkg.dkg_write_reinstruct = 0;
1366 	lsp->ls_dkg.dkg_read_reinstruct = 0;
1367 
1368 	/* vtoc - see dkio(7I) */
1369 	bzero(&lsp->ls_vtoc, sizeof (struct vtoc));
1370 	lsp->ls_vtoc.v_sanity = VTOC_SANE;
1371 	lsp->ls_vtoc.v_version = V_VERSION;
1372 	(void) strncpy(lsp->ls_vtoc.v_volume, LOFI_DRIVER_NAME,
1373 	    sizeof (lsp->ls_vtoc.v_volume));
1374 	lsp->ls_vtoc.v_sectorsz = DEV_BSIZE;
1375 	lsp->ls_vtoc.v_nparts = 1;
1376 	lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED;
1377 
1378 	/*
1379 	 * A compressed file is read-only, other files can
1380 	 * be read-write
1381 	 */
1382 	if (lsp->ls_uncomp_seg_sz > 0) {
1383 		lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY;
1384 	} else {
1385 		lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT;
1386 	}
1387 	lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0;
1388 	/*
1389 	 * The partition size cannot just be the number of sectors, because
1390 	 * that might not end on a cylinder boundary. And if that's the case,
1391 	 * newfs/mkfs will print a scary warning. So just figure the size
1392 	 * based on the number of cylinders and sectors/cylinder.
1393 	 */
1394 	lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl *
1395 	    lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead;
1396 
1397 	/* dk_cinfo - see dkio(7I) */
1398 	bzero(&lsp->ls_ci, sizeof (struct dk_cinfo));
1399 	(void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME);
1400 	lsp->ls_ci.dki_ctype = DKC_MD;
1401 	lsp->ls_ci.dki_flags = 0;
1402 	lsp->ls_ci.dki_cnum = 0;
1403 	lsp->ls_ci.dki_addr = 0;
1404 	lsp->ls_ci.dki_space = 0;
1405 	lsp->ls_ci.dki_prio = 0;
1406 	lsp->ls_ci.dki_vec = 0;
1407 	(void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME);
1408 	lsp->ls_ci.dki_unit = 0;
1409 	lsp->ls_ci.dki_slave = 0;
1410 	lsp->ls_ci.dki_partition = 0;
1411 	/*
1412 	 * newfs uses this to set maxcontig. Must not be < 16, or it
1413 	 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
1414 	 * it by the block size. Then tunefs doesn't work because
1415 	 * maxcontig is 0.
1416 	 */
1417 	lsp->ls_ci.dki_maxtransfer = 16;
1418 }
1419 
1420 /*
1421  * map in a compressed file
1422  *
1423  * Read in the header and the index that follows.
1424  *
1425  * The header is as follows -
1426  *
1427  * Signature (name of the compression algorithm)
1428  * Compression segment size (a multiple of 512)
1429  * Number of index entries
1430  * Size of the last block
1431  * The array containing the index entries
1432  *
1433  * The header information is always stored in
1434  * network byte order on disk.
1435  */
1436 static int
1437 lofi_map_compressed_file(struct lofi_state *lsp, char *buf)
1438 {
1439 	uint32_t index_sz, header_len, i;
1440 	ssize_t	resid;
1441 	enum uio_rw rw;
1442 	char *tbuf = buf;
1443 	int error;
1444 
1445 	/* The signature has already been read */
1446 	tbuf += sizeof (lsp->ls_comp_algorithm);
1447 	bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz));
1448 	lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz);
1449 
1450 	/*
1451 	 * The compressed segment size must be a power of 2
1452 	 */
1453 	if (lsp->ls_uncomp_seg_sz % 2)
1454 		return (EINVAL);
1455 
1456 	for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++)
1457 		;
1458 
1459 	lsp->ls_comp_seg_shift = i;
1460 
1461 	tbuf += sizeof (lsp->ls_uncomp_seg_sz);
1462 	bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz));
1463 	lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz);
1464 
1465 	tbuf += sizeof (lsp->ls_comp_index_sz);
1466 	bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz),
1467 	    sizeof (lsp->ls_uncomp_last_seg_sz));
1468 	lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz);
1469 
1470 	/*
1471 	 * Compute the total size of the uncompressed data
1472 	 * for use in fake_disk_geometry and other calculations.
1473 	 * Disk geometry has to be faked with respect to the
1474 	 * actual uncompressed data size rather than the
1475 	 * compressed file size.
1476 	 */
1477 	lsp->ls_vp_size = (lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz
1478 	    + lsp->ls_uncomp_last_seg_sz;
1479 
1480 	/*
1481 	 * Index size is rounded up to DEV_BSIZE for ease
1482 	 * of segmapping
1483 	 */
1484 	index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz;
1485 	header_len = sizeof (lsp->ls_comp_algorithm) +
1486 	    sizeof (lsp->ls_uncomp_seg_sz) +
1487 	    sizeof (lsp->ls_comp_index_sz) +
1488 	    sizeof (lsp->ls_uncomp_last_seg_sz);
1489 	lsp->ls_comp_offbase = header_len + index_sz;
1490 
1491 	index_sz += header_len;
1492 	index_sz = roundup(index_sz, DEV_BSIZE);
1493 
1494 	lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP);
1495 	lsp->ls_comp_index_data_sz = index_sz;
1496 
1497 	/*
1498 	 * Read in the index -- this has a side-effect
1499 	 * of reading in the header as well
1500 	 */
1501 	rw = UIO_READ;
1502 	error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz,
1503 	    0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
1504 
1505 	if (error != 0)
1506 		return (error);
1507 
1508 	/* Skip the header, this is where the index really begins */
1509 	lsp->ls_comp_seg_index =
1510 	    /*LINTED*/
1511 	    (uint64_t *)(lsp->ls_comp_index_data + header_len);
1512 
1513 	/*
1514 	 * Now recompute offsets in the index to account for
1515 	 * the header length
1516 	 */
1517 	for (i = 0; i < lsp->ls_comp_index_sz; i++) {
1518 		lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase +
1519 		    BE_64(lsp->ls_comp_seg_index[i]);
1520 	}
1521 
1522 	return (error);
1523 }
1524 
1525 /*
1526  * Check to see if the passed in signature is a valid
1527  * one.  If it is valid, return the index into
1528  * lofi_compress_table.
1529  *
1530  * Return -1 if it is invalid
1531  */
1532 static int lofi_compress_select(char *signature)
1533 {
1534 	int i;
1535 
1536 	for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) {
1537 		if (strcmp(lofi_compress_table[i].l_name, signature) == 0)
1538 			return (i);
1539 	}
1540 
1541 	return (-1);
1542 }
1543 
1544 /*
1545  * map a file to a minor number. Return the minor number.
1546  */
1547 static int
1548 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor,
1549     int *rvalp, struct cred *credp, int ioctl_flag)
1550 {
1551 	minor_t	newminor;
1552 	struct lofi_state *lsp;
1553 	struct lofi_ioctl *klip;
1554 	int	error;
1555 	struct vnode *vp;
1556 	int64_t	Nblocks_prop_val;
1557 	int64_t	Size_prop_val;
1558 	int	compress_index;
1559 	vattr_t	vattr;
1560 	int	flag;
1561 	enum vtype v_type;
1562 	int zalloced = 0;
1563 	dev_t	newdev;
1564 	char	namebuf[50];
1565 	char	buf[DEV_BSIZE];
1566 	char	crybuf[DEV_BSIZE];
1567 	ssize_t	resid;
1568 	boolean_t need_vn_close = B_FALSE;
1569 	boolean_t keycopied = B_FALSE;
1570 	boolean_t need_size_update = B_FALSE;
1571 
1572 	klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
1573 	if (klip == NULL)
1574 		return (EFAULT);
1575 
1576 	mutex_enter(&lofi_lock);
1577 
1578 	if (!valid_filename(klip->li_filename)) {
1579 		error = EINVAL;
1580 		goto out;
1581 	}
1582 
1583 	if (file_to_minor(klip->li_filename) != 0) {
1584 		error = EBUSY;
1585 		goto out;
1586 	}
1587 
1588 	if (pickminor) {
1589 		/* Find a free one */
1590 		for (newminor = 1; newminor <= lofi_max_files; newminor++)
1591 			if (ddi_get_soft_state(lofi_statep, newminor) == NULL)
1592 				break;
1593 		if (newminor >= lofi_max_files) {
1594 			error = EAGAIN;
1595 			goto out;
1596 		}
1597 	} else {
1598 		newminor = klip->li_minor;
1599 		if (ddi_get_soft_state(lofi_statep, newminor) != NULL) {
1600 			error = EEXIST;
1601 			goto out;
1602 		}
1603 	}
1604 
1605 	/* make sure it's valid */
1606 	error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW,
1607 	    NULLVPP, &vp);
1608 	if (error) {
1609 		goto out;
1610 	}
1611 	v_type = vp->v_type;
1612 	VN_RELE(vp);
1613 	if (!V_ISLOFIABLE(v_type)) {
1614 		error = EINVAL;
1615 		goto out;
1616 	}
1617 	flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1618 	error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0);
1619 	if (error) {
1620 		/* try read-only */
1621 		flag &= ~FWRITE;
1622 		error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0,
1623 		    &vp, 0, 0);
1624 		if (error) {
1625 			goto out;
1626 		}
1627 	}
1628 	need_vn_close = B_TRUE;
1629 
1630 	vattr.va_mask = AT_SIZE;
1631 	error = VOP_GETATTR(vp, &vattr, 0, credp, NULL);
1632 	if (error) {
1633 		goto out;
1634 	}
1635 	/* the file needs to be a multiple of the block size */
1636 	if ((vattr.va_size % DEV_BSIZE) != 0) {
1637 		error = EINVAL;
1638 		goto out;
1639 	}
1640 	newdev = makedevice(getmajor(dev), newminor);
1641 	Size_prop_val = vattr.va_size;
1642 	if ((ddi_prop_update_int64(newdev, lofi_dip,
1643 	    SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) {
1644 		error = EINVAL;
1645 		goto out;
1646 	}
1647 	Nblocks_prop_val = vattr.va_size / DEV_BSIZE;
1648 	if ((ddi_prop_update_int64(newdev, lofi_dip,
1649 	    NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
1650 		error = EINVAL;
1651 		goto propout;
1652 	}
1653 	error = ddi_soft_state_zalloc(lofi_statep, newminor);
1654 	if (error == DDI_FAILURE) {
1655 		error = ENOMEM;
1656 		goto propout;
1657 	}
1658 	zalloced = 1;
1659 	(void) snprintf(namebuf, sizeof (namebuf), "%d", newminor);
1660 	error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor,
1661 	    DDI_PSEUDO, NULL);
1662 	if (error != DDI_SUCCESS) {
1663 		error = ENXIO;
1664 		goto propout;
1665 	}
1666 	(void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor);
1667 	error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor,
1668 	    DDI_PSEUDO, NULL);
1669 	if (error != DDI_SUCCESS) {
1670 		/* remove block node */
1671 		(void) snprintf(namebuf, sizeof (namebuf), "%d", newminor);
1672 		ddi_remove_minor_node(lofi_dip, namebuf);
1673 		error = ENXIO;
1674 		goto propout;
1675 	}
1676 	lsp = ddi_get_soft_state(lofi_statep, newminor);
1677 	lsp->ls_filename_sz = strlen(klip->li_filename) + 1;
1678 	lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP);
1679 	(void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d",
1680 	    LOFI_DRIVER_NAME, newminor);
1681 	lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads,
1682 	    minclsyspri, 1, lofi_taskq_maxalloc, 0);
1683 	lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor,
1684 	    NULL, "disk", KSTAT_TYPE_IO, 1, 0);
1685 	if (lsp->ls_kstat) {
1686 		mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL);
1687 		lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock;
1688 		kstat_install(lsp->ls_kstat);
1689 	}
1690 	cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL);
1691 	mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL);
1692 
1693 	/*
1694 	 * save open mode so file can be closed properly and vnode counts
1695 	 * updated correctly.
1696 	 */
1697 	lsp->ls_openflag = flag;
1698 
1699 	/*
1700 	 * Try to handle stacked lofs vnodes.
1701 	 */
1702 	if (vp->v_type == VREG) {
1703 		if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) {
1704 			lsp->ls_vp = vp;
1705 		} else {
1706 			/*
1707 			 * Even though vp was obtained via vn_open(), we
1708 			 * can't call vn_close() on it, since lofs will
1709 			 * pass the VOP_CLOSE() on down to the realvp
1710 			 * (which we are about to use). Hence we merely
1711 			 * drop the reference to the lofs vnode and hold
1712 			 * the realvp so things behave as if we've
1713 			 * opened the realvp without any interaction
1714 			 * with lofs.
1715 			 */
1716 			VN_HOLD(lsp->ls_vp);
1717 			VN_RELE(vp);
1718 		}
1719 	} else {
1720 		lsp->ls_vp = vp;
1721 	}
1722 	lsp->ls_vp_size = vattr.va_size;
1723 	(void) strcpy(lsp->ls_filename, klip->li_filename);
1724 	if (rvalp)
1725 		*rvalp = (int)newminor;
1726 	klip->li_minor = newminor;
1727 
1728 	/*
1729 	 * Initialize crypto details for encrypted lofi
1730 	 */
1731 	if (klip->li_crypto_enabled) {
1732 		int ret;
1733 
1734 		mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL);
1735 
1736 		lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher);
1737 		if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) {
1738 			cmn_err(CE_WARN, "invalid cipher %s requested for %s",
1739 			    klip->li_cipher, lsp->ls_filename);
1740 			error = EINVAL;
1741 			goto propout;
1742 		}
1743 
1744 		/* this is just initialization here */
1745 		lsp->ls_mech.cm_param = NULL;
1746 		lsp->ls_mech.cm_param_len = 0;
1747 
1748 		lsp->ls_iv_type = klip->li_iv_type;
1749 		lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher);
1750 		if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) {
1751 			cmn_err(CE_WARN, "invalid iv cipher %s requested"
1752 			    " for %s", klip->li_iv_cipher, lsp->ls_filename);
1753 			error = EINVAL;
1754 			goto propout;
1755 		}
1756 
1757 		/* iv mech must itself take a null iv */
1758 		lsp->ls_iv_mech.cm_param = NULL;
1759 		lsp->ls_iv_mech.cm_param_len = 0;
1760 		lsp->ls_iv_len = klip->li_iv_len;
1761 
1762 		/*
1763 		 * Create ctx using li_cipher & the raw li_key after checking
1764 		 * that it isn't a weak key.
1765 		 */
1766 		lsp->ls_key.ck_format = CRYPTO_KEY_RAW;
1767 		lsp->ls_key.ck_length = klip->li_key_len;
1768 		lsp->ls_key.ck_data = kmem_alloc(
1769 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP);
1770 		bcopy(klip->li_key, lsp->ls_key.ck_data,
1771 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
1772 		keycopied = B_TRUE;
1773 
1774 		ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key);
1775 		if (ret != CRYPTO_SUCCESS) {
1776 			error = EINVAL;
1777 			cmn_err(CE_WARN, "weak key check failed for cipher "
1778 			    "%s on file %s (0x%x)", klip->li_cipher,
1779 			    lsp->ls_filename, ret);
1780 			goto propout;
1781 		}
1782 	}
1783 	lsp->ls_crypto_enabled = klip->li_crypto_enabled;
1784 
1785 	/*
1786 	 * Read the file signature to check if it is compressed or encrypted.
1787 	 * Crypto signature is in a different location; both areas should
1788 	 * read to keep compression and encryption mutually exclusive.
1789 	 */
1790 	if (lsp->ls_crypto_enabled) {
1791 		error = vn_rdwr(UIO_READ, lsp->ls_vp, crybuf, DEV_BSIZE,
1792 		    CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
1793 		if (error != 0)
1794 			goto propout;
1795 	}
1796 	error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE,
1797 	    0, RLIM64_INFINITY, kcred, &resid);
1798 	if (error != 0)
1799 		goto propout;
1800 
1801 	/* initialize these variables for all lofi files */
1802 	lsp->ls_uncomp_seg_sz = 0;
1803 	lsp->ls_vp_comp_size = lsp->ls_vp_size;
1804 	lsp->ls_comp_algorithm[0] = '\0';
1805 
1806 	/* encrypted lofi reads/writes shifted by crypto metadata size */
1807 	lsp->ls_crypto_offset = 0;
1808 
1809 	/* this is a compressed lofi */
1810 	if ((compress_index = lofi_compress_select(buf)) != -1) {
1811 
1812 		/* compression and encryption are mutually exclusive */
1813 		if (klip->li_crypto_enabled) {
1814 			error = ENOTSUP;
1815 			goto propout;
1816 		}
1817 
1818 		/* initialize compression info for compressed lofi */
1819 		lsp->ls_comp_algorithm_index = compress_index;
1820 		(void) strlcpy(lsp->ls_comp_algorithm,
1821 		    lofi_compress_table[compress_index].l_name,
1822 		    sizeof (lsp->ls_comp_algorithm));
1823 
1824 		error = lofi_map_compressed_file(lsp, buf);
1825 		if (error != 0)
1826 			goto propout;
1827 		need_size_update = B_TRUE;
1828 
1829 	/* this is an encrypted lofi */
1830 	} else if (strncmp(crybuf, lofi_crypto_magic,
1831 	    sizeof (lofi_crypto_magic)) == 0) {
1832 
1833 		char *marker = crybuf;
1834 
1835 		/*
1836 		 * This is the case where the header in the lofi image is
1837 		 * already initialized to indicate it is encrypted.
1838 		 * There is another case (see below) where encryption is
1839 		 * requested but the lofi image has never been used yet,
1840 		 * so the header needs to be written with encryption magic.
1841 		 */
1842 
1843 		/* indicate this must be an encrypted lofi due to magic */
1844 		klip->li_crypto_enabled = B_TRUE;
1845 
1846 		/*
1847 		 * The encryption header information is laid out this way:
1848 		 *	6 bytes:	hex "CFLOFI"
1849 		 *	2 bytes:	version = 0 ... for now
1850 		 *	96 bytes:	reserved1 (not implemented yet)
1851 		 *	4 bytes:	data_sector = 2 ... for now
1852 		 *	more...		not implemented yet
1853 		 */
1854 
1855 		/* copy the magic */
1856 		bcopy(marker, lsp->ls_crypto.magic,
1857 		    sizeof (lsp->ls_crypto.magic));
1858 		marker += sizeof (lsp->ls_crypto.magic);
1859 
1860 		/* read the encryption version number */
1861 		bcopy(marker, &(lsp->ls_crypto.version),
1862 		    sizeof (lsp->ls_crypto.version));
1863 		lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version);
1864 		marker += sizeof (lsp->ls_crypto.version);
1865 
1866 		/* read a chunk of reserved data */
1867 		bcopy(marker, lsp->ls_crypto.reserved1,
1868 		    sizeof (lsp->ls_crypto.reserved1));
1869 		marker += sizeof (lsp->ls_crypto.reserved1);
1870 
1871 		/* read block number where encrypted data begins */
1872 		bcopy(marker, &(lsp->ls_crypto.data_sector),
1873 		    sizeof (lsp->ls_crypto.data_sector));
1874 		lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector);
1875 		marker += sizeof (lsp->ls_crypto.data_sector);
1876 
1877 		/* and ignore the rest until it is implemented */
1878 
1879 		lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE;
1880 		need_size_update = B_TRUE;
1881 
1882 	/* neither compressed nor encrypted, BUT could be new encrypted lofi */
1883 	} else if (klip->li_crypto_enabled) {
1884 
1885 		/*
1886 		 * This is the case where encryption was requested but the
1887 		 * appears to be entirely blank where the encryption header
1888 		 * would have been in the lofi image.  If it is blank,
1889 		 * assume it is a brand new lofi image and initialize the
1890 		 * header area with encryption magic and current version
1891 		 * header data.  If it is not blank, that's an error.
1892 		 */
1893 		int	i;
1894 		char	*marker;
1895 		struct crypto_meta	chead;
1896 
1897 		for (i = 0; i < sizeof (struct crypto_meta); i++)
1898 			if (crybuf[i] != '\0')
1899 				break;
1900 		if (i != sizeof (struct crypto_meta)) {
1901 			error = EINVAL;
1902 			goto propout;
1903 		}
1904 
1905 		/* nothing there, initialize as encrypted lofi */
1906 		marker = crybuf;
1907 		bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic));
1908 		marker += sizeof (lofi_crypto_magic);
1909 		chead.version = htons(LOFI_CRYPTO_VERSION);
1910 		bcopy(&(chead.version), marker, sizeof (chead.version));
1911 		marker += sizeof (chead.version);
1912 		marker += sizeof (chead.reserved1);
1913 		chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR);
1914 		bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector));
1915 
1916 		/* write the header */
1917 		error = vn_rdwr(UIO_WRITE, lsp->ls_vp, crybuf, DEV_BSIZE,
1918 		    CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
1919 		if (error != 0)
1920 			goto propout;
1921 
1922 		/* fix things up so it looks like we read this info */
1923 		bcopy(lofi_crypto_magic, lsp->ls_crypto.magic,
1924 		    sizeof (lofi_crypto_magic));
1925 		lsp->ls_crypto.version = LOFI_CRYPTO_VERSION;
1926 		lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR;
1927 
1928 		lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE;
1929 		need_size_update = B_TRUE;
1930 	}
1931 
1932 	/*
1933 	 * Either lsp->ls_vp_size or lsp->ls_crypto_offset changed;
1934 	 * for encrypted lofi, advertise that it is somewhat shorter
1935 	 * due to embedded crypto metadata section
1936 	 */
1937 	if (need_size_update) {
1938 		/* update DDI properties */
1939 		Size_prop_val = lsp->ls_vp_size - lsp->ls_crypto_offset;
1940 		if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME,
1941 		    Size_prop_val)) != DDI_PROP_SUCCESS) {
1942 			error = EINVAL;
1943 			goto propout;
1944 		}
1945 		Nblocks_prop_val =
1946 		    (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE;
1947 		if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME,
1948 		    Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
1949 			error = EINVAL;
1950 			goto propout;
1951 		}
1952 	}
1953 
1954 	fake_disk_geometry(lsp);
1955 	mutex_exit(&lofi_lock);
1956 	(void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
1957 	free_lofi_ioctl(klip);
1958 	return (0);
1959 
1960 propout:
1961 	if (keycopied) {
1962 		bzero(lsp->ls_key.ck_data,
1963 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
1964 		kmem_free(lsp->ls_key.ck_data,
1965 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
1966 		lsp->ls_key.ck_data = NULL;
1967 		lsp->ls_key.ck_length = 0;
1968 	}
1969 
1970 	if (zalloced)
1971 		ddi_soft_state_free(lofi_statep, newminor);
1972 
1973 	(void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
1974 	(void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
1975 
1976 out:
1977 	if (need_vn_close) {
1978 		(void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL);
1979 		VN_RELE(vp);
1980 	}
1981 
1982 	mutex_exit(&lofi_lock);
1983 	free_lofi_ioctl(klip);
1984 	return (error);
1985 }
1986 
1987 /*
1988  * unmap a file.
1989  */
1990 static int
1991 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename,
1992     struct cred *credp, int ioctl_flag)
1993 {
1994 	struct lofi_state *lsp;
1995 	struct lofi_ioctl *klip;
1996 	minor_t	minor;
1997 
1998 	klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
1999 	if (klip == NULL)
2000 		return (EFAULT);
2001 
2002 	mutex_enter(&lofi_lock);
2003 	if (byfilename) {
2004 		minor = file_to_minor(klip->li_filename);
2005 	} else {
2006 		minor = klip->li_minor;
2007 	}
2008 	if (minor == 0) {
2009 		mutex_exit(&lofi_lock);
2010 		free_lofi_ioctl(klip);
2011 		return (ENXIO);
2012 	}
2013 	lsp = ddi_get_soft_state(lofi_statep, minor);
2014 	if (lsp == NULL || lsp->ls_vp == NULL) {
2015 		mutex_exit(&lofi_lock);
2016 		free_lofi_ioctl(klip);
2017 		return (ENXIO);
2018 	}
2019 
2020 	/*
2021 	 * If it's still held open, we'll do one of three things:
2022 	 *
2023 	 * If no flag is set, just return EBUSY.
2024 	 *
2025 	 * If the 'cleanup' flag is set, unmap and remove the device when
2026 	 * the last user finishes.
2027 	 *
2028 	 * If the 'force' flag is set, then we forcibly close the underlying
2029 	 * file.  Subsequent operations will fail, and the DKIOCSTATE ioctl
2030 	 * will return DKIO_DEV_GONE.  When the device is last closed, the
2031 	 * device will be cleaned up appropriately.
2032 	 *
2033 	 * This is complicated by the fact that we may have outstanding
2034 	 * dispatched I/Os.  Rather than having a single mutex to serialize all
2035 	 * I/O, we keep a count of the number of outstanding I/O requests
2036 	 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os
2037 	 * should be dispatched (ls_vp_closereq).
2038 	 *
2039 	 * We set the flag, wait for the number of outstanding I/Os to reach 0,
2040 	 * and then close the underlying vnode.
2041 	 */
2042 	if (is_opened(lsp)) {
2043 		if (klip->li_force) {
2044 			/*
2045 			 * XXX: the section marked here should probably be
2046 			 * carefully incorporated into lofi_free_handle();
2047 			 * afterward just replace this section with:
2048 			 *	lofi_free_handle(dev, minor, lsp, credp);
2049 			 * and clean up lofi_unmap_file() a bit more
2050 			 */
2051 			lofi_free_crypto(lsp);
2052 
2053 			mutex_enter(&lsp->ls_vp_lock);
2054 			lsp->ls_vp_closereq = B_TRUE;
2055 			while (lsp->ls_vp_iocount > 0)
2056 				cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock);
2057 			(void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0,
2058 			    credp, NULL);
2059 			VN_RELE(lsp->ls_vp);
2060 			lsp->ls_vp = NULL;
2061 			cv_broadcast(&lsp->ls_vp_cv);
2062 			mutex_exit(&lsp->ls_vp_lock);
2063 			/*
2064 			 * XXX: to here
2065 			 */
2066 
2067 			klip->li_minor = minor;
2068 			mutex_exit(&lofi_lock);
2069 			(void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2070 			free_lofi_ioctl(klip);
2071 			return (0);
2072 		} else if (klip->li_cleanup) {
2073 			lsp->ls_cleanup = 1;
2074 			mutex_exit(&lofi_lock);
2075 			free_lofi_ioctl(klip);
2076 			return (0);
2077 		}
2078 
2079 		mutex_exit(&lofi_lock);
2080 		free_lofi_ioctl(klip);
2081 		return (EBUSY);
2082 	}
2083 
2084 	lofi_free_handle(dev, minor, lsp, credp);
2085 
2086 	klip->li_minor = minor;
2087 	mutex_exit(&lofi_lock);
2088 	(void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2089 	free_lofi_ioctl(klip);
2090 	return (0);
2091 }
2092 
2093 /*
2094  * get the filename given the minor number, or the minor number given
2095  * the name.
2096  */
2097 /*ARGSUSED*/
2098 static int
2099 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which,
2100     struct cred *credp, int ioctl_flag)
2101 {
2102 	struct lofi_state *lsp;
2103 	struct lofi_ioctl *klip;
2104 	int	error;
2105 	minor_t	minor;
2106 
2107 	klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
2108 	if (klip == NULL)
2109 		return (EFAULT);
2110 
2111 	switch (which) {
2112 	case LOFI_GET_FILENAME:
2113 		minor = klip->li_minor;
2114 		if (minor == 0) {
2115 			free_lofi_ioctl(klip);
2116 			return (EINVAL);
2117 		}
2118 
2119 		mutex_enter(&lofi_lock);
2120 		lsp = ddi_get_soft_state(lofi_statep, minor);
2121 		if (lsp == NULL) {
2122 			mutex_exit(&lofi_lock);
2123 			free_lofi_ioctl(klip);
2124 			return (ENXIO);
2125 		}
2126 		(void) strcpy(klip->li_filename, lsp->ls_filename);
2127 		(void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm,
2128 		    sizeof (klip->li_algorithm));
2129 		klip->li_crypto_enabled = lsp->ls_crypto_enabled;
2130 		mutex_exit(&lofi_lock);
2131 		error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2132 		free_lofi_ioctl(klip);
2133 		return (error);
2134 	case LOFI_GET_MINOR:
2135 		mutex_enter(&lofi_lock);
2136 		klip->li_minor = file_to_minor(klip->li_filename);
2137 		/* caller should not depend on klip->li_crypto_enabled here */
2138 		mutex_exit(&lofi_lock);
2139 		if (klip->li_minor == 0) {
2140 			free_lofi_ioctl(klip);
2141 			return (ENOENT);
2142 		}
2143 		error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2144 		free_lofi_ioctl(klip);
2145 		return (error);
2146 	case LOFI_CHECK_COMPRESSED:
2147 		mutex_enter(&lofi_lock);
2148 		klip->li_minor = file_to_minor(klip->li_filename);
2149 		mutex_exit(&lofi_lock);
2150 		if (klip->li_minor == 0) {
2151 			free_lofi_ioctl(klip);
2152 			return (ENOENT);
2153 		}
2154 		mutex_enter(&lofi_lock);
2155 		lsp = ddi_get_soft_state(lofi_statep, klip->li_minor);
2156 		if (lsp == NULL) {
2157 			mutex_exit(&lofi_lock);
2158 			free_lofi_ioctl(klip);
2159 			return (ENXIO);
2160 		}
2161 		ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0);
2162 
2163 		(void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm,
2164 		    sizeof (klip->li_algorithm));
2165 		mutex_exit(&lofi_lock);
2166 		error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
2167 		free_lofi_ioctl(klip);
2168 		return (error);
2169 	default:
2170 		free_lofi_ioctl(klip);
2171 		return (EINVAL);
2172 	}
2173 
2174 }
2175 
2176 static int
2177 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
2178     int *rvalp)
2179 {
2180 	int	error;
2181 	enum dkio_state dkstate;
2182 	struct lofi_state *lsp;
2183 	minor_t	minor;
2184 
2185 	minor = getminor(dev);
2186 	/* lofi ioctls only apply to the master device */
2187 	if (minor == 0) {
2188 		struct lofi_ioctl *lip = (struct lofi_ioctl *)arg;
2189 
2190 		/*
2191 		 * the query command only need read-access - i.e., normal
2192 		 * users are allowed to do those on the ctl device as
2193 		 * long as they can open it read-only.
2194 		 */
2195 		switch (cmd) {
2196 		case LOFI_MAP_FILE:
2197 			if ((flag & FWRITE) == 0)
2198 				return (EPERM);
2199 			return (lofi_map_file(dev, lip, 1, rvalp, credp, flag));
2200 		case LOFI_MAP_FILE_MINOR:
2201 			if ((flag & FWRITE) == 0)
2202 				return (EPERM);
2203 			return (lofi_map_file(dev, lip, 0, rvalp, credp, flag));
2204 		case LOFI_UNMAP_FILE:
2205 			if ((flag & FWRITE) == 0)
2206 				return (EPERM);
2207 			return (lofi_unmap_file(dev, lip, 1, credp, flag));
2208 		case LOFI_UNMAP_FILE_MINOR:
2209 			if ((flag & FWRITE) == 0)
2210 				return (EPERM);
2211 			return (lofi_unmap_file(dev, lip, 0, credp, flag));
2212 		case LOFI_GET_FILENAME:
2213 			return (lofi_get_info(dev, lip, LOFI_GET_FILENAME,
2214 			    credp, flag));
2215 		case LOFI_GET_MINOR:
2216 			return (lofi_get_info(dev, lip, LOFI_GET_MINOR,
2217 			    credp, flag));
2218 		case LOFI_GET_MAXMINOR:
2219 			error = ddi_copyout(&lofi_max_files, &lip->li_minor,
2220 			    sizeof (lofi_max_files), flag);
2221 			if (error)
2222 				return (EFAULT);
2223 			return (0);
2224 		case LOFI_CHECK_COMPRESSED:
2225 			return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED,
2226 			    credp, flag));
2227 		default:
2228 			break;
2229 		}
2230 	}
2231 
2232 	lsp = ddi_get_soft_state(lofi_statep, minor);
2233 	if (lsp == NULL)
2234 		return (ENXIO);
2235 
2236 	/*
2237 	 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with
2238 	 * EIO as if the device was no longer present.
2239 	 */
2240 	if (lsp->ls_vp == NULL && cmd != DKIOCSTATE)
2241 		return (EIO);
2242 
2243 	/* these are for faking out utilities like newfs */
2244 	switch (cmd) {
2245 	case DKIOCGVTOC:
2246 		switch (ddi_model_convert_from(flag & FMODELS)) {
2247 		case DDI_MODEL_ILP32: {
2248 			struct vtoc32 vtoc32;
2249 
2250 			vtoctovtoc32(lsp->ls_vtoc, vtoc32);
2251 			if (ddi_copyout(&vtoc32, (void *)arg,
2252 			    sizeof (struct vtoc32), flag))
2253 				return (EFAULT);
2254 			break;
2255 			}
2256 
2257 		case DDI_MODEL_NONE:
2258 			if (ddi_copyout(&lsp->ls_vtoc, (void *)arg,
2259 			    sizeof (struct vtoc), flag))
2260 				return (EFAULT);
2261 			break;
2262 		}
2263 		return (0);
2264 	case DKIOCINFO:
2265 		error = ddi_copyout(&lsp->ls_ci, (void *)arg,
2266 		    sizeof (struct dk_cinfo), flag);
2267 		if (error)
2268 			return (EFAULT);
2269 		return (0);
2270 	case DKIOCG_VIRTGEOM:
2271 	case DKIOCG_PHYGEOM:
2272 	case DKIOCGGEOM:
2273 		error = ddi_copyout(&lsp->ls_dkg, (void *)arg,
2274 		    sizeof (struct dk_geom), flag);
2275 		if (error)
2276 			return (EFAULT);
2277 		return (0);
2278 	case DKIOCSTATE:
2279 		/*
2280 		 * Normally, lofi devices are always in the INSERTED state.  If
2281 		 * a device is forcefully unmapped, then the device transitions
2282 		 * to the DKIO_DEV_GONE state.
2283 		 */
2284 		if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate),
2285 		    flag) != 0)
2286 			return (EFAULT);
2287 
2288 		mutex_enter(&lsp->ls_vp_lock);
2289 		while ((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) ||
2290 		    (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) {
2291 			/*
2292 			 * By virtue of having the device open, we know that
2293 			 * 'lsp' will remain valid when we return.
2294 			 */
2295 			if (!cv_wait_sig(&lsp->ls_vp_cv,
2296 			    &lsp->ls_vp_lock)) {
2297 				mutex_exit(&lsp->ls_vp_lock);
2298 				return (EINTR);
2299 			}
2300 		}
2301 
2302 		dkstate = (lsp->ls_vp != NULL ? DKIO_INSERTED : DKIO_DEV_GONE);
2303 		mutex_exit(&lsp->ls_vp_lock);
2304 
2305 		if (ddi_copyout(&dkstate, (void *)arg,
2306 		    sizeof (dkstate), flag) != 0)
2307 			return (EFAULT);
2308 		return (0);
2309 	default:
2310 		return (ENOTTY);
2311 	}
2312 }
2313 
2314 static struct cb_ops lofi_cb_ops = {
2315 	lofi_open,		/* open */
2316 	lofi_close,		/* close */
2317 	lofi_strategy,		/* strategy */
2318 	nodev,			/* print */
2319 	nodev,			/* dump */
2320 	lofi_read,		/* read */
2321 	lofi_write,		/* write */
2322 	lofi_ioctl,		/* ioctl */
2323 	nodev,			/* devmap */
2324 	nodev,			/* mmap */
2325 	nodev,			/* segmap */
2326 	nochpoll,		/* poll */
2327 	ddi_prop_op,		/* prop_op */
2328 	0,			/* streamtab  */
2329 	D_64BIT | D_NEW | D_MP,	/* Driver compatibility flag */
2330 	CB_REV,
2331 	lofi_aread,
2332 	lofi_awrite
2333 };
2334 
2335 static struct dev_ops lofi_ops = {
2336 	DEVO_REV,		/* devo_rev, */
2337 	0,			/* refcnt  */
2338 	lofi_info,		/* info */
2339 	nulldev,		/* identify */
2340 	nulldev,		/* probe */
2341 	lofi_attach,		/* attach */
2342 	lofi_detach,		/* detach */
2343 	nodev,			/* reset */
2344 	&lofi_cb_ops,		/* driver operations */
2345 	NULL,			/* no bus operations */
2346 	NULL,			/* power */
2347 	ddi_quiesce_not_needed,	/* quiesce */
2348 };
2349 
2350 static struct modldrv modldrv = {
2351 	&mod_driverops,
2352 	"loopback file driver",
2353 	&lofi_ops,
2354 };
2355 
2356 static struct modlinkage modlinkage = {
2357 	MODREV_1,
2358 	&modldrv,
2359 	NULL
2360 };
2361 
2362 int
2363 _init(void)
2364 {
2365 	int error;
2366 
2367 	error = ddi_soft_state_init(&lofi_statep,
2368 	    sizeof (struct lofi_state), 0);
2369 	if (error)
2370 		return (error);
2371 
2372 	mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL);
2373 	error = mod_install(&modlinkage);
2374 	if (error) {
2375 		mutex_destroy(&lofi_lock);
2376 		ddi_soft_state_fini(&lofi_statep);
2377 	}
2378 
2379 	return (error);
2380 }
2381 
2382 int
2383 _fini(void)
2384 {
2385 	int	error;
2386 
2387 	if (lofi_busy())
2388 		return (EBUSY);
2389 
2390 	error = mod_remove(&modlinkage);
2391 	if (error)
2392 		return (error);
2393 
2394 	mutex_destroy(&lofi_lock);
2395 	ddi_soft_state_fini(&lofi_statep);
2396 
2397 	return (error);
2398 }
2399 
2400 int
2401 _info(struct modinfo *modinfop)
2402 {
2403 	return (mod_info(&modlinkage, modinfop));
2404 }
2405