1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * lofi (loopback file) driver - allows you to attach a file to a device, 27 * which can then be accessed through that device. The simple model is that 28 * you tell lofi to open a file, and then use the block device you get as 29 * you would any block device. lofi translates access to the block device 30 * into I/O on the underlying file. This is mostly useful for 31 * mounting images of filesystems. 32 * 33 * lofi is controlled through /dev/lofictl - this is the only device exported 34 * during attach, and is minor number 0. lofiadm communicates with lofi through 35 * ioctls on this device. When a file is attached to lofi, block and character 36 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 37 * are identified by their minor number, and the minor number is also used 38 * as the name in /dev/lofi. If we ever decide to support virtual disks, 39 * we'll have to divide the minor number space to identify fdisk partitions 40 * and slices, and the name will then be the minor number shifted down a 41 * few bits. Minor devices are tracked with state structures handled with 42 * ddi_soft_state(9F) for simplicity. 43 * 44 * A file attached to lofi is opened when attached and not closed until 45 * explicitly detached from lofi. This seems more sensible than deferring 46 * the open until the /dev/lofi device is opened, for a number of reasons. 47 * One is that any failure is likely to be noticed by the person (or script) 48 * running lofiadm. Another is that it would be a security problem if the 49 * file was replaced by another one after being added but before being opened. 50 * 51 * The only hard part about lofi is the ioctls. In order to support things 52 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 53 * So it has to fake disk geometry and partition information. More may need 54 * to be faked if your favorite utility doesn't work and you think it should 55 * (fdformat doesn't work because it really wants to know the type of floppy 56 * controller to talk to, and that didn't seem easy to fake. Or possibly even 57 * necessary, since we have mkfs_pcfs now). 58 * 59 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To 60 * support simulation of hotplug events, an optional force flag is provided. 61 * If a lofi device is open when a force detach is requested, then the 62 * underlying file is closed and any subsequent operations return EIO. When the 63 * device is closed for the last time, it will be cleaned up at that time. In 64 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is 65 * detached but not removed. 66 * 67 * Known problems: 68 * 69 * UFS logging. Mounting a UFS filesystem image "logging" 70 * works for basic copy testing but wedges during a build of ON through 71 * that image. Some deadlock in lufs holding the log mutex and then 72 * getting stuck on a buf. So for now, don't do that. 73 * 74 * Direct I/O. Since the filesystem data is being cached in the buffer 75 * cache, _and_ again in the underlying filesystem, it's tempting to 76 * enable direct I/O on the underlying file. Don't, because that deadlocks. 77 * I think to fix the cache-twice problem we might need filesystem support. 78 * 79 * Interesting things to do: 80 * 81 * Allow multiple files for each device. A poor-man's metadisk, basically. 82 * 83 * Pass-through ioctls on block devices. You can (though it's not 84 * documented), give lofi a block device as a file name. Then we shouldn't 85 * need to fake a geometry, however, it may be relevant if you're replacing 86 * metadisk, or using lofi to get crypto. 87 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1 88 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home. 89 * In fact this even makes sense if you have lofi "above" metadisk. 90 * 91 * Encryption: 92 * Each lofi device can have its own symmetric key and cipher. 93 * They are passed to us by lofiadm(1m) in the correct format for use 94 * with the misc/kcf crypto_* routines. 95 * 96 * Each block has its own IV, that is calculated in lofi_blk_mech(), based 97 * on the "master" key held in the lsp and the block number of the buffer. 98 */ 99 100 #include <sys/types.h> 101 #include <netinet/in.h> 102 #include <sys/sysmacros.h> 103 #include <sys/uio.h> 104 #include <sys/kmem.h> 105 #include <sys/cred.h> 106 #include <sys/mman.h> 107 #include <sys/errno.h> 108 #include <sys/aio_req.h> 109 #include <sys/stat.h> 110 #include <sys/file.h> 111 #include <sys/modctl.h> 112 #include <sys/conf.h> 113 #include <sys/debug.h> 114 #include <sys/vnode.h> 115 #include <sys/lofi.h> 116 #include <sys/fcntl.h> 117 #include <sys/pathname.h> 118 #include <sys/filio.h> 119 #include <sys/fdio.h> 120 #include <sys/open.h> 121 #include <sys/disp.h> 122 #include <vm/seg_map.h> 123 #include <sys/ddi.h> 124 #include <sys/sunddi.h> 125 #include <sys/zmod.h> 126 #include <sys/id_space.h> 127 #include <sys/mkdev.h> 128 #include <sys/crypto/common.h> 129 #include <sys/crypto/api.h> 130 #include <sys/rctl.h> 131 #include <LzmaDec.h> 132 133 /* 134 * The basis for CRYOFF is derived from usr/src/uts/common/sys/fs/ufs_fs.h. 135 * Crypto metadata, if it exists, is located at the end of the boot block 136 * (BBOFF + BBSIZE, which is SBOFF). The super block and everything after 137 * is offset by the size of the crypto metadata which is handled by 138 * lsp->ls_crypto_offset. 139 */ 140 #define CRYOFF ((off_t)8192) 141 142 #define NBLOCKS_PROP_NAME "Nblocks" 143 #define SIZE_PROP_NAME "Size" 144 #define ZONE_PROP_NAME "zone" 145 146 #define SETUP_C_DATA(cd, buf, len) \ 147 (cd).cd_format = CRYPTO_DATA_RAW; \ 148 (cd).cd_offset = 0; \ 149 (cd).cd_miscdata = NULL; \ 150 (cd).cd_length = (len); \ 151 (cd).cd_raw.iov_base = (buf); \ 152 (cd).cd_raw.iov_len = (len); 153 154 #define UIO_CHECK(uio) \ 155 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \ 156 ((uio)->uio_resid % DEV_BSIZE) != 0) { \ 157 return (EINVAL); \ 158 } 159 160 static dev_info_t *lofi_dip = NULL; 161 static void *lofi_statep = NULL; 162 static kmutex_t lofi_lock; /* state lock */ 163 static id_space_t *lofi_minor_id; 164 static list_t lofi_list; 165 static zone_key_t lofi_zone_key; 166 167 /* 168 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 169 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 170 * high. If we want to be assured that the underlying device is always busy, 171 * we must be sure that the number of bytes enqueued when the number of 172 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 173 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 174 * set maxalloc to be the maximum throughput (in bytes per second) of the 175 * underlying device divided by the minimum I/O size. We assume a realistic 176 * maximum throughput of one hundred megabytes per second; we set maxalloc on 177 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 178 */ 179 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 180 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 181 182 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; 183 184 /* 185 * To avoid decompressing data in a compressed segment multiple times 186 * when accessing small parts of a segment's data, we cache and reuse 187 * the uncompressed segment's data. 188 * 189 * A single cached segment is sufficient to avoid lots of duplicate 190 * segment decompress operations. A small cache size also reduces the 191 * memory footprint. 192 * 193 * lofi_max_comp_cache is the maximum number of decompressed data segments 194 * cached for each compressed lofi image. It can be set to 0 to disable 195 * caching. 196 */ 197 198 uint32_t lofi_max_comp_cache = 1; 199 200 static int gzip_decompress(void *src, size_t srclen, void *dst, 201 size_t *destlen, int level); 202 203 static int lzma_decompress(void *src, size_t srclen, void *dst, 204 size_t *dstlen, int level); 205 206 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = { 207 {gzip_decompress, NULL, 6, "gzip"}, /* default */ 208 {gzip_decompress, NULL, 6, "gzip-6"}, 209 {gzip_decompress, NULL, 9, "gzip-9"}, 210 {lzma_decompress, NULL, 0, "lzma"} 211 }; 212 213 /*ARGSUSED*/ 214 static void 215 *SzAlloc(void *p, size_t size) 216 { 217 return (kmem_alloc(size, KM_SLEEP)); 218 } 219 220 /*ARGSUSED*/ 221 static void 222 SzFree(void *p, void *address, size_t size) 223 { 224 kmem_free(address, size); 225 } 226 227 static ISzAlloc g_Alloc = { SzAlloc, SzFree }; 228 229 /* 230 * Free data referenced by the linked list of cached uncompressed 231 * segments. 232 */ 233 static void 234 lofi_free_comp_cache(struct lofi_state *lsp) 235 { 236 struct lofi_comp_cache *lc; 237 238 while ((lc = list_remove_head(&lsp->ls_comp_cache)) != NULL) { 239 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 240 kmem_free(lc, sizeof (struct lofi_comp_cache)); 241 lsp->ls_comp_cache_count--; 242 } 243 ASSERT(lsp->ls_comp_cache_count == 0); 244 } 245 246 static int 247 is_opened(struct lofi_state *lsp) 248 { 249 ASSERT(MUTEX_HELD(&lofi_lock)); 250 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 251 } 252 253 static int 254 mark_opened(struct lofi_state *lsp, int otyp) 255 { 256 ASSERT(MUTEX_HELD(&lofi_lock)); 257 switch (otyp) { 258 case OTYP_CHR: 259 lsp->ls_chr_open = 1; 260 break; 261 case OTYP_BLK: 262 lsp->ls_blk_open = 1; 263 break; 264 case OTYP_LYR: 265 lsp->ls_lyr_open_count++; 266 break; 267 default: 268 return (-1); 269 } 270 return (0); 271 } 272 273 static void 274 mark_closed(struct lofi_state *lsp, int otyp) 275 { 276 ASSERT(MUTEX_HELD(&lofi_lock)); 277 switch (otyp) { 278 case OTYP_CHR: 279 lsp->ls_chr_open = 0; 280 break; 281 case OTYP_BLK: 282 lsp->ls_blk_open = 0; 283 break; 284 case OTYP_LYR: 285 lsp->ls_lyr_open_count--; 286 break; 287 default: 288 break; 289 } 290 } 291 292 static void 293 lofi_free_crypto(struct lofi_state *lsp) 294 { 295 ASSERT(MUTEX_HELD(&lofi_lock)); 296 297 if (lsp->ls_crypto_enabled) { 298 /* 299 * Clean up the crypto state so that it doesn't hang around 300 * in memory after we are done with it. 301 */ 302 if (lsp->ls_key.ck_data != NULL) { 303 bzero(lsp->ls_key.ck_data, 304 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 305 kmem_free(lsp->ls_key.ck_data, 306 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 307 lsp->ls_key.ck_data = NULL; 308 lsp->ls_key.ck_length = 0; 309 } 310 311 if (lsp->ls_mech.cm_param != NULL) { 312 kmem_free(lsp->ls_mech.cm_param, 313 lsp->ls_mech.cm_param_len); 314 lsp->ls_mech.cm_param = NULL; 315 lsp->ls_mech.cm_param_len = 0; 316 } 317 318 if (lsp->ls_iv_mech.cm_param != NULL) { 319 kmem_free(lsp->ls_iv_mech.cm_param, 320 lsp->ls_iv_mech.cm_param_len); 321 lsp->ls_iv_mech.cm_param = NULL; 322 lsp->ls_iv_mech.cm_param_len = 0; 323 } 324 325 mutex_destroy(&lsp->ls_crypto_lock); 326 } 327 } 328 329 static void 330 lofi_destroy(struct lofi_state *lsp, cred_t *credp) 331 { 332 minor_t minor = getminor(lsp->ls_dev); 333 int i; 334 335 ASSERT(MUTEX_HELD(&lofi_lock)); 336 337 list_remove(&lofi_list, lsp); 338 339 lofi_free_crypto(lsp); 340 341 /* 342 * Free pre-allocated compressed buffers 343 */ 344 if (lsp->ls_comp_bufs != NULL) { 345 for (i = 0; i < lofi_taskq_nthreads; i++) { 346 if (lsp->ls_comp_bufs[i].bufsize > 0) 347 kmem_free(lsp->ls_comp_bufs[i].buf, 348 lsp->ls_comp_bufs[i].bufsize); 349 } 350 kmem_free(lsp->ls_comp_bufs, 351 sizeof (struct compbuf) * lofi_taskq_nthreads); 352 } 353 354 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 355 1, 0, credp, NULL); 356 VN_RELE(lsp->ls_vp); 357 if (lsp->ls_stacked_vp != lsp->ls_vp) 358 VN_RELE(lsp->ls_stacked_vp); 359 360 taskq_destroy(lsp->ls_taskq); 361 362 if (lsp->ls_kstat != NULL) 363 kstat_delete(lsp->ls_kstat); 364 365 /* 366 * Free cached decompressed segment data 367 */ 368 lofi_free_comp_cache(lsp); 369 list_destroy(&lsp->ls_comp_cache); 370 371 if (lsp->ls_uncomp_seg_sz > 0) { 372 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); 373 lsp->ls_uncomp_seg_sz = 0; 374 } 375 376 rctl_decr_lofi(lsp->ls_zone, 1); 377 zone_rele(lsp->ls_zone); 378 379 mutex_destroy(&lsp->ls_comp_cache_lock); 380 mutex_destroy(&lsp->ls_comp_bufs_lock); 381 mutex_destroy(&lsp->ls_kstat_lock); 382 mutex_destroy(&lsp->ls_vp_lock); 383 384 ASSERT(ddi_get_soft_state(lofi_statep, minor) == lsp); 385 ddi_soft_state_free(lofi_statep, minor); 386 id_free(lofi_minor_id, minor); 387 } 388 389 static void 390 lofi_free_dev(dev_t dev) 391 { 392 minor_t minor = getminor(dev); 393 char namebuf[50]; 394 395 ASSERT(MUTEX_HELD(&lofi_lock)); 396 397 (void) ddi_prop_remove(dev, lofi_dip, ZONE_PROP_NAME); 398 (void) ddi_prop_remove(dev, lofi_dip, SIZE_PROP_NAME); 399 (void) ddi_prop_remove(dev, lofi_dip, NBLOCKS_PROP_NAME); 400 401 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 402 ddi_remove_minor_node(lofi_dip, namebuf); 403 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 404 ddi_remove_minor_node(lofi_dip, namebuf); 405 } 406 407 /*ARGSUSED*/ 408 static void 409 lofi_zone_shutdown(zoneid_t zoneid, void *arg) 410 { 411 struct lofi_state *lsp; 412 struct lofi_state *next; 413 414 mutex_enter(&lofi_lock); 415 416 for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) { 417 418 /* lofi_destroy() frees lsp */ 419 next = list_next(&lofi_list, lsp); 420 421 if (lsp->ls_zone->zone_id != zoneid) 422 continue; 423 424 /* 425 * No in-zone processes are running, but something has this 426 * open. It's either a global zone process, or a lofi 427 * mount. In either case we set ls_cleanup so the last 428 * user destroys the device. 429 */ 430 if (is_opened(lsp)) { 431 lsp->ls_cleanup = 1; 432 } else { 433 lofi_free_dev(lsp->ls_dev); 434 lofi_destroy(lsp, kcred); 435 } 436 } 437 438 mutex_exit(&lofi_lock); 439 } 440 441 /*ARGSUSED*/ 442 static int 443 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 444 { 445 minor_t minor; 446 struct lofi_state *lsp; 447 448 /* 449 * lofiadm -a /dev/lofi/1 gets us here. 450 */ 451 if (mutex_owner(&lofi_lock) == curthread) 452 return (EINVAL); 453 454 mutex_enter(&lofi_lock); 455 456 minor = getminor(*devp); 457 458 /* master control device */ 459 if (minor == 0) { 460 mutex_exit(&lofi_lock); 461 return (0); 462 } 463 464 /* otherwise, the mapping should already exist */ 465 lsp = ddi_get_soft_state(lofi_statep, minor); 466 if (lsp == NULL) { 467 mutex_exit(&lofi_lock); 468 return (EINVAL); 469 } 470 471 if (lsp->ls_vp == NULL) { 472 mutex_exit(&lofi_lock); 473 return (ENXIO); 474 } 475 476 if (mark_opened(lsp, otyp) == -1) { 477 mutex_exit(&lofi_lock); 478 return (EINVAL); 479 } 480 481 mutex_exit(&lofi_lock); 482 return (0); 483 } 484 485 /*ARGSUSED*/ 486 static int 487 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 488 { 489 minor_t minor; 490 struct lofi_state *lsp; 491 492 mutex_enter(&lofi_lock); 493 minor = getminor(dev); 494 lsp = ddi_get_soft_state(lofi_statep, minor); 495 if (lsp == NULL) { 496 mutex_exit(&lofi_lock); 497 return (EINVAL); 498 } 499 500 if (minor == 0) { 501 mutex_exit(&lofi_lock); 502 return (0); 503 } 504 505 mark_closed(lsp, otyp); 506 507 /* 508 * If we forcibly closed the underlying device (li_force), or 509 * asked for cleanup (li_cleanup), finish up if we're the last 510 * out of the door. 511 */ 512 if (!is_opened(lsp) && (lsp->ls_cleanup || lsp->ls_vp == NULL)) { 513 lofi_free_dev(lsp->ls_dev); 514 lofi_destroy(lsp, credp); 515 } 516 517 mutex_exit(&lofi_lock); 518 return (0); 519 } 520 521 /* 522 * Sets the mechanism's initialization vector (IV) if one is needed. 523 * The IV is computed from the data block number. lsp->ls_mech is 524 * altered so that: 525 * lsp->ls_mech.cm_param_len is set to the IV len. 526 * lsp->ls_mech.cm_param is set to the IV. 527 */ 528 static int 529 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno) 530 { 531 int ret; 532 crypto_data_t cdata; 533 char *iv; 534 size_t iv_len; 535 size_t min; 536 void *data; 537 size_t datasz; 538 539 ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock)); 540 541 if (lsp == NULL) 542 return (CRYPTO_DEVICE_ERROR); 543 544 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */ 545 if (lsp->ls_iv_type == IVM_NONE) { 546 return (CRYPTO_SUCCESS); 547 } 548 549 /* 550 * if kmem already alloced from previous call and it's the same size 551 * we need now, just recycle it; allocate new kmem only if we have to 552 */ 553 if (lsp->ls_mech.cm_param == NULL || 554 lsp->ls_mech.cm_param_len != lsp->ls_iv_len) { 555 iv_len = lsp->ls_iv_len; 556 iv = kmem_zalloc(iv_len, KM_SLEEP); 557 } else { 558 iv_len = lsp->ls_mech.cm_param_len; 559 iv = lsp->ls_mech.cm_param; 560 bzero(iv, iv_len); 561 } 562 563 switch (lsp->ls_iv_type) { 564 case IVM_ENC_BLKNO: 565 /* iv is not static, lblkno changes each time */ 566 data = &lblkno; 567 datasz = sizeof (lblkno); 568 break; 569 default: 570 data = 0; 571 datasz = 0; 572 break; 573 } 574 575 /* 576 * write blkno into the iv buffer padded on the left in case 577 * blkno ever grows bigger than its current longlong_t size 578 * or a variation other than blkno is used for the iv data 579 */ 580 min = MIN(datasz, iv_len); 581 bcopy(data, iv + (iv_len - min), min); 582 583 /* encrypt the data in-place to get the IV */ 584 SETUP_C_DATA(cdata, iv, iv_len); 585 586 ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key, 587 NULL, NULL, NULL); 588 if (ret != CRYPTO_SUCCESS) { 589 cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)", 590 lblkno, ret); 591 if (lsp->ls_mech.cm_param != iv) 592 kmem_free(iv, iv_len); 593 594 return (ret); 595 } 596 597 /* clean up the iv from the last computation */ 598 if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv) 599 kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len); 600 601 lsp->ls_mech.cm_param_len = iv_len; 602 lsp->ls_mech.cm_param = iv; 603 604 return (CRYPTO_SUCCESS); 605 } 606 607 /* 608 * Performs encryption and decryption of a chunk of data of size "len", 609 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of 610 * DEV_BSIZE. 611 */ 612 static int 613 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext, 614 caddr_t ciphertext, size_t len, boolean_t op_encrypt) 615 { 616 crypto_data_t cdata; 617 crypto_data_t wdata; 618 int ret; 619 longlong_t lblkno = bp->b_lblkno; 620 621 mutex_enter(&lsp->ls_crypto_lock); 622 623 /* 624 * though we could encrypt/decrypt entire "len" chunk of data, we need 625 * to break it into DEV_BSIZE pieces to capture blkno incrementing 626 */ 627 SETUP_C_DATA(cdata, plaintext, len); 628 cdata.cd_length = DEV_BSIZE; 629 if (ciphertext != NULL) { /* not in-place crypto */ 630 SETUP_C_DATA(wdata, ciphertext, len); 631 wdata.cd_length = DEV_BSIZE; 632 } 633 634 do { 635 ret = lofi_blk_mech(lsp, lblkno); 636 if (ret != CRYPTO_SUCCESS) 637 continue; 638 639 if (op_encrypt) { 640 ret = crypto_encrypt(&lsp->ls_mech, &cdata, 641 &lsp->ls_key, NULL, 642 ((ciphertext != NULL) ? &wdata : NULL), NULL); 643 } else { 644 ret = crypto_decrypt(&lsp->ls_mech, &cdata, 645 &lsp->ls_key, NULL, 646 ((ciphertext != NULL) ? &wdata : NULL), NULL); 647 } 648 649 cdata.cd_offset += DEV_BSIZE; 650 if (ciphertext != NULL) 651 wdata.cd_offset += DEV_BSIZE; 652 lblkno++; 653 } while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len); 654 655 mutex_exit(&lsp->ls_crypto_lock); 656 657 if (ret != CRYPTO_SUCCESS) { 658 cmn_err(CE_WARN, "%s failed for block %lld: (0x%x)", 659 op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()", 660 lblkno, ret); 661 } 662 663 return (ret); 664 } 665 666 #define RDWR_RAW 1 667 #define RDWR_BCOPY 2 668 669 static int 670 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 671 struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn) 672 { 673 ssize_t resid; 674 int isread; 675 int error; 676 677 /* 678 * Handles reads/writes for both plain and encrypted lofi 679 * Note: offset is already shifted by lsp->ls_crypto_offset 680 * when it gets here. 681 */ 682 683 isread = bp->b_flags & B_READ; 684 if (isread) { 685 if (method == RDWR_BCOPY) { 686 /* DO NOT update bp->b_resid for bcopy */ 687 bcopy(bcopy_locn, bufaddr, len); 688 error = 0; 689 } else { /* RDWR_RAW */ 690 error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len, 691 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 692 &resid); 693 bp->b_resid = resid; 694 } 695 if (lsp->ls_crypto_enabled && error == 0) { 696 if (lofi_crypto(lsp, bp, bufaddr, NULL, len, 697 B_FALSE) != CRYPTO_SUCCESS) { 698 /* 699 * XXX: original code didn't set residual 700 * back to len because no error was expected 701 * from bcopy() if encryption is not enabled 702 */ 703 if (method != RDWR_BCOPY) 704 bp->b_resid = len; 705 error = EIO; 706 } 707 } 708 return (error); 709 } else { 710 void *iobuf = bufaddr; 711 712 if (lsp->ls_crypto_enabled) { 713 /* don't do in-place crypto to keep bufaddr intact */ 714 iobuf = kmem_alloc(len, KM_SLEEP); 715 if (lofi_crypto(lsp, bp, bufaddr, iobuf, len, 716 B_TRUE) != CRYPTO_SUCCESS) { 717 kmem_free(iobuf, len); 718 if (method != RDWR_BCOPY) 719 bp->b_resid = len; 720 return (EIO); 721 } 722 } 723 if (method == RDWR_BCOPY) { 724 /* DO NOT update bp->b_resid for bcopy */ 725 bcopy(iobuf, bcopy_locn, len); 726 error = 0; 727 } else { /* RDWR_RAW */ 728 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len, 729 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 730 &resid); 731 bp->b_resid = resid; 732 } 733 if (lsp->ls_crypto_enabled) { 734 kmem_free(iobuf, len); 735 } 736 return (error); 737 } 738 } 739 740 static int 741 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 742 struct lofi_state *lsp) 743 { 744 int error; 745 offset_t alignedoffset, mapoffset; 746 size_t xfersize; 747 int isread; 748 int smflags; 749 caddr_t mapaddr; 750 size_t len; 751 enum seg_rw srw; 752 int save_error; 753 754 /* 755 * Note: offset is already shifted by lsp->ls_crypto_offset 756 * when it gets here. 757 */ 758 if (lsp->ls_crypto_enabled) 759 ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size); 760 761 /* 762 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 763 * an 8K boundary, but the buf transfer address may not be 764 * aligned on more than a 512-byte boundary (we don't enforce 765 * that even though we could). This matters since the initial 766 * part of the transfer may not start at offset 0 within the 767 * segmap'd chunk. So we have to compensate for that with 768 * 'mapoffset'. Subsequent chunks always start off at the 769 * beginning, and the last is capped by b_resid 770 * 771 * Visually, where "|" represents page map boundaries: 772 * alignedoffset (mapaddr begins at this segmap boundary) 773 * | offset (from beginning of file) 774 * | | len 775 * v v v 776 * ===|====X========|====...======|========X====|==== 777 * /-------------...---------------/ 778 * ^ bp->b_bcount/bp->b_resid at start 779 * /----/--------/----...------/--------/ 780 * ^ ^ ^ ^ ^ 781 * | | | | nth xfersize (<= MAXBSIZE) 782 * | | 2nd thru n-1st xfersize (= MAXBSIZE) 783 * | 1st xfersize (<= MAXBSIZE) 784 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter) 785 * 786 * Notes: "alignedoffset" is "offset" rounded down to nearest 787 * MAXBSIZE boundary. "len" is next page boundary of size 788 * PAGESIZE after "alignedoffset". 789 */ 790 mapoffset = offset & MAXBOFFSET; 791 alignedoffset = offset - mapoffset; 792 bp->b_resid = bp->b_bcount; 793 isread = bp->b_flags & B_READ; 794 srw = isread ? S_READ : S_WRITE; 795 do { 796 xfersize = MIN(lsp->ls_vp_comp_size - offset, 797 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 798 len = roundup(mapoffset + xfersize, PAGESIZE); 799 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 800 alignedoffset, MAXBSIZE, 1, srw); 801 /* 802 * Now fault in the pages. This lets us check 803 * for errors before we reference mapaddr and 804 * try to resolve the fault in bcopy (which would 805 * panic instead). And this can easily happen, 806 * particularly if you've lofi'd a file over NFS 807 * and someone deletes the file on the server. 808 */ 809 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 810 len, F_SOFTLOCK, srw); 811 if (error) { 812 (void) segmap_release(segkmap, mapaddr, 0); 813 if (FC_CODE(error) == FC_OBJERR) 814 error = FC_ERRNO(error); 815 else 816 error = EIO; 817 break; 818 } 819 /* error may be non-zero for encrypted lofi */ 820 error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize, 821 RDWR_BCOPY, mapaddr + mapoffset); 822 if (error == 0) { 823 bp->b_resid -= xfersize; 824 bufaddr += xfersize; 825 offset += xfersize; 826 } 827 smflags = 0; 828 if (isread) { 829 smflags |= SM_FREE; 830 /* 831 * If we're reading an entire page starting 832 * at a page boundary, there's a good chance 833 * we won't need it again. Put it on the 834 * head of the freelist. 835 */ 836 if (mapoffset == 0 && xfersize == MAXBSIZE) 837 smflags |= SM_DONTNEED; 838 } else { 839 /* 840 * Write back good pages, it is okay to 841 * always release asynchronous here as we'll 842 * follow with VOP_FSYNC for B_SYNC buffers. 843 */ 844 if (error == 0) 845 smflags |= SM_WRITE | SM_ASYNC; 846 } 847 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 848 len, F_SOFTUNLOCK, srw); 849 save_error = segmap_release(segkmap, mapaddr, smflags); 850 if (error == 0) 851 error = save_error; 852 /* only the first map may start partial */ 853 mapoffset = 0; 854 alignedoffset += MAXBSIZE; 855 } while ((error == 0) && (bp->b_resid > 0) && 856 (offset < lsp->ls_vp_comp_size)); 857 858 return (error); 859 } 860 861 /* 862 * Check if segment seg_index is present in the decompressed segment 863 * data cache. 864 * 865 * Returns a pointer to the decompressed segment data cache entry if 866 * found, and NULL when decompressed data for this segment is not yet 867 * cached. 868 */ 869 static struct lofi_comp_cache * 870 lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index) 871 { 872 struct lofi_comp_cache *lc; 873 874 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 875 876 for (lc = list_head(&lsp->ls_comp_cache); lc != NULL; 877 lc = list_next(&lsp->ls_comp_cache, lc)) { 878 if (lc->lc_index == seg_index) { 879 /* 880 * Decompressed segment data was found in the 881 * cache. 882 * 883 * The cache uses an LRU replacement strategy; 884 * move the entry to head of list. 885 */ 886 list_remove(&lsp->ls_comp_cache, lc); 887 list_insert_head(&lsp->ls_comp_cache, lc); 888 return (lc); 889 } 890 } 891 return (NULL); 892 } 893 894 /* 895 * Add the data for a decompressed segment at segment index 896 * seg_index to the cache of the decompressed segments. 897 * 898 * Returns a pointer to the cache element structure in case 899 * the data was added to the cache; returns NULL when the data 900 * wasn't cached. 901 */ 902 static struct lofi_comp_cache * 903 lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index, 904 uchar_t *data) 905 { 906 struct lofi_comp_cache *lc; 907 908 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 909 910 while (lsp->ls_comp_cache_count > lofi_max_comp_cache) { 911 lc = list_remove_tail(&lsp->ls_comp_cache); 912 ASSERT(lc != NULL); 913 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 914 kmem_free(lc, sizeof (struct lofi_comp_cache)); 915 lsp->ls_comp_cache_count--; 916 } 917 918 /* 919 * Do not cache when disabled by tunable variable 920 */ 921 if (lofi_max_comp_cache == 0) 922 return (NULL); 923 924 /* 925 * When the cache has not yet reached the maximum allowed 926 * number of segments, allocate a new cache element. 927 * Otherwise the cache is full; reuse the last list element 928 * (LRU) for caching the decompressed segment data. 929 * 930 * The cache element for the new decompressed segment data is 931 * added to the head of the list. 932 */ 933 if (lsp->ls_comp_cache_count < lofi_max_comp_cache) { 934 lc = kmem_alloc(sizeof (struct lofi_comp_cache), KM_SLEEP); 935 lc->lc_data = NULL; 936 list_insert_head(&lsp->ls_comp_cache, lc); 937 lsp->ls_comp_cache_count++; 938 } else { 939 lc = list_remove_tail(&lsp->ls_comp_cache); 940 if (lc == NULL) 941 return (NULL); 942 list_insert_head(&lsp->ls_comp_cache, lc); 943 } 944 945 /* 946 * Free old uncompressed segment data when reusing a cache 947 * entry. 948 */ 949 if (lc->lc_data != NULL) 950 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 951 952 lc->lc_data = data; 953 lc->lc_index = seg_index; 954 return (lc); 955 } 956 957 958 /*ARGSUSED*/ 959 static int 960 gzip_decompress(void *src, size_t srclen, void *dst, 961 size_t *dstlen, int level) 962 { 963 ASSERT(*dstlen >= srclen); 964 965 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK) 966 return (-1); 967 return (0); 968 } 969 970 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) 971 /*ARGSUSED*/ 972 static int 973 lzma_decompress(void *src, size_t srclen, void *dst, 974 size_t *dstlen, int level) 975 { 976 size_t insizepure; 977 void *actual_src; 978 ELzmaStatus status; 979 980 insizepure = srclen - LZMA_HEADER_SIZE; 981 actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE); 982 983 if (LzmaDecode((Byte *)dst, (size_t *)dstlen, 984 (const Byte *)actual_src, &insizepure, 985 (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, 986 &g_Alloc) != SZ_OK) { 987 return (-1); 988 } 989 return (0); 990 } 991 992 /* 993 * This is basically what strategy used to be before we found we 994 * needed task queues. 995 */ 996 static void 997 lofi_strategy_task(void *arg) 998 { 999 struct buf *bp = (struct buf *)arg; 1000 int error; 1001 int syncflag = 0; 1002 struct lofi_state *lsp; 1003 offset_t offset; 1004 caddr_t bufaddr; 1005 size_t len; 1006 size_t xfersize; 1007 boolean_t bufinited = B_FALSE; 1008 1009 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 1010 if (lsp == NULL) { 1011 error = ENXIO; 1012 goto errout; 1013 } 1014 if (lsp->ls_kstat) { 1015 mutex_enter(lsp->ls_kstat->ks_lock); 1016 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 1017 mutex_exit(lsp->ls_kstat->ks_lock); 1018 } 1019 bp_mapin(bp); 1020 bufaddr = bp->b_un.b_addr; 1021 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 1022 if (lsp->ls_crypto_enabled) { 1023 /* encrypted data really begins after crypto header */ 1024 offset += lsp->ls_crypto_offset; 1025 } 1026 len = bp->b_bcount; 1027 bufinited = B_TRUE; 1028 1029 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1030 error = EIO; 1031 goto errout; 1032 } 1033 1034 /* 1035 * If we're writing and the buffer was not B_ASYNC 1036 * we'll follow up with a VOP_FSYNC() to force any 1037 * asynchronous I/O to stable storage. 1038 */ 1039 if (!(bp->b_flags & B_READ) && !(bp->b_flags & B_ASYNC)) 1040 syncflag = FSYNC; 1041 1042 /* 1043 * We used to always use vn_rdwr here, but we cannot do that because 1044 * we might decide to read or write from the the underlying 1045 * file during this call, which would be a deadlock because 1046 * we have the rw_lock. So instead we page, unless it's not 1047 * mapable or it's a character device or it's an encrypted lofi. 1048 */ 1049 if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) || 1050 lsp->ls_crypto_enabled) { 1051 error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW, 1052 NULL); 1053 } else if (lsp->ls_uncomp_seg_sz == 0) { 1054 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp); 1055 } else { 1056 uchar_t *compressed_seg = NULL, *cmpbuf; 1057 uchar_t *uncompressed_seg = NULL; 1058 lofi_compress_info_t *li; 1059 size_t oblkcount; 1060 ulong_t seglen; 1061 uint64_t sblkno, eblkno, cmpbytes; 1062 uint64_t uncompressed_seg_index; 1063 struct lofi_comp_cache *lc; 1064 offset_t sblkoff, eblkoff; 1065 u_offset_t salign, ealign; 1066 u_offset_t sdiff; 1067 uint32_t comp_data_sz; 1068 uint64_t i; 1069 int j; 1070 1071 /* 1072 * From here on we're dealing primarily with compressed files 1073 */ 1074 ASSERT(!lsp->ls_crypto_enabled); 1075 1076 /* 1077 * Compressed files can only be read from and 1078 * not written to 1079 */ 1080 if (!(bp->b_flags & B_READ)) { 1081 bp->b_resid = bp->b_bcount; 1082 error = EROFS; 1083 goto done; 1084 } 1085 1086 ASSERT(lsp->ls_comp_algorithm_index >= 0); 1087 li = &lofi_compress_table[lsp->ls_comp_algorithm_index]; 1088 /* 1089 * Compute starting and ending compressed segment numbers 1090 * We use only bitwise operations avoiding division and 1091 * modulus because we enforce the compression segment size 1092 * to a power of 2 1093 */ 1094 sblkno = offset >> lsp->ls_comp_seg_shift; 1095 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1); 1096 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift; 1097 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1); 1098 1099 /* 1100 * Check the decompressed segment cache. 1101 * 1102 * The cache is used only when the requested data 1103 * is within a segment. Requests that cross 1104 * segment boundaries bypass the cache. 1105 */ 1106 if (sblkno == eblkno || 1107 (sblkno + 1 == eblkno && eblkoff == 0)) { 1108 /* 1109 * Request doesn't cross a segment boundary, 1110 * now check the cache. 1111 */ 1112 mutex_enter(&lsp->ls_comp_cache_lock); 1113 lc = lofi_find_comp_data(lsp, sblkno); 1114 if (lc != NULL) { 1115 /* 1116 * We've found the decompressed segment 1117 * data in the cache; reuse it. 1118 */ 1119 bcopy(lc->lc_data + sblkoff, bufaddr, 1120 bp->b_bcount); 1121 mutex_exit(&lsp->ls_comp_cache_lock); 1122 bp->b_resid = 0; 1123 error = 0; 1124 goto done; 1125 } 1126 mutex_exit(&lsp->ls_comp_cache_lock); 1127 } 1128 1129 /* 1130 * Align start offset to block boundary for segmap 1131 */ 1132 salign = lsp->ls_comp_seg_index[sblkno]; 1133 sdiff = salign & (DEV_BSIZE - 1); 1134 salign -= sdiff; 1135 if (eblkno >= (lsp->ls_comp_index_sz - 1)) { 1136 /* 1137 * We're dealing with the last segment of 1138 * the compressed file -- the size of this 1139 * segment *may not* be the same as the 1140 * segment size for the file 1141 */ 1142 eblkoff = (offset + bp->b_bcount) & 1143 (lsp->ls_uncomp_last_seg_sz - 1); 1144 ealign = lsp->ls_vp_comp_size; 1145 } else { 1146 ealign = lsp->ls_comp_seg_index[eblkno + 1]; 1147 } 1148 1149 /* 1150 * Preserve original request paramaters 1151 */ 1152 oblkcount = bp->b_bcount; 1153 1154 /* 1155 * Assign the calculated parameters 1156 */ 1157 comp_data_sz = ealign - salign; 1158 bp->b_bcount = comp_data_sz; 1159 1160 /* 1161 * Buffers to hold compressed segments are pre-allocated 1162 * on a per-thread basis. Find a pre-allocated buffer 1163 * that is not currently in use and mark it for use. 1164 */ 1165 mutex_enter(&lsp->ls_comp_bufs_lock); 1166 for (j = 0; j < lofi_taskq_nthreads; j++) { 1167 if (lsp->ls_comp_bufs[j].inuse == 0) { 1168 lsp->ls_comp_bufs[j].inuse = 1; 1169 break; 1170 } 1171 } 1172 1173 mutex_exit(&lsp->ls_comp_bufs_lock); 1174 ASSERT(j < lofi_taskq_nthreads); 1175 1176 /* 1177 * If the pre-allocated buffer size does not match 1178 * the size of the I/O request, re-allocate it with 1179 * the appropriate size 1180 */ 1181 if (lsp->ls_comp_bufs[j].bufsize < bp->b_bcount) { 1182 if (lsp->ls_comp_bufs[j].bufsize > 0) 1183 kmem_free(lsp->ls_comp_bufs[j].buf, 1184 lsp->ls_comp_bufs[j].bufsize); 1185 lsp->ls_comp_bufs[j].buf = kmem_alloc(bp->b_bcount, 1186 KM_SLEEP); 1187 lsp->ls_comp_bufs[j].bufsize = bp->b_bcount; 1188 } 1189 compressed_seg = lsp->ls_comp_bufs[j].buf; 1190 1191 /* 1192 * Map in the calculated number of blocks 1193 */ 1194 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign, 1195 bp, lsp); 1196 1197 bp->b_bcount = oblkcount; 1198 bp->b_resid = oblkcount; 1199 if (error != 0) 1200 goto done; 1201 1202 /* 1203 * decompress compressed blocks start 1204 */ 1205 cmpbuf = compressed_seg + sdiff; 1206 for (i = sblkno; i <= eblkno; i++) { 1207 ASSERT(i < lsp->ls_comp_index_sz - 1); 1208 uchar_t *useg; 1209 1210 /* 1211 * The last segment is special in that it is 1212 * most likely not going to be the same 1213 * (uncompressed) size as the other segments. 1214 */ 1215 if (i == (lsp->ls_comp_index_sz - 2)) { 1216 seglen = lsp->ls_uncomp_last_seg_sz; 1217 } else { 1218 seglen = lsp->ls_uncomp_seg_sz; 1219 } 1220 1221 /* 1222 * Each of the segment index entries contains 1223 * the starting block number for that segment. 1224 * The number of compressed bytes in a segment 1225 * is thus the difference between the starting 1226 * block number of this segment and the starting 1227 * block number of the next segment. 1228 */ 1229 cmpbytes = lsp->ls_comp_seg_index[i + 1] - 1230 lsp->ls_comp_seg_index[i]; 1231 1232 /* 1233 * The first byte in a compressed segment is a flag 1234 * that indicates whether this segment is compressed 1235 * at all. 1236 * 1237 * The variable 'useg' is used (instead of 1238 * uncompressed_seg) in this loop to keep a 1239 * reference to the uncompressed segment. 1240 * 1241 * N.B. If 'useg' is replaced with uncompressed_seg, 1242 * it leads to memory leaks and heap corruption in 1243 * corner cases where compressed segments lie 1244 * adjacent to uncompressed segments. 1245 */ 1246 if (*cmpbuf == UNCOMPRESSED) { 1247 useg = cmpbuf + SEGHDR; 1248 } else { 1249 if (uncompressed_seg == NULL) 1250 uncompressed_seg = 1251 kmem_alloc(lsp->ls_uncomp_seg_sz, 1252 KM_SLEEP); 1253 useg = uncompressed_seg; 1254 uncompressed_seg_index = i; 1255 1256 if (li->l_decompress((cmpbuf + SEGHDR), 1257 (cmpbytes - SEGHDR), uncompressed_seg, 1258 &seglen, li->l_level) != 0) { 1259 error = EIO; 1260 goto done; 1261 } 1262 } 1263 1264 /* 1265 * Determine how much uncompressed data we 1266 * have to copy and copy it 1267 */ 1268 xfersize = lsp->ls_uncomp_seg_sz - sblkoff; 1269 if (i == eblkno) 1270 xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff); 1271 1272 bcopy((useg + sblkoff), bufaddr, xfersize); 1273 1274 cmpbuf += cmpbytes; 1275 bufaddr += xfersize; 1276 bp->b_resid -= xfersize; 1277 sblkoff = 0; 1278 1279 if (bp->b_resid == 0) 1280 break; 1281 } /* decompress compressed blocks ends */ 1282 1283 /* 1284 * Skip to done if there is no uncompressed data to cache 1285 */ 1286 if (uncompressed_seg == NULL) 1287 goto done; 1288 1289 /* 1290 * Add the data for the last decompressed segment to 1291 * the cache. 1292 * 1293 * In case the uncompressed segment data was added to (and 1294 * is referenced by) the cache, make sure we don't free it 1295 * here. 1296 */ 1297 mutex_enter(&lsp->ls_comp_cache_lock); 1298 if ((lc = lofi_add_comp_data(lsp, uncompressed_seg_index, 1299 uncompressed_seg)) != NULL) { 1300 uncompressed_seg = NULL; 1301 } 1302 mutex_exit(&lsp->ls_comp_cache_lock); 1303 1304 done: 1305 if (compressed_seg != NULL) { 1306 mutex_enter(&lsp->ls_comp_bufs_lock); 1307 lsp->ls_comp_bufs[j].inuse = 0; 1308 mutex_exit(&lsp->ls_comp_bufs_lock); 1309 } 1310 if (uncompressed_seg != NULL) 1311 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz); 1312 } /* end of handling compressed files */ 1313 1314 if ((error == 0) && (syncflag != 0)) 1315 error = VOP_FSYNC(lsp->ls_vp, syncflag, kcred, NULL); 1316 1317 errout: 1318 if (bufinited && lsp->ls_kstat) { 1319 size_t n_done = bp->b_bcount - bp->b_resid; 1320 kstat_io_t *kioptr; 1321 1322 mutex_enter(lsp->ls_kstat->ks_lock); 1323 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 1324 if (bp->b_flags & B_READ) { 1325 kioptr->nread += n_done; 1326 kioptr->reads++; 1327 } else { 1328 kioptr->nwritten += n_done; 1329 kioptr->writes++; 1330 } 1331 kstat_runq_exit(kioptr); 1332 mutex_exit(lsp->ls_kstat->ks_lock); 1333 } 1334 1335 mutex_enter(&lsp->ls_vp_lock); 1336 if (--lsp->ls_vp_iocount == 0) 1337 cv_broadcast(&lsp->ls_vp_cv); 1338 mutex_exit(&lsp->ls_vp_lock); 1339 1340 bioerror(bp, error); 1341 biodone(bp); 1342 } 1343 1344 static int 1345 lofi_strategy(struct buf *bp) 1346 { 1347 struct lofi_state *lsp; 1348 offset_t offset; 1349 1350 /* 1351 * We cannot just do I/O here, because the current thread 1352 * _might_ end up back in here because the underlying filesystem 1353 * wants a buffer, which eventually gets into bio_recycle and 1354 * might call into lofi to write out a delayed-write buffer. 1355 * This is bad if the filesystem above lofi is the same as below. 1356 * 1357 * We could come up with a complex strategy using threads to 1358 * do the I/O asynchronously, or we could use task queues. task 1359 * queues were incredibly easy so they win. 1360 */ 1361 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 1362 if (lsp == NULL) { 1363 bioerror(bp, ENXIO); 1364 biodone(bp); 1365 return (0); 1366 } 1367 1368 mutex_enter(&lsp->ls_vp_lock); 1369 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1370 bioerror(bp, EIO); 1371 biodone(bp); 1372 mutex_exit(&lsp->ls_vp_lock); 1373 return (0); 1374 } 1375 1376 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 1377 if (lsp->ls_crypto_enabled) { 1378 /* encrypted data really begins after crypto header */ 1379 offset += lsp->ls_crypto_offset; 1380 } 1381 if (offset == lsp->ls_vp_size) { 1382 /* EOF */ 1383 if ((bp->b_flags & B_READ) != 0) { 1384 bp->b_resid = bp->b_bcount; 1385 bioerror(bp, 0); 1386 } else { 1387 /* writes should fail */ 1388 bioerror(bp, ENXIO); 1389 } 1390 biodone(bp); 1391 mutex_exit(&lsp->ls_vp_lock); 1392 return (0); 1393 } 1394 if (offset > lsp->ls_vp_size) { 1395 bioerror(bp, ENXIO); 1396 biodone(bp); 1397 mutex_exit(&lsp->ls_vp_lock); 1398 return (0); 1399 } 1400 lsp->ls_vp_iocount++; 1401 mutex_exit(&lsp->ls_vp_lock); 1402 1403 if (lsp->ls_kstat) { 1404 mutex_enter(lsp->ls_kstat->ks_lock); 1405 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 1406 mutex_exit(lsp->ls_kstat->ks_lock); 1407 } 1408 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 1409 return (0); 1410 } 1411 1412 /*ARGSUSED2*/ 1413 static int 1414 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 1415 { 1416 if (getminor(dev) == 0) 1417 return (EINVAL); 1418 UIO_CHECK(uio); 1419 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 1420 } 1421 1422 /*ARGSUSED2*/ 1423 static int 1424 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 1425 { 1426 if (getminor(dev) == 0) 1427 return (EINVAL); 1428 UIO_CHECK(uio); 1429 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 1430 } 1431 1432 /*ARGSUSED2*/ 1433 static int 1434 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 1435 { 1436 if (getminor(dev) == 0) 1437 return (EINVAL); 1438 UIO_CHECK(aio->aio_uio); 1439 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 1440 } 1441 1442 /*ARGSUSED2*/ 1443 static int 1444 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 1445 { 1446 if (getminor(dev) == 0) 1447 return (EINVAL); 1448 UIO_CHECK(aio->aio_uio); 1449 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1450 } 1451 1452 /*ARGSUSED*/ 1453 static int 1454 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1455 { 1456 switch (infocmd) { 1457 case DDI_INFO_DEVT2DEVINFO: 1458 *result = lofi_dip; 1459 return (DDI_SUCCESS); 1460 case DDI_INFO_DEVT2INSTANCE: 1461 *result = 0; 1462 return (DDI_SUCCESS); 1463 } 1464 return (DDI_FAILURE); 1465 } 1466 1467 static int 1468 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1469 { 1470 int error; 1471 1472 if (cmd != DDI_ATTACH) 1473 return (DDI_FAILURE); 1474 1475 lofi_minor_id = id_space_create("lofi_minor_id", 1, L_MAXMIN32 + 1); 1476 1477 if (!lofi_minor_id) 1478 return (DDI_FAILURE); 1479 1480 error = ddi_soft_state_zalloc(lofi_statep, 0); 1481 if (error == DDI_FAILURE) { 1482 id_space_destroy(lofi_minor_id); 1483 return (DDI_FAILURE); 1484 } 1485 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 1486 DDI_PSEUDO, NULL); 1487 if (error == DDI_FAILURE) { 1488 ddi_soft_state_free(lofi_statep, 0); 1489 id_space_destroy(lofi_minor_id); 1490 return (DDI_FAILURE); 1491 } 1492 /* driver handles kernel-issued IOCTLs */ 1493 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1494 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 1495 ddi_remove_minor_node(dip, NULL); 1496 ddi_soft_state_free(lofi_statep, 0); 1497 id_space_destroy(lofi_minor_id); 1498 return (DDI_FAILURE); 1499 } 1500 1501 zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL); 1502 1503 lofi_dip = dip; 1504 ddi_report_dev(dip); 1505 return (DDI_SUCCESS); 1506 } 1507 1508 static int 1509 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1510 { 1511 if (cmd != DDI_DETACH) 1512 return (DDI_FAILURE); 1513 1514 mutex_enter(&lofi_lock); 1515 1516 if (!list_is_empty(&lofi_list)) { 1517 mutex_exit(&lofi_lock); 1518 return (DDI_FAILURE); 1519 } 1520 1521 lofi_dip = NULL; 1522 ddi_remove_minor_node(dip, NULL); 1523 ddi_prop_remove_all(dip); 1524 1525 mutex_exit(&lofi_lock); 1526 1527 if (zone_key_delete(lofi_zone_key) != 0) 1528 cmn_err(CE_WARN, "failed to delete zone key"); 1529 1530 ddi_soft_state_free(lofi_statep, 0); 1531 1532 id_space_destroy(lofi_minor_id); 1533 1534 return (DDI_SUCCESS); 1535 } 1536 1537 /* 1538 * With addition of encryption, be careful that encryption key is wiped before 1539 * kernel memory structures are freed, and also that key is not accidentally 1540 * passed out into userland structures. 1541 */ 1542 static void 1543 free_lofi_ioctl(struct lofi_ioctl *klip) 1544 { 1545 /* Make sure this encryption key doesn't stick around */ 1546 bzero(klip->li_key, sizeof (klip->li_key)); 1547 kmem_free(klip, sizeof (struct lofi_ioctl)); 1548 } 1549 1550 /* 1551 * These two just simplify the rest of the ioctls that need to copyin/out 1552 * the lofi_ioctl structure. 1553 */ 1554 int 1555 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp, 1556 int flag) 1557 { 1558 struct lofi_ioctl *klip; 1559 int error; 1560 1561 klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 1562 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 1563 if (error) 1564 goto err; 1565 1566 /* ensure NULL termination */ 1567 klip->li_filename[MAXPATHLEN-1] = '\0'; 1568 klip->li_algorithm[MAXALGLEN-1] = '\0'; 1569 klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 1570 klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 1571 1572 if (klip->li_minor > L_MAXMIN32) { 1573 error = EINVAL; 1574 goto err; 1575 } 1576 1577 return (0); 1578 1579 err: 1580 free_lofi_ioctl(klip); 1581 return (error); 1582 } 1583 1584 int 1585 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 1586 int flag) 1587 { 1588 int error; 1589 1590 /* 1591 * NOTE: Do NOT copy the crypto_key_t "back" to userland. 1592 * This ensures that an attacker can't trivially find the 1593 * key for a mapping just by issuing the ioctl. 1594 * 1595 * It can still be found by poking around in kmem with mdb(1), 1596 * but there is no point in making it easy when the info isn't 1597 * of any use in this direction anyway. 1598 * 1599 * Either way we don't actually have the raw key stored in 1600 * a form that we can get it anyway, since we just used it 1601 * to create a ctx template and didn't keep "the original". 1602 */ 1603 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 1604 if (error) 1605 return (EFAULT); 1606 return (0); 1607 } 1608 1609 static int 1610 lofi_access(struct lofi_state *lsp) 1611 { 1612 ASSERT(MUTEX_HELD(&lofi_lock)); 1613 if (INGLOBALZONE(curproc) || lsp->ls_zone == curproc->p_zone) 1614 return (0); 1615 return (EPERM); 1616 } 1617 1618 /* 1619 * Find the lofi state for the given filename. We compare by vnode to 1620 * allow the global zone visibility into NGZ lofi nodes. 1621 */ 1622 static int 1623 file_to_lofi_nocheck(char *filename, struct lofi_state **lspp) 1624 { 1625 struct lofi_state *lsp; 1626 vnode_t *vp = NULL; 1627 int err = 0; 1628 1629 ASSERT(MUTEX_HELD(&lofi_lock)); 1630 1631 if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW, 1632 NULLVPP, &vp)) != 0) 1633 goto out; 1634 1635 if (vp->v_type == VREG) { 1636 vnode_t *realvp; 1637 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 1638 VN_HOLD(realvp); 1639 VN_RELE(vp); 1640 vp = realvp; 1641 } 1642 } 1643 1644 for (lsp = list_head(&lofi_list); lsp != NULL; 1645 lsp = list_next(&lofi_list, lsp)) { 1646 if (lsp->ls_vp == vp) { 1647 if (lspp != NULL) 1648 *lspp = lsp; 1649 goto out; 1650 } 1651 } 1652 1653 err = ENOENT; 1654 1655 out: 1656 if (vp != NULL) 1657 VN_RELE(vp); 1658 return (err); 1659 } 1660 1661 /* 1662 * Find the minor for the given filename, checking the zone can access 1663 * it. 1664 */ 1665 static int 1666 file_to_lofi(char *filename, struct lofi_state **lspp) 1667 { 1668 int err = 0; 1669 1670 ASSERT(MUTEX_HELD(&lofi_lock)); 1671 1672 if ((err = file_to_lofi_nocheck(filename, lspp)) != 0) 1673 return (err); 1674 1675 if ((err = lofi_access(*lspp)) != 0) 1676 return (err); 1677 1678 return (0); 1679 } 1680 1681 /* 1682 * Fakes up a disk geometry, and one big partition, based on the size 1683 * of the file. This is needed because we allow newfs'ing the device, 1684 * and newfs will do several disk ioctls to figure out the geometry and 1685 * partition information. It uses that information to determine the parameters 1686 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 1687 * have to support it. 1688 */ 1689 static void 1690 fake_disk_geometry(struct lofi_state *lsp) 1691 { 1692 u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset; 1693 1694 /* dk_geom - see dkio(7I) */ 1695 /* 1696 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 1697 * of sectors), but that breaks programs like fdisk which want to 1698 * partition a disk by cylinder. With one cylinder, you can't create 1699 * an fdisk partition and put pcfs on it for testing (hard to pick 1700 * a number between one and one). 1701 * 1702 * The cheezy floppy test is an attempt to not have too few cylinders 1703 * for a small file, or so many on a big file that you waste space 1704 * for backup superblocks or cylinder group structures. 1705 */ 1706 if (dsize < (2 * 1024 * 1024)) /* floppy? */ 1707 lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024); 1708 else 1709 lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024); 1710 /* in case file file is < 100k */ 1711 if (lsp->ls_dkg.dkg_ncyl == 0) 1712 lsp->ls_dkg.dkg_ncyl = 1; 1713 lsp->ls_dkg.dkg_acyl = 0; 1714 lsp->ls_dkg.dkg_bcyl = 0; 1715 lsp->ls_dkg.dkg_nhead = 1; 1716 lsp->ls_dkg.dkg_obs1 = 0; 1717 lsp->ls_dkg.dkg_intrlv = 0; 1718 lsp->ls_dkg.dkg_obs2 = 0; 1719 lsp->ls_dkg.dkg_obs3 = 0; 1720 lsp->ls_dkg.dkg_apc = 0; 1721 lsp->ls_dkg.dkg_rpm = 7200; 1722 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 1723 lsp->ls_dkg.dkg_nsect = dsize / (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 1724 lsp->ls_dkg.dkg_write_reinstruct = 0; 1725 lsp->ls_dkg.dkg_read_reinstruct = 0; 1726 1727 /* vtoc - see dkio(7I) */ 1728 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 1729 lsp->ls_vtoc.v_sanity = VTOC_SANE; 1730 lsp->ls_vtoc.v_version = V_VERSION; 1731 (void) strncpy(lsp->ls_vtoc.v_volume, LOFI_DRIVER_NAME, 1732 sizeof (lsp->ls_vtoc.v_volume)); 1733 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 1734 lsp->ls_vtoc.v_nparts = 1; 1735 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 1736 1737 /* 1738 * A compressed file is read-only, other files can 1739 * be read-write 1740 */ 1741 if (lsp->ls_uncomp_seg_sz > 0) { 1742 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY; 1743 } else { 1744 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 1745 } 1746 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 1747 /* 1748 * The partition size cannot just be the number of sectors, because 1749 * that might not end on a cylinder boundary. And if that's the case, 1750 * newfs/mkfs will print a scary warning. So just figure the size 1751 * based on the number of cylinders and sectors/cylinder. 1752 */ 1753 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 1754 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 1755 1756 /* dk_cinfo - see dkio(7I) */ 1757 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 1758 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 1759 lsp->ls_ci.dki_ctype = DKC_MD; 1760 lsp->ls_ci.dki_flags = 0; 1761 lsp->ls_ci.dki_cnum = 0; 1762 lsp->ls_ci.dki_addr = 0; 1763 lsp->ls_ci.dki_space = 0; 1764 lsp->ls_ci.dki_prio = 0; 1765 lsp->ls_ci.dki_vec = 0; 1766 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 1767 lsp->ls_ci.dki_unit = 0; 1768 lsp->ls_ci.dki_slave = 0; 1769 lsp->ls_ci.dki_partition = 0; 1770 /* 1771 * newfs uses this to set maxcontig. Must not be < 16, or it 1772 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 1773 * it by the block size. Then tunefs doesn't work because 1774 * maxcontig is 0. 1775 */ 1776 lsp->ls_ci.dki_maxtransfer = 16; 1777 } 1778 1779 /* 1780 * map in a compressed file 1781 * 1782 * Read in the header and the index that follows. 1783 * 1784 * The header is as follows - 1785 * 1786 * Signature (name of the compression algorithm) 1787 * Compression segment size (a multiple of 512) 1788 * Number of index entries 1789 * Size of the last block 1790 * The array containing the index entries 1791 * 1792 * The header information is always stored in 1793 * network byte order on disk. 1794 */ 1795 static int 1796 lofi_map_compressed_file(struct lofi_state *lsp, char *buf) 1797 { 1798 uint32_t index_sz, header_len, i; 1799 ssize_t resid; 1800 enum uio_rw rw; 1801 char *tbuf = buf; 1802 int error; 1803 1804 /* The signature has already been read */ 1805 tbuf += sizeof (lsp->ls_comp_algorithm); 1806 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz)); 1807 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz); 1808 1809 /* 1810 * The compressed segment size must be a power of 2 1811 */ 1812 if (lsp->ls_uncomp_seg_sz < DEV_BSIZE || 1813 !ISP2(lsp->ls_uncomp_seg_sz)) 1814 return (EINVAL); 1815 1816 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++) 1817 ; 1818 1819 lsp->ls_comp_seg_shift = i; 1820 1821 tbuf += sizeof (lsp->ls_uncomp_seg_sz); 1822 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz)); 1823 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz); 1824 1825 tbuf += sizeof (lsp->ls_comp_index_sz); 1826 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz), 1827 sizeof (lsp->ls_uncomp_last_seg_sz)); 1828 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz); 1829 1830 /* 1831 * Compute the total size of the uncompressed data 1832 * for use in fake_disk_geometry and other calculations. 1833 * Disk geometry has to be faked with respect to the 1834 * actual uncompressed data size rather than the 1835 * compressed file size. 1836 */ 1837 lsp->ls_vp_size = 1838 (u_offset_t)(lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz 1839 + lsp->ls_uncomp_last_seg_sz; 1840 1841 /* 1842 * Index size is rounded up to DEV_BSIZE for ease 1843 * of segmapping 1844 */ 1845 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz; 1846 header_len = sizeof (lsp->ls_comp_algorithm) + 1847 sizeof (lsp->ls_uncomp_seg_sz) + 1848 sizeof (lsp->ls_comp_index_sz) + 1849 sizeof (lsp->ls_uncomp_last_seg_sz); 1850 lsp->ls_comp_offbase = header_len + index_sz; 1851 1852 index_sz += header_len; 1853 index_sz = roundup(index_sz, DEV_BSIZE); 1854 1855 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP); 1856 lsp->ls_comp_index_data_sz = index_sz; 1857 1858 /* 1859 * Read in the index -- this has a side-effect 1860 * of reading in the header as well 1861 */ 1862 rw = UIO_READ; 1863 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz, 1864 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1865 1866 if (error != 0) 1867 return (error); 1868 1869 /* Skip the header, this is where the index really begins */ 1870 lsp->ls_comp_seg_index = 1871 /*LINTED*/ 1872 (uint64_t *)(lsp->ls_comp_index_data + header_len); 1873 1874 /* 1875 * Now recompute offsets in the index to account for 1876 * the header length 1877 */ 1878 for (i = 0; i < lsp->ls_comp_index_sz; i++) { 1879 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase + 1880 BE_64(lsp->ls_comp_seg_index[i]); 1881 } 1882 1883 return (error); 1884 } 1885 1886 static int 1887 lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip) 1888 { 1889 struct crypto_meta chead; 1890 char buf[DEV_BSIZE]; 1891 ssize_t resid; 1892 char *marker; 1893 int error; 1894 int ret; 1895 int i; 1896 1897 if (!klip->li_crypto_enabled) 1898 return (0); 1899 1900 /* 1901 * All current algorithms have a max of 448 bits. 1902 */ 1903 if (klip->li_iv_len > CRYPTO_BITS2BYTES(512)) 1904 return (EINVAL); 1905 1906 if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key)) 1907 return (EINVAL); 1908 1909 lsp->ls_crypto_enabled = klip->li_crypto_enabled; 1910 1911 mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); 1912 1913 lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); 1914 if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { 1915 cmn_err(CE_WARN, "invalid cipher %s requested for %s", 1916 klip->li_cipher, klip->li_filename); 1917 return (EINVAL); 1918 } 1919 1920 /* this is just initialization here */ 1921 lsp->ls_mech.cm_param = NULL; 1922 lsp->ls_mech.cm_param_len = 0; 1923 1924 lsp->ls_iv_type = klip->li_iv_type; 1925 lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); 1926 if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { 1927 cmn_err(CE_WARN, "invalid iv cipher %s requested" 1928 " for %s", klip->li_iv_cipher, klip->li_filename); 1929 return (EINVAL); 1930 } 1931 1932 /* iv mech must itself take a null iv */ 1933 lsp->ls_iv_mech.cm_param = NULL; 1934 lsp->ls_iv_mech.cm_param_len = 0; 1935 lsp->ls_iv_len = klip->li_iv_len; 1936 1937 /* 1938 * Create ctx using li_cipher & the raw li_key after checking 1939 * that it isn't a weak key. 1940 */ 1941 lsp->ls_key.ck_format = CRYPTO_KEY_RAW; 1942 lsp->ls_key.ck_length = klip->li_key_len; 1943 lsp->ls_key.ck_data = kmem_alloc( 1944 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); 1945 bcopy(klip->li_key, lsp->ls_key.ck_data, 1946 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 1947 1948 ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); 1949 if (ret != CRYPTO_SUCCESS) { 1950 cmn_err(CE_WARN, "weak key check failed for cipher " 1951 "%s on file %s (0x%x)", klip->li_cipher, 1952 klip->li_filename, ret); 1953 return (EINVAL); 1954 } 1955 1956 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 1957 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1958 if (error != 0) 1959 return (error); 1960 1961 /* 1962 * This is the case where the header in the lofi image is already 1963 * initialized to indicate it is encrypted. 1964 */ 1965 if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) { 1966 /* 1967 * The encryption header information is laid out this way: 1968 * 6 bytes: hex "CFLOFI" 1969 * 2 bytes: version = 0 ... for now 1970 * 96 bytes: reserved1 (not implemented yet) 1971 * 4 bytes: data_sector = 2 ... for now 1972 * more... not implemented yet 1973 */ 1974 1975 marker = buf; 1976 1977 /* copy the magic */ 1978 bcopy(marker, lsp->ls_crypto.magic, 1979 sizeof (lsp->ls_crypto.magic)); 1980 marker += sizeof (lsp->ls_crypto.magic); 1981 1982 /* read the encryption version number */ 1983 bcopy(marker, &(lsp->ls_crypto.version), 1984 sizeof (lsp->ls_crypto.version)); 1985 lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); 1986 marker += sizeof (lsp->ls_crypto.version); 1987 1988 /* read a chunk of reserved data */ 1989 bcopy(marker, lsp->ls_crypto.reserved1, 1990 sizeof (lsp->ls_crypto.reserved1)); 1991 marker += sizeof (lsp->ls_crypto.reserved1); 1992 1993 /* read block number where encrypted data begins */ 1994 bcopy(marker, &(lsp->ls_crypto.data_sector), 1995 sizeof (lsp->ls_crypto.data_sector)); 1996 lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); 1997 marker += sizeof (lsp->ls_crypto.data_sector); 1998 1999 /* and ignore the rest until it is implemented */ 2000 2001 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2002 return (0); 2003 } 2004 2005 /* 2006 * We've requested encryption, but no magic was found, so it must be 2007 * a new image. 2008 */ 2009 2010 for (i = 0; i < sizeof (struct crypto_meta); i++) { 2011 if (buf[i] != '\0') 2012 return (EINVAL); 2013 } 2014 2015 marker = buf; 2016 bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); 2017 marker += sizeof (lofi_crypto_magic); 2018 chead.version = htons(LOFI_CRYPTO_VERSION); 2019 bcopy(&(chead.version), marker, sizeof (chead.version)); 2020 marker += sizeof (chead.version); 2021 marker += sizeof (chead.reserved1); 2022 chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); 2023 bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); 2024 2025 /* write the header */ 2026 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE, 2027 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2028 if (error != 0) 2029 return (error); 2030 2031 /* fix things up so it looks like we read this info */ 2032 bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, 2033 sizeof (lofi_crypto_magic)); 2034 lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; 2035 lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; 2036 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2037 return (0); 2038 } 2039 2040 /* 2041 * Check to see if the passed in signature is a valid one. If it is 2042 * valid, return the index into lofi_compress_table. 2043 * 2044 * Return -1 if it is invalid 2045 */ 2046 static int 2047 lofi_compress_select(const char *signature) 2048 { 2049 int i; 2050 2051 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { 2052 if (strcmp(lofi_compress_table[i].l_name, signature) == 0) 2053 return (i); 2054 } 2055 2056 return (-1); 2057 } 2058 2059 static int 2060 lofi_init_compress(struct lofi_state *lsp) 2061 { 2062 char buf[DEV_BSIZE]; 2063 int compress_index; 2064 ssize_t resid; 2065 int error; 2066 2067 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, 2068 0, RLIM64_INFINITY, kcred, &resid); 2069 2070 if (error != 0) 2071 return (error); 2072 2073 if ((compress_index = lofi_compress_select(buf)) == -1) 2074 return (0); 2075 2076 /* compression and encryption are mutually exclusive */ 2077 if (lsp->ls_crypto_enabled) 2078 return (ENOTSUP); 2079 2080 /* initialize compression info for compressed lofi */ 2081 lsp->ls_comp_algorithm_index = compress_index; 2082 (void) strlcpy(lsp->ls_comp_algorithm, 2083 lofi_compress_table[compress_index].l_name, 2084 sizeof (lsp->ls_comp_algorithm)); 2085 2086 /* Finally setup per-thread pre-allocated buffers */ 2087 lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * 2088 sizeof (struct compbuf), KM_SLEEP); 2089 2090 return (lofi_map_compressed_file(lsp, buf)); 2091 } 2092 2093 /* 2094 * map a file to a minor number. Return the minor number. 2095 */ 2096 static int 2097 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 2098 int *rvalp, struct cred *credp, int ioctl_flag) 2099 { 2100 minor_t minor = (minor_t)-1; 2101 struct lofi_state *lsp = NULL; 2102 struct lofi_ioctl *klip; 2103 int error; 2104 struct vnode *vp = NULL; 2105 vattr_t vattr; 2106 int flag; 2107 dev_t newdev; 2108 char namebuf[50]; 2109 2110 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2111 if (error != 0) 2112 return (error); 2113 2114 mutex_enter(&lofi_lock); 2115 2116 mutex_enter(&curproc->p_lock); 2117 if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) { 2118 mutex_exit(&curproc->p_lock); 2119 mutex_exit(&lofi_lock); 2120 free_lofi_ioctl(klip); 2121 return (error); 2122 } 2123 mutex_exit(&curproc->p_lock); 2124 2125 if (file_to_lofi_nocheck(klip->li_filename, NULL) == 0) { 2126 error = EBUSY; 2127 goto err; 2128 } 2129 2130 if (pickminor) { 2131 minor = (minor_t)id_allocff_nosleep(lofi_minor_id); 2132 if (minor == (minor_t)-1) { 2133 error = EAGAIN; 2134 goto err; 2135 } 2136 } else { 2137 if (ddi_get_soft_state(lofi_statep, klip->li_minor) != NULL) { 2138 error = EEXIST; 2139 goto err; 2140 } 2141 2142 minor = (minor_t) 2143 id_alloc_specific_nosleep(lofi_minor_id, klip->li_minor); 2144 ASSERT(minor != (minor_t)-1); 2145 } 2146 2147 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 2148 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 2149 if (error) { 2150 /* try read-only */ 2151 flag &= ~FWRITE; 2152 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 2153 &vp, 0, 0); 2154 if (error) 2155 goto err; 2156 } 2157 2158 if (!V_ISLOFIABLE(vp->v_type)) { 2159 error = EINVAL; 2160 goto err; 2161 } 2162 2163 vattr.va_mask = AT_SIZE; 2164 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); 2165 if (error) 2166 goto err; 2167 2168 /* the file needs to be a multiple of the block size */ 2169 if ((vattr.va_size % DEV_BSIZE) != 0) { 2170 error = EINVAL; 2171 goto err; 2172 } 2173 2174 /* lsp alloc+init */ 2175 2176 error = ddi_soft_state_zalloc(lofi_statep, minor); 2177 if (error == DDI_FAILURE) { 2178 error = ENOMEM; 2179 goto err; 2180 } 2181 2182 lsp = ddi_get_soft_state(lofi_statep, minor); 2183 list_insert_tail(&lofi_list, lsp); 2184 2185 newdev = makedevice(getmajor(dev), minor); 2186 lsp->ls_dev = newdev; 2187 lsp->ls_zone = zone_find_by_id(getzoneid()); 2188 ASSERT(lsp->ls_zone != NULL); 2189 lsp->ls_uncomp_seg_sz = 0; 2190 lsp->ls_comp_algorithm[0] = '\0'; 2191 lsp->ls_crypto_offset = 0; 2192 2193 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); 2194 mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); 2195 mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); 2196 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 2197 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); 2198 2199 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 2200 LOFI_DRIVER_NAME, minor); 2201 lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads, 2202 minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0); 2203 2204 list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), 2205 offsetof(struct lofi_comp_cache, lc_list)); 2206 2207 /* 2208 * save open mode so file can be closed properly and vnode counts 2209 * updated correctly. 2210 */ 2211 lsp->ls_openflag = flag; 2212 2213 lsp->ls_vp = vp; 2214 lsp->ls_stacked_vp = vp; 2215 /* 2216 * Try to handle stacked lofs vnodes. 2217 */ 2218 if (vp->v_type == VREG) { 2219 vnode_t *realvp; 2220 2221 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 2222 /* 2223 * We need to use the realvp for uniqueness 2224 * checking, but keep the stacked vp for 2225 * LOFI_GET_FILENAME display. 2226 */ 2227 VN_HOLD(realvp); 2228 lsp->ls_vp = realvp; 2229 } 2230 } 2231 2232 lsp->ls_vp_size = vattr.va_size; 2233 lsp->ls_vp_comp_size = lsp->ls_vp_size; 2234 2235 lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, minor, 2236 NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid()); 2237 2238 if (lsp->ls_kstat == NULL) { 2239 error = ENOMEM; 2240 goto err; 2241 } 2242 2243 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 2244 kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID); 2245 2246 if ((error = lofi_init_crypto(lsp, klip)) != 0) 2247 goto err; 2248 2249 if ((error = lofi_init_compress(lsp)) != 0) 2250 goto err; 2251 2252 fake_disk_geometry(lsp); 2253 2254 /* create minor nodes */ 2255 2256 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 2257 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, minor, 2258 DDI_PSEUDO, NULL); 2259 if (error != DDI_SUCCESS) { 2260 error = ENXIO; 2261 goto err; 2262 } 2263 2264 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 2265 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, minor, 2266 DDI_PSEUDO, NULL); 2267 if (error != DDI_SUCCESS) { 2268 /* remove block node */ 2269 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 2270 ddi_remove_minor_node(lofi_dip, namebuf); 2271 error = ENXIO; 2272 goto err; 2273 } 2274 2275 /* create DDI properties */ 2276 2277 if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, 2278 lsp->ls_vp_size - lsp->ls_crypto_offset)) != DDI_PROP_SUCCESS) { 2279 error = EINVAL; 2280 goto nodeerr; 2281 } 2282 2283 if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, 2284 (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE)) 2285 != DDI_PROP_SUCCESS) { 2286 error = EINVAL; 2287 goto nodeerr; 2288 } 2289 2290 if (ddi_prop_update_string(newdev, lofi_dip, ZONE_PROP_NAME, 2291 (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) { 2292 error = EINVAL; 2293 goto nodeerr; 2294 } 2295 2296 kstat_install(lsp->ls_kstat); 2297 2298 mutex_exit(&lofi_lock); 2299 2300 if (rvalp) 2301 *rvalp = (int)minor; 2302 klip->li_minor = minor; 2303 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2304 free_lofi_ioctl(klip); 2305 return (0); 2306 2307 nodeerr: 2308 lofi_free_dev(newdev); 2309 err: 2310 if (lsp != NULL) { 2311 lofi_destroy(lsp, credp); 2312 } else { 2313 if (vp != NULL) { 2314 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); 2315 VN_RELE(vp); 2316 } 2317 2318 if (minor != (minor_t)-1) 2319 id_free(lofi_minor_id, minor); 2320 2321 rctl_decr_lofi(curproc->p_zone, 1); 2322 } 2323 2324 mutex_exit(&lofi_lock); 2325 free_lofi_ioctl(klip); 2326 return (error); 2327 } 2328 2329 /* 2330 * unmap a file. 2331 */ 2332 static int 2333 lofi_unmap_file(struct lofi_ioctl *ulip, int byfilename, 2334 struct cred *credp, int ioctl_flag) 2335 { 2336 struct lofi_state *lsp; 2337 struct lofi_ioctl *klip; 2338 int err; 2339 2340 err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2341 if (err != 0) 2342 return (err); 2343 2344 mutex_enter(&lofi_lock); 2345 if (byfilename) { 2346 if ((err = file_to_lofi(klip->li_filename, &lsp)) != 0) { 2347 mutex_exit(&lofi_lock); 2348 return (err); 2349 } 2350 } else if (klip->li_minor == 0) { 2351 mutex_exit(&lofi_lock); 2352 free_lofi_ioctl(klip); 2353 return (ENXIO); 2354 } else { 2355 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); 2356 } 2357 2358 if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) { 2359 mutex_exit(&lofi_lock); 2360 free_lofi_ioctl(klip); 2361 return (ENXIO); 2362 } 2363 2364 klip->li_minor = getminor(lsp->ls_dev); 2365 2366 /* 2367 * If it's still held open, we'll do one of three things: 2368 * 2369 * If no flag is set, just return EBUSY. 2370 * 2371 * If the 'cleanup' flag is set, unmap and remove the device when 2372 * the last user finishes. 2373 * 2374 * If the 'force' flag is set, then we forcibly close the underlying 2375 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl 2376 * will return DKIO_DEV_GONE. When the device is last closed, the 2377 * device will be cleaned up appropriately. 2378 * 2379 * This is complicated by the fact that we may have outstanding 2380 * dispatched I/Os. Rather than having a single mutex to serialize all 2381 * I/O, we keep a count of the number of outstanding I/O requests 2382 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os 2383 * should be dispatched (ls_vp_closereq). 2384 * 2385 * We set the flag, wait for the number of outstanding I/Os to reach 0, 2386 * and then close the underlying vnode. 2387 */ 2388 if (is_opened(lsp)) { 2389 if (klip->li_force) { 2390 mutex_enter(&lsp->ls_vp_lock); 2391 lsp->ls_vp_closereq = B_TRUE; 2392 /* wake up any threads waiting on dkiocstate */ 2393 cv_broadcast(&lsp->ls_vp_cv); 2394 while (lsp->ls_vp_iocount > 0) 2395 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 2396 mutex_exit(&lsp->ls_vp_lock); 2397 2398 goto out; 2399 } else if (klip->li_cleanup) { 2400 lsp->ls_cleanup = 1; 2401 mutex_exit(&lofi_lock); 2402 free_lofi_ioctl(klip); 2403 return (0); 2404 } 2405 2406 mutex_exit(&lofi_lock); 2407 free_lofi_ioctl(klip); 2408 return (EBUSY); 2409 } 2410 2411 out: 2412 lofi_free_dev(lsp->ls_dev); 2413 lofi_destroy(lsp, credp); 2414 2415 mutex_exit(&lofi_lock); 2416 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2417 free_lofi_ioctl(klip); 2418 return (0); 2419 } 2420 2421 /* 2422 * get the filename given the minor number, or the minor number given 2423 * the name. 2424 */ 2425 /*ARGSUSED*/ 2426 static int 2427 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 2428 struct cred *credp, int ioctl_flag) 2429 { 2430 struct lofi_ioctl *klip; 2431 struct lofi_state *lsp; 2432 int error; 2433 2434 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2435 if (error != 0) 2436 return (error); 2437 2438 switch (which) { 2439 case LOFI_GET_FILENAME: 2440 if (klip->li_minor == 0) { 2441 free_lofi_ioctl(klip); 2442 return (EINVAL); 2443 } 2444 2445 mutex_enter(&lofi_lock); 2446 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); 2447 if (lsp == NULL || lofi_access(lsp) != 0) { 2448 mutex_exit(&lofi_lock); 2449 free_lofi_ioctl(klip); 2450 return (ENXIO); 2451 } 2452 2453 /* 2454 * This may fail if, for example, we're trying to look 2455 * up a zoned NFS path from the global zone. 2456 */ 2457 if (vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename, 2458 sizeof (klip->li_filename), CRED()) != 0) { 2459 (void) strlcpy(klip->li_filename, "?", 2460 sizeof (klip->li_filename)); 2461 } 2462 2463 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 2464 sizeof (klip->li_algorithm)); 2465 klip->li_crypto_enabled = lsp->ls_crypto_enabled; 2466 mutex_exit(&lofi_lock); 2467 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2468 free_lofi_ioctl(klip); 2469 return (error); 2470 case LOFI_GET_MINOR: 2471 mutex_enter(&lofi_lock); 2472 error = file_to_lofi(klip->li_filename, &lsp); 2473 if (error == 0) 2474 klip->li_minor = getminor(lsp->ls_dev); 2475 mutex_exit(&lofi_lock); 2476 2477 if (error == 0) 2478 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2479 2480 free_lofi_ioctl(klip); 2481 return (error); 2482 case LOFI_CHECK_COMPRESSED: 2483 mutex_enter(&lofi_lock); 2484 error = file_to_lofi(klip->li_filename, &lsp); 2485 if (error != 0) { 2486 mutex_exit(&lofi_lock); 2487 free_lofi_ioctl(klip); 2488 return (error); 2489 } 2490 2491 klip->li_minor = getminor(lsp->ls_dev); 2492 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 2493 sizeof (klip->li_algorithm)); 2494 2495 mutex_exit(&lofi_lock); 2496 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2497 free_lofi_ioctl(klip); 2498 return (error); 2499 default: 2500 free_lofi_ioctl(klip); 2501 return (EINVAL); 2502 } 2503 } 2504 2505 static int 2506 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 2507 int *rvalp) 2508 { 2509 int error; 2510 enum dkio_state dkstate; 2511 struct lofi_state *lsp; 2512 minor_t minor; 2513 2514 minor = getminor(dev); 2515 /* lofi ioctls only apply to the master device */ 2516 if (minor == 0) { 2517 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 2518 2519 /* 2520 * the query command only need read-access - i.e., normal 2521 * users are allowed to do those on the ctl device as 2522 * long as they can open it read-only. 2523 */ 2524 switch (cmd) { 2525 case LOFI_MAP_FILE: 2526 if ((flag & FWRITE) == 0) 2527 return (EPERM); 2528 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 2529 case LOFI_MAP_FILE_MINOR: 2530 if ((flag & FWRITE) == 0) 2531 return (EPERM); 2532 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 2533 case LOFI_UNMAP_FILE: 2534 if ((flag & FWRITE) == 0) 2535 return (EPERM); 2536 return (lofi_unmap_file(lip, 1, credp, flag)); 2537 case LOFI_UNMAP_FILE_MINOR: 2538 if ((flag & FWRITE) == 0) 2539 return (EPERM); 2540 return (lofi_unmap_file(lip, 0, credp, flag)); 2541 case LOFI_GET_FILENAME: 2542 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 2543 credp, flag)); 2544 case LOFI_GET_MINOR: 2545 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 2546 credp, flag)); 2547 2548 /* 2549 * This API made limited sense when this value was fixed 2550 * at LOFI_MAX_FILES. However, its use to iterate 2551 * across all possible devices in lofiadm means we don't 2552 * want to return L_MAXMIN32, but the highest 2553 * *allocated* minor. 2554 */ 2555 case LOFI_GET_MAXMINOR: 2556 minor = 0; 2557 2558 mutex_enter(&lofi_lock); 2559 2560 for (lsp = list_head(&lofi_list); lsp != NULL; 2561 lsp = list_next(&lofi_list, lsp)) { 2562 if (lofi_access(lsp) != 0) 2563 continue; 2564 2565 if (getminor(lsp->ls_dev) > minor) 2566 minor = getminor(lsp->ls_dev); 2567 } 2568 2569 mutex_exit(&lofi_lock); 2570 2571 error = ddi_copyout(&minor, &lip->li_minor, 2572 sizeof (minor), flag); 2573 if (error) 2574 return (EFAULT); 2575 return (0); 2576 2577 case LOFI_CHECK_COMPRESSED: 2578 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, 2579 credp, flag)); 2580 default: 2581 return (EINVAL); 2582 } 2583 } 2584 2585 mutex_enter(&lofi_lock); 2586 lsp = ddi_get_soft_state(lofi_statep, minor); 2587 if (lsp == NULL || lsp->ls_vp_closereq) { 2588 mutex_exit(&lofi_lock); 2589 return (ENXIO); 2590 } 2591 mutex_exit(&lofi_lock); 2592 2593 /* 2594 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with 2595 * EIO as if the device was no longer present. 2596 */ 2597 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE) 2598 return (EIO); 2599 2600 /* these are for faking out utilities like newfs */ 2601 switch (cmd) { 2602 case DKIOCGVTOC: 2603 switch (ddi_model_convert_from(flag & FMODELS)) { 2604 case DDI_MODEL_ILP32: { 2605 struct vtoc32 vtoc32; 2606 2607 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 2608 if (ddi_copyout(&vtoc32, (void *)arg, 2609 sizeof (struct vtoc32), flag)) 2610 return (EFAULT); 2611 break; 2612 } 2613 2614 case DDI_MODEL_NONE: 2615 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 2616 sizeof (struct vtoc), flag)) 2617 return (EFAULT); 2618 break; 2619 } 2620 return (0); 2621 case DKIOCINFO: 2622 error = ddi_copyout(&lsp->ls_ci, (void *)arg, 2623 sizeof (struct dk_cinfo), flag); 2624 if (error) 2625 return (EFAULT); 2626 return (0); 2627 case DKIOCG_VIRTGEOM: 2628 case DKIOCG_PHYGEOM: 2629 case DKIOCGGEOM: 2630 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 2631 sizeof (struct dk_geom), flag); 2632 if (error) 2633 return (EFAULT); 2634 return (0); 2635 case DKIOCSTATE: 2636 /* 2637 * Normally, lofi devices are always in the INSERTED state. If 2638 * a device is forcefully unmapped, then the device transitions 2639 * to the DKIO_DEV_GONE state. 2640 */ 2641 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate), 2642 flag) != 0) 2643 return (EFAULT); 2644 2645 mutex_enter(&lsp->ls_vp_lock); 2646 lsp->ls_vp_iocount++; 2647 while (((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) || 2648 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) && 2649 !lsp->ls_vp_closereq) { 2650 /* 2651 * By virtue of having the device open, we know that 2652 * 'lsp' will remain valid when we return. 2653 */ 2654 if (!cv_wait_sig(&lsp->ls_vp_cv, 2655 &lsp->ls_vp_lock)) { 2656 lsp->ls_vp_iocount--; 2657 cv_broadcast(&lsp->ls_vp_cv); 2658 mutex_exit(&lsp->ls_vp_lock); 2659 return (EINTR); 2660 } 2661 } 2662 2663 dkstate = (!lsp->ls_vp_closereq && lsp->ls_vp != NULL ? 2664 DKIO_INSERTED : DKIO_DEV_GONE); 2665 lsp->ls_vp_iocount--; 2666 cv_broadcast(&lsp->ls_vp_cv); 2667 mutex_exit(&lsp->ls_vp_lock); 2668 2669 if (ddi_copyout(&dkstate, (void *)arg, 2670 sizeof (dkstate), flag) != 0) 2671 return (EFAULT); 2672 return (0); 2673 default: 2674 return (ENOTTY); 2675 } 2676 } 2677 2678 static struct cb_ops lofi_cb_ops = { 2679 lofi_open, /* open */ 2680 lofi_close, /* close */ 2681 lofi_strategy, /* strategy */ 2682 nodev, /* print */ 2683 nodev, /* dump */ 2684 lofi_read, /* read */ 2685 lofi_write, /* write */ 2686 lofi_ioctl, /* ioctl */ 2687 nodev, /* devmap */ 2688 nodev, /* mmap */ 2689 nodev, /* segmap */ 2690 nochpoll, /* poll */ 2691 ddi_prop_op, /* prop_op */ 2692 0, /* streamtab */ 2693 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 2694 CB_REV, 2695 lofi_aread, 2696 lofi_awrite 2697 }; 2698 2699 static struct dev_ops lofi_ops = { 2700 DEVO_REV, /* devo_rev, */ 2701 0, /* refcnt */ 2702 lofi_info, /* info */ 2703 nulldev, /* identify */ 2704 nulldev, /* probe */ 2705 lofi_attach, /* attach */ 2706 lofi_detach, /* detach */ 2707 nodev, /* reset */ 2708 &lofi_cb_ops, /* driver operations */ 2709 NULL, /* no bus operations */ 2710 NULL, /* power */ 2711 ddi_quiesce_not_needed, /* quiesce */ 2712 }; 2713 2714 static struct modldrv modldrv = { 2715 &mod_driverops, 2716 "loopback file driver", 2717 &lofi_ops, 2718 }; 2719 2720 static struct modlinkage modlinkage = { 2721 MODREV_1, 2722 &modldrv, 2723 NULL 2724 }; 2725 2726 int 2727 _init(void) 2728 { 2729 int error; 2730 2731 list_create(&lofi_list, sizeof (struct lofi_state), 2732 offsetof(struct lofi_state, ls_list)); 2733 2734 error = ddi_soft_state_init(&lofi_statep, 2735 sizeof (struct lofi_state), 0); 2736 if (error) 2737 return (error); 2738 2739 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 2740 2741 error = mod_install(&modlinkage); 2742 if (error) { 2743 mutex_destroy(&lofi_lock); 2744 ddi_soft_state_fini(&lofi_statep); 2745 list_destroy(&lofi_list); 2746 } 2747 2748 return (error); 2749 } 2750 2751 int 2752 _fini(void) 2753 { 2754 int error; 2755 2756 mutex_enter(&lofi_lock); 2757 2758 if (!list_is_empty(&lofi_list)) { 2759 mutex_exit(&lofi_lock); 2760 return (EBUSY); 2761 } 2762 2763 mutex_exit(&lofi_lock); 2764 2765 error = mod_remove(&modlinkage); 2766 if (error) 2767 return (error); 2768 2769 mutex_destroy(&lofi_lock); 2770 ddi_soft_state_fini(&lofi_statep); 2771 list_destroy(&lofi_list); 2772 2773 return (error); 2774 } 2775 2776 int 2777 _info(struct modinfo *modinfop) 2778 { 2779 return (mod_info(&modlinkage, modinfop)); 2780 } 2781