1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 Andrey Sokolov 26 */ 27 28 /* 29 * lofi (loopback file) driver - allows you to attach a file to a device, 30 * which can then be accessed through that device. The simple model is that 31 * you tell lofi to open a file, and then use the block device you get as 32 * you would any block device. lofi translates access to the block device 33 * into I/O on the underlying file. This is mostly useful for 34 * mounting images of filesystems. 35 * 36 * lofi is controlled through /dev/lofictl - this is the only device exported 37 * during attach, and is minor number 0. lofiadm communicates with lofi through 38 * ioctls on this device. When a file is attached to lofi, block and character 39 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 40 * are identified by their minor number, and the minor number is also used 41 * as the name in /dev/lofi. If we ever decide to support virtual disks, 42 * we'll have to divide the minor number space to identify fdisk partitions 43 * and slices, and the name will then be the minor number shifted down a 44 * few bits. Minor devices are tracked with state structures handled with 45 * ddi_soft_state(9F) for simplicity. 46 * 47 * A file attached to lofi is opened when attached and not closed until 48 * explicitly detached from lofi. This seems more sensible than deferring 49 * the open until the /dev/lofi device is opened, for a number of reasons. 50 * One is that any failure is likely to be noticed by the person (or script) 51 * running lofiadm. Another is that it would be a security problem if the 52 * file was replaced by another one after being added but before being opened. 53 * 54 * The only hard part about lofi is the ioctls. In order to support things 55 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 56 * So it has to fake disk geometry and partition information. More may need 57 * to be faked if your favorite utility doesn't work and you think it should 58 * (fdformat doesn't work because it really wants to know the type of floppy 59 * controller to talk to, and that didn't seem easy to fake. Or possibly even 60 * necessary, since we have mkfs_pcfs now). 61 * 62 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To 63 * support simulation of hotplug events, an optional force flag is provided. 64 * If a lofi device is open when a force detach is requested, then the 65 * underlying file is closed and any subsequent operations return EIO. When the 66 * device is closed for the last time, it will be cleaned up at that time. In 67 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is 68 * detached but not removed. 69 * 70 * Known problems: 71 * 72 * UFS logging. Mounting a UFS filesystem image "logging" 73 * works for basic copy testing but wedges during a build of ON through 74 * that image. Some deadlock in lufs holding the log mutex and then 75 * getting stuck on a buf. So for now, don't do that. 76 * 77 * Direct I/O. Since the filesystem data is being cached in the buffer 78 * cache, _and_ again in the underlying filesystem, it's tempting to 79 * enable direct I/O on the underlying file. Don't, because that deadlocks. 80 * I think to fix the cache-twice problem we might need filesystem support. 81 * 82 * Interesting things to do: 83 * 84 * Allow multiple files for each device. A poor-man's metadisk, basically. 85 * 86 * Pass-through ioctls on block devices. You can (though it's not 87 * documented), give lofi a block device as a file name. Then we shouldn't 88 * need to fake a geometry, however, it may be relevant if you're replacing 89 * metadisk, or using lofi to get crypto. 90 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1 91 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home. 92 * In fact this even makes sense if you have lofi "above" metadisk. 93 * 94 * Encryption: 95 * Each lofi device can have its own symmetric key and cipher. 96 * They are passed to us by lofiadm(1m) in the correct format for use 97 * with the misc/kcf crypto_* routines. 98 * 99 * Each block has its own IV, that is calculated in lofi_blk_mech(), based 100 * on the "master" key held in the lsp and the block number of the buffer. 101 */ 102 103 #include <sys/types.h> 104 #include <netinet/in.h> 105 #include <sys/sysmacros.h> 106 #include <sys/uio.h> 107 #include <sys/kmem.h> 108 #include <sys/cred.h> 109 #include <sys/mman.h> 110 #include <sys/errno.h> 111 #include <sys/aio_req.h> 112 #include <sys/stat.h> 113 #include <sys/file.h> 114 #include <sys/modctl.h> 115 #include <sys/conf.h> 116 #include <sys/debug.h> 117 #include <sys/vnode.h> 118 #include <sys/lofi.h> 119 #include <sys/fcntl.h> 120 #include <sys/pathname.h> 121 #include <sys/filio.h> 122 #include <sys/fdio.h> 123 #include <sys/open.h> 124 #include <sys/disp.h> 125 #include <vm/seg_map.h> 126 #include <sys/ddi.h> 127 #include <sys/sunddi.h> 128 #include <sys/zmod.h> 129 #include <sys/id_space.h> 130 #include <sys/mkdev.h> 131 #include <sys/crypto/common.h> 132 #include <sys/crypto/api.h> 133 #include <sys/rctl.h> 134 #include <LzmaDec.h> 135 136 #define NBLOCKS_PROP_NAME "Nblocks" 137 #define SIZE_PROP_NAME "Size" 138 #define ZONE_PROP_NAME "zone" 139 140 #define SETUP_C_DATA(cd, buf, len) \ 141 (cd).cd_format = CRYPTO_DATA_RAW; \ 142 (cd).cd_offset = 0; \ 143 (cd).cd_miscdata = NULL; \ 144 (cd).cd_length = (len); \ 145 (cd).cd_raw.iov_base = (buf); \ 146 (cd).cd_raw.iov_len = (len); 147 148 #define UIO_CHECK(uio) \ 149 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \ 150 ((uio)->uio_resid % DEV_BSIZE) != 0) { \ 151 return (EINVAL); \ 152 } 153 154 static dev_info_t *lofi_dip = NULL; 155 static void *lofi_statep = NULL; 156 static kmutex_t lofi_lock; /* state lock */ 157 static id_space_t *lofi_minor_id; 158 static list_t lofi_list; 159 static zone_key_t lofi_zone_key; 160 161 /* 162 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 163 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 164 * high. If we want to be assured that the underlying device is always busy, 165 * we must be sure that the number of bytes enqueued when the number of 166 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 167 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 168 * set maxalloc to be the maximum throughput (in bytes per second) of the 169 * underlying device divided by the minimum I/O size. We assume a realistic 170 * maximum throughput of one hundred megabytes per second; we set maxalloc on 171 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 172 */ 173 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 174 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 175 176 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; 177 178 /* 179 * To avoid decompressing data in a compressed segment multiple times 180 * when accessing small parts of a segment's data, we cache and reuse 181 * the uncompressed segment's data. 182 * 183 * A single cached segment is sufficient to avoid lots of duplicate 184 * segment decompress operations. A small cache size also reduces the 185 * memory footprint. 186 * 187 * lofi_max_comp_cache is the maximum number of decompressed data segments 188 * cached for each compressed lofi image. It can be set to 0 to disable 189 * caching. 190 */ 191 192 uint32_t lofi_max_comp_cache = 1; 193 194 static int gzip_decompress(void *src, size_t srclen, void *dst, 195 size_t *destlen, int level); 196 197 static int lzma_decompress(void *src, size_t srclen, void *dst, 198 size_t *dstlen, int level); 199 200 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = { 201 {gzip_decompress, NULL, 6, "gzip"}, /* default */ 202 {gzip_decompress, NULL, 6, "gzip-6"}, 203 {gzip_decompress, NULL, 9, "gzip-9"}, 204 {lzma_decompress, NULL, 0, "lzma"} 205 }; 206 207 /*ARGSUSED*/ 208 static void 209 *SzAlloc(void *p, size_t size) 210 { 211 return (kmem_alloc(size, KM_SLEEP)); 212 } 213 214 /*ARGSUSED*/ 215 static void 216 SzFree(void *p, void *address, size_t size) 217 { 218 kmem_free(address, size); 219 } 220 221 static ISzAlloc g_Alloc = { SzAlloc, SzFree }; 222 223 /* 224 * Free data referenced by the linked list of cached uncompressed 225 * segments. 226 */ 227 static void 228 lofi_free_comp_cache(struct lofi_state *lsp) 229 { 230 struct lofi_comp_cache *lc; 231 232 while ((lc = list_remove_head(&lsp->ls_comp_cache)) != NULL) { 233 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 234 kmem_free(lc, sizeof (struct lofi_comp_cache)); 235 lsp->ls_comp_cache_count--; 236 } 237 ASSERT(lsp->ls_comp_cache_count == 0); 238 } 239 240 static int 241 is_opened(struct lofi_state *lsp) 242 { 243 ASSERT(MUTEX_HELD(&lofi_lock)); 244 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 245 } 246 247 static int 248 mark_opened(struct lofi_state *lsp, int otyp) 249 { 250 ASSERT(MUTEX_HELD(&lofi_lock)); 251 switch (otyp) { 252 case OTYP_CHR: 253 lsp->ls_chr_open = 1; 254 break; 255 case OTYP_BLK: 256 lsp->ls_blk_open = 1; 257 break; 258 case OTYP_LYR: 259 lsp->ls_lyr_open_count++; 260 break; 261 default: 262 return (-1); 263 } 264 return (0); 265 } 266 267 static void 268 mark_closed(struct lofi_state *lsp, int otyp) 269 { 270 ASSERT(MUTEX_HELD(&lofi_lock)); 271 switch (otyp) { 272 case OTYP_CHR: 273 lsp->ls_chr_open = 0; 274 break; 275 case OTYP_BLK: 276 lsp->ls_blk_open = 0; 277 break; 278 case OTYP_LYR: 279 lsp->ls_lyr_open_count--; 280 break; 281 default: 282 break; 283 } 284 } 285 286 static void 287 lofi_free_crypto(struct lofi_state *lsp) 288 { 289 ASSERT(MUTEX_HELD(&lofi_lock)); 290 291 if (lsp->ls_crypto_enabled) { 292 /* 293 * Clean up the crypto state so that it doesn't hang around 294 * in memory after we are done with it. 295 */ 296 if (lsp->ls_key.ck_data != NULL) { 297 bzero(lsp->ls_key.ck_data, 298 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 299 kmem_free(lsp->ls_key.ck_data, 300 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 301 lsp->ls_key.ck_data = NULL; 302 lsp->ls_key.ck_length = 0; 303 } 304 305 if (lsp->ls_mech.cm_param != NULL) { 306 kmem_free(lsp->ls_mech.cm_param, 307 lsp->ls_mech.cm_param_len); 308 lsp->ls_mech.cm_param = NULL; 309 lsp->ls_mech.cm_param_len = 0; 310 } 311 312 if (lsp->ls_iv_mech.cm_param != NULL) { 313 kmem_free(lsp->ls_iv_mech.cm_param, 314 lsp->ls_iv_mech.cm_param_len); 315 lsp->ls_iv_mech.cm_param = NULL; 316 lsp->ls_iv_mech.cm_param_len = 0; 317 } 318 319 mutex_destroy(&lsp->ls_crypto_lock); 320 } 321 } 322 323 static void 324 lofi_destroy(struct lofi_state *lsp, cred_t *credp) 325 { 326 minor_t minor = getminor(lsp->ls_dev); 327 int i; 328 329 ASSERT(MUTEX_HELD(&lofi_lock)); 330 331 list_remove(&lofi_list, lsp); 332 333 lofi_free_crypto(lsp); 334 335 /* 336 * Free pre-allocated compressed buffers 337 */ 338 if (lsp->ls_comp_bufs != NULL) { 339 for (i = 0; i < lofi_taskq_nthreads; i++) { 340 if (lsp->ls_comp_bufs[i].bufsize > 0) 341 kmem_free(lsp->ls_comp_bufs[i].buf, 342 lsp->ls_comp_bufs[i].bufsize); 343 } 344 kmem_free(lsp->ls_comp_bufs, 345 sizeof (struct compbuf) * lofi_taskq_nthreads); 346 } 347 348 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 349 1, 0, credp, NULL); 350 VN_RELE(lsp->ls_vp); 351 if (lsp->ls_stacked_vp != lsp->ls_vp) 352 VN_RELE(lsp->ls_stacked_vp); 353 354 taskq_destroy(lsp->ls_taskq); 355 356 if (lsp->ls_kstat != NULL) 357 kstat_delete(lsp->ls_kstat); 358 359 /* 360 * Free cached decompressed segment data 361 */ 362 lofi_free_comp_cache(lsp); 363 list_destroy(&lsp->ls_comp_cache); 364 365 if (lsp->ls_uncomp_seg_sz > 0) { 366 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); 367 lsp->ls_uncomp_seg_sz = 0; 368 } 369 370 rctl_decr_lofi(lsp->ls_zone.zref_zone, 1); 371 zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI); 372 373 mutex_destroy(&lsp->ls_comp_cache_lock); 374 mutex_destroy(&lsp->ls_comp_bufs_lock); 375 mutex_destroy(&lsp->ls_kstat_lock); 376 mutex_destroy(&lsp->ls_vp_lock); 377 378 ASSERT(ddi_get_soft_state(lofi_statep, minor) == lsp); 379 ddi_soft_state_free(lofi_statep, minor); 380 id_free(lofi_minor_id, minor); 381 } 382 383 static void 384 lofi_free_dev(dev_t dev) 385 { 386 minor_t minor = getminor(dev); 387 char namebuf[50]; 388 389 ASSERT(MUTEX_HELD(&lofi_lock)); 390 391 (void) ddi_prop_remove(dev, lofi_dip, ZONE_PROP_NAME); 392 (void) ddi_prop_remove(dev, lofi_dip, SIZE_PROP_NAME); 393 (void) ddi_prop_remove(dev, lofi_dip, NBLOCKS_PROP_NAME); 394 395 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 396 ddi_remove_minor_node(lofi_dip, namebuf); 397 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 398 ddi_remove_minor_node(lofi_dip, namebuf); 399 } 400 401 /*ARGSUSED*/ 402 static void 403 lofi_zone_shutdown(zoneid_t zoneid, void *arg) 404 { 405 struct lofi_state *lsp; 406 struct lofi_state *next; 407 408 mutex_enter(&lofi_lock); 409 410 for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) { 411 412 /* lofi_destroy() frees lsp */ 413 next = list_next(&lofi_list, lsp); 414 415 if (lsp->ls_zone.zref_zone->zone_id != zoneid) 416 continue; 417 418 /* 419 * No in-zone processes are running, but something has this 420 * open. It's either a global zone process, or a lofi 421 * mount. In either case we set ls_cleanup so the last 422 * user destroys the device. 423 */ 424 if (is_opened(lsp)) { 425 lsp->ls_cleanup = 1; 426 } else { 427 lofi_free_dev(lsp->ls_dev); 428 lofi_destroy(lsp, kcred); 429 } 430 } 431 432 mutex_exit(&lofi_lock); 433 } 434 435 /*ARGSUSED*/ 436 static int 437 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 438 { 439 minor_t minor; 440 struct lofi_state *lsp; 441 442 /* 443 * lofiadm -a /dev/lofi/1 gets us here. 444 */ 445 if (mutex_owner(&lofi_lock) == curthread) 446 return (EINVAL); 447 448 mutex_enter(&lofi_lock); 449 450 minor = getminor(*devp); 451 452 /* master control device */ 453 if (minor == 0) { 454 mutex_exit(&lofi_lock); 455 return (0); 456 } 457 458 /* otherwise, the mapping should already exist */ 459 lsp = ddi_get_soft_state(lofi_statep, minor); 460 if (lsp == NULL) { 461 mutex_exit(&lofi_lock); 462 return (EINVAL); 463 } 464 465 if (lsp->ls_vp == NULL) { 466 mutex_exit(&lofi_lock); 467 return (ENXIO); 468 } 469 470 if (lsp->ls_readonly && (flag & FWRITE)) { 471 mutex_exit(&lofi_lock); 472 return (EROFS); 473 } 474 475 if (mark_opened(lsp, otyp) == -1) { 476 mutex_exit(&lofi_lock); 477 return (EINVAL); 478 } 479 480 mutex_exit(&lofi_lock); 481 return (0); 482 } 483 484 /*ARGSUSED*/ 485 static int 486 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 487 { 488 minor_t minor; 489 struct lofi_state *lsp; 490 491 mutex_enter(&lofi_lock); 492 minor = getminor(dev); 493 lsp = ddi_get_soft_state(lofi_statep, minor); 494 if (lsp == NULL) { 495 mutex_exit(&lofi_lock); 496 return (EINVAL); 497 } 498 499 if (minor == 0) { 500 mutex_exit(&lofi_lock); 501 return (0); 502 } 503 504 mark_closed(lsp, otyp); 505 506 /* 507 * If we forcibly closed the underlying device (li_force), or 508 * asked for cleanup (li_cleanup), finish up if we're the last 509 * out of the door. 510 */ 511 if (!is_opened(lsp) && (lsp->ls_cleanup || lsp->ls_vp == NULL)) { 512 lofi_free_dev(lsp->ls_dev); 513 lofi_destroy(lsp, credp); 514 } 515 516 mutex_exit(&lofi_lock); 517 return (0); 518 } 519 520 /* 521 * Sets the mechanism's initialization vector (IV) if one is needed. 522 * The IV is computed from the data block number. lsp->ls_mech is 523 * altered so that: 524 * lsp->ls_mech.cm_param_len is set to the IV len. 525 * lsp->ls_mech.cm_param is set to the IV. 526 */ 527 static int 528 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno) 529 { 530 int ret; 531 crypto_data_t cdata; 532 char *iv; 533 size_t iv_len; 534 size_t min; 535 void *data; 536 size_t datasz; 537 538 ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock)); 539 540 if (lsp == NULL) 541 return (CRYPTO_DEVICE_ERROR); 542 543 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */ 544 if (lsp->ls_iv_type == IVM_NONE) { 545 return (CRYPTO_SUCCESS); 546 } 547 548 /* 549 * if kmem already alloced from previous call and it's the same size 550 * we need now, just recycle it; allocate new kmem only if we have to 551 */ 552 if (lsp->ls_mech.cm_param == NULL || 553 lsp->ls_mech.cm_param_len != lsp->ls_iv_len) { 554 iv_len = lsp->ls_iv_len; 555 iv = kmem_zalloc(iv_len, KM_SLEEP); 556 } else { 557 iv_len = lsp->ls_mech.cm_param_len; 558 iv = lsp->ls_mech.cm_param; 559 bzero(iv, iv_len); 560 } 561 562 switch (lsp->ls_iv_type) { 563 case IVM_ENC_BLKNO: 564 /* iv is not static, lblkno changes each time */ 565 data = &lblkno; 566 datasz = sizeof (lblkno); 567 break; 568 default: 569 data = 0; 570 datasz = 0; 571 break; 572 } 573 574 /* 575 * write blkno into the iv buffer padded on the left in case 576 * blkno ever grows bigger than its current longlong_t size 577 * or a variation other than blkno is used for the iv data 578 */ 579 min = MIN(datasz, iv_len); 580 bcopy(data, iv + (iv_len - min), min); 581 582 /* encrypt the data in-place to get the IV */ 583 SETUP_C_DATA(cdata, iv, iv_len); 584 585 ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key, 586 NULL, NULL, NULL); 587 if (ret != CRYPTO_SUCCESS) { 588 cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)", 589 lblkno, ret); 590 if (lsp->ls_mech.cm_param != iv) 591 kmem_free(iv, iv_len); 592 593 return (ret); 594 } 595 596 /* clean up the iv from the last computation */ 597 if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv) 598 kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len); 599 600 lsp->ls_mech.cm_param_len = iv_len; 601 lsp->ls_mech.cm_param = iv; 602 603 return (CRYPTO_SUCCESS); 604 } 605 606 /* 607 * Performs encryption and decryption of a chunk of data of size "len", 608 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of 609 * DEV_BSIZE. 610 */ 611 static int 612 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext, 613 caddr_t ciphertext, size_t len, boolean_t op_encrypt) 614 { 615 crypto_data_t cdata; 616 crypto_data_t wdata; 617 int ret; 618 longlong_t lblkno = bp->b_lblkno; 619 620 mutex_enter(&lsp->ls_crypto_lock); 621 622 /* 623 * though we could encrypt/decrypt entire "len" chunk of data, we need 624 * to break it into DEV_BSIZE pieces to capture blkno incrementing 625 */ 626 SETUP_C_DATA(cdata, plaintext, len); 627 cdata.cd_length = DEV_BSIZE; 628 if (ciphertext != NULL) { /* not in-place crypto */ 629 SETUP_C_DATA(wdata, ciphertext, len); 630 wdata.cd_length = DEV_BSIZE; 631 } 632 633 do { 634 ret = lofi_blk_mech(lsp, lblkno); 635 if (ret != CRYPTO_SUCCESS) 636 continue; 637 638 if (op_encrypt) { 639 ret = crypto_encrypt(&lsp->ls_mech, &cdata, 640 &lsp->ls_key, NULL, 641 ((ciphertext != NULL) ? &wdata : NULL), NULL); 642 } else { 643 ret = crypto_decrypt(&lsp->ls_mech, &cdata, 644 &lsp->ls_key, NULL, 645 ((ciphertext != NULL) ? &wdata : NULL), NULL); 646 } 647 648 cdata.cd_offset += DEV_BSIZE; 649 if (ciphertext != NULL) 650 wdata.cd_offset += DEV_BSIZE; 651 lblkno++; 652 } while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len); 653 654 mutex_exit(&lsp->ls_crypto_lock); 655 656 if (ret != CRYPTO_SUCCESS) { 657 cmn_err(CE_WARN, "%s failed for block %lld: (0x%x)", 658 op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()", 659 lblkno, ret); 660 } 661 662 return (ret); 663 } 664 665 #define RDWR_RAW 1 666 #define RDWR_BCOPY 2 667 668 static int 669 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 670 struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn) 671 { 672 ssize_t resid; 673 int isread; 674 int error; 675 676 /* 677 * Handles reads/writes for both plain and encrypted lofi 678 * Note: offset is already shifted by lsp->ls_crypto_offset 679 * when it gets here. 680 */ 681 682 isread = bp->b_flags & B_READ; 683 if (isread) { 684 if (method == RDWR_BCOPY) { 685 /* DO NOT update bp->b_resid for bcopy */ 686 bcopy(bcopy_locn, bufaddr, len); 687 error = 0; 688 } else { /* RDWR_RAW */ 689 error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len, 690 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 691 &resid); 692 bp->b_resid = resid; 693 } 694 if (lsp->ls_crypto_enabled && error == 0) { 695 if (lofi_crypto(lsp, bp, bufaddr, NULL, len, 696 B_FALSE) != CRYPTO_SUCCESS) { 697 /* 698 * XXX: original code didn't set residual 699 * back to len because no error was expected 700 * from bcopy() if encryption is not enabled 701 */ 702 if (method != RDWR_BCOPY) 703 bp->b_resid = len; 704 error = EIO; 705 } 706 } 707 return (error); 708 } else { 709 void *iobuf = bufaddr; 710 711 if (lsp->ls_crypto_enabled) { 712 /* don't do in-place crypto to keep bufaddr intact */ 713 iobuf = kmem_alloc(len, KM_SLEEP); 714 if (lofi_crypto(lsp, bp, bufaddr, iobuf, len, 715 B_TRUE) != CRYPTO_SUCCESS) { 716 kmem_free(iobuf, len); 717 if (method != RDWR_BCOPY) 718 bp->b_resid = len; 719 return (EIO); 720 } 721 } 722 if (method == RDWR_BCOPY) { 723 /* DO NOT update bp->b_resid for bcopy */ 724 bcopy(iobuf, bcopy_locn, len); 725 error = 0; 726 } else { /* RDWR_RAW */ 727 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len, 728 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 729 &resid); 730 bp->b_resid = resid; 731 } 732 if (lsp->ls_crypto_enabled) { 733 kmem_free(iobuf, len); 734 } 735 return (error); 736 } 737 } 738 739 static int 740 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 741 struct lofi_state *lsp) 742 { 743 int error; 744 offset_t alignedoffset, mapoffset; 745 size_t xfersize; 746 int isread; 747 int smflags; 748 caddr_t mapaddr; 749 size_t len; 750 enum seg_rw srw; 751 int save_error; 752 753 /* 754 * Note: offset is already shifted by lsp->ls_crypto_offset 755 * when it gets here. 756 */ 757 if (lsp->ls_crypto_enabled) 758 ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size); 759 760 /* 761 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 762 * an 8K boundary, but the buf transfer address may not be 763 * aligned on more than a 512-byte boundary (we don't enforce 764 * that even though we could). This matters since the initial 765 * part of the transfer may not start at offset 0 within the 766 * segmap'd chunk. So we have to compensate for that with 767 * 'mapoffset'. Subsequent chunks always start off at the 768 * beginning, and the last is capped by b_resid 769 * 770 * Visually, where "|" represents page map boundaries: 771 * alignedoffset (mapaddr begins at this segmap boundary) 772 * | offset (from beginning of file) 773 * | | len 774 * v v v 775 * ===|====X========|====...======|========X====|==== 776 * /-------------...---------------/ 777 * ^ bp->b_bcount/bp->b_resid at start 778 * /----/--------/----...------/--------/ 779 * ^ ^ ^ ^ ^ 780 * | | | | nth xfersize (<= MAXBSIZE) 781 * | | 2nd thru n-1st xfersize (= MAXBSIZE) 782 * | 1st xfersize (<= MAXBSIZE) 783 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter) 784 * 785 * Notes: "alignedoffset" is "offset" rounded down to nearest 786 * MAXBSIZE boundary. "len" is next page boundary of size 787 * PAGESIZE after "alignedoffset". 788 */ 789 mapoffset = offset & MAXBOFFSET; 790 alignedoffset = offset - mapoffset; 791 bp->b_resid = bp->b_bcount; 792 isread = bp->b_flags & B_READ; 793 srw = isread ? S_READ : S_WRITE; 794 do { 795 xfersize = MIN(lsp->ls_vp_comp_size - offset, 796 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 797 len = roundup(mapoffset + xfersize, PAGESIZE); 798 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 799 alignedoffset, MAXBSIZE, 1, srw); 800 /* 801 * Now fault in the pages. This lets us check 802 * for errors before we reference mapaddr and 803 * try to resolve the fault in bcopy (which would 804 * panic instead). And this can easily happen, 805 * particularly if you've lofi'd a file over NFS 806 * and someone deletes the file on the server. 807 */ 808 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 809 len, F_SOFTLOCK, srw); 810 if (error) { 811 (void) segmap_release(segkmap, mapaddr, 0); 812 if (FC_CODE(error) == FC_OBJERR) 813 error = FC_ERRNO(error); 814 else 815 error = EIO; 816 break; 817 } 818 /* error may be non-zero for encrypted lofi */ 819 error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize, 820 RDWR_BCOPY, mapaddr + mapoffset); 821 if (error == 0) { 822 bp->b_resid -= xfersize; 823 bufaddr += xfersize; 824 offset += xfersize; 825 } 826 smflags = 0; 827 if (isread) { 828 smflags |= SM_FREE; 829 /* 830 * If we're reading an entire page starting 831 * at a page boundary, there's a good chance 832 * we won't need it again. Put it on the 833 * head of the freelist. 834 */ 835 if (mapoffset == 0 && xfersize == MAXBSIZE) 836 smflags |= SM_DONTNEED; 837 } else { 838 /* 839 * Write back good pages, it is okay to 840 * always release asynchronous here as we'll 841 * follow with VOP_FSYNC for B_SYNC buffers. 842 */ 843 if (error == 0) 844 smflags |= SM_WRITE | SM_ASYNC; 845 } 846 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 847 len, F_SOFTUNLOCK, srw); 848 save_error = segmap_release(segkmap, mapaddr, smflags); 849 if (error == 0) 850 error = save_error; 851 /* only the first map may start partial */ 852 mapoffset = 0; 853 alignedoffset += MAXBSIZE; 854 } while ((error == 0) && (bp->b_resid > 0) && 855 (offset < lsp->ls_vp_comp_size)); 856 857 return (error); 858 } 859 860 /* 861 * Check if segment seg_index is present in the decompressed segment 862 * data cache. 863 * 864 * Returns a pointer to the decompressed segment data cache entry if 865 * found, and NULL when decompressed data for this segment is not yet 866 * cached. 867 */ 868 static struct lofi_comp_cache * 869 lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index) 870 { 871 struct lofi_comp_cache *lc; 872 873 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 874 875 for (lc = list_head(&lsp->ls_comp_cache); lc != NULL; 876 lc = list_next(&lsp->ls_comp_cache, lc)) { 877 if (lc->lc_index == seg_index) { 878 /* 879 * Decompressed segment data was found in the 880 * cache. 881 * 882 * The cache uses an LRU replacement strategy; 883 * move the entry to head of list. 884 */ 885 list_remove(&lsp->ls_comp_cache, lc); 886 list_insert_head(&lsp->ls_comp_cache, lc); 887 return (lc); 888 } 889 } 890 return (NULL); 891 } 892 893 /* 894 * Add the data for a decompressed segment at segment index 895 * seg_index to the cache of the decompressed segments. 896 * 897 * Returns a pointer to the cache element structure in case 898 * the data was added to the cache; returns NULL when the data 899 * wasn't cached. 900 */ 901 static struct lofi_comp_cache * 902 lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index, 903 uchar_t *data) 904 { 905 struct lofi_comp_cache *lc; 906 907 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 908 909 while (lsp->ls_comp_cache_count > lofi_max_comp_cache) { 910 lc = list_remove_tail(&lsp->ls_comp_cache); 911 ASSERT(lc != NULL); 912 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 913 kmem_free(lc, sizeof (struct lofi_comp_cache)); 914 lsp->ls_comp_cache_count--; 915 } 916 917 /* 918 * Do not cache when disabled by tunable variable 919 */ 920 if (lofi_max_comp_cache == 0) 921 return (NULL); 922 923 /* 924 * When the cache has not yet reached the maximum allowed 925 * number of segments, allocate a new cache element. 926 * Otherwise the cache is full; reuse the last list element 927 * (LRU) for caching the decompressed segment data. 928 * 929 * The cache element for the new decompressed segment data is 930 * added to the head of the list. 931 */ 932 if (lsp->ls_comp_cache_count < lofi_max_comp_cache) { 933 lc = kmem_alloc(sizeof (struct lofi_comp_cache), KM_SLEEP); 934 lc->lc_data = NULL; 935 list_insert_head(&lsp->ls_comp_cache, lc); 936 lsp->ls_comp_cache_count++; 937 } else { 938 lc = list_remove_tail(&lsp->ls_comp_cache); 939 if (lc == NULL) 940 return (NULL); 941 list_insert_head(&lsp->ls_comp_cache, lc); 942 } 943 944 /* 945 * Free old uncompressed segment data when reusing a cache 946 * entry. 947 */ 948 if (lc->lc_data != NULL) 949 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 950 951 lc->lc_data = data; 952 lc->lc_index = seg_index; 953 return (lc); 954 } 955 956 957 /*ARGSUSED*/ 958 static int 959 gzip_decompress(void *src, size_t srclen, void *dst, 960 size_t *dstlen, int level) 961 { 962 ASSERT(*dstlen >= srclen); 963 964 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK) 965 return (-1); 966 return (0); 967 } 968 969 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) 970 /*ARGSUSED*/ 971 static int 972 lzma_decompress(void *src, size_t srclen, void *dst, 973 size_t *dstlen, int level) 974 { 975 size_t insizepure; 976 void *actual_src; 977 ELzmaStatus status; 978 979 insizepure = srclen - LZMA_HEADER_SIZE; 980 actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE); 981 982 if (LzmaDecode((Byte *)dst, (size_t *)dstlen, 983 (const Byte *)actual_src, &insizepure, 984 (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, 985 &g_Alloc) != SZ_OK) { 986 return (-1); 987 } 988 return (0); 989 } 990 991 /* 992 * This is basically what strategy used to be before we found we 993 * needed task queues. 994 */ 995 static void 996 lofi_strategy_task(void *arg) 997 { 998 struct buf *bp = (struct buf *)arg; 999 int error; 1000 int syncflag = 0; 1001 struct lofi_state *lsp; 1002 offset_t offset; 1003 caddr_t bufaddr; 1004 size_t len; 1005 size_t xfersize; 1006 boolean_t bufinited = B_FALSE; 1007 1008 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 1009 if (lsp == NULL) { 1010 error = ENXIO; 1011 goto errout; 1012 } 1013 if (lsp->ls_kstat) { 1014 mutex_enter(lsp->ls_kstat->ks_lock); 1015 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 1016 mutex_exit(lsp->ls_kstat->ks_lock); 1017 } 1018 bp_mapin(bp); 1019 bufaddr = bp->b_un.b_addr; 1020 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 1021 if (lsp->ls_crypto_enabled) { 1022 /* encrypted data really begins after crypto header */ 1023 offset += lsp->ls_crypto_offset; 1024 } 1025 len = bp->b_bcount; 1026 bufinited = B_TRUE; 1027 1028 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1029 error = EIO; 1030 goto errout; 1031 } 1032 1033 /* 1034 * If we're writing and the buffer was not B_ASYNC 1035 * we'll follow up with a VOP_FSYNC() to force any 1036 * asynchronous I/O to stable storage. 1037 */ 1038 if (!(bp->b_flags & B_READ) && !(bp->b_flags & B_ASYNC)) 1039 syncflag = FSYNC; 1040 1041 /* 1042 * We used to always use vn_rdwr here, but we cannot do that because 1043 * we might decide to read or write from the the underlying 1044 * file during this call, which would be a deadlock because 1045 * we have the rw_lock. So instead we page, unless it's not 1046 * mapable or it's a character device or it's an encrypted lofi. 1047 */ 1048 if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) || 1049 lsp->ls_crypto_enabled) { 1050 error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW, 1051 NULL); 1052 } else if (lsp->ls_uncomp_seg_sz == 0) { 1053 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp); 1054 } else { 1055 uchar_t *compressed_seg = NULL, *cmpbuf; 1056 uchar_t *uncompressed_seg = NULL; 1057 lofi_compress_info_t *li; 1058 size_t oblkcount; 1059 ulong_t seglen; 1060 uint64_t sblkno, eblkno, cmpbytes; 1061 uint64_t uncompressed_seg_index; 1062 struct lofi_comp_cache *lc; 1063 offset_t sblkoff, eblkoff; 1064 u_offset_t salign, ealign; 1065 u_offset_t sdiff; 1066 uint32_t comp_data_sz; 1067 uint64_t i; 1068 int j; 1069 1070 /* 1071 * From here on we're dealing primarily with compressed files 1072 */ 1073 ASSERT(!lsp->ls_crypto_enabled); 1074 1075 /* 1076 * Compressed files can only be read from and 1077 * not written to 1078 */ 1079 if (!(bp->b_flags & B_READ)) { 1080 bp->b_resid = bp->b_bcount; 1081 error = EROFS; 1082 goto done; 1083 } 1084 1085 ASSERT(lsp->ls_comp_algorithm_index >= 0); 1086 li = &lofi_compress_table[lsp->ls_comp_algorithm_index]; 1087 /* 1088 * Compute starting and ending compressed segment numbers 1089 * We use only bitwise operations avoiding division and 1090 * modulus because we enforce the compression segment size 1091 * to a power of 2 1092 */ 1093 sblkno = offset >> lsp->ls_comp_seg_shift; 1094 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1); 1095 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift; 1096 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1); 1097 1098 /* 1099 * Check the decompressed segment cache. 1100 * 1101 * The cache is used only when the requested data 1102 * is within a segment. Requests that cross 1103 * segment boundaries bypass the cache. 1104 */ 1105 if (sblkno == eblkno || 1106 (sblkno + 1 == eblkno && eblkoff == 0)) { 1107 /* 1108 * Request doesn't cross a segment boundary, 1109 * now check the cache. 1110 */ 1111 mutex_enter(&lsp->ls_comp_cache_lock); 1112 lc = lofi_find_comp_data(lsp, sblkno); 1113 if (lc != NULL) { 1114 /* 1115 * We've found the decompressed segment 1116 * data in the cache; reuse it. 1117 */ 1118 bcopy(lc->lc_data + sblkoff, bufaddr, 1119 bp->b_bcount); 1120 mutex_exit(&lsp->ls_comp_cache_lock); 1121 bp->b_resid = 0; 1122 error = 0; 1123 goto done; 1124 } 1125 mutex_exit(&lsp->ls_comp_cache_lock); 1126 } 1127 1128 /* 1129 * Align start offset to block boundary for segmap 1130 */ 1131 salign = lsp->ls_comp_seg_index[sblkno]; 1132 sdiff = salign & (DEV_BSIZE - 1); 1133 salign -= sdiff; 1134 if (eblkno >= (lsp->ls_comp_index_sz - 1)) { 1135 /* 1136 * We're dealing with the last segment of 1137 * the compressed file -- the size of this 1138 * segment *may not* be the same as the 1139 * segment size for the file 1140 */ 1141 eblkoff = (offset + bp->b_bcount) & 1142 (lsp->ls_uncomp_last_seg_sz - 1); 1143 ealign = lsp->ls_vp_comp_size; 1144 } else { 1145 ealign = lsp->ls_comp_seg_index[eblkno + 1]; 1146 } 1147 1148 /* 1149 * Preserve original request paramaters 1150 */ 1151 oblkcount = bp->b_bcount; 1152 1153 /* 1154 * Assign the calculated parameters 1155 */ 1156 comp_data_sz = ealign - salign; 1157 bp->b_bcount = comp_data_sz; 1158 1159 /* 1160 * Buffers to hold compressed segments are pre-allocated 1161 * on a per-thread basis. Find a pre-allocated buffer 1162 * that is not currently in use and mark it for use. 1163 */ 1164 mutex_enter(&lsp->ls_comp_bufs_lock); 1165 for (j = 0; j < lofi_taskq_nthreads; j++) { 1166 if (lsp->ls_comp_bufs[j].inuse == 0) { 1167 lsp->ls_comp_bufs[j].inuse = 1; 1168 break; 1169 } 1170 } 1171 1172 mutex_exit(&lsp->ls_comp_bufs_lock); 1173 ASSERT(j < lofi_taskq_nthreads); 1174 1175 /* 1176 * If the pre-allocated buffer size does not match 1177 * the size of the I/O request, re-allocate it with 1178 * the appropriate size 1179 */ 1180 if (lsp->ls_comp_bufs[j].bufsize < bp->b_bcount) { 1181 if (lsp->ls_comp_bufs[j].bufsize > 0) 1182 kmem_free(lsp->ls_comp_bufs[j].buf, 1183 lsp->ls_comp_bufs[j].bufsize); 1184 lsp->ls_comp_bufs[j].buf = kmem_alloc(bp->b_bcount, 1185 KM_SLEEP); 1186 lsp->ls_comp_bufs[j].bufsize = bp->b_bcount; 1187 } 1188 compressed_seg = lsp->ls_comp_bufs[j].buf; 1189 1190 /* 1191 * Map in the calculated number of blocks 1192 */ 1193 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign, 1194 bp, lsp); 1195 1196 bp->b_bcount = oblkcount; 1197 bp->b_resid = oblkcount; 1198 if (error != 0) 1199 goto done; 1200 1201 /* 1202 * decompress compressed blocks start 1203 */ 1204 cmpbuf = compressed_seg + sdiff; 1205 for (i = sblkno; i <= eblkno; i++) { 1206 ASSERT(i < lsp->ls_comp_index_sz - 1); 1207 uchar_t *useg; 1208 1209 /* 1210 * The last segment is special in that it is 1211 * most likely not going to be the same 1212 * (uncompressed) size as the other segments. 1213 */ 1214 if (i == (lsp->ls_comp_index_sz - 2)) { 1215 seglen = lsp->ls_uncomp_last_seg_sz; 1216 } else { 1217 seglen = lsp->ls_uncomp_seg_sz; 1218 } 1219 1220 /* 1221 * Each of the segment index entries contains 1222 * the starting block number for that segment. 1223 * The number of compressed bytes in a segment 1224 * is thus the difference between the starting 1225 * block number of this segment and the starting 1226 * block number of the next segment. 1227 */ 1228 cmpbytes = lsp->ls_comp_seg_index[i + 1] - 1229 lsp->ls_comp_seg_index[i]; 1230 1231 /* 1232 * The first byte in a compressed segment is a flag 1233 * that indicates whether this segment is compressed 1234 * at all. 1235 * 1236 * The variable 'useg' is used (instead of 1237 * uncompressed_seg) in this loop to keep a 1238 * reference to the uncompressed segment. 1239 * 1240 * N.B. If 'useg' is replaced with uncompressed_seg, 1241 * it leads to memory leaks and heap corruption in 1242 * corner cases where compressed segments lie 1243 * adjacent to uncompressed segments. 1244 */ 1245 if (*cmpbuf == UNCOMPRESSED) { 1246 useg = cmpbuf + SEGHDR; 1247 } else { 1248 if (uncompressed_seg == NULL) 1249 uncompressed_seg = 1250 kmem_alloc(lsp->ls_uncomp_seg_sz, 1251 KM_SLEEP); 1252 useg = uncompressed_seg; 1253 uncompressed_seg_index = i; 1254 1255 if (li->l_decompress((cmpbuf + SEGHDR), 1256 (cmpbytes - SEGHDR), uncompressed_seg, 1257 &seglen, li->l_level) != 0) { 1258 error = EIO; 1259 goto done; 1260 } 1261 } 1262 1263 /* 1264 * Determine how much uncompressed data we 1265 * have to copy and copy it 1266 */ 1267 xfersize = lsp->ls_uncomp_seg_sz - sblkoff; 1268 if (i == eblkno) 1269 xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff); 1270 1271 bcopy((useg + sblkoff), bufaddr, xfersize); 1272 1273 cmpbuf += cmpbytes; 1274 bufaddr += xfersize; 1275 bp->b_resid -= xfersize; 1276 sblkoff = 0; 1277 1278 if (bp->b_resid == 0) 1279 break; 1280 } /* decompress compressed blocks ends */ 1281 1282 /* 1283 * Skip to done if there is no uncompressed data to cache 1284 */ 1285 if (uncompressed_seg == NULL) 1286 goto done; 1287 1288 /* 1289 * Add the data for the last decompressed segment to 1290 * the cache. 1291 * 1292 * In case the uncompressed segment data was added to (and 1293 * is referenced by) the cache, make sure we don't free it 1294 * here. 1295 */ 1296 mutex_enter(&lsp->ls_comp_cache_lock); 1297 if ((lc = lofi_add_comp_data(lsp, uncompressed_seg_index, 1298 uncompressed_seg)) != NULL) { 1299 uncompressed_seg = NULL; 1300 } 1301 mutex_exit(&lsp->ls_comp_cache_lock); 1302 1303 done: 1304 if (compressed_seg != NULL) { 1305 mutex_enter(&lsp->ls_comp_bufs_lock); 1306 lsp->ls_comp_bufs[j].inuse = 0; 1307 mutex_exit(&lsp->ls_comp_bufs_lock); 1308 } 1309 if (uncompressed_seg != NULL) 1310 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz); 1311 } /* end of handling compressed files */ 1312 1313 if ((error == 0) && (syncflag != 0)) 1314 error = VOP_FSYNC(lsp->ls_vp, syncflag, kcred, NULL); 1315 1316 errout: 1317 if (bufinited && lsp->ls_kstat) { 1318 size_t n_done = bp->b_bcount - bp->b_resid; 1319 kstat_io_t *kioptr; 1320 1321 mutex_enter(lsp->ls_kstat->ks_lock); 1322 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 1323 if (bp->b_flags & B_READ) { 1324 kioptr->nread += n_done; 1325 kioptr->reads++; 1326 } else { 1327 kioptr->nwritten += n_done; 1328 kioptr->writes++; 1329 } 1330 kstat_runq_exit(kioptr); 1331 mutex_exit(lsp->ls_kstat->ks_lock); 1332 } 1333 1334 mutex_enter(&lsp->ls_vp_lock); 1335 if (--lsp->ls_vp_iocount == 0) 1336 cv_broadcast(&lsp->ls_vp_cv); 1337 mutex_exit(&lsp->ls_vp_lock); 1338 1339 bioerror(bp, error); 1340 biodone(bp); 1341 } 1342 1343 static int 1344 lofi_strategy(struct buf *bp) 1345 { 1346 struct lofi_state *lsp; 1347 offset_t offset; 1348 1349 /* 1350 * We cannot just do I/O here, because the current thread 1351 * _might_ end up back in here because the underlying filesystem 1352 * wants a buffer, which eventually gets into bio_recycle and 1353 * might call into lofi to write out a delayed-write buffer. 1354 * This is bad if the filesystem above lofi is the same as below. 1355 * 1356 * We could come up with a complex strategy using threads to 1357 * do the I/O asynchronously, or we could use task queues. task 1358 * queues were incredibly easy so they win. 1359 */ 1360 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 1361 if (lsp == NULL) { 1362 bioerror(bp, ENXIO); 1363 biodone(bp); 1364 return (0); 1365 } 1366 1367 mutex_enter(&lsp->ls_vp_lock); 1368 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1369 bioerror(bp, EIO); 1370 biodone(bp); 1371 mutex_exit(&lsp->ls_vp_lock); 1372 return (0); 1373 } 1374 1375 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 1376 if (lsp->ls_crypto_enabled) { 1377 /* encrypted data really begins after crypto header */ 1378 offset += lsp->ls_crypto_offset; 1379 } 1380 if (offset == lsp->ls_vp_size) { 1381 /* EOF */ 1382 if ((bp->b_flags & B_READ) != 0) { 1383 bp->b_resid = bp->b_bcount; 1384 bioerror(bp, 0); 1385 } else { 1386 /* writes should fail */ 1387 bioerror(bp, ENXIO); 1388 } 1389 biodone(bp); 1390 mutex_exit(&lsp->ls_vp_lock); 1391 return (0); 1392 } 1393 if (offset > lsp->ls_vp_size) { 1394 bioerror(bp, ENXIO); 1395 biodone(bp); 1396 mutex_exit(&lsp->ls_vp_lock); 1397 return (0); 1398 } 1399 lsp->ls_vp_iocount++; 1400 mutex_exit(&lsp->ls_vp_lock); 1401 1402 if (lsp->ls_kstat) { 1403 mutex_enter(lsp->ls_kstat->ks_lock); 1404 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 1405 mutex_exit(lsp->ls_kstat->ks_lock); 1406 } 1407 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 1408 return (0); 1409 } 1410 1411 /*ARGSUSED2*/ 1412 static int 1413 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 1414 { 1415 if (getminor(dev) == 0) 1416 return (EINVAL); 1417 UIO_CHECK(uio); 1418 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 1419 } 1420 1421 /*ARGSUSED2*/ 1422 static int 1423 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 1424 { 1425 if (getminor(dev) == 0) 1426 return (EINVAL); 1427 UIO_CHECK(uio); 1428 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 1429 } 1430 1431 /*ARGSUSED2*/ 1432 static int 1433 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 1434 { 1435 if (getminor(dev) == 0) 1436 return (EINVAL); 1437 UIO_CHECK(aio->aio_uio); 1438 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 1439 } 1440 1441 /*ARGSUSED2*/ 1442 static int 1443 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 1444 { 1445 if (getminor(dev) == 0) 1446 return (EINVAL); 1447 UIO_CHECK(aio->aio_uio); 1448 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1449 } 1450 1451 /*ARGSUSED*/ 1452 static int 1453 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1454 { 1455 switch (infocmd) { 1456 case DDI_INFO_DEVT2DEVINFO: 1457 *result = lofi_dip; 1458 return (DDI_SUCCESS); 1459 case DDI_INFO_DEVT2INSTANCE: 1460 *result = 0; 1461 return (DDI_SUCCESS); 1462 } 1463 return (DDI_FAILURE); 1464 } 1465 1466 static int 1467 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1468 { 1469 int error; 1470 1471 if (cmd != DDI_ATTACH) 1472 return (DDI_FAILURE); 1473 1474 lofi_minor_id = id_space_create("lofi_minor_id", 1, L_MAXMIN32 + 1); 1475 1476 if (!lofi_minor_id) 1477 return (DDI_FAILURE); 1478 1479 error = ddi_soft_state_zalloc(lofi_statep, 0); 1480 if (error == DDI_FAILURE) { 1481 id_space_destroy(lofi_minor_id); 1482 return (DDI_FAILURE); 1483 } 1484 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 1485 DDI_PSEUDO, NULL); 1486 if (error == DDI_FAILURE) { 1487 ddi_soft_state_free(lofi_statep, 0); 1488 id_space_destroy(lofi_minor_id); 1489 return (DDI_FAILURE); 1490 } 1491 /* driver handles kernel-issued IOCTLs */ 1492 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1493 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 1494 ddi_remove_minor_node(dip, NULL); 1495 ddi_soft_state_free(lofi_statep, 0); 1496 id_space_destroy(lofi_minor_id); 1497 return (DDI_FAILURE); 1498 } 1499 1500 zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL); 1501 1502 lofi_dip = dip; 1503 ddi_report_dev(dip); 1504 return (DDI_SUCCESS); 1505 } 1506 1507 static int 1508 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1509 { 1510 if (cmd != DDI_DETACH) 1511 return (DDI_FAILURE); 1512 1513 mutex_enter(&lofi_lock); 1514 1515 if (!list_is_empty(&lofi_list)) { 1516 mutex_exit(&lofi_lock); 1517 return (DDI_FAILURE); 1518 } 1519 1520 lofi_dip = NULL; 1521 ddi_remove_minor_node(dip, NULL); 1522 ddi_prop_remove_all(dip); 1523 1524 mutex_exit(&lofi_lock); 1525 1526 if (zone_key_delete(lofi_zone_key) != 0) 1527 cmn_err(CE_WARN, "failed to delete zone key"); 1528 1529 ddi_soft_state_free(lofi_statep, 0); 1530 1531 id_space_destroy(lofi_minor_id); 1532 1533 return (DDI_SUCCESS); 1534 } 1535 1536 /* 1537 * With addition of encryption, be careful that encryption key is wiped before 1538 * kernel memory structures are freed, and also that key is not accidentally 1539 * passed out into userland structures. 1540 */ 1541 static void 1542 free_lofi_ioctl(struct lofi_ioctl *klip) 1543 { 1544 /* Make sure this encryption key doesn't stick around */ 1545 bzero(klip->li_key, sizeof (klip->li_key)); 1546 kmem_free(klip, sizeof (struct lofi_ioctl)); 1547 } 1548 1549 /* 1550 * These two just simplify the rest of the ioctls that need to copyin/out 1551 * the lofi_ioctl structure. 1552 */ 1553 int 1554 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp, 1555 int flag) 1556 { 1557 struct lofi_ioctl *klip; 1558 int error; 1559 1560 klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 1561 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 1562 if (error) 1563 goto err; 1564 1565 /* ensure NULL termination */ 1566 klip->li_filename[MAXPATHLEN-1] = '\0'; 1567 klip->li_algorithm[MAXALGLEN-1] = '\0'; 1568 klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 1569 klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 1570 1571 if (klip->li_minor > L_MAXMIN32) { 1572 error = EINVAL; 1573 goto err; 1574 } 1575 1576 return (0); 1577 1578 err: 1579 free_lofi_ioctl(klip); 1580 return (error); 1581 } 1582 1583 int 1584 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 1585 int flag) 1586 { 1587 int error; 1588 1589 /* 1590 * NOTE: Do NOT copy the crypto_key_t "back" to userland. 1591 * This ensures that an attacker can't trivially find the 1592 * key for a mapping just by issuing the ioctl. 1593 * 1594 * It can still be found by poking around in kmem with mdb(1), 1595 * but there is no point in making it easy when the info isn't 1596 * of any use in this direction anyway. 1597 * 1598 * Either way we don't actually have the raw key stored in 1599 * a form that we can get it anyway, since we just used it 1600 * to create a ctx template and didn't keep "the original". 1601 */ 1602 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 1603 if (error) 1604 return (EFAULT); 1605 return (0); 1606 } 1607 1608 static int 1609 lofi_access(struct lofi_state *lsp) 1610 { 1611 ASSERT(MUTEX_HELD(&lofi_lock)); 1612 if (INGLOBALZONE(curproc) || lsp->ls_zone.zref_zone == curzone) 1613 return (0); 1614 return (EPERM); 1615 } 1616 1617 /* 1618 * Find the lofi state for the given filename. We compare by vnode to 1619 * allow the global zone visibility into NGZ lofi nodes. 1620 */ 1621 static int 1622 file_to_lofi_nocheck(char *filename, boolean_t readonly, 1623 struct lofi_state **lspp) 1624 { 1625 struct lofi_state *lsp; 1626 vnode_t *vp = NULL; 1627 int err = 0; 1628 int rdfiles = 0; 1629 1630 ASSERT(MUTEX_HELD(&lofi_lock)); 1631 1632 if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW, 1633 NULLVPP, &vp)) != 0) 1634 goto out; 1635 1636 if (vp->v_type == VREG) { 1637 vnode_t *realvp; 1638 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 1639 VN_HOLD(realvp); 1640 VN_RELE(vp); 1641 vp = realvp; 1642 } 1643 } 1644 1645 for (lsp = list_head(&lofi_list); lsp != NULL; 1646 lsp = list_next(&lofi_list, lsp)) { 1647 if (lsp->ls_vp == vp) { 1648 if (lspp != NULL) 1649 *lspp = lsp; 1650 if (lsp->ls_readonly) { 1651 rdfiles++; 1652 /* Skip if '-r' is specified */ 1653 if (readonly) 1654 continue; 1655 } 1656 goto out; 1657 } 1658 } 1659 1660 err = ENOENT; 1661 1662 /* 1663 * If a filename is given as an argument for lofi_unmap, we shouldn't 1664 * allow unmap if there are multiple read-only lofi devices associated 1665 * with this file. 1666 */ 1667 if (lspp != NULL) { 1668 if (rdfiles == 1) 1669 err = 0; 1670 else if (rdfiles > 1) 1671 err = EBUSY; 1672 } 1673 1674 out: 1675 if (vp != NULL) 1676 VN_RELE(vp); 1677 return (err); 1678 } 1679 1680 /* 1681 * Find the minor for the given filename, checking the zone can access 1682 * it. 1683 */ 1684 static int 1685 file_to_lofi(char *filename, boolean_t readonly, struct lofi_state **lspp) 1686 { 1687 int err = 0; 1688 1689 ASSERT(MUTEX_HELD(&lofi_lock)); 1690 1691 if ((err = file_to_lofi_nocheck(filename, readonly, lspp)) != 0) 1692 return (err); 1693 1694 if ((err = lofi_access(*lspp)) != 0) 1695 return (err); 1696 1697 return (0); 1698 } 1699 1700 /* 1701 * Fakes up a disk geometry, and one big partition, based on the size 1702 * of the file. This is needed because we allow newfs'ing the device, 1703 * and newfs will do several disk ioctls to figure out the geometry and 1704 * partition information. It uses that information to determine the parameters 1705 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 1706 * have to support it. 1707 */ 1708 static void 1709 fake_disk_geometry(struct lofi_state *lsp) 1710 { 1711 u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset; 1712 1713 /* dk_geom - see dkio(7I) */ 1714 /* 1715 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 1716 * of sectors), but that breaks programs like fdisk which want to 1717 * partition a disk by cylinder. With one cylinder, you can't create 1718 * an fdisk partition and put pcfs on it for testing (hard to pick 1719 * a number between one and one). 1720 * 1721 * The cheezy floppy test is an attempt to not have too few cylinders 1722 * for a small file, or so many on a big file that you waste space 1723 * for backup superblocks or cylinder group structures. 1724 */ 1725 if (dsize < (2 * 1024 * 1024)) /* floppy? */ 1726 lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024); 1727 else 1728 lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024); 1729 /* in case file file is < 100k */ 1730 if (lsp->ls_dkg.dkg_ncyl == 0) 1731 lsp->ls_dkg.dkg_ncyl = 1; 1732 lsp->ls_dkg.dkg_acyl = 0; 1733 lsp->ls_dkg.dkg_bcyl = 0; 1734 lsp->ls_dkg.dkg_nhead = 1; 1735 lsp->ls_dkg.dkg_obs1 = 0; 1736 lsp->ls_dkg.dkg_intrlv = 0; 1737 lsp->ls_dkg.dkg_obs2 = 0; 1738 lsp->ls_dkg.dkg_obs3 = 0; 1739 lsp->ls_dkg.dkg_apc = 0; 1740 lsp->ls_dkg.dkg_rpm = 7200; 1741 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 1742 lsp->ls_dkg.dkg_nsect = dsize / (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 1743 lsp->ls_dkg.dkg_write_reinstruct = 0; 1744 lsp->ls_dkg.dkg_read_reinstruct = 0; 1745 1746 /* vtoc - see dkio(7I) */ 1747 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 1748 lsp->ls_vtoc.v_sanity = VTOC_SANE; 1749 lsp->ls_vtoc.v_version = V_VERSION; 1750 (void) strncpy(lsp->ls_vtoc.v_volume, LOFI_DRIVER_NAME, 1751 sizeof (lsp->ls_vtoc.v_volume)); 1752 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 1753 lsp->ls_vtoc.v_nparts = 1; 1754 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 1755 1756 /* 1757 * A compressed file is read-only, other files can 1758 * be read-write 1759 */ 1760 if (lsp->ls_uncomp_seg_sz > 0) { 1761 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY; 1762 } else { 1763 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 1764 } 1765 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 1766 /* 1767 * The partition size cannot just be the number of sectors, because 1768 * that might not end on a cylinder boundary. And if that's the case, 1769 * newfs/mkfs will print a scary warning. So just figure the size 1770 * based on the number of cylinders and sectors/cylinder. 1771 */ 1772 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 1773 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 1774 1775 /* dk_cinfo - see dkio(7I) */ 1776 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 1777 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 1778 lsp->ls_ci.dki_ctype = DKC_MD; 1779 lsp->ls_ci.dki_flags = 0; 1780 lsp->ls_ci.dki_cnum = 0; 1781 lsp->ls_ci.dki_addr = 0; 1782 lsp->ls_ci.dki_space = 0; 1783 lsp->ls_ci.dki_prio = 0; 1784 lsp->ls_ci.dki_vec = 0; 1785 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 1786 lsp->ls_ci.dki_unit = 0; 1787 lsp->ls_ci.dki_slave = 0; 1788 lsp->ls_ci.dki_partition = 0; 1789 /* 1790 * newfs uses this to set maxcontig. Must not be < 16, or it 1791 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 1792 * it by the block size. Then tunefs doesn't work because 1793 * maxcontig is 0. 1794 */ 1795 lsp->ls_ci.dki_maxtransfer = 16; 1796 } 1797 1798 /* 1799 * map in a compressed file 1800 * 1801 * Read in the header and the index that follows. 1802 * 1803 * The header is as follows - 1804 * 1805 * Signature (name of the compression algorithm) 1806 * Compression segment size (a multiple of 512) 1807 * Number of index entries 1808 * Size of the last block 1809 * The array containing the index entries 1810 * 1811 * The header information is always stored in 1812 * network byte order on disk. 1813 */ 1814 static int 1815 lofi_map_compressed_file(struct lofi_state *lsp, char *buf) 1816 { 1817 uint32_t index_sz, header_len, i; 1818 ssize_t resid; 1819 enum uio_rw rw; 1820 char *tbuf = buf; 1821 int error; 1822 1823 /* The signature has already been read */ 1824 tbuf += sizeof (lsp->ls_comp_algorithm); 1825 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz)); 1826 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz); 1827 1828 /* 1829 * The compressed segment size must be a power of 2 1830 */ 1831 if (lsp->ls_uncomp_seg_sz < DEV_BSIZE || 1832 !ISP2(lsp->ls_uncomp_seg_sz)) 1833 return (EINVAL); 1834 1835 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++) 1836 ; 1837 1838 lsp->ls_comp_seg_shift = i; 1839 1840 tbuf += sizeof (lsp->ls_uncomp_seg_sz); 1841 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz)); 1842 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz); 1843 1844 tbuf += sizeof (lsp->ls_comp_index_sz); 1845 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz), 1846 sizeof (lsp->ls_uncomp_last_seg_sz)); 1847 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz); 1848 1849 /* 1850 * Compute the total size of the uncompressed data 1851 * for use in fake_disk_geometry and other calculations. 1852 * Disk geometry has to be faked with respect to the 1853 * actual uncompressed data size rather than the 1854 * compressed file size. 1855 */ 1856 lsp->ls_vp_size = 1857 (u_offset_t)(lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz 1858 + lsp->ls_uncomp_last_seg_sz; 1859 1860 /* 1861 * Index size is rounded up to DEV_BSIZE for ease 1862 * of segmapping 1863 */ 1864 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz; 1865 header_len = sizeof (lsp->ls_comp_algorithm) + 1866 sizeof (lsp->ls_uncomp_seg_sz) + 1867 sizeof (lsp->ls_comp_index_sz) + 1868 sizeof (lsp->ls_uncomp_last_seg_sz); 1869 lsp->ls_comp_offbase = header_len + index_sz; 1870 1871 index_sz += header_len; 1872 index_sz = roundup(index_sz, DEV_BSIZE); 1873 1874 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP); 1875 lsp->ls_comp_index_data_sz = index_sz; 1876 1877 /* 1878 * Read in the index -- this has a side-effect 1879 * of reading in the header as well 1880 */ 1881 rw = UIO_READ; 1882 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz, 1883 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1884 1885 if (error != 0) 1886 return (error); 1887 1888 /* Skip the header, this is where the index really begins */ 1889 lsp->ls_comp_seg_index = 1890 /*LINTED*/ 1891 (uint64_t *)(lsp->ls_comp_index_data + header_len); 1892 1893 /* 1894 * Now recompute offsets in the index to account for 1895 * the header length 1896 */ 1897 for (i = 0; i < lsp->ls_comp_index_sz; i++) { 1898 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase + 1899 BE_64(lsp->ls_comp_seg_index[i]); 1900 } 1901 1902 return (error); 1903 } 1904 1905 static int 1906 lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip) 1907 { 1908 struct crypto_meta chead; 1909 char buf[DEV_BSIZE]; 1910 ssize_t resid; 1911 char *marker; 1912 int error; 1913 int ret; 1914 int i; 1915 1916 if (!klip->li_crypto_enabled) 1917 return (0); 1918 1919 /* 1920 * All current algorithms have a max of 448 bits. 1921 */ 1922 if (klip->li_iv_len > CRYPTO_BITS2BYTES(512)) 1923 return (EINVAL); 1924 1925 if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key)) 1926 return (EINVAL); 1927 1928 lsp->ls_crypto_enabled = klip->li_crypto_enabled; 1929 1930 mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); 1931 1932 lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); 1933 if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { 1934 cmn_err(CE_WARN, "invalid cipher %s requested for %s", 1935 klip->li_cipher, klip->li_filename); 1936 return (EINVAL); 1937 } 1938 1939 /* this is just initialization here */ 1940 lsp->ls_mech.cm_param = NULL; 1941 lsp->ls_mech.cm_param_len = 0; 1942 1943 lsp->ls_iv_type = klip->li_iv_type; 1944 lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); 1945 if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { 1946 cmn_err(CE_WARN, "invalid iv cipher %s requested" 1947 " for %s", klip->li_iv_cipher, klip->li_filename); 1948 return (EINVAL); 1949 } 1950 1951 /* iv mech must itself take a null iv */ 1952 lsp->ls_iv_mech.cm_param = NULL; 1953 lsp->ls_iv_mech.cm_param_len = 0; 1954 lsp->ls_iv_len = klip->li_iv_len; 1955 1956 /* 1957 * Create ctx using li_cipher & the raw li_key after checking 1958 * that it isn't a weak key. 1959 */ 1960 lsp->ls_key.ck_format = CRYPTO_KEY_RAW; 1961 lsp->ls_key.ck_length = klip->li_key_len; 1962 lsp->ls_key.ck_data = kmem_alloc( 1963 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); 1964 bcopy(klip->li_key, lsp->ls_key.ck_data, 1965 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 1966 1967 ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); 1968 if (ret != CRYPTO_SUCCESS) { 1969 cmn_err(CE_WARN, "weak key check failed for cipher " 1970 "%s on file %s (0x%x)", klip->li_cipher, 1971 klip->li_filename, ret); 1972 return (EINVAL); 1973 } 1974 1975 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 1976 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1977 if (error != 0) 1978 return (error); 1979 1980 /* 1981 * This is the case where the header in the lofi image is already 1982 * initialized to indicate it is encrypted. 1983 */ 1984 if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) { 1985 /* 1986 * The encryption header information is laid out this way: 1987 * 6 bytes: hex "CFLOFI" 1988 * 2 bytes: version = 0 ... for now 1989 * 96 bytes: reserved1 (not implemented yet) 1990 * 4 bytes: data_sector = 2 ... for now 1991 * more... not implemented yet 1992 */ 1993 1994 marker = buf; 1995 1996 /* copy the magic */ 1997 bcopy(marker, lsp->ls_crypto.magic, 1998 sizeof (lsp->ls_crypto.magic)); 1999 marker += sizeof (lsp->ls_crypto.magic); 2000 2001 /* read the encryption version number */ 2002 bcopy(marker, &(lsp->ls_crypto.version), 2003 sizeof (lsp->ls_crypto.version)); 2004 lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); 2005 marker += sizeof (lsp->ls_crypto.version); 2006 2007 /* read a chunk of reserved data */ 2008 bcopy(marker, lsp->ls_crypto.reserved1, 2009 sizeof (lsp->ls_crypto.reserved1)); 2010 marker += sizeof (lsp->ls_crypto.reserved1); 2011 2012 /* read block number where encrypted data begins */ 2013 bcopy(marker, &(lsp->ls_crypto.data_sector), 2014 sizeof (lsp->ls_crypto.data_sector)); 2015 lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); 2016 marker += sizeof (lsp->ls_crypto.data_sector); 2017 2018 /* and ignore the rest until it is implemented */ 2019 2020 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2021 return (0); 2022 } 2023 2024 /* 2025 * We've requested encryption, but no magic was found, so it must be 2026 * a new image. 2027 */ 2028 2029 for (i = 0; i < sizeof (struct crypto_meta); i++) { 2030 if (buf[i] != '\0') 2031 return (EINVAL); 2032 } 2033 2034 marker = buf; 2035 bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); 2036 marker += sizeof (lofi_crypto_magic); 2037 chead.version = htons(LOFI_CRYPTO_VERSION); 2038 bcopy(&(chead.version), marker, sizeof (chead.version)); 2039 marker += sizeof (chead.version); 2040 marker += sizeof (chead.reserved1); 2041 chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); 2042 bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); 2043 2044 /* write the header */ 2045 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE, 2046 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2047 if (error != 0) 2048 return (error); 2049 2050 /* fix things up so it looks like we read this info */ 2051 bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, 2052 sizeof (lofi_crypto_magic)); 2053 lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; 2054 lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; 2055 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2056 return (0); 2057 } 2058 2059 /* 2060 * Check to see if the passed in signature is a valid one. If it is 2061 * valid, return the index into lofi_compress_table. 2062 * 2063 * Return -1 if it is invalid 2064 */ 2065 static int 2066 lofi_compress_select(const char *signature) 2067 { 2068 int i; 2069 2070 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { 2071 if (strcmp(lofi_compress_table[i].l_name, signature) == 0) 2072 return (i); 2073 } 2074 2075 return (-1); 2076 } 2077 2078 static int 2079 lofi_init_compress(struct lofi_state *lsp) 2080 { 2081 char buf[DEV_BSIZE]; 2082 int compress_index; 2083 ssize_t resid; 2084 int error; 2085 2086 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, 2087 0, RLIM64_INFINITY, kcred, &resid); 2088 2089 if (error != 0) 2090 return (error); 2091 2092 if ((compress_index = lofi_compress_select(buf)) == -1) 2093 return (0); 2094 2095 /* compression and encryption are mutually exclusive */ 2096 if (lsp->ls_crypto_enabled) 2097 return (ENOTSUP); 2098 2099 /* initialize compression info for compressed lofi */ 2100 lsp->ls_comp_algorithm_index = compress_index; 2101 (void) strlcpy(lsp->ls_comp_algorithm, 2102 lofi_compress_table[compress_index].l_name, 2103 sizeof (lsp->ls_comp_algorithm)); 2104 2105 /* Finally setup per-thread pre-allocated buffers */ 2106 lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * 2107 sizeof (struct compbuf), KM_SLEEP); 2108 2109 return (lofi_map_compressed_file(lsp, buf)); 2110 } 2111 2112 /* 2113 * map a file to a minor number. Return the minor number. 2114 */ 2115 static int 2116 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 2117 int *rvalp, struct cred *credp, int ioctl_flag) 2118 { 2119 minor_t minor = (minor_t)-1; 2120 struct lofi_state *lsp = NULL; 2121 struct lofi_ioctl *klip; 2122 int error; 2123 struct vnode *vp = NULL; 2124 vattr_t vattr; 2125 int flag; 2126 dev_t newdev; 2127 char namebuf[50]; 2128 2129 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2130 if (error != 0) 2131 return (error); 2132 2133 mutex_enter(&lofi_lock); 2134 2135 mutex_enter(&curproc->p_lock); 2136 if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) { 2137 mutex_exit(&curproc->p_lock); 2138 mutex_exit(&lofi_lock); 2139 free_lofi_ioctl(klip); 2140 return (error); 2141 } 2142 mutex_exit(&curproc->p_lock); 2143 2144 if (file_to_lofi_nocheck(klip->li_filename, klip->li_readonly, 2145 NULL) == 0) { 2146 error = EBUSY; 2147 goto err; 2148 } 2149 2150 if (pickminor) { 2151 minor = (minor_t)id_allocff_nosleep(lofi_minor_id); 2152 if (minor == (minor_t)-1) { 2153 error = EAGAIN; 2154 goto err; 2155 } 2156 } else { 2157 if (ddi_get_soft_state(lofi_statep, klip->li_minor) != NULL) { 2158 error = EEXIST; 2159 goto err; 2160 } 2161 2162 minor = (minor_t) 2163 id_alloc_specific_nosleep(lofi_minor_id, klip->li_minor); 2164 ASSERT(minor != (minor_t)-1); 2165 } 2166 2167 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 2168 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 2169 if (error) { 2170 /* try read-only */ 2171 flag &= ~FWRITE; 2172 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 2173 &vp, 0, 0); 2174 if (error) 2175 goto err; 2176 } 2177 2178 if (!V_ISLOFIABLE(vp->v_type)) { 2179 error = EINVAL; 2180 goto err; 2181 } 2182 2183 vattr.va_mask = AT_SIZE; 2184 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); 2185 if (error) 2186 goto err; 2187 2188 /* the file needs to be a multiple of the block size */ 2189 if ((vattr.va_size % DEV_BSIZE) != 0) { 2190 error = EINVAL; 2191 goto err; 2192 } 2193 2194 /* lsp alloc+init */ 2195 2196 error = ddi_soft_state_zalloc(lofi_statep, minor); 2197 if (error == DDI_FAILURE) { 2198 error = ENOMEM; 2199 goto err; 2200 } 2201 2202 lsp = ddi_get_soft_state(lofi_statep, minor); 2203 list_insert_tail(&lofi_list, lsp); 2204 2205 newdev = makedevice(getmajor(dev), minor); 2206 lsp->ls_dev = newdev; 2207 zone_init_ref(&lsp->ls_zone); 2208 zone_hold_ref(curzone, &lsp->ls_zone, ZONE_REF_LOFI); 2209 lsp->ls_uncomp_seg_sz = 0; 2210 lsp->ls_comp_algorithm[0] = '\0'; 2211 lsp->ls_crypto_offset = 0; 2212 2213 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); 2214 mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); 2215 mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); 2216 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 2217 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); 2218 2219 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 2220 LOFI_DRIVER_NAME, minor); 2221 lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads, 2222 minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0); 2223 2224 list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), 2225 offsetof(struct lofi_comp_cache, lc_list)); 2226 2227 /* 2228 * save open mode so file can be closed properly and vnode counts 2229 * updated correctly. 2230 */ 2231 lsp->ls_openflag = flag; 2232 2233 lsp->ls_vp = vp; 2234 lsp->ls_stacked_vp = vp; 2235 /* 2236 * Try to handle stacked lofs vnodes. 2237 */ 2238 if (vp->v_type == VREG) { 2239 vnode_t *realvp; 2240 2241 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 2242 /* 2243 * We need to use the realvp for uniqueness 2244 * checking, but keep the stacked vp for 2245 * LOFI_GET_FILENAME display. 2246 */ 2247 VN_HOLD(realvp); 2248 lsp->ls_vp = realvp; 2249 } 2250 } 2251 2252 lsp->ls_vp_size = vattr.va_size; 2253 lsp->ls_vp_comp_size = lsp->ls_vp_size; 2254 2255 lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, minor, 2256 NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid()); 2257 2258 if (lsp->ls_kstat == NULL) { 2259 error = ENOMEM; 2260 goto err; 2261 } 2262 2263 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 2264 kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID); 2265 2266 lsp->ls_readonly = klip->li_readonly; 2267 2268 if ((error = lofi_init_crypto(lsp, klip)) != 0) 2269 goto err; 2270 2271 if ((error = lofi_init_compress(lsp)) != 0) 2272 goto err; 2273 2274 fake_disk_geometry(lsp); 2275 2276 /* create minor nodes */ 2277 2278 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 2279 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, minor, 2280 DDI_PSEUDO, NULL); 2281 if (error != DDI_SUCCESS) { 2282 error = ENXIO; 2283 goto err; 2284 } 2285 2286 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 2287 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, minor, 2288 DDI_PSEUDO, NULL); 2289 if (error != DDI_SUCCESS) { 2290 /* remove block node */ 2291 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 2292 ddi_remove_minor_node(lofi_dip, namebuf); 2293 error = ENXIO; 2294 goto err; 2295 } 2296 2297 /* create DDI properties */ 2298 2299 if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, 2300 lsp->ls_vp_size - lsp->ls_crypto_offset)) != DDI_PROP_SUCCESS) { 2301 error = EINVAL; 2302 goto nodeerr; 2303 } 2304 2305 if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, 2306 (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE)) 2307 != DDI_PROP_SUCCESS) { 2308 error = EINVAL; 2309 goto nodeerr; 2310 } 2311 2312 if (ddi_prop_update_string(newdev, lofi_dip, ZONE_PROP_NAME, 2313 (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) { 2314 error = EINVAL; 2315 goto nodeerr; 2316 } 2317 2318 kstat_install(lsp->ls_kstat); 2319 2320 mutex_exit(&lofi_lock); 2321 2322 if (rvalp) 2323 *rvalp = (int)minor; 2324 klip->li_minor = minor; 2325 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2326 free_lofi_ioctl(klip); 2327 return (0); 2328 2329 nodeerr: 2330 lofi_free_dev(newdev); 2331 err: 2332 if (lsp != NULL) { 2333 lofi_destroy(lsp, credp); 2334 } else { 2335 if (vp != NULL) { 2336 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); 2337 VN_RELE(vp); 2338 } 2339 2340 if (minor != (minor_t)-1) 2341 id_free(lofi_minor_id, minor); 2342 2343 rctl_decr_lofi(curproc->p_zone, 1); 2344 } 2345 2346 mutex_exit(&lofi_lock); 2347 free_lofi_ioctl(klip); 2348 return (error); 2349 } 2350 2351 /* 2352 * unmap a file. 2353 */ 2354 static int 2355 lofi_unmap_file(struct lofi_ioctl *ulip, int byfilename, 2356 struct cred *credp, int ioctl_flag) 2357 { 2358 struct lofi_state *lsp; 2359 struct lofi_ioctl *klip; 2360 int err; 2361 2362 err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2363 if (err != 0) 2364 return (err); 2365 2366 mutex_enter(&lofi_lock); 2367 if (byfilename) { 2368 if ((err = file_to_lofi(klip->li_filename, klip->li_readonly, 2369 &lsp)) != 0) { 2370 mutex_exit(&lofi_lock); 2371 return (err); 2372 } 2373 } else if (klip->li_minor == 0) { 2374 mutex_exit(&lofi_lock); 2375 free_lofi_ioctl(klip); 2376 return (ENXIO); 2377 } else { 2378 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); 2379 } 2380 2381 if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) { 2382 mutex_exit(&lofi_lock); 2383 free_lofi_ioctl(klip); 2384 return (ENXIO); 2385 } 2386 2387 klip->li_minor = getminor(lsp->ls_dev); 2388 2389 /* 2390 * If it's still held open, we'll do one of three things: 2391 * 2392 * If no flag is set, just return EBUSY. 2393 * 2394 * If the 'cleanup' flag is set, unmap and remove the device when 2395 * the last user finishes. 2396 * 2397 * If the 'force' flag is set, then we forcibly close the underlying 2398 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl 2399 * will return DKIO_DEV_GONE. When the device is last closed, the 2400 * device will be cleaned up appropriately. 2401 * 2402 * This is complicated by the fact that we may have outstanding 2403 * dispatched I/Os. Rather than having a single mutex to serialize all 2404 * I/O, we keep a count of the number of outstanding I/O requests 2405 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os 2406 * should be dispatched (ls_vp_closereq). 2407 * 2408 * We set the flag, wait for the number of outstanding I/Os to reach 0, 2409 * and then close the underlying vnode. 2410 */ 2411 if (is_opened(lsp)) { 2412 if (klip->li_force) { 2413 mutex_enter(&lsp->ls_vp_lock); 2414 lsp->ls_vp_closereq = B_TRUE; 2415 /* wake up any threads waiting on dkiocstate */ 2416 cv_broadcast(&lsp->ls_vp_cv); 2417 while (lsp->ls_vp_iocount > 0) 2418 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 2419 mutex_exit(&lsp->ls_vp_lock); 2420 2421 goto out; 2422 } else if (klip->li_cleanup) { 2423 lsp->ls_cleanup = 1; 2424 mutex_exit(&lofi_lock); 2425 free_lofi_ioctl(klip); 2426 return (0); 2427 } 2428 2429 mutex_exit(&lofi_lock); 2430 free_lofi_ioctl(klip); 2431 return (EBUSY); 2432 } 2433 2434 out: 2435 lofi_free_dev(lsp->ls_dev); 2436 lofi_destroy(lsp, credp); 2437 2438 mutex_exit(&lofi_lock); 2439 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2440 free_lofi_ioctl(klip); 2441 return (0); 2442 } 2443 2444 /* 2445 * get the filename given the minor number, or the minor number given 2446 * the name. 2447 */ 2448 /*ARGSUSED*/ 2449 static int 2450 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 2451 struct cred *credp, int ioctl_flag) 2452 { 2453 struct lofi_ioctl *klip; 2454 struct lofi_state *lsp; 2455 int error; 2456 2457 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2458 if (error != 0) 2459 return (error); 2460 2461 switch (which) { 2462 case LOFI_GET_FILENAME: 2463 if (klip->li_minor == 0) { 2464 free_lofi_ioctl(klip); 2465 return (EINVAL); 2466 } 2467 2468 mutex_enter(&lofi_lock); 2469 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); 2470 if (lsp == NULL || lofi_access(lsp) != 0) { 2471 mutex_exit(&lofi_lock); 2472 free_lofi_ioctl(klip); 2473 return (ENXIO); 2474 } 2475 2476 /* 2477 * This may fail if, for example, we're trying to look 2478 * up a zoned NFS path from the global zone. 2479 */ 2480 if (vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename, 2481 sizeof (klip->li_filename), CRED()) != 0) { 2482 (void) strlcpy(klip->li_filename, "?", 2483 sizeof (klip->li_filename)); 2484 } 2485 2486 klip->li_readonly = lsp->ls_readonly; 2487 2488 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 2489 sizeof (klip->li_algorithm)); 2490 klip->li_crypto_enabled = lsp->ls_crypto_enabled; 2491 mutex_exit(&lofi_lock); 2492 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2493 free_lofi_ioctl(klip); 2494 return (error); 2495 case LOFI_GET_MINOR: 2496 mutex_enter(&lofi_lock); 2497 error = file_to_lofi(klip->li_filename, 2498 klip->li_readonly, &lsp); 2499 if (error == 0) 2500 klip->li_minor = getminor(lsp->ls_dev); 2501 mutex_exit(&lofi_lock); 2502 2503 if (error == 0) 2504 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2505 2506 free_lofi_ioctl(klip); 2507 return (error); 2508 case LOFI_CHECK_COMPRESSED: 2509 mutex_enter(&lofi_lock); 2510 error = file_to_lofi(klip->li_filename, 2511 klip->li_readonly, &lsp); 2512 if (error != 0) { 2513 mutex_exit(&lofi_lock); 2514 free_lofi_ioctl(klip); 2515 return (error); 2516 } 2517 2518 klip->li_minor = getminor(lsp->ls_dev); 2519 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 2520 sizeof (klip->li_algorithm)); 2521 2522 mutex_exit(&lofi_lock); 2523 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2524 free_lofi_ioctl(klip); 2525 return (error); 2526 default: 2527 free_lofi_ioctl(klip); 2528 return (EINVAL); 2529 } 2530 } 2531 2532 static int 2533 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 2534 int *rvalp) 2535 { 2536 int error; 2537 enum dkio_state dkstate; 2538 struct lofi_state *lsp; 2539 minor_t minor; 2540 2541 minor = getminor(dev); 2542 /* lofi ioctls only apply to the master device */ 2543 if (minor == 0) { 2544 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 2545 2546 /* 2547 * the query command only need read-access - i.e., normal 2548 * users are allowed to do those on the ctl device as 2549 * long as they can open it read-only. 2550 */ 2551 switch (cmd) { 2552 case LOFI_MAP_FILE: 2553 if ((flag & FWRITE) == 0) 2554 return (EPERM); 2555 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 2556 case LOFI_MAP_FILE_MINOR: 2557 if ((flag & FWRITE) == 0) 2558 return (EPERM); 2559 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 2560 case LOFI_UNMAP_FILE: 2561 if ((flag & FWRITE) == 0) 2562 return (EPERM); 2563 return (lofi_unmap_file(lip, 1, credp, flag)); 2564 case LOFI_UNMAP_FILE_MINOR: 2565 if ((flag & FWRITE) == 0) 2566 return (EPERM); 2567 return (lofi_unmap_file(lip, 0, credp, flag)); 2568 case LOFI_GET_FILENAME: 2569 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 2570 credp, flag)); 2571 case LOFI_GET_MINOR: 2572 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 2573 credp, flag)); 2574 2575 /* 2576 * This API made limited sense when this value was fixed 2577 * at LOFI_MAX_FILES. However, its use to iterate 2578 * across all possible devices in lofiadm means we don't 2579 * want to return L_MAXMIN32, but the highest 2580 * *allocated* minor. 2581 */ 2582 case LOFI_GET_MAXMINOR: 2583 minor = 0; 2584 2585 mutex_enter(&lofi_lock); 2586 2587 for (lsp = list_head(&lofi_list); lsp != NULL; 2588 lsp = list_next(&lofi_list, lsp)) { 2589 if (lofi_access(lsp) != 0) 2590 continue; 2591 2592 if (getminor(lsp->ls_dev) > minor) 2593 minor = getminor(lsp->ls_dev); 2594 } 2595 2596 mutex_exit(&lofi_lock); 2597 2598 error = ddi_copyout(&minor, &lip->li_minor, 2599 sizeof (minor), flag); 2600 if (error) 2601 return (EFAULT); 2602 return (0); 2603 2604 case LOFI_CHECK_COMPRESSED: 2605 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, 2606 credp, flag)); 2607 default: 2608 return (EINVAL); 2609 } 2610 } 2611 2612 mutex_enter(&lofi_lock); 2613 lsp = ddi_get_soft_state(lofi_statep, minor); 2614 if (lsp == NULL || lsp->ls_vp_closereq) { 2615 mutex_exit(&lofi_lock); 2616 return (ENXIO); 2617 } 2618 mutex_exit(&lofi_lock); 2619 2620 /* 2621 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with 2622 * EIO as if the device was no longer present. 2623 */ 2624 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE) 2625 return (EIO); 2626 2627 /* these are for faking out utilities like newfs */ 2628 switch (cmd) { 2629 case DKIOCGVTOC: 2630 switch (ddi_model_convert_from(flag & FMODELS)) { 2631 case DDI_MODEL_ILP32: { 2632 struct vtoc32 vtoc32; 2633 2634 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 2635 if (ddi_copyout(&vtoc32, (void *)arg, 2636 sizeof (struct vtoc32), flag)) 2637 return (EFAULT); 2638 break; 2639 } 2640 2641 case DDI_MODEL_NONE: 2642 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 2643 sizeof (struct vtoc), flag)) 2644 return (EFAULT); 2645 break; 2646 } 2647 return (0); 2648 case DKIOCINFO: 2649 error = ddi_copyout(&lsp->ls_ci, (void *)arg, 2650 sizeof (struct dk_cinfo), flag); 2651 if (error) 2652 return (EFAULT); 2653 return (0); 2654 case DKIOCG_VIRTGEOM: 2655 case DKIOCG_PHYGEOM: 2656 case DKIOCGGEOM: 2657 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 2658 sizeof (struct dk_geom), flag); 2659 if (error) 2660 return (EFAULT); 2661 return (0); 2662 case DKIOCSTATE: 2663 /* 2664 * Normally, lofi devices are always in the INSERTED state. If 2665 * a device is forcefully unmapped, then the device transitions 2666 * to the DKIO_DEV_GONE state. 2667 */ 2668 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate), 2669 flag) != 0) 2670 return (EFAULT); 2671 2672 mutex_enter(&lsp->ls_vp_lock); 2673 lsp->ls_vp_iocount++; 2674 while (((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) || 2675 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) && 2676 !lsp->ls_vp_closereq) { 2677 /* 2678 * By virtue of having the device open, we know that 2679 * 'lsp' will remain valid when we return. 2680 */ 2681 if (!cv_wait_sig(&lsp->ls_vp_cv, 2682 &lsp->ls_vp_lock)) { 2683 lsp->ls_vp_iocount--; 2684 cv_broadcast(&lsp->ls_vp_cv); 2685 mutex_exit(&lsp->ls_vp_lock); 2686 return (EINTR); 2687 } 2688 } 2689 2690 dkstate = (!lsp->ls_vp_closereq && lsp->ls_vp != NULL ? 2691 DKIO_INSERTED : DKIO_DEV_GONE); 2692 lsp->ls_vp_iocount--; 2693 cv_broadcast(&lsp->ls_vp_cv); 2694 mutex_exit(&lsp->ls_vp_lock); 2695 2696 if (ddi_copyout(&dkstate, (void *)arg, 2697 sizeof (dkstate), flag) != 0) 2698 return (EFAULT); 2699 return (0); 2700 default: 2701 return (ENOTTY); 2702 } 2703 } 2704 2705 static struct cb_ops lofi_cb_ops = { 2706 lofi_open, /* open */ 2707 lofi_close, /* close */ 2708 lofi_strategy, /* strategy */ 2709 nodev, /* print */ 2710 nodev, /* dump */ 2711 lofi_read, /* read */ 2712 lofi_write, /* write */ 2713 lofi_ioctl, /* ioctl */ 2714 nodev, /* devmap */ 2715 nodev, /* mmap */ 2716 nodev, /* segmap */ 2717 nochpoll, /* poll */ 2718 ddi_prop_op, /* prop_op */ 2719 0, /* streamtab */ 2720 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 2721 CB_REV, 2722 lofi_aread, 2723 lofi_awrite 2724 }; 2725 2726 static struct dev_ops lofi_ops = { 2727 DEVO_REV, /* devo_rev, */ 2728 0, /* refcnt */ 2729 lofi_info, /* info */ 2730 nulldev, /* identify */ 2731 nulldev, /* probe */ 2732 lofi_attach, /* attach */ 2733 lofi_detach, /* detach */ 2734 nodev, /* reset */ 2735 &lofi_cb_ops, /* driver operations */ 2736 NULL, /* no bus operations */ 2737 NULL, /* power */ 2738 ddi_quiesce_not_needed, /* quiesce */ 2739 }; 2740 2741 static struct modldrv modldrv = { 2742 &mod_driverops, 2743 "loopback file driver", 2744 &lofi_ops, 2745 }; 2746 2747 static struct modlinkage modlinkage = { 2748 MODREV_1, 2749 &modldrv, 2750 NULL 2751 }; 2752 2753 int 2754 _init(void) 2755 { 2756 int error; 2757 2758 list_create(&lofi_list, sizeof (struct lofi_state), 2759 offsetof(struct lofi_state, ls_list)); 2760 2761 error = ddi_soft_state_init(&lofi_statep, 2762 sizeof (struct lofi_state), 0); 2763 if (error) 2764 return (error); 2765 2766 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 2767 2768 error = mod_install(&modlinkage); 2769 if (error) { 2770 mutex_destroy(&lofi_lock); 2771 ddi_soft_state_fini(&lofi_statep); 2772 list_destroy(&lofi_list); 2773 } 2774 2775 return (error); 2776 } 2777 2778 int 2779 _fini(void) 2780 { 2781 int error; 2782 2783 mutex_enter(&lofi_lock); 2784 2785 if (!list_is_empty(&lofi_list)) { 2786 mutex_exit(&lofi_lock); 2787 return (EBUSY); 2788 } 2789 2790 mutex_exit(&lofi_lock); 2791 2792 error = mod_remove(&modlinkage); 2793 if (error) 2794 return (error); 2795 2796 mutex_destroy(&lofi_lock); 2797 ddi_soft_state_fini(&lofi_statep); 2798 list_destroy(&lofi_list); 2799 2800 return (error); 2801 } 2802 2803 int 2804 _info(struct modinfo *modinfop) 2805 { 2806 return (mod_info(&modlinkage, modinfop)); 2807 } 2808