1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * lofi (loopback file) driver - allows you to attach a file to a device, 28 * which can then be accessed through that device. The simple model is that 29 * you tell lofi to open a file, and then use the block device you get as 30 * you would any block device. lofi translates access to the block device 31 * into I/O on the underlying file. This is mostly useful for 32 * mounting images of filesystems. 33 * 34 * lofi is controlled through /dev/lofictl - this is the only device exported 35 * during attach, and is minor number 0. lofiadm communicates with lofi through 36 * ioctls on this device. When a file is attached to lofi, block and character 37 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 38 * are identified by their minor number, and the minor number is also used 39 * as the name in /dev/lofi. If we ever decide to support virtual disks, 40 * we'll have to divide the minor number space to identify fdisk partitions 41 * and slices, and the name will then be the minor number shifted down a 42 * few bits. Minor devices are tracked with state structures handled with 43 * ddi_soft_state(9F) for simplicity. 44 * 45 * A file attached to lofi is opened when attached and not closed until 46 * explicitly detached from lofi. This seems more sensible than deferring 47 * the open until the /dev/lofi device is opened, for a number of reasons. 48 * One is that any failure is likely to be noticed by the person (or script) 49 * running lofiadm. Another is that it would be a security problem if the 50 * file was replaced by another one after being added but before being opened. 51 * 52 * The only hard part about lofi is the ioctls. In order to support things 53 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 54 * So it has to fake disk geometry and partition information. More may need 55 * to be faked if your favorite utility doesn't work and you think it should 56 * (fdformat doesn't work because it really wants to know the type of floppy 57 * controller to talk to, and that didn't seem easy to fake. Or possibly even 58 * necessary, since we have mkfs_pcfs now). 59 * 60 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To 61 * support simulation of hotplug events, an optional force flag is provided. 62 * If a lofi device is open when a force detach is requested, then the 63 * underlying file is closed and any subsequent operations return EIO. When the 64 * device is closed for the last time, it will be cleaned up at that time. In 65 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is 66 * detached but not removed. 67 * 68 * Known problems: 69 * 70 * UFS logging. Mounting a UFS filesystem image "logging" 71 * works for basic copy testing but wedges during a build of ON through 72 * that image. Some deadlock in lufs holding the log mutex and then 73 * getting stuck on a buf. So for now, don't do that. 74 * 75 * Direct I/O. Since the filesystem data is being cached in the buffer 76 * cache, _and_ again in the underlying filesystem, it's tempting to 77 * enable direct I/O on the underlying file. Don't, because that deadlocks. 78 * I think to fix the cache-twice problem we might need filesystem support. 79 * 80 * lofi on itself. The simple lock strategy (lofi_lock) precludes this 81 * because you'll be in lofi_ioctl, holding the lock when you open the 82 * file, which, if it's lofi, will grab lofi_lock. We prevent this for 83 * now, though not using ddi_soft_state(9F) would make it possible to 84 * do. Though it would still be silly. 85 * 86 * Interesting things to do: 87 * 88 * Allow multiple files for each device. A poor-man's metadisk, basically. 89 * 90 * Pass-through ioctls on block devices. You can (though it's not 91 * documented), give lofi a block device as a file name. Then we shouldn't 92 * need to fake a geometry, however, it may be relevant if you're replacing 93 * metadisk, or using lofi to get crypto. 94 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1 95 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home. 96 * In fact this even makes sense if you have lofi "above" metadisk. 97 * 98 * Encryption: 99 * Each lofi device can have its own symmetric key and cipher. 100 * They are passed to us by lofiadm(1m) in the correct format for use 101 * with the misc/kcf crypto_* routines. 102 * 103 * Each block has its own IV, that is calculated in lofi_blk_mech(), based 104 * on the "master" key held in the lsp and the block number of the buffer. 105 */ 106 107 #include <sys/types.h> 108 #include <netinet/in.h> 109 #include <sys/sysmacros.h> 110 #include <sys/uio.h> 111 #include <sys/kmem.h> 112 #include <sys/cred.h> 113 #include <sys/mman.h> 114 #include <sys/errno.h> 115 #include <sys/aio_req.h> 116 #include <sys/stat.h> 117 #include <sys/file.h> 118 #include <sys/modctl.h> 119 #include <sys/conf.h> 120 #include <sys/debug.h> 121 #include <sys/vnode.h> 122 #include <sys/lofi.h> 123 #include <sys/fcntl.h> 124 #include <sys/pathname.h> 125 #include <sys/filio.h> 126 #include <sys/fdio.h> 127 #include <sys/open.h> 128 #include <sys/disp.h> 129 #include <vm/seg_map.h> 130 #include <sys/ddi.h> 131 #include <sys/sunddi.h> 132 #include <sys/zmod.h> 133 #include <sys/crypto/common.h> 134 #include <sys/crypto/api.h> 135 #include <LzmaDec.h> 136 137 /* 138 * The basis for CRYOFF is derived from usr/src/uts/common/sys/fs/ufs_fs.h. 139 * Crypto metadata, if it exists, is located at the end of the boot block 140 * (BBOFF + BBSIZE, which is SBOFF). The super block and everything after 141 * is offset by the size of the crypto metadata which is handled by 142 * lsp->ls_crypto_offset. 143 */ 144 #define CRYOFF ((off_t)8192) 145 146 #define NBLOCKS_PROP_NAME "Nblocks" 147 #define SIZE_PROP_NAME "Size" 148 149 #define SETUP_C_DATA(cd, buf, len) \ 150 (cd).cd_format = CRYPTO_DATA_RAW; \ 151 (cd).cd_offset = 0; \ 152 (cd).cd_miscdata = NULL; \ 153 (cd).cd_length = (len); \ 154 (cd).cd_raw.iov_base = (buf); \ 155 (cd).cd_raw.iov_len = (len); 156 157 #define UIO_CHECK(uio) \ 158 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \ 159 ((uio)->uio_resid % DEV_BSIZE) != 0) { \ 160 return (EINVAL); \ 161 } 162 163 static dev_info_t *lofi_dip = NULL; 164 static void *lofi_statep = NULL; 165 static kmutex_t lofi_lock; /* state lock */ 166 167 /* 168 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 169 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 170 * high. If we want to be assured that the underlying device is always busy, 171 * we must be sure that the number of bytes enqueued when the number of 172 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 173 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 174 * set maxalloc to be the maximum throughput (in bytes per second) of the 175 * underlying device divided by the minimum I/O size. We assume a realistic 176 * maximum throughput of one hundred megabytes per second; we set maxalloc on 177 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 178 */ 179 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 180 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 181 182 uint32_t lofi_max_files = LOFI_MAX_FILES; 183 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; 184 185 static int gzip_decompress(void *src, size_t srclen, void *dst, 186 size_t *destlen, int level); 187 188 static int lzma_decompress(void *src, size_t srclen, void *dst, 189 size_t *dstlen, int level); 190 191 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = { 192 {gzip_decompress, NULL, 6, "gzip"}, /* default */ 193 {gzip_decompress, NULL, 6, "gzip-6"}, 194 {gzip_decompress, NULL, 9, "gzip-9"}, 195 {lzma_decompress, NULL, 0, "lzma"} 196 }; 197 198 /*ARGSUSED*/ 199 static void 200 *SzAlloc(void *p, size_t size) 201 { 202 return (kmem_alloc(size, KM_SLEEP)); 203 } 204 205 /*ARGSUSED*/ 206 static void 207 SzFree(void *p, void *address, size_t size) 208 { 209 kmem_free(address, size); 210 } 211 212 static ISzAlloc g_Alloc = { SzAlloc, SzFree }; 213 214 static int 215 lofi_busy(void) 216 { 217 minor_t minor; 218 219 /* 220 * We need to make sure no mappings exist - mod_remove won't 221 * help because the device isn't open. 222 */ 223 mutex_enter(&lofi_lock); 224 for (minor = 1; minor <= lofi_max_files; minor++) { 225 if (ddi_get_soft_state(lofi_statep, minor) != NULL) { 226 mutex_exit(&lofi_lock); 227 return (EBUSY); 228 } 229 } 230 mutex_exit(&lofi_lock); 231 return (0); 232 } 233 234 static int 235 is_opened(struct lofi_state *lsp) 236 { 237 ASSERT(mutex_owned(&lofi_lock)); 238 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 239 } 240 241 static int 242 mark_opened(struct lofi_state *lsp, int otyp) 243 { 244 ASSERT(mutex_owned(&lofi_lock)); 245 switch (otyp) { 246 case OTYP_CHR: 247 lsp->ls_chr_open = 1; 248 break; 249 case OTYP_BLK: 250 lsp->ls_blk_open = 1; 251 break; 252 case OTYP_LYR: 253 lsp->ls_lyr_open_count++; 254 break; 255 default: 256 return (-1); 257 } 258 return (0); 259 } 260 261 static void 262 mark_closed(struct lofi_state *lsp, int otyp) 263 { 264 ASSERT(mutex_owned(&lofi_lock)); 265 switch (otyp) { 266 case OTYP_CHR: 267 lsp->ls_chr_open = 0; 268 break; 269 case OTYP_BLK: 270 lsp->ls_blk_open = 0; 271 break; 272 case OTYP_LYR: 273 lsp->ls_lyr_open_count--; 274 break; 275 default: 276 break; 277 } 278 } 279 280 static void 281 lofi_free_crypto(struct lofi_state *lsp) 282 { 283 ASSERT(mutex_owned(&lofi_lock)); 284 285 if (lsp->ls_crypto_enabled) { 286 /* 287 * Clean up the crypto state so that it doesn't hang around 288 * in memory after we are done with it. 289 */ 290 bzero(lsp->ls_key.ck_data, 291 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 292 kmem_free(lsp->ls_key.ck_data, 293 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 294 lsp->ls_key.ck_data = NULL; 295 lsp->ls_key.ck_length = 0; 296 297 if (lsp->ls_mech.cm_param != NULL) { 298 kmem_free(lsp->ls_mech.cm_param, 299 lsp->ls_mech.cm_param_len); 300 lsp->ls_mech.cm_param = NULL; 301 lsp->ls_mech.cm_param_len = 0; 302 } 303 304 if (lsp->ls_iv_mech.cm_param != NULL) { 305 kmem_free(lsp->ls_iv_mech.cm_param, 306 lsp->ls_iv_mech.cm_param_len); 307 lsp->ls_iv_mech.cm_param = NULL; 308 lsp->ls_iv_mech.cm_param_len = 0; 309 } 310 311 mutex_destroy(&lsp->ls_crypto_lock); 312 } 313 } 314 315 static void 316 lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp, 317 cred_t *credp) 318 { 319 dev_t newdev; 320 char namebuf[50]; 321 322 ASSERT(mutex_owned(&lofi_lock)); 323 324 lofi_free_crypto(lsp); 325 326 if (lsp->ls_vp) { 327 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 328 1, 0, credp, NULL); 329 VN_RELE(lsp->ls_vp); 330 lsp->ls_vp = NULL; 331 } 332 333 newdev = makedevice(getmajor(dev), minor); 334 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 335 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 336 337 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 338 ddi_remove_minor_node(lofi_dip, namebuf); 339 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 340 ddi_remove_minor_node(lofi_dip, namebuf); 341 342 kmem_free(lsp->ls_filename, lsp->ls_filename_sz); 343 taskq_destroy(lsp->ls_taskq); 344 if (lsp->ls_kstat) { 345 kstat_delete(lsp->ls_kstat); 346 mutex_destroy(&lsp->ls_kstat_lock); 347 } 348 349 if (lsp->ls_uncomp_seg_sz > 0) { 350 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); 351 lsp->ls_uncomp_seg_sz = 0; 352 } 353 ddi_soft_state_free(lofi_statep, minor); 354 } 355 356 /*ARGSUSED*/ 357 static int 358 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 359 { 360 minor_t minor; 361 struct lofi_state *lsp; 362 363 mutex_enter(&lofi_lock); 364 minor = getminor(*devp); 365 if (minor == 0) { 366 /* master control device */ 367 /* must be opened exclusively */ 368 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { 369 mutex_exit(&lofi_lock); 370 return (EINVAL); 371 } 372 lsp = ddi_get_soft_state(lofi_statep, 0); 373 if (lsp == NULL) { 374 mutex_exit(&lofi_lock); 375 return (ENXIO); 376 } 377 if (is_opened(lsp)) { 378 mutex_exit(&lofi_lock); 379 return (EBUSY); 380 } 381 (void) mark_opened(lsp, OTYP_CHR); 382 mutex_exit(&lofi_lock); 383 return (0); 384 } 385 386 /* otherwise, the mapping should already exist */ 387 lsp = ddi_get_soft_state(lofi_statep, minor); 388 if (lsp == NULL) { 389 mutex_exit(&lofi_lock); 390 return (EINVAL); 391 } 392 393 if (lsp->ls_vp == NULL) { 394 mutex_exit(&lofi_lock); 395 return (ENXIO); 396 } 397 398 if (mark_opened(lsp, otyp) == -1) { 399 mutex_exit(&lofi_lock); 400 return (EINVAL); 401 } 402 403 mutex_exit(&lofi_lock); 404 return (0); 405 } 406 407 /*ARGSUSED*/ 408 static int 409 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 410 { 411 minor_t minor; 412 struct lofi_state *lsp; 413 414 mutex_enter(&lofi_lock); 415 minor = getminor(dev); 416 lsp = ddi_get_soft_state(lofi_statep, minor); 417 if (lsp == NULL) { 418 mutex_exit(&lofi_lock); 419 return (EINVAL); 420 } 421 mark_closed(lsp, otyp); 422 423 /* 424 * If we forcibly closed the underlying device (li_force), or 425 * asked for cleanup (li_cleanup), finish up if we're the last 426 * out of the door. 427 */ 428 if (minor != 0 && !is_opened(lsp) && 429 (lsp->ls_cleanup || lsp->ls_vp == NULL)) 430 lofi_free_handle(dev, minor, lsp, credp); 431 432 mutex_exit(&lofi_lock); 433 return (0); 434 } 435 436 /* 437 * Sets the mechanism's initialization vector (IV) if one is needed. 438 * The IV is computed from the data block number. lsp->ls_mech is 439 * altered so that: 440 * lsp->ls_mech.cm_param_len is set to the IV len. 441 * lsp->ls_mech.cm_param is set to the IV. 442 */ 443 static int 444 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno) 445 { 446 int ret; 447 crypto_data_t cdata; 448 char *iv; 449 size_t iv_len; 450 size_t min; 451 void *data; 452 size_t datasz; 453 454 ASSERT(mutex_owned(&lsp->ls_crypto_lock)); 455 456 if (lsp == NULL) 457 return (CRYPTO_DEVICE_ERROR); 458 459 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */ 460 if (lsp->ls_iv_type == IVM_NONE) { 461 return (CRYPTO_SUCCESS); 462 } 463 464 /* 465 * if kmem already alloced from previous call and it's the same size 466 * we need now, just recycle it; allocate new kmem only if we have to 467 */ 468 if (lsp->ls_mech.cm_param == NULL || 469 lsp->ls_mech.cm_param_len != lsp->ls_iv_len) { 470 iv_len = lsp->ls_iv_len; 471 iv = kmem_zalloc(iv_len, KM_SLEEP); 472 } else { 473 iv_len = lsp->ls_mech.cm_param_len; 474 iv = lsp->ls_mech.cm_param; 475 bzero(iv, iv_len); 476 } 477 478 switch (lsp->ls_iv_type) { 479 case IVM_ENC_BLKNO: 480 /* iv is not static, lblkno changes each time */ 481 data = &lblkno; 482 datasz = sizeof (lblkno); 483 break; 484 default: 485 data = 0; 486 datasz = 0; 487 break; 488 } 489 490 /* 491 * write blkno into the iv buffer padded on the left in case 492 * blkno ever grows bigger than its current longlong_t size 493 * or a variation other than blkno is used for the iv data 494 */ 495 min = MIN(datasz, iv_len); 496 bcopy(data, iv + (iv_len - min), min); 497 498 /* encrypt the data in-place to get the IV */ 499 SETUP_C_DATA(cdata, iv, iv_len); 500 501 ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key, 502 NULL, NULL, NULL); 503 if (ret != CRYPTO_SUCCESS) { 504 cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)", 505 lblkno, ret); 506 if (lsp->ls_mech.cm_param != iv) 507 kmem_free(iv, iv_len); 508 509 return (ret); 510 } 511 512 /* clean up the iv from the last computation */ 513 if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv) 514 kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len); 515 516 lsp->ls_mech.cm_param_len = iv_len; 517 lsp->ls_mech.cm_param = iv; 518 519 return (CRYPTO_SUCCESS); 520 } 521 522 /* 523 * Performs encryption and decryption of a chunk of data of size "len", 524 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of 525 * DEV_BSIZE. 526 */ 527 static int 528 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext, 529 caddr_t ciphertext, size_t len, boolean_t op_encrypt) 530 { 531 crypto_data_t cdata; 532 crypto_data_t wdata; 533 int ret; 534 longlong_t lblkno = bp->b_lblkno; 535 536 mutex_enter(&lsp->ls_crypto_lock); 537 538 /* 539 * though we could encrypt/decrypt entire "len" chunk of data, we need 540 * to break it into DEV_BSIZE pieces to capture blkno incrementing 541 */ 542 SETUP_C_DATA(cdata, plaintext, len); 543 cdata.cd_length = DEV_BSIZE; 544 if (ciphertext != NULL) { /* not in-place crypto */ 545 SETUP_C_DATA(wdata, ciphertext, len); 546 wdata.cd_length = DEV_BSIZE; 547 } 548 549 do { 550 ret = lofi_blk_mech(lsp, lblkno); 551 if (ret != CRYPTO_SUCCESS) 552 continue; 553 554 if (op_encrypt) { 555 ret = crypto_encrypt(&lsp->ls_mech, &cdata, 556 &lsp->ls_key, NULL, 557 ((ciphertext != NULL) ? &wdata : NULL), NULL); 558 } else { 559 ret = crypto_decrypt(&lsp->ls_mech, &cdata, 560 &lsp->ls_key, NULL, 561 ((ciphertext != NULL) ? &wdata : NULL), NULL); 562 } 563 564 cdata.cd_offset += DEV_BSIZE; 565 if (ciphertext != NULL) 566 wdata.cd_offset += DEV_BSIZE; 567 lblkno++; 568 } while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len); 569 570 mutex_exit(&lsp->ls_crypto_lock); 571 572 if (ret != CRYPTO_SUCCESS) { 573 cmn_err(CE_WARN, "%s failed for block %lld: (0x%x)", 574 op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()", 575 lblkno, ret); 576 } 577 578 return (ret); 579 } 580 581 #define RDWR_RAW 1 582 #define RDWR_BCOPY 2 583 584 static int 585 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 586 struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn) 587 { 588 ssize_t resid; 589 int isread; 590 int error; 591 592 /* 593 * Handles reads/writes for both plain and encrypted lofi 594 * Note: offset is already shifted by lsp->ls_crypto_offset 595 * when it gets here. 596 */ 597 598 isread = bp->b_flags & B_READ; 599 if (isread) { 600 if (method == RDWR_BCOPY) { 601 /* DO NOT update bp->b_resid for bcopy */ 602 bcopy(bcopy_locn, bufaddr, len); 603 error = 0; 604 } else { /* RDWR_RAW */ 605 error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len, 606 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 607 &resid); 608 bp->b_resid = resid; 609 } 610 if (lsp->ls_crypto_enabled && error == 0) { 611 if (lofi_crypto(lsp, bp, bufaddr, NULL, len, 612 B_FALSE) != CRYPTO_SUCCESS) { 613 /* 614 * XXX: original code didn't set residual 615 * back to len because no error was expected 616 * from bcopy() if encryption is not enabled 617 */ 618 if (method != RDWR_BCOPY) 619 bp->b_resid = len; 620 error = EIO; 621 } 622 } 623 return (error); 624 } else { 625 void *iobuf = bufaddr; 626 627 if (lsp->ls_crypto_enabled) { 628 /* don't do in-place crypto to keep bufaddr intact */ 629 iobuf = kmem_alloc(len, KM_SLEEP); 630 if (lofi_crypto(lsp, bp, bufaddr, iobuf, len, 631 B_TRUE) != CRYPTO_SUCCESS) { 632 kmem_free(iobuf, len); 633 if (method != RDWR_BCOPY) 634 bp->b_resid = len; 635 return (EIO); 636 } 637 } 638 if (method == RDWR_BCOPY) { 639 /* DO NOT update bp->b_resid for bcopy */ 640 bcopy(iobuf, bcopy_locn, len); 641 error = 0; 642 } else { /* RDWR_RAW */ 643 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len, 644 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 645 &resid); 646 bp->b_resid = resid; 647 } 648 if (lsp->ls_crypto_enabled) { 649 kmem_free(iobuf, len); 650 } 651 return (error); 652 } 653 } 654 655 static int 656 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 657 struct lofi_state *lsp) 658 { 659 int error; 660 offset_t alignedoffset, mapoffset; 661 size_t xfersize; 662 int isread; 663 int smflags; 664 caddr_t mapaddr; 665 size_t len; 666 enum seg_rw srw; 667 int save_error; 668 669 /* 670 * Note: offset is already shifted by lsp->ls_crypto_offset 671 * when it gets here. 672 */ 673 if (lsp->ls_crypto_enabled) 674 ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size); 675 676 /* 677 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 678 * an 8K boundary, but the buf transfer address may not be 679 * aligned on more than a 512-byte boundary (we don't enforce 680 * that even though we could). This matters since the initial 681 * part of the transfer may not start at offset 0 within the 682 * segmap'd chunk. So we have to compensate for that with 683 * 'mapoffset'. Subsequent chunks always start off at the 684 * beginning, and the last is capped by b_resid 685 * 686 * Visually, where "|" represents page map boundaries: 687 * alignedoffset (mapaddr begins at this segmap boundary) 688 * | offset (from beginning of file) 689 * | | len 690 * v v v 691 * ===|====X========|====...======|========X====|==== 692 * /-------------...---------------/ 693 * ^ bp->b_bcount/bp->b_resid at start 694 * /----/--------/----...------/--------/ 695 * ^ ^ ^ ^ ^ 696 * | | | | nth xfersize (<= MAXBSIZE) 697 * | | 2nd thru n-1st xfersize (= MAXBSIZE) 698 * | 1st xfersize (<= MAXBSIZE) 699 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter) 700 * 701 * Notes: "alignedoffset" is "offset" rounded down to nearest 702 * MAXBSIZE boundary. "len" is next page boundary of size 703 * PAGESIZE after "alignedoffset". 704 */ 705 mapoffset = offset & MAXBOFFSET; 706 alignedoffset = offset - mapoffset; 707 bp->b_resid = bp->b_bcount; 708 isread = bp->b_flags & B_READ; 709 srw = isread ? S_READ : S_WRITE; 710 do { 711 xfersize = MIN(lsp->ls_vp_comp_size - offset, 712 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 713 len = roundup(mapoffset + xfersize, PAGESIZE); 714 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 715 alignedoffset, MAXBSIZE, 1, srw); 716 /* 717 * Now fault in the pages. This lets us check 718 * for errors before we reference mapaddr and 719 * try to resolve the fault in bcopy (which would 720 * panic instead). And this can easily happen, 721 * particularly if you've lofi'd a file over NFS 722 * and someone deletes the file on the server. 723 */ 724 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 725 len, F_SOFTLOCK, srw); 726 if (error) { 727 (void) segmap_release(segkmap, mapaddr, 0); 728 if (FC_CODE(error) == FC_OBJERR) 729 error = FC_ERRNO(error); 730 else 731 error = EIO; 732 break; 733 } 734 /* error may be non-zero for encrypted lofi */ 735 error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize, 736 RDWR_BCOPY, mapaddr + mapoffset); 737 if (error == 0) { 738 bp->b_resid -= xfersize; 739 bufaddr += xfersize; 740 offset += xfersize; 741 } 742 smflags = 0; 743 if (isread) { 744 smflags |= SM_FREE; 745 /* 746 * If we're reading an entire page starting 747 * at a page boundary, there's a good chance 748 * we won't need it again. Put it on the 749 * head of the freelist. 750 */ 751 if (mapoffset == 0 && xfersize == MAXBSIZE) 752 smflags |= SM_DONTNEED; 753 } else { 754 if (error == 0) /* write back good pages */ 755 smflags |= SM_WRITE; 756 } 757 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 758 len, F_SOFTUNLOCK, srw); 759 save_error = segmap_release(segkmap, mapaddr, smflags); 760 if (error == 0) 761 error = save_error; 762 /* only the first map may start partial */ 763 mapoffset = 0; 764 alignedoffset += MAXBSIZE; 765 } while ((error == 0) && (bp->b_resid > 0) && 766 (offset < lsp->ls_vp_comp_size)); 767 768 return (error); 769 } 770 771 /*ARGSUSED*/ 772 static int 773 gzip_decompress(void *src, size_t srclen, void *dst, 774 size_t *dstlen, int level) 775 { 776 ASSERT(*dstlen >= srclen); 777 778 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK) 779 return (-1); 780 return (0); 781 } 782 783 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) 784 /*ARGSUSED*/ 785 static int 786 lzma_decompress(void *src, size_t srclen, void *dst, 787 size_t *dstlen, int level) 788 { 789 size_t insizepure; 790 void *actual_src; 791 ELzmaStatus status; 792 793 insizepure = srclen - LZMA_HEADER_SIZE; 794 actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE); 795 796 if (LzmaDecode((Byte *)dst, (size_t *)dstlen, 797 (const Byte *)actual_src, &insizepure, 798 (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, 799 &g_Alloc) != SZ_OK) { 800 return (-1); 801 } 802 return (0); 803 } 804 805 /* 806 * This is basically what strategy used to be before we found we 807 * needed task queues. 808 */ 809 static void 810 lofi_strategy_task(void *arg) 811 { 812 struct buf *bp = (struct buf *)arg; 813 int error; 814 struct lofi_state *lsp; 815 offset_t offset; 816 caddr_t bufaddr; 817 size_t len; 818 size_t xfersize; 819 boolean_t bufinited = B_FALSE; 820 821 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 822 if (lsp == NULL) { 823 error = ENXIO; 824 goto errout; 825 } 826 if (lsp->ls_kstat) { 827 mutex_enter(lsp->ls_kstat->ks_lock); 828 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 829 mutex_exit(lsp->ls_kstat->ks_lock); 830 } 831 bp_mapin(bp); 832 bufaddr = bp->b_un.b_addr; 833 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 834 if (lsp->ls_crypto_enabled) { 835 /* encrypted data really begins after crypto header */ 836 offset += lsp->ls_crypto_offset; 837 } 838 len = bp->b_bcount; 839 bufinited = B_TRUE; 840 841 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 842 error = EIO; 843 goto errout; 844 } 845 846 /* 847 * We used to always use vn_rdwr here, but we cannot do that because 848 * we might decide to read or write from the the underlying 849 * file during this call, which would be a deadlock because 850 * we have the rw_lock. So instead we page, unless it's not 851 * mapable or it's a character device or it's an encrypted lofi. 852 */ 853 if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) || 854 lsp->ls_crypto_enabled) { 855 error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW, 856 NULL); 857 } else if (lsp->ls_uncomp_seg_sz == 0) { 858 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp); 859 } else { 860 unsigned char *compressed_seg = NULL, *cmpbuf; 861 unsigned char *uncompressed_seg = NULL; 862 lofi_compress_info_t *li; 863 size_t oblkcount; 864 unsigned long seglen; 865 uint64_t sblkno, eblkno, cmpbytes; 866 offset_t sblkoff, eblkoff; 867 u_offset_t salign, ealign; 868 u_offset_t sdiff; 869 uint32_t comp_data_sz; 870 uint64_t i; 871 872 /* 873 * From here on we're dealing primarily with compressed files 874 */ 875 ASSERT(!lsp->ls_crypto_enabled); 876 877 /* 878 * Compressed files can only be read from and 879 * not written to 880 */ 881 if (!(bp->b_flags & B_READ)) { 882 bp->b_resid = bp->b_bcount; 883 error = EROFS; 884 goto done; 885 } 886 887 ASSERT(lsp->ls_comp_algorithm_index >= 0); 888 li = &lofi_compress_table[lsp->ls_comp_algorithm_index]; 889 /* 890 * Compute starting and ending compressed segment numbers 891 * We use only bitwise operations avoiding division and 892 * modulus because we enforce the compression segment size 893 * to a power of 2 894 */ 895 sblkno = offset >> lsp->ls_comp_seg_shift; 896 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1); 897 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift; 898 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1); 899 900 /* 901 * Align start offset to block boundary for segmap 902 */ 903 salign = lsp->ls_comp_seg_index[sblkno]; 904 sdiff = salign & (DEV_BSIZE - 1); 905 salign -= sdiff; 906 if (eblkno >= (lsp->ls_comp_index_sz - 1)) { 907 /* 908 * We're dealing with the last segment of 909 * the compressed file -- the size of this 910 * segment *may not* be the same as the 911 * segment size for the file 912 */ 913 eblkoff = (offset + bp->b_bcount) & 914 (lsp->ls_uncomp_last_seg_sz - 1); 915 ealign = lsp->ls_vp_comp_size; 916 } else { 917 ealign = lsp->ls_comp_seg_index[eblkno + 1]; 918 } 919 920 /* 921 * Preserve original request paramaters 922 */ 923 oblkcount = bp->b_bcount; 924 925 /* 926 * Assign the calculated parameters 927 */ 928 comp_data_sz = ealign - salign; 929 bp->b_bcount = comp_data_sz; 930 931 /* 932 * Allocate fixed size memory blocks to hold compressed 933 * segments and one uncompressed segment since we 934 * uncompress segments one at a time 935 */ 936 compressed_seg = kmem_alloc(bp->b_bcount, KM_SLEEP); 937 uncompressed_seg = kmem_alloc(lsp->ls_uncomp_seg_sz, KM_SLEEP); 938 /* 939 * Map in the calculated number of blocks 940 */ 941 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign, 942 bp, lsp); 943 944 bp->b_bcount = oblkcount; 945 bp->b_resid = oblkcount; 946 if (error != 0) 947 goto done; 948 949 /* 950 * We have the compressed blocks, now uncompress them 951 */ 952 cmpbuf = compressed_seg + sdiff; 953 for (i = sblkno; i <= eblkno; i++) { 954 ASSERT(i < lsp->ls_comp_index_sz - 1); 955 956 /* 957 * The last segment is special in that it is 958 * most likely not going to be the same 959 * (uncompressed) size as the other segments. 960 */ 961 if (i == (lsp->ls_comp_index_sz - 2)) { 962 seglen = lsp->ls_uncomp_last_seg_sz; 963 } else { 964 seglen = lsp->ls_uncomp_seg_sz; 965 } 966 967 /* 968 * Each of the segment index entries contains 969 * the starting block number for that segment. 970 * The number of compressed bytes in a segment 971 * is thus the difference between the starting 972 * block number of this segment and the starting 973 * block number of the next segment. 974 */ 975 cmpbytes = lsp->ls_comp_seg_index[i + 1] - 976 lsp->ls_comp_seg_index[i]; 977 978 /* 979 * The first byte in a compressed segment is a flag 980 * that indicates whether this segment is compressed 981 * at all 982 */ 983 if (*cmpbuf == UNCOMPRESSED) { 984 bcopy((cmpbuf + SEGHDR), uncompressed_seg, 985 (cmpbytes - SEGHDR)); 986 } else { 987 if (li->l_decompress((cmpbuf + SEGHDR), 988 (cmpbytes - SEGHDR), uncompressed_seg, 989 &seglen, li->l_level) != 0) { 990 error = EIO; 991 goto done; 992 } 993 } 994 995 /* 996 * Determine how much uncompressed data we 997 * have to copy and copy it 998 */ 999 xfersize = lsp->ls_uncomp_seg_sz - sblkoff; 1000 if (i == eblkno) 1001 xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff); 1002 1003 bcopy((uncompressed_seg + sblkoff), bufaddr, xfersize); 1004 1005 cmpbuf += cmpbytes; 1006 bufaddr += xfersize; 1007 bp->b_resid -= xfersize; 1008 sblkoff = 0; 1009 1010 if (bp->b_resid == 0) 1011 break; 1012 } 1013 done: 1014 if (compressed_seg != NULL) 1015 kmem_free(compressed_seg, comp_data_sz); 1016 if (uncompressed_seg != NULL) 1017 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz); 1018 } /* end of handling compressed files */ 1019 1020 errout: 1021 if (bufinited && lsp->ls_kstat) { 1022 size_t n_done = bp->b_bcount - bp->b_resid; 1023 kstat_io_t *kioptr; 1024 1025 mutex_enter(lsp->ls_kstat->ks_lock); 1026 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 1027 if (bp->b_flags & B_READ) { 1028 kioptr->nread += n_done; 1029 kioptr->reads++; 1030 } else { 1031 kioptr->nwritten += n_done; 1032 kioptr->writes++; 1033 } 1034 kstat_runq_exit(kioptr); 1035 mutex_exit(lsp->ls_kstat->ks_lock); 1036 } 1037 1038 mutex_enter(&lsp->ls_vp_lock); 1039 if (--lsp->ls_vp_iocount == 0) 1040 cv_broadcast(&lsp->ls_vp_cv); 1041 mutex_exit(&lsp->ls_vp_lock); 1042 1043 bioerror(bp, error); 1044 biodone(bp); 1045 } 1046 1047 static int 1048 lofi_strategy(struct buf *bp) 1049 { 1050 struct lofi_state *lsp; 1051 offset_t offset; 1052 1053 /* 1054 * We cannot just do I/O here, because the current thread 1055 * _might_ end up back in here because the underlying filesystem 1056 * wants a buffer, which eventually gets into bio_recycle and 1057 * might call into lofi to write out a delayed-write buffer. 1058 * This is bad if the filesystem above lofi is the same as below. 1059 * 1060 * We could come up with a complex strategy using threads to 1061 * do the I/O asynchronously, or we could use task queues. task 1062 * queues were incredibly easy so they win. 1063 */ 1064 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 1065 if (lsp == NULL) { 1066 bioerror(bp, ENXIO); 1067 biodone(bp); 1068 return (0); 1069 } 1070 1071 mutex_enter(&lsp->ls_vp_lock); 1072 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1073 bioerror(bp, EIO); 1074 biodone(bp); 1075 mutex_exit(&lsp->ls_vp_lock); 1076 return (0); 1077 } 1078 1079 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 1080 if (lsp->ls_crypto_enabled) { 1081 /* encrypted data really begins after crypto header */ 1082 offset += lsp->ls_crypto_offset; 1083 } 1084 if (offset == lsp->ls_vp_size) { 1085 /* EOF */ 1086 if ((bp->b_flags & B_READ) != 0) { 1087 bp->b_resid = bp->b_bcount; 1088 bioerror(bp, 0); 1089 } else { 1090 /* writes should fail */ 1091 bioerror(bp, ENXIO); 1092 } 1093 biodone(bp); 1094 mutex_exit(&lsp->ls_vp_lock); 1095 return (0); 1096 } 1097 if (offset > lsp->ls_vp_size) { 1098 bioerror(bp, ENXIO); 1099 biodone(bp); 1100 mutex_exit(&lsp->ls_vp_lock); 1101 return (0); 1102 } 1103 lsp->ls_vp_iocount++; 1104 mutex_exit(&lsp->ls_vp_lock); 1105 1106 if (lsp->ls_kstat) { 1107 mutex_enter(lsp->ls_kstat->ks_lock); 1108 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 1109 mutex_exit(lsp->ls_kstat->ks_lock); 1110 } 1111 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 1112 return (0); 1113 } 1114 1115 /*ARGSUSED2*/ 1116 static int 1117 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 1118 { 1119 if (getminor(dev) == 0) 1120 return (EINVAL); 1121 UIO_CHECK(uio); 1122 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 1123 } 1124 1125 /*ARGSUSED2*/ 1126 static int 1127 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 1128 { 1129 if (getminor(dev) == 0) 1130 return (EINVAL); 1131 UIO_CHECK(uio); 1132 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 1133 } 1134 1135 /*ARGSUSED2*/ 1136 static int 1137 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 1138 { 1139 if (getminor(dev) == 0) 1140 return (EINVAL); 1141 UIO_CHECK(aio->aio_uio); 1142 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 1143 } 1144 1145 /*ARGSUSED2*/ 1146 static int 1147 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 1148 { 1149 if (getminor(dev) == 0) 1150 return (EINVAL); 1151 UIO_CHECK(aio->aio_uio); 1152 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1153 } 1154 1155 /*ARGSUSED*/ 1156 static int 1157 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1158 { 1159 switch (infocmd) { 1160 case DDI_INFO_DEVT2DEVINFO: 1161 *result = lofi_dip; 1162 return (DDI_SUCCESS); 1163 case DDI_INFO_DEVT2INSTANCE: 1164 *result = 0; 1165 return (DDI_SUCCESS); 1166 } 1167 return (DDI_FAILURE); 1168 } 1169 1170 static int 1171 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1172 { 1173 int error; 1174 1175 if (cmd != DDI_ATTACH) 1176 return (DDI_FAILURE); 1177 error = ddi_soft_state_zalloc(lofi_statep, 0); 1178 if (error == DDI_FAILURE) { 1179 return (DDI_FAILURE); 1180 } 1181 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 1182 DDI_PSEUDO, NULL); 1183 if (error == DDI_FAILURE) { 1184 ddi_soft_state_free(lofi_statep, 0); 1185 return (DDI_FAILURE); 1186 } 1187 /* driver handles kernel-issued IOCTLs */ 1188 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1189 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 1190 ddi_remove_minor_node(dip, NULL); 1191 ddi_soft_state_free(lofi_statep, 0); 1192 return (DDI_FAILURE); 1193 } 1194 lofi_dip = dip; 1195 ddi_report_dev(dip); 1196 return (DDI_SUCCESS); 1197 } 1198 1199 static int 1200 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1201 { 1202 if (cmd != DDI_DETACH) 1203 return (DDI_FAILURE); 1204 if (lofi_busy()) 1205 return (DDI_FAILURE); 1206 lofi_dip = NULL; 1207 ddi_remove_minor_node(dip, NULL); 1208 ddi_prop_remove_all(dip); 1209 ddi_soft_state_free(lofi_statep, 0); 1210 return (DDI_SUCCESS); 1211 } 1212 1213 /* 1214 * With addition of encryption, be careful that encryption key is wiped before 1215 * kernel memory structures are freed, and also that key is not accidentally 1216 * passed out into userland structures. 1217 */ 1218 static void 1219 free_lofi_ioctl(struct lofi_ioctl *klip) 1220 { 1221 /* Make sure this encryption key doesn't stick around */ 1222 bzero(klip->li_key, sizeof (klip->li_key)); 1223 kmem_free(klip, sizeof (struct lofi_ioctl)); 1224 } 1225 1226 /* 1227 * These two just simplify the rest of the ioctls that need to copyin/out 1228 * the lofi_ioctl structure. 1229 */ 1230 struct lofi_ioctl * 1231 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag) 1232 { 1233 struct lofi_ioctl *klip; 1234 int error; 1235 1236 klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 1237 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 1238 if (error) { 1239 free_lofi_ioctl(klip); 1240 return (NULL); 1241 } 1242 1243 /* make sure filename is always null-terminated */ 1244 klip->li_filename[MAXPATHLEN-1] = '\0'; 1245 1246 /* validate minor number */ 1247 if (klip->li_minor > lofi_max_files) { 1248 free_lofi_ioctl(klip); 1249 cmn_err(CE_WARN, "attempt to map more than lofi_max_files (%d)", 1250 lofi_max_files); 1251 return (NULL); 1252 } 1253 return (klip); 1254 } 1255 1256 int 1257 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 1258 int flag) 1259 { 1260 int error; 1261 1262 /* 1263 * NOTE: Do NOT copy the crypto_key_t "back" to userland. 1264 * This ensures that an attacker can't trivially find the 1265 * key for a mapping just by issuing the ioctl. 1266 * 1267 * It can still be found by poking around in kmem with mdb(1), 1268 * but there is no point in making it easy when the info isn't 1269 * of any use in this direction anyway. 1270 * 1271 * Either way we don't actually have the raw key stored in 1272 * a form that we can get it anyway, since we just used it 1273 * to create a ctx template and didn't keep "the original". 1274 */ 1275 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 1276 if (error) 1277 return (EFAULT); 1278 return (0); 1279 } 1280 1281 /* 1282 * Return the minor number 'filename' is mapped to, if it is. 1283 */ 1284 static int 1285 file_to_minor(char *filename) 1286 { 1287 minor_t minor; 1288 struct lofi_state *lsp; 1289 1290 ASSERT(mutex_owned(&lofi_lock)); 1291 for (minor = 1; minor <= lofi_max_files; minor++) { 1292 lsp = ddi_get_soft_state(lofi_statep, minor); 1293 if (lsp == NULL) 1294 continue; 1295 if (strcmp(lsp->ls_filename, filename) == 0) 1296 return (minor); 1297 } 1298 return (0); 1299 } 1300 1301 /* 1302 * lofiadm does some validation, but since Joe Random (or crashme) could 1303 * do our ioctls, we need to do some validation too. 1304 */ 1305 static int 1306 valid_filename(const char *filename) 1307 { 1308 static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; 1309 static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; 1310 1311 /* must be absolute path */ 1312 if (filename[0] != '/') 1313 return (0); 1314 /* must not be lofi */ 1315 if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) 1316 return (0); 1317 if (strncmp(filename, charprefix, strlen(charprefix)) == 0) 1318 return (0); 1319 return (1); 1320 } 1321 1322 /* 1323 * Fakes up a disk geometry, and one big partition, based on the size 1324 * of the file. This is needed because we allow newfs'ing the device, 1325 * and newfs will do several disk ioctls to figure out the geometry and 1326 * partition information. It uses that information to determine the parameters 1327 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 1328 * have to support it. 1329 */ 1330 static void 1331 fake_disk_geometry(struct lofi_state *lsp) 1332 { 1333 u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset; 1334 1335 /* dk_geom - see dkio(7I) */ 1336 /* 1337 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 1338 * of sectors), but that breaks programs like fdisk which want to 1339 * partition a disk by cylinder. With one cylinder, you can't create 1340 * an fdisk partition and put pcfs on it for testing (hard to pick 1341 * a number between one and one). 1342 * 1343 * The cheezy floppy test is an attempt to not have too few cylinders 1344 * for a small file, or so many on a big file that you waste space 1345 * for backup superblocks or cylinder group structures. 1346 */ 1347 if (dsize < (2 * 1024 * 1024)) /* floppy? */ 1348 lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024); 1349 else 1350 lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024); 1351 /* in case file file is < 100k */ 1352 if (lsp->ls_dkg.dkg_ncyl == 0) 1353 lsp->ls_dkg.dkg_ncyl = 1; 1354 lsp->ls_dkg.dkg_acyl = 0; 1355 lsp->ls_dkg.dkg_bcyl = 0; 1356 lsp->ls_dkg.dkg_nhead = 1; 1357 lsp->ls_dkg.dkg_obs1 = 0; 1358 lsp->ls_dkg.dkg_intrlv = 0; 1359 lsp->ls_dkg.dkg_obs2 = 0; 1360 lsp->ls_dkg.dkg_obs3 = 0; 1361 lsp->ls_dkg.dkg_apc = 0; 1362 lsp->ls_dkg.dkg_rpm = 7200; 1363 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 1364 lsp->ls_dkg.dkg_nsect = dsize / (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 1365 lsp->ls_dkg.dkg_write_reinstruct = 0; 1366 lsp->ls_dkg.dkg_read_reinstruct = 0; 1367 1368 /* vtoc - see dkio(7I) */ 1369 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 1370 lsp->ls_vtoc.v_sanity = VTOC_SANE; 1371 lsp->ls_vtoc.v_version = V_VERSION; 1372 (void) strncpy(lsp->ls_vtoc.v_volume, LOFI_DRIVER_NAME, 1373 sizeof (lsp->ls_vtoc.v_volume)); 1374 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 1375 lsp->ls_vtoc.v_nparts = 1; 1376 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 1377 1378 /* 1379 * A compressed file is read-only, other files can 1380 * be read-write 1381 */ 1382 if (lsp->ls_uncomp_seg_sz > 0) { 1383 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY; 1384 } else { 1385 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 1386 } 1387 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 1388 /* 1389 * The partition size cannot just be the number of sectors, because 1390 * that might not end on a cylinder boundary. And if that's the case, 1391 * newfs/mkfs will print a scary warning. So just figure the size 1392 * based on the number of cylinders and sectors/cylinder. 1393 */ 1394 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 1395 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 1396 1397 /* dk_cinfo - see dkio(7I) */ 1398 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 1399 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 1400 lsp->ls_ci.dki_ctype = DKC_MD; 1401 lsp->ls_ci.dki_flags = 0; 1402 lsp->ls_ci.dki_cnum = 0; 1403 lsp->ls_ci.dki_addr = 0; 1404 lsp->ls_ci.dki_space = 0; 1405 lsp->ls_ci.dki_prio = 0; 1406 lsp->ls_ci.dki_vec = 0; 1407 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 1408 lsp->ls_ci.dki_unit = 0; 1409 lsp->ls_ci.dki_slave = 0; 1410 lsp->ls_ci.dki_partition = 0; 1411 /* 1412 * newfs uses this to set maxcontig. Must not be < 16, or it 1413 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 1414 * it by the block size. Then tunefs doesn't work because 1415 * maxcontig is 0. 1416 */ 1417 lsp->ls_ci.dki_maxtransfer = 16; 1418 } 1419 1420 /* 1421 * map in a compressed file 1422 * 1423 * Read in the header and the index that follows. 1424 * 1425 * The header is as follows - 1426 * 1427 * Signature (name of the compression algorithm) 1428 * Compression segment size (a multiple of 512) 1429 * Number of index entries 1430 * Size of the last block 1431 * The array containing the index entries 1432 * 1433 * The header information is always stored in 1434 * network byte order on disk. 1435 */ 1436 static int 1437 lofi_map_compressed_file(struct lofi_state *lsp, char *buf) 1438 { 1439 uint32_t index_sz, header_len, i; 1440 ssize_t resid; 1441 enum uio_rw rw; 1442 char *tbuf = buf; 1443 int error; 1444 1445 /* The signature has already been read */ 1446 tbuf += sizeof (lsp->ls_comp_algorithm); 1447 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz)); 1448 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz); 1449 1450 /* 1451 * The compressed segment size must be a power of 2 1452 */ 1453 if (lsp->ls_uncomp_seg_sz % 2) 1454 return (EINVAL); 1455 1456 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++) 1457 ; 1458 1459 lsp->ls_comp_seg_shift = i; 1460 1461 tbuf += sizeof (lsp->ls_uncomp_seg_sz); 1462 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz)); 1463 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz); 1464 1465 tbuf += sizeof (lsp->ls_comp_index_sz); 1466 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz), 1467 sizeof (lsp->ls_uncomp_last_seg_sz)); 1468 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz); 1469 1470 /* 1471 * Compute the total size of the uncompressed data 1472 * for use in fake_disk_geometry and other calculations. 1473 * Disk geometry has to be faked with respect to the 1474 * actual uncompressed data size rather than the 1475 * compressed file size. 1476 */ 1477 lsp->ls_vp_size = (lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz 1478 + lsp->ls_uncomp_last_seg_sz; 1479 1480 /* 1481 * Index size is rounded up to DEV_BSIZE for ease 1482 * of segmapping 1483 */ 1484 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz; 1485 header_len = sizeof (lsp->ls_comp_algorithm) + 1486 sizeof (lsp->ls_uncomp_seg_sz) + 1487 sizeof (lsp->ls_comp_index_sz) + 1488 sizeof (lsp->ls_uncomp_last_seg_sz); 1489 lsp->ls_comp_offbase = header_len + index_sz; 1490 1491 index_sz += header_len; 1492 index_sz = roundup(index_sz, DEV_BSIZE); 1493 1494 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP); 1495 lsp->ls_comp_index_data_sz = index_sz; 1496 1497 /* 1498 * Read in the index -- this has a side-effect 1499 * of reading in the header as well 1500 */ 1501 rw = UIO_READ; 1502 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz, 1503 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1504 1505 if (error != 0) 1506 return (error); 1507 1508 /* Skip the header, this is where the index really begins */ 1509 lsp->ls_comp_seg_index = 1510 /*LINTED*/ 1511 (uint64_t *)(lsp->ls_comp_index_data + header_len); 1512 1513 /* 1514 * Now recompute offsets in the index to account for 1515 * the header length 1516 */ 1517 for (i = 0; i < lsp->ls_comp_index_sz; i++) { 1518 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase + 1519 BE_64(lsp->ls_comp_seg_index[i]); 1520 } 1521 1522 return (error); 1523 } 1524 1525 /* 1526 * Check to see if the passed in signature is a valid 1527 * one. If it is valid, return the index into 1528 * lofi_compress_table. 1529 * 1530 * Return -1 if it is invalid 1531 */ 1532 static int lofi_compress_select(char *signature) 1533 { 1534 int i; 1535 1536 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { 1537 if (strcmp(lofi_compress_table[i].l_name, signature) == 0) 1538 return (i); 1539 } 1540 1541 return (-1); 1542 } 1543 1544 /* 1545 * map a file to a minor number. Return the minor number. 1546 */ 1547 static int 1548 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 1549 int *rvalp, struct cred *credp, int ioctl_flag) 1550 { 1551 minor_t newminor; 1552 struct lofi_state *lsp; 1553 struct lofi_ioctl *klip; 1554 int error; 1555 struct vnode *vp; 1556 int64_t Nblocks_prop_val; 1557 int64_t Size_prop_val; 1558 int compress_index; 1559 vattr_t vattr; 1560 int flag; 1561 enum vtype v_type; 1562 int zalloced = 0; 1563 dev_t newdev; 1564 char namebuf[50]; 1565 char buf[DEV_BSIZE]; 1566 char crybuf[DEV_BSIZE]; 1567 ssize_t resid; 1568 boolean_t need_vn_close = B_FALSE; 1569 boolean_t keycopied = B_FALSE; 1570 boolean_t need_size_update = B_FALSE; 1571 1572 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 1573 if (klip == NULL) 1574 return (EFAULT); 1575 1576 mutex_enter(&lofi_lock); 1577 1578 if (!valid_filename(klip->li_filename)) { 1579 error = EINVAL; 1580 goto out; 1581 } 1582 1583 if (file_to_minor(klip->li_filename) != 0) { 1584 error = EBUSY; 1585 goto out; 1586 } 1587 1588 if (pickminor) { 1589 /* Find a free one */ 1590 for (newminor = 1; newminor <= lofi_max_files; newminor++) 1591 if (ddi_get_soft_state(lofi_statep, newminor) == NULL) 1592 break; 1593 if (newminor >= lofi_max_files) { 1594 error = EAGAIN; 1595 goto out; 1596 } 1597 } else { 1598 newminor = klip->li_minor; 1599 if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { 1600 error = EEXIST; 1601 goto out; 1602 } 1603 } 1604 1605 /* make sure it's valid */ 1606 error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, 1607 NULLVPP, &vp); 1608 if (error) { 1609 goto out; 1610 } 1611 v_type = vp->v_type; 1612 VN_RELE(vp); 1613 if (!V_ISLOFIABLE(v_type)) { 1614 error = EINVAL; 1615 goto out; 1616 } 1617 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 1618 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 1619 if (error) { 1620 /* try read-only */ 1621 flag &= ~FWRITE; 1622 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 1623 &vp, 0, 0); 1624 if (error) { 1625 goto out; 1626 } 1627 } 1628 need_vn_close = B_TRUE; 1629 1630 vattr.va_mask = AT_SIZE; 1631 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); 1632 if (error) { 1633 goto out; 1634 } 1635 /* the file needs to be a multiple of the block size */ 1636 if ((vattr.va_size % DEV_BSIZE) != 0) { 1637 error = EINVAL; 1638 goto out; 1639 } 1640 newdev = makedevice(getmajor(dev), newminor); 1641 Size_prop_val = vattr.va_size; 1642 if ((ddi_prop_update_int64(newdev, lofi_dip, 1643 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 1644 error = EINVAL; 1645 goto out; 1646 } 1647 Nblocks_prop_val = vattr.va_size / DEV_BSIZE; 1648 if ((ddi_prop_update_int64(newdev, lofi_dip, 1649 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 1650 error = EINVAL; 1651 goto propout; 1652 } 1653 error = ddi_soft_state_zalloc(lofi_statep, newminor); 1654 if (error == DDI_FAILURE) { 1655 error = ENOMEM; 1656 goto propout; 1657 } 1658 zalloced = 1; 1659 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 1660 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, 1661 DDI_PSEUDO, NULL); 1662 if (error != DDI_SUCCESS) { 1663 error = ENXIO; 1664 goto propout; 1665 } 1666 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); 1667 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, 1668 DDI_PSEUDO, NULL); 1669 if (error != DDI_SUCCESS) { 1670 /* remove block node */ 1671 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 1672 ddi_remove_minor_node(lofi_dip, namebuf); 1673 error = ENXIO; 1674 goto propout; 1675 } 1676 lsp = ddi_get_soft_state(lofi_statep, newminor); 1677 lsp->ls_filename_sz = strlen(klip->li_filename) + 1; 1678 lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); 1679 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 1680 LOFI_DRIVER_NAME, newminor); 1681 lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, 1682 minclsyspri, 1, lofi_taskq_maxalloc, 0); 1683 lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, 1684 NULL, "disk", KSTAT_TYPE_IO, 1, 0); 1685 if (lsp->ls_kstat) { 1686 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 1687 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 1688 kstat_install(lsp->ls_kstat); 1689 } 1690 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); 1691 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); 1692 1693 /* 1694 * save open mode so file can be closed properly and vnode counts 1695 * updated correctly. 1696 */ 1697 lsp->ls_openflag = flag; 1698 1699 /* 1700 * Try to handle stacked lofs vnodes. 1701 */ 1702 if (vp->v_type == VREG) { 1703 if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) { 1704 lsp->ls_vp = vp; 1705 } else { 1706 /* 1707 * Even though vp was obtained via vn_open(), we 1708 * can't call vn_close() on it, since lofs will 1709 * pass the VOP_CLOSE() on down to the realvp 1710 * (which we are about to use). Hence we merely 1711 * drop the reference to the lofs vnode and hold 1712 * the realvp so things behave as if we've 1713 * opened the realvp without any interaction 1714 * with lofs. 1715 */ 1716 VN_HOLD(lsp->ls_vp); 1717 VN_RELE(vp); 1718 } 1719 } else { 1720 lsp->ls_vp = vp; 1721 } 1722 lsp->ls_vp_size = vattr.va_size; 1723 (void) strcpy(lsp->ls_filename, klip->li_filename); 1724 if (rvalp) 1725 *rvalp = (int)newminor; 1726 klip->li_minor = newminor; 1727 1728 /* 1729 * Initialize crypto details for encrypted lofi 1730 */ 1731 if (klip->li_crypto_enabled) { 1732 int ret; 1733 1734 mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); 1735 1736 lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); 1737 if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { 1738 cmn_err(CE_WARN, "invalid cipher %s requested for %s", 1739 klip->li_cipher, lsp->ls_filename); 1740 error = EINVAL; 1741 goto propout; 1742 } 1743 1744 /* this is just initialization here */ 1745 lsp->ls_mech.cm_param = NULL; 1746 lsp->ls_mech.cm_param_len = 0; 1747 1748 lsp->ls_iv_type = klip->li_iv_type; 1749 lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); 1750 if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { 1751 cmn_err(CE_WARN, "invalid iv cipher %s requested" 1752 " for %s", klip->li_iv_cipher, lsp->ls_filename); 1753 error = EINVAL; 1754 goto propout; 1755 } 1756 1757 /* iv mech must itself take a null iv */ 1758 lsp->ls_iv_mech.cm_param = NULL; 1759 lsp->ls_iv_mech.cm_param_len = 0; 1760 lsp->ls_iv_len = klip->li_iv_len; 1761 1762 /* 1763 * Create ctx using li_cipher & the raw li_key after checking 1764 * that it isn't a weak key. 1765 */ 1766 lsp->ls_key.ck_format = CRYPTO_KEY_RAW; 1767 lsp->ls_key.ck_length = klip->li_key_len; 1768 lsp->ls_key.ck_data = kmem_alloc( 1769 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); 1770 bcopy(klip->li_key, lsp->ls_key.ck_data, 1771 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 1772 keycopied = B_TRUE; 1773 1774 ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); 1775 if (ret != CRYPTO_SUCCESS) { 1776 error = EINVAL; 1777 cmn_err(CE_WARN, "weak key check failed for cipher " 1778 "%s on file %s (0x%x)", klip->li_cipher, 1779 lsp->ls_filename, ret); 1780 goto propout; 1781 } 1782 } 1783 lsp->ls_crypto_enabled = klip->li_crypto_enabled; 1784 1785 /* 1786 * Read the file signature to check if it is compressed or encrypted. 1787 * Crypto signature is in a different location; both areas should 1788 * read to keep compression and encryption mutually exclusive. 1789 */ 1790 if (lsp->ls_crypto_enabled) { 1791 error = vn_rdwr(UIO_READ, lsp->ls_vp, crybuf, DEV_BSIZE, 1792 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1793 if (error != 0) 1794 goto propout; 1795 } 1796 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, 1797 0, RLIM64_INFINITY, kcred, &resid); 1798 if (error != 0) 1799 goto propout; 1800 1801 /* initialize these variables for all lofi files */ 1802 lsp->ls_uncomp_seg_sz = 0; 1803 lsp->ls_vp_comp_size = lsp->ls_vp_size; 1804 lsp->ls_comp_algorithm[0] = '\0'; 1805 1806 /* encrypted lofi reads/writes shifted by crypto metadata size */ 1807 lsp->ls_crypto_offset = 0; 1808 1809 /* this is a compressed lofi */ 1810 if ((compress_index = lofi_compress_select(buf)) != -1) { 1811 1812 /* compression and encryption are mutually exclusive */ 1813 if (klip->li_crypto_enabled) { 1814 error = ENOTSUP; 1815 goto propout; 1816 } 1817 1818 /* initialize compression info for compressed lofi */ 1819 lsp->ls_comp_algorithm_index = compress_index; 1820 (void) strlcpy(lsp->ls_comp_algorithm, 1821 lofi_compress_table[compress_index].l_name, 1822 sizeof (lsp->ls_comp_algorithm)); 1823 1824 error = lofi_map_compressed_file(lsp, buf); 1825 if (error != 0) 1826 goto propout; 1827 need_size_update = B_TRUE; 1828 1829 /* this is an encrypted lofi */ 1830 } else if (strncmp(crybuf, lofi_crypto_magic, 1831 sizeof (lofi_crypto_magic)) == 0) { 1832 1833 char *marker = crybuf; 1834 1835 /* 1836 * This is the case where the header in the lofi image is 1837 * already initialized to indicate it is encrypted. 1838 * There is another case (see below) where encryption is 1839 * requested but the lofi image has never been used yet, 1840 * so the header needs to be written with encryption magic. 1841 */ 1842 1843 /* indicate this must be an encrypted lofi due to magic */ 1844 klip->li_crypto_enabled = B_TRUE; 1845 1846 /* 1847 * The encryption header information is laid out this way: 1848 * 6 bytes: hex "CFLOFI" 1849 * 2 bytes: version = 0 ... for now 1850 * 96 bytes: reserved1 (not implemented yet) 1851 * 4 bytes: data_sector = 2 ... for now 1852 * more... not implemented yet 1853 */ 1854 1855 /* copy the magic */ 1856 bcopy(marker, lsp->ls_crypto.magic, 1857 sizeof (lsp->ls_crypto.magic)); 1858 marker += sizeof (lsp->ls_crypto.magic); 1859 1860 /* read the encryption version number */ 1861 bcopy(marker, &(lsp->ls_crypto.version), 1862 sizeof (lsp->ls_crypto.version)); 1863 lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); 1864 marker += sizeof (lsp->ls_crypto.version); 1865 1866 /* read a chunk of reserved data */ 1867 bcopy(marker, lsp->ls_crypto.reserved1, 1868 sizeof (lsp->ls_crypto.reserved1)); 1869 marker += sizeof (lsp->ls_crypto.reserved1); 1870 1871 /* read block number where encrypted data begins */ 1872 bcopy(marker, &(lsp->ls_crypto.data_sector), 1873 sizeof (lsp->ls_crypto.data_sector)); 1874 lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); 1875 marker += sizeof (lsp->ls_crypto.data_sector); 1876 1877 /* and ignore the rest until it is implemented */ 1878 1879 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 1880 need_size_update = B_TRUE; 1881 1882 /* neither compressed nor encrypted, BUT could be new encrypted lofi */ 1883 } else if (klip->li_crypto_enabled) { 1884 1885 /* 1886 * This is the case where encryption was requested but the 1887 * appears to be entirely blank where the encryption header 1888 * would have been in the lofi image. If it is blank, 1889 * assume it is a brand new lofi image and initialize the 1890 * header area with encryption magic and current version 1891 * header data. If it is not blank, that's an error. 1892 */ 1893 int i; 1894 char *marker; 1895 struct crypto_meta chead; 1896 1897 for (i = 0; i < sizeof (struct crypto_meta); i++) 1898 if (crybuf[i] != '\0') 1899 break; 1900 if (i != sizeof (struct crypto_meta)) { 1901 error = EINVAL; 1902 goto propout; 1903 } 1904 1905 /* nothing there, initialize as encrypted lofi */ 1906 marker = crybuf; 1907 bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); 1908 marker += sizeof (lofi_crypto_magic); 1909 chead.version = htons(LOFI_CRYPTO_VERSION); 1910 bcopy(&(chead.version), marker, sizeof (chead.version)); 1911 marker += sizeof (chead.version); 1912 marker += sizeof (chead.reserved1); 1913 chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); 1914 bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); 1915 1916 /* write the header */ 1917 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, crybuf, DEV_BSIZE, 1918 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1919 if (error != 0) 1920 goto propout; 1921 1922 /* fix things up so it looks like we read this info */ 1923 bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, 1924 sizeof (lofi_crypto_magic)); 1925 lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; 1926 lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; 1927 1928 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 1929 need_size_update = B_TRUE; 1930 } 1931 1932 /* 1933 * Either lsp->ls_vp_size or lsp->ls_crypto_offset changed; 1934 * for encrypted lofi, advertise that it is somewhat shorter 1935 * due to embedded crypto metadata section 1936 */ 1937 if (need_size_update) { 1938 /* update DDI properties */ 1939 Size_prop_val = lsp->ls_vp_size - lsp->ls_crypto_offset; 1940 if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, 1941 Size_prop_val)) != DDI_PROP_SUCCESS) { 1942 error = EINVAL; 1943 goto propout; 1944 } 1945 Nblocks_prop_val = 1946 (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE; 1947 if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, 1948 Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 1949 error = EINVAL; 1950 goto propout; 1951 } 1952 } 1953 1954 fake_disk_geometry(lsp); 1955 mutex_exit(&lofi_lock); 1956 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1957 free_lofi_ioctl(klip); 1958 return (0); 1959 1960 propout: 1961 if (keycopied) { 1962 bzero(lsp->ls_key.ck_data, 1963 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 1964 kmem_free(lsp->ls_key.ck_data, 1965 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 1966 lsp->ls_key.ck_data = NULL; 1967 lsp->ls_key.ck_length = 0; 1968 } 1969 1970 if (zalloced) 1971 ddi_soft_state_free(lofi_statep, newminor); 1972 1973 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 1974 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 1975 1976 out: 1977 if (need_vn_close) { 1978 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); 1979 VN_RELE(vp); 1980 } 1981 1982 mutex_exit(&lofi_lock); 1983 free_lofi_ioctl(klip); 1984 return (error); 1985 } 1986 1987 /* 1988 * unmap a file. 1989 */ 1990 static int 1991 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, 1992 struct cred *credp, int ioctl_flag) 1993 { 1994 struct lofi_state *lsp; 1995 struct lofi_ioctl *klip; 1996 minor_t minor; 1997 1998 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 1999 if (klip == NULL) 2000 return (EFAULT); 2001 2002 mutex_enter(&lofi_lock); 2003 if (byfilename) { 2004 minor = file_to_minor(klip->li_filename); 2005 } else { 2006 minor = klip->li_minor; 2007 } 2008 if (minor == 0) { 2009 mutex_exit(&lofi_lock); 2010 free_lofi_ioctl(klip); 2011 return (ENXIO); 2012 } 2013 lsp = ddi_get_soft_state(lofi_statep, minor); 2014 if (lsp == NULL || lsp->ls_vp == NULL) { 2015 mutex_exit(&lofi_lock); 2016 free_lofi_ioctl(klip); 2017 return (ENXIO); 2018 } 2019 2020 /* 2021 * If it's still held open, we'll do one of three things: 2022 * 2023 * If no flag is set, just return EBUSY. 2024 * 2025 * If the 'cleanup' flag is set, unmap and remove the device when 2026 * the last user finishes. 2027 * 2028 * If the 'force' flag is set, then we forcibly close the underlying 2029 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl 2030 * will return DKIO_DEV_GONE. When the device is last closed, the 2031 * device will be cleaned up appropriately. 2032 * 2033 * This is complicated by the fact that we may have outstanding 2034 * dispatched I/Os. Rather than having a single mutex to serialize all 2035 * I/O, we keep a count of the number of outstanding I/O requests 2036 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os 2037 * should be dispatched (ls_vp_closereq). 2038 * 2039 * We set the flag, wait for the number of outstanding I/Os to reach 0, 2040 * and then close the underlying vnode. 2041 */ 2042 if (is_opened(lsp)) { 2043 if (klip->li_force) { 2044 /* 2045 * XXX: the section marked here should probably be 2046 * carefully incorporated into lofi_free_handle(); 2047 * afterward just replace this section with: 2048 * lofi_free_handle(dev, minor, lsp, credp); 2049 * and clean up lofi_unmap_file() a bit more 2050 */ 2051 lofi_free_crypto(lsp); 2052 2053 mutex_enter(&lsp->ls_vp_lock); 2054 lsp->ls_vp_closereq = B_TRUE; 2055 while (lsp->ls_vp_iocount > 0) 2056 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 2057 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0, 2058 credp, NULL); 2059 VN_RELE(lsp->ls_vp); 2060 lsp->ls_vp = NULL; 2061 cv_broadcast(&lsp->ls_vp_cv); 2062 mutex_exit(&lsp->ls_vp_lock); 2063 /* 2064 * XXX: to here 2065 */ 2066 2067 klip->li_minor = minor; 2068 mutex_exit(&lofi_lock); 2069 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2070 free_lofi_ioctl(klip); 2071 return (0); 2072 } else if (klip->li_cleanup) { 2073 lsp->ls_cleanup = 1; 2074 mutex_exit(&lofi_lock); 2075 free_lofi_ioctl(klip); 2076 return (0); 2077 } 2078 2079 mutex_exit(&lofi_lock); 2080 free_lofi_ioctl(klip); 2081 return (EBUSY); 2082 } 2083 2084 lofi_free_handle(dev, minor, lsp, credp); 2085 2086 klip->li_minor = minor; 2087 mutex_exit(&lofi_lock); 2088 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2089 free_lofi_ioctl(klip); 2090 return (0); 2091 } 2092 2093 /* 2094 * get the filename given the minor number, or the minor number given 2095 * the name. 2096 */ 2097 /*ARGSUSED*/ 2098 static int 2099 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 2100 struct cred *credp, int ioctl_flag) 2101 { 2102 struct lofi_state *lsp; 2103 struct lofi_ioctl *klip; 2104 int error; 2105 minor_t minor; 2106 2107 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 2108 if (klip == NULL) 2109 return (EFAULT); 2110 2111 switch (which) { 2112 case LOFI_GET_FILENAME: 2113 minor = klip->li_minor; 2114 if (minor == 0) { 2115 free_lofi_ioctl(klip); 2116 return (EINVAL); 2117 } 2118 2119 mutex_enter(&lofi_lock); 2120 lsp = ddi_get_soft_state(lofi_statep, minor); 2121 if (lsp == NULL) { 2122 mutex_exit(&lofi_lock); 2123 free_lofi_ioctl(klip); 2124 return (ENXIO); 2125 } 2126 (void) strcpy(klip->li_filename, lsp->ls_filename); 2127 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 2128 sizeof (klip->li_algorithm)); 2129 klip->li_crypto_enabled = lsp->ls_crypto_enabled; 2130 mutex_exit(&lofi_lock); 2131 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2132 free_lofi_ioctl(klip); 2133 return (error); 2134 case LOFI_GET_MINOR: 2135 mutex_enter(&lofi_lock); 2136 klip->li_minor = file_to_minor(klip->li_filename); 2137 /* caller should not depend on klip->li_crypto_enabled here */ 2138 mutex_exit(&lofi_lock); 2139 if (klip->li_minor == 0) { 2140 free_lofi_ioctl(klip); 2141 return (ENOENT); 2142 } 2143 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2144 free_lofi_ioctl(klip); 2145 return (error); 2146 case LOFI_CHECK_COMPRESSED: 2147 mutex_enter(&lofi_lock); 2148 klip->li_minor = file_to_minor(klip->li_filename); 2149 mutex_exit(&lofi_lock); 2150 if (klip->li_minor == 0) { 2151 free_lofi_ioctl(klip); 2152 return (ENOENT); 2153 } 2154 mutex_enter(&lofi_lock); 2155 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); 2156 if (lsp == NULL) { 2157 mutex_exit(&lofi_lock); 2158 free_lofi_ioctl(klip); 2159 return (ENXIO); 2160 } 2161 ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0); 2162 2163 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 2164 sizeof (klip->li_algorithm)); 2165 mutex_exit(&lofi_lock); 2166 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2167 free_lofi_ioctl(klip); 2168 return (error); 2169 default: 2170 free_lofi_ioctl(klip); 2171 return (EINVAL); 2172 } 2173 2174 } 2175 2176 static int 2177 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 2178 int *rvalp) 2179 { 2180 int error; 2181 enum dkio_state dkstate; 2182 struct lofi_state *lsp; 2183 minor_t minor; 2184 2185 minor = getminor(dev); 2186 /* lofi ioctls only apply to the master device */ 2187 if (minor == 0) { 2188 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 2189 2190 /* 2191 * the query command only need read-access - i.e., normal 2192 * users are allowed to do those on the ctl device as 2193 * long as they can open it read-only. 2194 */ 2195 switch (cmd) { 2196 case LOFI_MAP_FILE: 2197 if ((flag & FWRITE) == 0) 2198 return (EPERM); 2199 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 2200 case LOFI_MAP_FILE_MINOR: 2201 if ((flag & FWRITE) == 0) 2202 return (EPERM); 2203 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 2204 case LOFI_UNMAP_FILE: 2205 if ((flag & FWRITE) == 0) 2206 return (EPERM); 2207 return (lofi_unmap_file(dev, lip, 1, credp, flag)); 2208 case LOFI_UNMAP_FILE_MINOR: 2209 if ((flag & FWRITE) == 0) 2210 return (EPERM); 2211 return (lofi_unmap_file(dev, lip, 0, credp, flag)); 2212 case LOFI_GET_FILENAME: 2213 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 2214 credp, flag)); 2215 case LOFI_GET_MINOR: 2216 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 2217 credp, flag)); 2218 case LOFI_GET_MAXMINOR: 2219 error = ddi_copyout(&lofi_max_files, &lip->li_minor, 2220 sizeof (lofi_max_files), flag); 2221 if (error) 2222 return (EFAULT); 2223 return (0); 2224 case LOFI_CHECK_COMPRESSED: 2225 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, 2226 credp, flag)); 2227 default: 2228 break; 2229 } 2230 } 2231 2232 lsp = ddi_get_soft_state(lofi_statep, minor); 2233 if (lsp == NULL) 2234 return (ENXIO); 2235 2236 /* 2237 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with 2238 * EIO as if the device was no longer present. 2239 */ 2240 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE) 2241 return (EIO); 2242 2243 /* these are for faking out utilities like newfs */ 2244 switch (cmd) { 2245 case DKIOCGVTOC: 2246 switch (ddi_model_convert_from(flag & FMODELS)) { 2247 case DDI_MODEL_ILP32: { 2248 struct vtoc32 vtoc32; 2249 2250 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 2251 if (ddi_copyout(&vtoc32, (void *)arg, 2252 sizeof (struct vtoc32), flag)) 2253 return (EFAULT); 2254 break; 2255 } 2256 2257 case DDI_MODEL_NONE: 2258 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 2259 sizeof (struct vtoc), flag)) 2260 return (EFAULT); 2261 break; 2262 } 2263 return (0); 2264 case DKIOCINFO: 2265 error = ddi_copyout(&lsp->ls_ci, (void *)arg, 2266 sizeof (struct dk_cinfo), flag); 2267 if (error) 2268 return (EFAULT); 2269 return (0); 2270 case DKIOCG_VIRTGEOM: 2271 case DKIOCG_PHYGEOM: 2272 case DKIOCGGEOM: 2273 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 2274 sizeof (struct dk_geom), flag); 2275 if (error) 2276 return (EFAULT); 2277 return (0); 2278 case DKIOCSTATE: 2279 /* 2280 * Normally, lofi devices are always in the INSERTED state. If 2281 * a device is forcefully unmapped, then the device transitions 2282 * to the DKIO_DEV_GONE state. 2283 */ 2284 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate), 2285 flag) != 0) 2286 return (EFAULT); 2287 2288 mutex_enter(&lsp->ls_vp_lock); 2289 while ((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) || 2290 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) { 2291 /* 2292 * By virtue of having the device open, we know that 2293 * 'lsp' will remain valid when we return. 2294 */ 2295 if (!cv_wait_sig(&lsp->ls_vp_cv, 2296 &lsp->ls_vp_lock)) { 2297 mutex_exit(&lsp->ls_vp_lock); 2298 return (EINTR); 2299 } 2300 } 2301 2302 dkstate = (lsp->ls_vp != NULL ? DKIO_INSERTED : DKIO_DEV_GONE); 2303 mutex_exit(&lsp->ls_vp_lock); 2304 2305 if (ddi_copyout(&dkstate, (void *)arg, 2306 sizeof (dkstate), flag) != 0) 2307 return (EFAULT); 2308 return (0); 2309 default: 2310 return (ENOTTY); 2311 } 2312 } 2313 2314 static struct cb_ops lofi_cb_ops = { 2315 lofi_open, /* open */ 2316 lofi_close, /* close */ 2317 lofi_strategy, /* strategy */ 2318 nodev, /* print */ 2319 nodev, /* dump */ 2320 lofi_read, /* read */ 2321 lofi_write, /* write */ 2322 lofi_ioctl, /* ioctl */ 2323 nodev, /* devmap */ 2324 nodev, /* mmap */ 2325 nodev, /* segmap */ 2326 nochpoll, /* poll */ 2327 ddi_prop_op, /* prop_op */ 2328 0, /* streamtab */ 2329 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 2330 CB_REV, 2331 lofi_aread, 2332 lofi_awrite 2333 }; 2334 2335 static struct dev_ops lofi_ops = { 2336 DEVO_REV, /* devo_rev, */ 2337 0, /* refcnt */ 2338 lofi_info, /* info */ 2339 nulldev, /* identify */ 2340 nulldev, /* probe */ 2341 lofi_attach, /* attach */ 2342 lofi_detach, /* detach */ 2343 nodev, /* reset */ 2344 &lofi_cb_ops, /* driver operations */ 2345 NULL, /* no bus operations */ 2346 NULL, /* power */ 2347 ddi_quiesce_not_needed, /* quiesce */ 2348 }; 2349 2350 static struct modldrv modldrv = { 2351 &mod_driverops, 2352 "loopback file driver", 2353 &lofi_ops, 2354 }; 2355 2356 static struct modlinkage modlinkage = { 2357 MODREV_1, 2358 &modldrv, 2359 NULL 2360 }; 2361 2362 int 2363 _init(void) 2364 { 2365 int error; 2366 2367 error = ddi_soft_state_init(&lofi_statep, 2368 sizeof (struct lofi_state), 0); 2369 if (error) 2370 return (error); 2371 2372 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 2373 error = mod_install(&modlinkage); 2374 if (error) { 2375 mutex_destroy(&lofi_lock); 2376 ddi_soft_state_fini(&lofi_statep); 2377 } 2378 2379 return (error); 2380 } 2381 2382 int 2383 _fini(void) 2384 { 2385 int error; 2386 2387 if (lofi_busy()) 2388 return (EBUSY); 2389 2390 error = mod_remove(&modlinkage); 2391 if (error) 2392 return (error); 2393 2394 mutex_destroy(&lofi_lock); 2395 ddi_soft_state_fini(&lofi_statep); 2396 2397 return (error); 2398 } 2399 2400 int 2401 _info(struct modinfo *modinfop) 2402 { 2403 return (mod_info(&modlinkage, modinfop)); 2404 } 2405