1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * lofi (loopback file) driver - allows you to attach a file to a device, 30 * which can then be accessed through that device. The simple model is that 31 * you tell lofi to open a file, and then use the block device you get as 32 * you would any block device. lofi translates access to the block device 33 * into I/O on the underlying file. This is mostly useful for 34 * mounting images of filesystems. 35 * 36 * lofi is controlled through /dev/lofictl - this is the only device exported 37 * during attach, and is minor number 0. lofiadm communicates with lofi through 38 * ioctls on this device. When a file is attached to lofi, block and character 39 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 40 * are identified by their minor number, and the minor number is also used 41 * as the name in /dev/lofi. If we ever decide to support virtual disks, 42 * we'll have to divide the minor number space to identify fdisk partitions 43 * and slices, and the name will then be the minor number shifted down a 44 * few bits. Minor devices are tracked with state structures handled with 45 * ddi_soft_state(9F) for simplicity. 46 * 47 * A file attached to lofi is opened when attached and not closed until 48 * explicitly detached from lofi. This seems more sensible than deferring 49 * the open until the /dev/lofi device is opened, for a number of reasons. 50 * One is that any failure is likely to be noticed by the person (or script) 51 * running lofiadm. Another is that it would be a security problem if the 52 * file was replaced by another one after being added but before being opened. 53 * 54 * The only hard part about lofi is the ioctls. In order to support things 55 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 56 * So it has to fake disk geometry and partition information. More may need 57 * to be faked if your favorite utility doesn't work and you think it should 58 * (fdformat doesn't work because it really wants to know the type of floppy 59 * controller to talk to, and that didn't seem easy to fake. Or possibly even 60 * necessary, since we have mkfs_pcfs now). 61 * 62 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To 63 * support simulation of hotplug events, an optional force flag is provided. 64 * If a lofi device is open when a force detach is requested, then the 65 * underlying file is closed and any subsequent operations return EIO. When the 66 * device is closed for the last time, it will be cleaned up at that time. In 67 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is 68 * detached but not removed. 69 * 70 * Known problems: 71 * 72 * UFS logging. Mounting a UFS filesystem image "logging" 73 * works for basic copy testing but wedges during a build of ON through 74 * that image. Some deadlock in lufs holding the log mutex and then 75 * getting stuck on a buf. So for now, don't do that. 76 * 77 * Direct I/O. Since the filesystem data is being cached in the buffer 78 * cache, _and_ again in the underlying filesystem, it's tempting to 79 * enable direct I/O on the underlying file. Don't, because that deadlocks. 80 * I think to fix the cache-twice problem we might need filesystem support. 81 * 82 * lofi on itself. The simple lock strategy (lofi_lock) precludes this 83 * because you'll be in lofi_ioctl, holding the lock when you open the 84 * file, which, if it's lofi, will grab lofi_lock. We prevent this for 85 * now, though not using ddi_soft_state(9F) would make it possible to 86 * do. Though it would still be silly. 87 * 88 * Interesting things to do: 89 * 90 * Allow multiple files for each device. A poor-man's metadisk, basically. 91 * 92 * Pass-through ioctls on block devices. You can (though it's not 93 * documented), give lofi a block device as a file name. Then we shouldn't 94 * need to fake a geometry. But this is also silly unless you're replacing 95 * metadisk. 96 * 97 * Encryption. tpm would like this. Apparently Windows 2000 has it, and 98 * so does Linux. 99 */ 100 101 #include <sys/types.h> 102 #include <netinet/in.h> 103 #include <sys/sysmacros.h> 104 #include <sys/cmn_err.h> 105 #include <sys/uio.h> 106 #include <sys/kmem.h> 107 #include <sys/cred.h> 108 #include <sys/mman.h> 109 #include <sys/errno.h> 110 #include <sys/aio_req.h> 111 #include <sys/stat.h> 112 #include <sys/file.h> 113 #include <sys/modctl.h> 114 #include <sys/conf.h> 115 #include <sys/debug.h> 116 #include <sys/vnode.h> 117 #include <sys/lofi.h> 118 #include <sys/fcntl.h> 119 #include <sys/pathname.h> 120 #include <sys/filio.h> 121 #include <sys/fdio.h> 122 #include <sys/open.h> 123 #include <sys/disp.h> 124 #include <vm/seg_map.h> 125 #include <sys/ddi.h> 126 #include <sys/sunddi.h> 127 #include <sys/zmod.h> 128 129 #define NBLOCKS_PROP_NAME "Nblocks" 130 #define SIZE_PROP_NAME "Size" 131 132 static dev_info_t *lofi_dip; 133 static void *lofi_statep; 134 static kmutex_t lofi_lock; /* state lock */ 135 136 /* 137 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 138 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 139 * high. If we want to be assured that the underlying device is always busy, 140 * we must be sure that the number of bytes enqueued when the number of 141 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 142 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 143 * set maxalloc to be the maximum throughput (in bytes per second) of the 144 * underlying device divided by the minimum I/O size. We assume a realistic 145 * maximum throughput of one hundred megabytes per second; we set maxalloc on 146 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 147 */ 148 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 149 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 150 151 uint32_t lofi_max_files = LOFI_MAX_FILES; 152 153 static int gzip_decompress(void *src, size_t srclen, void *dst, 154 size_t *destlen, int level); 155 156 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = { 157 {gzip_decompress, NULL, 6, "gzip"}, /* default */ 158 {gzip_decompress, NULL, 6, "gzip-6"}, 159 {gzip_decompress, NULL, 9, "gzip-9"} 160 }; 161 162 static int 163 lofi_busy(void) 164 { 165 minor_t minor; 166 167 /* 168 * We need to make sure no mappings exist - mod_remove won't 169 * help because the device isn't open. 170 */ 171 mutex_enter(&lofi_lock); 172 for (minor = 1; minor <= lofi_max_files; minor++) { 173 if (ddi_get_soft_state(lofi_statep, minor) != NULL) { 174 mutex_exit(&lofi_lock); 175 return (EBUSY); 176 } 177 } 178 mutex_exit(&lofi_lock); 179 return (0); 180 } 181 182 static int 183 is_opened(struct lofi_state *lsp) 184 { 185 ASSERT(mutex_owned(&lofi_lock)); 186 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 187 } 188 189 static int 190 mark_opened(struct lofi_state *lsp, int otyp) 191 { 192 ASSERT(mutex_owned(&lofi_lock)); 193 switch (otyp) { 194 case OTYP_CHR: 195 lsp->ls_chr_open = 1; 196 break; 197 case OTYP_BLK: 198 lsp->ls_blk_open = 1; 199 break; 200 case OTYP_LYR: 201 lsp->ls_lyr_open_count++; 202 break; 203 default: 204 return (-1); 205 } 206 return (0); 207 } 208 209 static void 210 mark_closed(struct lofi_state *lsp, int otyp) 211 { 212 ASSERT(mutex_owned(&lofi_lock)); 213 switch (otyp) { 214 case OTYP_CHR: 215 lsp->ls_chr_open = 0; 216 break; 217 case OTYP_BLK: 218 lsp->ls_blk_open = 0; 219 break; 220 case OTYP_LYR: 221 lsp->ls_lyr_open_count--; 222 break; 223 default: 224 break; 225 } 226 } 227 228 static void 229 lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp, 230 cred_t *credp) 231 { 232 dev_t newdev; 233 char namebuf[50]; 234 235 if (lsp->ls_vp) { 236 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 237 1, 0, credp, NULL); 238 VN_RELE(lsp->ls_vp); 239 lsp->ls_vp = NULL; 240 } 241 242 newdev = makedevice(getmajor(dev), minor); 243 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 244 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 245 246 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 247 ddi_remove_minor_node(lofi_dip, namebuf); 248 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 249 ddi_remove_minor_node(lofi_dip, namebuf); 250 251 kmem_free(lsp->ls_filename, lsp->ls_filename_sz); 252 taskq_destroy(lsp->ls_taskq); 253 if (lsp->ls_kstat) { 254 kstat_delete(lsp->ls_kstat); 255 mutex_destroy(&lsp->ls_kstat_lock); 256 } 257 ddi_soft_state_free(lofi_statep, minor); 258 } 259 260 /*ARGSUSED*/ 261 static int 262 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 263 { 264 minor_t minor; 265 struct lofi_state *lsp; 266 267 mutex_enter(&lofi_lock); 268 minor = getminor(*devp); 269 if (minor == 0) { 270 /* master control device */ 271 /* must be opened exclusively */ 272 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { 273 mutex_exit(&lofi_lock); 274 return (EINVAL); 275 } 276 lsp = ddi_get_soft_state(lofi_statep, 0); 277 if (lsp == NULL) { 278 mutex_exit(&lofi_lock); 279 return (ENXIO); 280 } 281 if (is_opened(lsp)) { 282 mutex_exit(&lofi_lock); 283 return (EBUSY); 284 } 285 (void) mark_opened(lsp, OTYP_CHR); 286 mutex_exit(&lofi_lock); 287 return (0); 288 } 289 290 /* otherwise, the mapping should already exist */ 291 lsp = ddi_get_soft_state(lofi_statep, minor); 292 if (lsp == NULL) { 293 mutex_exit(&lofi_lock); 294 return (EINVAL); 295 } 296 297 if (lsp->ls_vp == NULL) { 298 mutex_exit(&lofi_lock); 299 return (ENXIO); 300 } 301 302 if (mark_opened(lsp, otyp) == -1) { 303 mutex_exit(&lofi_lock); 304 return (EINVAL); 305 } 306 307 mutex_exit(&lofi_lock); 308 return (0); 309 } 310 311 /*ARGSUSED*/ 312 static int 313 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 314 { 315 minor_t minor; 316 struct lofi_state *lsp; 317 318 mutex_enter(&lofi_lock); 319 minor = getminor(dev); 320 lsp = ddi_get_soft_state(lofi_statep, minor); 321 if (lsp == NULL) { 322 mutex_exit(&lofi_lock); 323 return (EINVAL); 324 } 325 mark_closed(lsp, otyp); 326 327 /* 328 * If we forcibly closed the underlying device (li_force), or 329 * asked for cleanup (li_cleanup), finish up if we're the last 330 * out of the door. 331 */ 332 if (minor != 0 && !is_opened(lsp) && 333 (lsp->ls_cleanup || lsp->ls_vp == NULL)) 334 lofi_free_handle(dev, minor, lsp, credp); 335 336 mutex_exit(&lofi_lock); 337 return (0); 338 } 339 340 static int 341 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 342 struct lofi_state *lsp) 343 { 344 int error; 345 offset_t alignedoffset, mapoffset; 346 size_t xfersize; 347 int isread; 348 int smflags; 349 caddr_t mapaddr; 350 size_t len; 351 enum seg_rw srw; 352 353 /* 354 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 355 * an 8K boundary, but the buf transfer address may not be 356 * aligned on more than a 512-byte boundary (we don't enforce 357 * that even though we could). This matters since the initial 358 * part of the transfer may not start at offset 0 within the 359 * segmap'd chunk. So we have to compensate for that with 360 * 'mapoffset'. Subsequent chunks always start off at the 361 * beginning, and the last is capped by b_resid 362 */ 363 mapoffset = offset & MAXBOFFSET; 364 alignedoffset = offset - mapoffset; 365 bp->b_resid = bp->b_bcount; 366 isread = bp->b_flags & B_READ; 367 srw = isread ? S_READ : S_WRITE; 368 do { 369 xfersize = MIN(lsp->ls_vp_comp_size - offset, 370 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 371 len = roundup(mapoffset + xfersize, PAGESIZE); 372 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 373 alignedoffset, MAXBSIZE, 1, srw); 374 /* 375 * Now fault in the pages. This lets us check 376 * for errors before we reference mapaddr and 377 * try to resolve the fault in bcopy (which would 378 * panic instead). And this can easily happen, 379 * particularly if you've lofi'd a file over NFS 380 * and someone deletes the file on the server. 381 */ 382 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 383 len, F_SOFTLOCK, srw); 384 if (error) { 385 (void) segmap_release(segkmap, mapaddr, 0); 386 if (FC_CODE(error) == FC_OBJERR) 387 error = FC_ERRNO(error); 388 else 389 error = EIO; 390 break; 391 } 392 smflags = 0; 393 if (isread) { 394 smflags |= SM_FREE; 395 /* 396 * If we're reading an entire page starting 397 * at a page boundary, there's a good chance 398 * we won't need it again. Put it on the 399 * head of the freelist. 400 */ 401 if (mapoffset == 0 && xfersize == PAGESIZE) 402 smflags |= SM_DONTNEED; 403 bcopy(mapaddr + mapoffset, bufaddr, xfersize); 404 } else { 405 smflags |= SM_WRITE; 406 bcopy(bufaddr, mapaddr + mapoffset, xfersize); 407 } 408 bp->b_resid -= xfersize; 409 bufaddr += xfersize; 410 offset += xfersize; 411 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 412 len, F_SOFTUNLOCK, srw); 413 error = segmap_release(segkmap, mapaddr, smflags); 414 /* only the first map may start partial */ 415 mapoffset = 0; 416 alignedoffset += MAXBSIZE; 417 } while ((error == 0) && (bp->b_resid > 0) && 418 (offset < lsp->ls_vp_comp_size)); 419 420 return (error); 421 } 422 423 /*ARGSUSED*/ 424 static int gzip_decompress(void *src, size_t srclen, void *dst, 425 size_t *dstlen, int level) 426 { 427 ASSERT(*dstlen >= srclen); 428 429 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK) 430 return (-1); 431 return (0); 432 } 433 434 /* 435 * This is basically what strategy used to be before we found we 436 * needed task queues. 437 */ 438 static void 439 lofi_strategy_task(void *arg) 440 { 441 struct buf *bp = (struct buf *)arg; 442 int error; 443 struct lofi_state *lsp; 444 uint64_t sblkno, eblkno, cmpbytes; 445 offset_t offset, sblkoff, eblkoff; 446 u_offset_t salign, ealign; 447 u_offset_t sdiff; 448 uint32_t comp_data_sz; 449 caddr_t bufaddr; 450 unsigned char *compressed_seg = NULL, *cmpbuf; 451 unsigned char *uncompressed_seg = NULL; 452 lofi_compress_info_t *li; 453 size_t oblkcount, xfersize; 454 unsigned long seglen; 455 456 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 457 if (lsp->ls_kstat) { 458 mutex_enter(lsp->ls_kstat->ks_lock); 459 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 460 mutex_exit(lsp->ls_kstat->ks_lock); 461 } 462 bp_mapin(bp); 463 bufaddr = bp->b_un.b_addr; 464 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 465 466 /* 467 * We used to always use vn_rdwr here, but we cannot do that because 468 * we might decide to read or write from the the underlying 469 * file during this call, which would be a deadlock because 470 * we have the rw_lock. So instead we page, unless it's not 471 * mapable or it's a character device. 472 */ 473 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 474 error = EIO; 475 } else if (((lsp->ls_vp->v_flag & VNOMAP) == 0) && 476 (lsp->ls_vp->v_type != VCHR)) { 477 uint64_t i; 478 479 /* 480 * Handle uncompressed files with a regular read 481 */ 482 if (lsp->ls_uncomp_seg_sz == 0) { 483 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp); 484 goto done; 485 } 486 487 /* 488 * From here on we're dealing primarily with compressed files 489 */ 490 491 /* 492 * Compressed files can only be read from and 493 * not written to 494 */ 495 if (!(bp->b_flags & B_READ)) { 496 bp->b_resid = bp->b_bcount; 497 error = EROFS; 498 goto done; 499 } 500 501 ASSERT(lsp->ls_comp_algorithm_index >= 0); 502 li = &lofi_compress_table[lsp->ls_comp_algorithm_index]; 503 /* 504 * Compute starting and ending compressed segment numbers 505 * We use only bitwise operations avoiding division and 506 * modulus because we enforce the compression segment size 507 * to a power of 2 508 */ 509 sblkno = offset >> lsp->ls_comp_seg_shift; 510 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1); 511 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift; 512 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1); 513 514 /* 515 * Align start offset to block boundary for segmap 516 */ 517 salign = lsp->ls_comp_seg_index[sblkno]; 518 sdiff = salign & (DEV_BSIZE - 1); 519 salign -= sdiff; 520 if (eblkno >= (lsp->ls_comp_index_sz - 1)) { 521 /* 522 * We're dealing with the last segment of 523 * the compressed file -- the size of this 524 * segment *may not* be the same as the 525 * segment size for the file 526 */ 527 eblkoff = (offset + bp->b_bcount) & 528 (lsp->ls_uncomp_last_seg_sz - 1); 529 ealign = lsp->ls_vp_comp_size; 530 } else { 531 ealign = lsp->ls_comp_seg_index[eblkno + 1]; 532 } 533 534 /* 535 * Preserve original request paramaters 536 */ 537 oblkcount = bp->b_bcount; 538 539 /* 540 * Assign the calculated parameters 541 */ 542 comp_data_sz = ealign - salign; 543 bp->b_bcount = comp_data_sz; 544 545 /* 546 * Allocate fixed size memory blocks to hold compressed 547 * segments and one uncompressed segment since we 548 * uncompress segments one at a time 549 */ 550 compressed_seg = kmem_alloc(bp->b_bcount, KM_SLEEP); 551 uncompressed_seg = kmem_alloc(lsp->ls_uncomp_seg_sz, KM_SLEEP); 552 /* 553 * Map in the calculated number of blocks 554 */ 555 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign, 556 bp, lsp); 557 558 bp->b_bcount = oblkcount; 559 bp->b_resid = oblkcount; 560 if (error != 0) 561 goto done; 562 563 /* 564 * We have the compressed blocks, now uncompress them 565 */ 566 cmpbuf = compressed_seg + sdiff; 567 for (i = sblkno; i < (eblkno + 1) && i < lsp->ls_comp_index_sz; 568 i++) { 569 /* 570 * Each of the segment index entries contains 571 * the starting block number for that segment. 572 * The number of compressed bytes in a segment 573 * is thus the difference between the starting 574 * block number of this segment and the starting 575 * block number of the next segment. 576 */ 577 if ((i == eblkno) && 578 (i == lsp->ls_comp_index_sz - 1)) { 579 cmpbytes = lsp->ls_vp_comp_size - 580 lsp->ls_comp_seg_index[i]; 581 } else { 582 cmpbytes = lsp->ls_comp_seg_index[i + 1] - 583 lsp->ls_comp_seg_index[i]; 584 } 585 586 /* 587 * The first byte in a compressed segment is a flag 588 * that indicates whether this segment is compressed 589 * at all 590 */ 591 if (*cmpbuf == UNCOMPRESSED) { 592 bcopy((cmpbuf + SEGHDR), uncompressed_seg, 593 (cmpbytes - SEGHDR)); 594 } else { 595 seglen = lsp->ls_uncomp_seg_sz; 596 597 if (li->l_decompress((cmpbuf + SEGHDR), 598 (cmpbytes - SEGHDR), uncompressed_seg, 599 &seglen, li->l_level) != 0) { 600 error = EIO; 601 goto done; 602 } 603 } 604 605 /* 606 * Determine how much uncompressed data we 607 * have to copy and copy it 608 */ 609 xfersize = lsp->ls_uncomp_seg_sz - sblkoff; 610 if (i == eblkno) { 611 if (i == (lsp->ls_comp_index_sz - 1)) 612 xfersize -= (lsp->ls_uncomp_last_seg_sz 613 - eblkoff); 614 else 615 xfersize -= 616 (lsp->ls_uncomp_seg_sz - eblkoff); 617 } 618 619 bcopy((uncompressed_seg + sblkoff), bufaddr, xfersize); 620 621 cmpbuf += cmpbytes; 622 bufaddr += xfersize; 623 bp->b_resid -= xfersize; 624 sblkoff = 0; 625 626 if (bp->b_resid == 0) 627 break; 628 } 629 } else { 630 ssize_t resid; 631 enum uio_rw rw; 632 633 if (bp->b_flags & B_READ) 634 rw = UIO_READ; 635 else 636 rw = UIO_WRITE; 637 error = vn_rdwr(rw, lsp->ls_vp, bufaddr, bp->b_bcount, 638 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 639 bp->b_resid = resid; 640 } 641 642 done: 643 if (compressed_seg != NULL) 644 kmem_free(compressed_seg, comp_data_sz); 645 if (uncompressed_seg != NULL) 646 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz); 647 648 if (lsp->ls_kstat) { 649 size_t n_done = bp->b_bcount - bp->b_resid; 650 kstat_io_t *kioptr; 651 652 mutex_enter(lsp->ls_kstat->ks_lock); 653 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 654 if (bp->b_flags & B_READ) { 655 kioptr->nread += n_done; 656 kioptr->reads++; 657 } else { 658 kioptr->nwritten += n_done; 659 kioptr->writes++; 660 } 661 kstat_runq_exit(kioptr); 662 mutex_exit(lsp->ls_kstat->ks_lock); 663 } 664 665 mutex_enter(&lsp->ls_vp_lock); 666 if (--lsp->ls_vp_iocount == 0) 667 cv_broadcast(&lsp->ls_vp_cv); 668 mutex_exit(&lsp->ls_vp_lock); 669 670 bioerror(bp, error); 671 biodone(bp); 672 } 673 674 static int 675 lofi_strategy(struct buf *bp) 676 { 677 struct lofi_state *lsp; 678 offset_t offset; 679 680 /* 681 * We cannot just do I/O here, because the current thread 682 * _might_ end up back in here because the underlying filesystem 683 * wants a buffer, which eventually gets into bio_recycle and 684 * might call into lofi to write out a delayed-write buffer. 685 * This is bad if the filesystem above lofi is the same as below. 686 * 687 * We could come up with a complex strategy using threads to 688 * do the I/O asynchronously, or we could use task queues. task 689 * queues were incredibly easy so they win. 690 */ 691 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 692 mutex_enter(&lsp->ls_vp_lock); 693 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 694 bioerror(bp, EIO); 695 biodone(bp); 696 mutex_exit(&lsp->ls_vp_lock); 697 return (0); 698 } 699 700 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 701 if (offset == lsp->ls_vp_size) { 702 /* EOF */ 703 if ((bp->b_flags & B_READ) != 0) { 704 bp->b_resid = bp->b_bcount; 705 bioerror(bp, 0); 706 } else { 707 /* writes should fail */ 708 bioerror(bp, ENXIO); 709 } 710 biodone(bp); 711 mutex_exit(&lsp->ls_vp_lock); 712 return (0); 713 } 714 if (offset > lsp->ls_vp_size) { 715 bioerror(bp, ENXIO); 716 biodone(bp); 717 mutex_exit(&lsp->ls_vp_lock); 718 return (0); 719 } 720 lsp->ls_vp_iocount++; 721 mutex_exit(&lsp->ls_vp_lock); 722 723 if (lsp->ls_kstat) { 724 mutex_enter(lsp->ls_kstat->ks_lock); 725 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 726 mutex_exit(lsp->ls_kstat->ks_lock); 727 } 728 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 729 return (0); 730 } 731 732 /*ARGSUSED2*/ 733 static int 734 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 735 { 736 if (getminor(dev) == 0) 737 return (EINVAL); 738 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 739 } 740 741 /*ARGSUSED2*/ 742 static int 743 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 744 { 745 if (getminor(dev) == 0) 746 return (EINVAL); 747 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 748 } 749 750 /*ARGSUSED2*/ 751 static int 752 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 753 { 754 if (getminor(dev) == 0) 755 return (EINVAL); 756 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 757 } 758 759 /*ARGSUSED2*/ 760 static int 761 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 762 { 763 if (getminor(dev) == 0) 764 return (EINVAL); 765 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 766 } 767 768 /*ARGSUSED*/ 769 static int 770 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 771 { 772 switch (infocmd) { 773 case DDI_INFO_DEVT2DEVINFO: 774 *result = lofi_dip; 775 return (DDI_SUCCESS); 776 case DDI_INFO_DEVT2INSTANCE: 777 *result = 0; 778 return (DDI_SUCCESS); 779 } 780 return (DDI_FAILURE); 781 } 782 783 static int 784 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 785 { 786 int error; 787 788 if (cmd != DDI_ATTACH) 789 return (DDI_FAILURE); 790 error = ddi_soft_state_zalloc(lofi_statep, 0); 791 if (error == DDI_FAILURE) { 792 return (DDI_FAILURE); 793 } 794 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 795 DDI_PSEUDO, NULL); 796 if (error == DDI_FAILURE) { 797 ddi_soft_state_free(lofi_statep, 0); 798 return (DDI_FAILURE); 799 } 800 /* driver handles kernel-issued IOCTLs */ 801 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 802 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 803 ddi_remove_minor_node(dip, NULL); 804 ddi_soft_state_free(lofi_statep, 0); 805 return (DDI_FAILURE); 806 } 807 lofi_dip = dip; 808 ddi_report_dev(dip); 809 return (DDI_SUCCESS); 810 } 811 812 static int 813 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 814 { 815 if (cmd != DDI_DETACH) 816 return (DDI_FAILURE); 817 if (lofi_busy()) 818 return (DDI_FAILURE); 819 lofi_dip = NULL; 820 ddi_remove_minor_node(dip, NULL); 821 ddi_prop_remove_all(dip); 822 ddi_soft_state_free(lofi_statep, 0); 823 return (DDI_SUCCESS); 824 } 825 826 /* 827 * These two just simplify the rest of the ioctls that need to copyin/out 828 * the lofi_ioctl structure. 829 */ 830 struct lofi_ioctl * 831 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag) 832 { 833 struct lofi_ioctl *klip; 834 int error; 835 836 klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 837 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 838 if (error) { 839 kmem_free(klip, sizeof (struct lofi_ioctl)); 840 return (NULL); 841 } 842 843 /* make sure filename is always null-terminated */ 844 klip->li_filename[MAXPATHLEN] = '\0'; 845 846 /* validate minor number */ 847 if (klip->li_minor > lofi_max_files) { 848 kmem_free(klip, sizeof (struct lofi_ioctl)); 849 return (NULL); 850 } 851 return (klip); 852 } 853 854 int 855 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 856 int flag) 857 { 858 int error; 859 860 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 861 if (error) 862 return (EFAULT); 863 return (0); 864 } 865 866 void 867 free_lofi_ioctl(struct lofi_ioctl *klip) 868 { 869 kmem_free(klip, sizeof (struct lofi_ioctl)); 870 } 871 872 /* 873 * Return the minor number 'filename' is mapped to, if it is. 874 */ 875 static int 876 file_to_minor(char *filename) 877 { 878 minor_t minor; 879 struct lofi_state *lsp; 880 881 ASSERT(mutex_owned(&lofi_lock)); 882 for (minor = 1; minor <= lofi_max_files; minor++) { 883 lsp = ddi_get_soft_state(lofi_statep, minor); 884 if (lsp == NULL) 885 continue; 886 if (strcmp(lsp->ls_filename, filename) == 0) 887 return (minor); 888 } 889 return (0); 890 } 891 892 /* 893 * lofiadm does some validation, but since Joe Random (or crashme) could 894 * do our ioctls, we need to do some validation too. 895 */ 896 static int 897 valid_filename(const char *filename) 898 { 899 static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; 900 static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; 901 902 /* must be absolute path */ 903 if (filename[0] != '/') 904 return (0); 905 /* must not be lofi */ 906 if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) 907 return (0); 908 if (strncmp(filename, charprefix, strlen(charprefix)) == 0) 909 return (0); 910 return (1); 911 } 912 913 /* 914 * Fakes up a disk geometry, and one big partition, based on the size 915 * of the file. This is needed because we allow newfs'ing the device, 916 * and newfs will do several disk ioctls to figure out the geometry and 917 * partition information. It uses that information to determine the parameters 918 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 919 * have to support it. 920 */ 921 static void 922 fake_disk_geometry(struct lofi_state *lsp) 923 { 924 /* dk_geom - see dkio(7I) */ 925 /* 926 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 927 * of sectors), but that breaks programs like fdisk which want to 928 * partition a disk by cylinder. With one cylinder, you can't create 929 * an fdisk partition and put pcfs on it for testing (hard to pick 930 * a number between one and one). 931 * 932 * The cheezy floppy test is an attempt to not have too few cylinders 933 * for a small file, or so many on a big file that you waste space 934 * for backup superblocks or cylinder group structures. 935 */ 936 if (lsp->ls_vp_size < (2 * 1024 * 1024)) /* floppy? */ 937 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (100 * 1024); 938 else 939 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (300 * 1024); 940 /* in case file file is < 100k */ 941 if (lsp->ls_dkg.dkg_ncyl == 0) 942 lsp->ls_dkg.dkg_ncyl = 1; 943 lsp->ls_dkg.dkg_acyl = 0; 944 lsp->ls_dkg.dkg_bcyl = 0; 945 lsp->ls_dkg.dkg_nhead = 1; 946 lsp->ls_dkg.dkg_obs1 = 0; 947 lsp->ls_dkg.dkg_intrlv = 0; 948 lsp->ls_dkg.dkg_obs2 = 0; 949 lsp->ls_dkg.dkg_obs3 = 0; 950 lsp->ls_dkg.dkg_apc = 0; 951 lsp->ls_dkg.dkg_rpm = 7200; 952 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 953 lsp->ls_dkg.dkg_nsect = lsp->ls_vp_size / 954 (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 955 lsp->ls_dkg.dkg_write_reinstruct = 0; 956 lsp->ls_dkg.dkg_read_reinstruct = 0; 957 958 /* vtoc - see dkio(7I) */ 959 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 960 lsp->ls_vtoc.v_sanity = VTOC_SANE; 961 lsp->ls_vtoc.v_version = V_VERSION; 962 bcopy(LOFI_DRIVER_NAME, lsp->ls_vtoc.v_volume, 7); 963 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 964 lsp->ls_vtoc.v_nparts = 1; 965 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 966 967 /* 968 * A compressed file is read-only, other files can 969 * be read-write 970 */ 971 if (lsp->ls_uncomp_seg_sz > 0) { 972 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY; 973 } else { 974 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 975 } 976 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 977 /* 978 * The partition size cannot just be the number of sectors, because 979 * that might not end on a cylinder boundary. And if that's the case, 980 * newfs/mkfs will print a scary warning. So just figure the size 981 * based on the number of cylinders and sectors/cylinder. 982 */ 983 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 984 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 985 986 /* dk_cinfo - see dkio(7I) */ 987 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 988 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 989 lsp->ls_ci.dki_ctype = DKC_MD; 990 lsp->ls_ci.dki_flags = 0; 991 lsp->ls_ci.dki_cnum = 0; 992 lsp->ls_ci.dki_addr = 0; 993 lsp->ls_ci.dki_space = 0; 994 lsp->ls_ci.dki_prio = 0; 995 lsp->ls_ci.dki_vec = 0; 996 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 997 lsp->ls_ci.dki_unit = 0; 998 lsp->ls_ci.dki_slave = 0; 999 lsp->ls_ci.dki_partition = 0; 1000 /* 1001 * newfs uses this to set maxcontig. Must not be < 16, or it 1002 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 1003 * it by the block size. Then tunefs doesn't work because 1004 * maxcontig is 0. 1005 */ 1006 lsp->ls_ci.dki_maxtransfer = 16; 1007 } 1008 1009 /* 1010 * map in a compressed file 1011 * 1012 * Read in the header and the index that follows. 1013 * 1014 * The header is as follows - 1015 * 1016 * Signature (name of the compression algorithm) 1017 * Compression segment size (a multiple of 512) 1018 * Number of index entries 1019 * Size of the last block 1020 * The array containing the index entries 1021 * 1022 * The header information is always stored in 1023 * network byte order on disk. 1024 */ 1025 static int 1026 lofi_map_compressed_file(struct lofi_state *lsp, char *buf) 1027 { 1028 uint32_t index_sz, header_len, i; 1029 ssize_t resid; 1030 enum uio_rw rw; 1031 char *tbuf = buf; 1032 int error; 1033 1034 /* The signature has already been read */ 1035 tbuf += sizeof (lsp->ls_comp_algorithm); 1036 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz)); 1037 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz); 1038 1039 /* 1040 * The compressed segment size must be a power of 2 1041 */ 1042 if (lsp->ls_uncomp_seg_sz % 2) 1043 return (EINVAL); 1044 1045 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++) 1046 ; 1047 1048 lsp->ls_comp_seg_shift = i; 1049 1050 tbuf += sizeof (lsp->ls_uncomp_seg_sz); 1051 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz)); 1052 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz); 1053 1054 tbuf += sizeof (lsp->ls_comp_index_sz); 1055 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz), 1056 sizeof (lsp->ls_uncomp_last_seg_sz)); 1057 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz); 1058 1059 /* 1060 * Compute the total size of the uncompressed data 1061 * for use in fake_disk_geometry and other calculations. 1062 * Disk geometry has to be faked with respect to the 1063 * actual uncompressed data size rather than the 1064 * compressed file size. 1065 */ 1066 lsp->ls_vp_size = (lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz 1067 + lsp->ls_uncomp_last_seg_sz; 1068 1069 /* 1070 * Index size is rounded up to a 512 byte boundary for ease 1071 * of segmapping 1072 */ 1073 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz; 1074 header_len = sizeof (lsp->ls_comp_algorithm) + 1075 sizeof (lsp->ls_uncomp_seg_sz) + 1076 sizeof (lsp->ls_comp_index_sz) + 1077 sizeof (lsp->ls_uncomp_last_seg_sz); 1078 lsp->ls_comp_offbase = header_len + index_sz; 1079 1080 index_sz += header_len; 1081 index_sz = roundup(index_sz, DEV_BSIZE); 1082 1083 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP); 1084 lsp->ls_comp_index_data_sz = index_sz; 1085 1086 /* 1087 * Read in the index -- this has a side-effect 1088 * of reading in the header as well 1089 */ 1090 rw = UIO_READ; 1091 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz, 1092 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 1093 1094 if (error != 0) 1095 return (error); 1096 1097 /* Skip the header, this is where the index really begins */ 1098 lsp->ls_comp_seg_index = 1099 /*LINTED*/ 1100 (uint64_t *)(lsp->ls_comp_index_data + header_len); 1101 1102 /* 1103 * Now recompute offsets in the index to account for 1104 * the header length 1105 */ 1106 for (i = 0; i < lsp->ls_comp_index_sz; i++) { 1107 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase + 1108 BE_64(lsp->ls_comp_seg_index[i]); 1109 } 1110 1111 return (error); 1112 } 1113 1114 /* 1115 * Check to see if the passed in signature is a valid 1116 * one. If it is valid, return the index into 1117 * lofi_compress_table. 1118 * 1119 * Return -1 if it is invalid 1120 */ 1121 static int lofi_compress_select(char *signature) 1122 { 1123 int i; 1124 1125 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { 1126 if (strcmp(lofi_compress_table[i].l_name, signature) == 0) 1127 return (i); 1128 } 1129 1130 return (-1); 1131 } 1132 1133 /* 1134 * map a file to a minor number. Return the minor number. 1135 */ 1136 static int 1137 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 1138 int *rvalp, struct cred *credp, int ioctl_flag) 1139 { 1140 minor_t newminor; 1141 struct lofi_state *lsp; 1142 struct lofi_ioctl *klip; 1143 int error; 1144 struct vnode *vp; 1145 int64_t Nblocks_prop_val; 1146 int64_t Size_prop_val; 1147 int compress_index; 1148 vattr_t vattr; 1149 int flag; 1150 enum vtype v_type; 1151 int zalloced = 0; 1152 dev_t newdev; 1153 char namebuf[50]; 1154 char buf[DEV_BSIZE]; 1155 char *tbuf; 1156 ssize_t resid; 1157 enum uio_rw rw; 1158 1159 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 1160 if (klip == NULL) 1161 return (EFAULT); 1162 1163 mutex_enter(&lofi_lock); 1164 1165 if (!valid_filename(klip->li_filename)) { 1166 error = EINVAL; 1167 goto out; 1168 } 1169 1170 if (file_to_minor(klip->li_filename) != 0) { 1171 error = EBUSY; 1172 goto out; 1173 } 1174 1175 if (pickminor) { 1176 /* Find a free one */ 1177 for (newminor = 1; newminor <= lofi_max_files; newminor++) 1178 if (ddi_get_soft_state(lofi_statep, newminor) == NULL) 1179 break; 1180 if (newminor >= lofi_max_files) { 1181 error = EAGAIN; 1182 goto out; 1183 } 1184 } else { 1185 newminor = klip->li_minor; 1186 if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { 1187 error = EEXIST; 1188 goto out; 1189 } 1190 } 1191 1192 /* make sure it's valid */ 1193 error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, 1194 NULLVPP, &vp); 1195 if (error) { 1196 goto out; 1197 } 1198 v_type = vp->v_type; 1199 VN_RELE(vp); 1200 if (!V_ISLOFIABLE(v_type)) { 1201 error = EINVAL; 1202 goto out; 1203 } 1204 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 1205 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 1206 if (error) { 1207 /* try read-only */ 1208 flag &= ~FWRITE; 1209 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 1210 &vp, 0, 0); 1211 if (error) { 1212 goto out; 1213 } 1214 } 1215 vattr.va_mask = AT_SIZE; 1216 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); 1217 if (error) { 1218 goto closeout; 1219 } 1220 /* the file needs to be a multiple of the block size */ 1221 if ((vattr.va_size % DEV_BSIZE) != 0) { 1222 error = EINVAL; 1223 goto closeout; 1224 } 1225 newdev = makedevice(getmajor(dev), newminor); 1226 Size_prop_val = vattr.va_size; 1227 if ((ddi_prop_update_int64(newdev, lofi_dip, 1228 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 1229 error = EINVAL; 1230 goto closeout; 1231 } 1232 Nblocks_prop_val = vattr.va_size / DEV_BSIZE; 1233 if ((ddi_prop_update_int64(newdev, lofi_dip, 1234 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 1235 error = EINVAL; 1236 goto propout; 1237 } 1238 error = ddi_soft_state_zalloc(lofi_statep, newminor); 1239 if (error == DDI_FAILURE) { 1240 error = ENOMEM; 1241 goto propout; 1242 } 1243 zalloced = 1; 1244 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 1245 (void) ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, 1246 DDI_PSEUDO, NULL); 1247 if (error != DDI_SUCCESS) { 1248 error = ENXIO; 1249 goto propout; 1250 } 1251 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); 1252 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, 1253 DDI_PSEUDO, NULL); 1254 if (error != DDI_SUCCESS) { 1255 /* remove block node */ 1256 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 1257 ddi_remove_minor_node(lofi_dip, namebuf); 1258 error = ENXIO; 1259 goto propout; 1260 } 1261 lsp = ddi_get_soft_state(lofi_statep, newminor); 1262 lsp->ls_filename_sz = strlen(klip->li_filename) + 1; 1263 lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); 1264 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 1265 LOFI_DRIVER_NAME, newminor); 1266 lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, 1267 minclsyspri, 1, lofi_taskq_maxalloc, 0); 1268 lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, 1269 NULL, "disk", KSTAT_TYPE_IO, 1, 0); 1270 if (lsp->ls_kstat) { 1271 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 1272 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 1273 kstat_install(lsp->ls_kstat); 1274 } 1275 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); 1276 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); 1277 1278 /* 1279 * save open mode so file can be closed properly and vnode counts 1280 * updated correctly. 1281 */ 1282 lsp->ls_openflag = flag; 1283 1284 /* 1285 * Try to handle stacked lofs vnodes. 1286 */ 1287 if (vp->v_type == VREG) { 1288 if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) { 1289 lsp->ls_vp = vp; 1290 } else { 1291 /* 1292 * Even though vp was obtained via vn_open(), we 1293 * can't call vn_close() on it, since lofs will 1294 * pass the VOP_CLOSE() on down to the realvp 1295 * (which we are about to use). Hence we merely 1296 * drop the reference to the lofs vnode and hold 1297 * the realvp so things behave as if we've 1298 * opened the realvp without any interaction 1299 * with lofs. 1300 */ 1301 VN_HOLD(lsp->ls_vp); 1302 VN_RELE(vp); 1303 } 1304 } else { 1305 lsp->ls_vp = vp; 1306 } 1307 lsp->ls_vp_size = vattr.va_size; 1308 (void) strcpy(lsp->ls_filename, klip->li_filename); 1309 if (rvalp) 1310 *rvalp = (int)newminor; 1311 klip->li_minor = newminor; 1312 1313 /* 1314 * Read the file signature to check if it is compressed. 1315 * 'rw' is set to read since only reads are allowed to 1316 * a compressed file. 1317 */ 1318 rw = UIO_READ; 1319 error = vn_rdwr(rw, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, 1320 0, RLIM64_INFINITY, kcred, &resid); 1321 1322 if (error != 0) 1323 goto propout; 1324 1325 tbuf = buf; 1326 lsp->ls_uncomp_seg_sz = 0; 1327 lsp->ls_vp_comp_size = lsp->ls_vp_size; 1328 lsp->ls_comp_algorithm[0] = '\0'; 1329 1330 compress_index = lofi_compress_select(tbuf); 1331 if (compress_index != -1) { 1332 lsp->ls_comp_algorithm_index = compress_index; 1333 (void) strlcpy(lsp->ls_comp_algorithm, 1334 lofi_compress_table[compress_index].l_name, 1335 sizeof (lsp->ls_comp_algorithm)); 1336 error = lofi_map_compressed_file(lsp, buf); 1337 if (error != 0) 1338 goto propout; 1339 1340 /* update DDI properties */ 1341 Size_prop_val = lsp->ls_vp_size; 1342 if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, 1343 Size_prop_val)) != DDI_PROP_SUCCESS) { 1344 error = EINVAL; 1345 goto propout; 1346 } 1347 1348 Nblocks_prop_val = lsp->ls_vp_size / DEV_BSIZE; 1349 if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, 1350 Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 1351 error = EINVAL; 1352 goto propout; 1353 } 1354 } 1355 1356 fake_disk_geometry(lsp); 1357 mutex_exit(&lofi_lock); 1358 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1359 free_lofi_ioctl(klip); 1360 return (0); 1361 1362 propout: 1363 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 1364 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 1365 closeout: 1366 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); 1367 VN_RELE(vp); 1368 out: 1369 if (zalloced) 1370 ddi_soft_state_free(lofi_statep, newminor); 1371 mutex_exit(&lofi_lock); 1372 free_lofi_ioctl(klip); 1373 return (error); 1374 } 1375 1376 /* 1377 * unmap a file. 1378 */ 1379 static int 1380 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, 1381 struct cred *credp, int ioctl_flag) 1382 { 1383 struct lofi_state *lsp; 1384 struct lofi_ioctl *klip; 1385 minor_t minor; 1386 1387 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 1388 if (klip == NULL) 1389 return (EFAULT); 1390 1391 mutex_enter(&lofi_lock); 1392 if (byfilename) { 1393 minor = file_to_minor(klip->li_filename); 1394 } else { 1395 minor = klip->li_minor; 1396 } 1397 if (minor == 0) { 1398 mutex_exit(&lofi_lock); 1399 free_lofi_ioctl(klip); 1400 return (ENXIO); 1401 } 1402 lsp = ddi_get_soft_state(lofi_statep, minor); 1403 if (lsp == NULL || lsp->ls_vp == NULL) { 1404 mutex_exit(&lofi_lock); 1405 free_lofi_ioctl(klip); 1406 return (ENXIO); 1407 } 1408 1409 /* 1410 * If it's still held open, we'll do one of three things: 1411 * 1412 * If no flag is set, just return EBUSY. 1413 * 1414 * If the 'cleanup' flag is set, unmap and remove the device when 1415 * the last user finishes. 1416 * 1417 * If the 'force' flag is set, then we forcibly close the underlying 1418 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl 1419 * will return DKIO_DEV_GONE. When the device is last closed, the 1420 * device will be cleaned up appropriately. 1421 * 1422 * This is complicated by the fact that we may have outstanding 1423 * dispatched I/Os. Rather than having a single mutex to serialize all 1424 * I/O, we keep a count of the number of outstanding I/O requests, as 1425 * well as a flag to indicate that no new I/Os should be dispatched. 1426 * We set the flag, wait for the number of outstanding I/Os to reach 0, 1427 * and then close the underlying vnode. 1428 */ 1429 1430 if (is_opened(lsp)) { 1431 if (klip->li_force) { 1432 mutex_enter(&lsp->ls_vp_lock); 1433 lsp->ls_vp_closereq = B_TRUE; 1434 while (lsp->ls_vp_iocount > 0) 1435 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 1436 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0, 1437 credp, NULL); 1438 VN_RELE(lsp->ls_vp); 1439 lsp->ls_vp = NULL; 1440 cv_broadcast(&lsp->ls_vp_cv); 1441 mutex_exit(&lsp->ls_vp_lock); 1442 mutex_exit(&lofi_lock); 1443 klip->li_minor = minor; 1444 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1445 free_lofi_ioctl(klip); 1446 return (0); 1447 } else if (klip->li_cleanup) { 1448 lsp->ls_cleanup = 1; 1449 mutex_exit(&lofi_lock); 1450 free_lofi_ioctl(klip); 1451 return (0); 1452 } 1453 1454 mutex_exit(&lofi_lock); 1455 free_lofi_ioctl(klip); 1456 return (EBUSY); 1457 } 1458 1459 if (lsp->ls_uncomp_seg_sz > 0) { 1460 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); 1461 lsp->ls_uncomp_seg_sz = 0; 1462 } 1463 1464 lofi_free_handle(dev, minor, lsp, credp); 1465 1466 klip->li_minor = minor; 1467 mutex_exit(&lofi_lock); 1468 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1469 free_lofi_ioctl(klip); 1470 return (0); 1471 } 1472 1473 /* 1474 * get the filename given the minor number, or the minor number given 1475 * the name. 1476 */ 1477 /*ARGSUSED*/ 1478 static int 1479 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 1480 struct cred *credp, int ioctl_flag) 1481 { 1482 struct lofi_state *lsp; 1483 struct lofi_ioctl *klip; 1484 int error; 1485 minor_t minor; 1486 1487 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 1488 if (klip == NULL) 1489 return (EFAULT); 1490 1491 switch (which) { 1492 case LOFI_GET_FILENAME: 1493 minor = klip->li_minor; 1494 if (minor == 0) { 1495 free_lofi_ioctl(klip); 1496 return (EINVAL); 1497 } 1498 1499 mutex_enter(&lofi_lock); 1500 lsp = ddi_get_soft_state(lofi_statep, minor); 1501 if (lsp == NULL) { 1502 mutex_exit(&lofi_lock); 1503 free_lofi_ioctl(klip); 1504 return (ENXIO); 1505 } 1506 (void) strcpy(klip->li_filename, lsp->ls_filename); 1507 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 1508 sizeof (klip->li_algorithm)); 1509 mutex_exit(&lofi_lock); 1510 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1511 free_lofi_ioctl(klip); 1512 return (error); 1513 case LOFI_GET_MINOR: 1514 mutex_enter(&lofi_lock); 1515 klip->li_minor = file_to_minor(klip->li_filename); 1516 mutex_exit(&lofi_lock); 1517 if (klip->li_minor == 0) { 1518 free_lofi_ioctl(klip); 1519 return (ENOENT); 1520 } 1521 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1522 free_lofi_ioctl(klip); 1523 return (error); 1524 case LOFI_CHECK_COMPRESSED: 1525 mutex_enter(&lofi_lock); 1526 klip->li_minor = file_to_minor(klip->li_filename); 1527 mutex_exit(&lofi_lock); 1528 if (klip->li_minor == 0) { 1529 free_lofi_ioctl(klip); 1530 return (ENOENT); 1531 } 1532 mutex_enter(&lofi_lock); 1533 lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); 1534 if (lsp == NULL) { 1535 mutex_exit(&lofi_lock); 1536 free_lofi_ioctl(klip); 1537 return (ENXIO); 1538 } 1539 ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0); 1540 1541 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 1542 sizeof (klip->li_algorithm)); 1543 mutex_exit(&lofi_lock); 1544 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1545 free_lofi_ioctl(klip); 1546 return (error); 1547 default: 1548 free_lofi_ioctl(klip); 1549 return (EINVAL); 1550 } 1551 1552 } 1553 1554 static int 1555 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 1556 int *rvalp) 1557 { 1558 int error; 1559 enum dkio_state dkstate; 1560 struct lofi_state *lsp; 1561 minor_t minor; 1562 1563 #ifdef lint 1564 credp = credp; 1565 #endif 1566 1567 minor = getminor(dev); 1568 /* lofi ioctls only apply to the master device */ 1569 if (minor == 0) { 1570 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 1571 1572 /* 1573 * the query command only need read-access - i.e., normal 1574 * users are allowed to do those on the ctl device as 1575 * long as they can open it read-only. 1576 */ 1577 switch (cmd) { 1578 case LOFI_MAP_FILE: 1579 if ((flag & FWRITE) == 0) 1580 return (EPERM); 1581 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 1582 case LOFI_MAP_FILE_MINOR: 1583 if ((flag & FWRITE) == 0) 1584 return (EPERM); 1585 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 1586 case LOFI_UNMAP_FILE: 1587 if ((flag & FWRITE) == 0) 1588 return (EPERM); 1589 return (lofi_unmap_file(dev, lip, 1, credp, flag)); 1590 case LOFI_UNMAP_FILE_MINOR: 1591 if ((flag & FWRITE) == 0) 1592 return (EPERM); 1593 return (lofi_unmap_file(dev, lip, 0, credp, flag)); 1594 case LOFI_GET_FILENAME: 1595 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 1596 credp, flag)); 1597 case LOFI_GET_MINOR: 1598 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 1599 credp, flag)); 1600 case LOFI_GET_MAXMINOR: 1601 error = ddi_copyout(&lofi_max_files, &lip->li_minor, 1602 sizeof (lofi_max_files), flag); 1603 if (error) 1604 return (EFAULT); 1605 return (0); 1606 case LOFI_CHECK_COMPRESSED: 1607 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, 1608 credp, flag)); 1609 default: 1610 break; 1611 } 1612 } 1613 1614 lsp = ddi_get_soft_state(lofi_statep, minor); 1615 if (lsp == NULL) 1616 return (ENXIO); 1617 1618 /* 1619 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with 1620 * EIO as if the device was no longer present. 1621 */ 1622 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE) 1623 return (EIO); 1624 1625 /* these are for faking out utilities like newfs */ 1626 switch (cmd) { 1627 case DKIOCGVTOC: 1628 switch (ddi_model_convert_from(flag & FMODELS)) { 1629 case DDI_MODEL_ILP32: { 1630 struct vtoc32 vtoc32; 1631 1632 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 1633 if (ddi_copyout(&vtoc32, (void *)arg, 1634 sizeof (struct vtoc32), flag)) 1635 return (EFAULT); 1636 break; 1637 } 1638 1639 case DDI_MODEL_NONE: 1640 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 1641 sizeof (struct vtoc), flag)) 1642 return (EFAULT); 1643 break; 1644 } 1645 return (0); 1646 case DKIOCINFO: 1647 error = ddi_copyout(&lsp->ls_ci, (void *)arg, 1648 sizeof (struct dk_cinfo), flag); 1649 if (error) 1650 return (EFAULT); 1651 return (0); 1652 case DKIOCG_VIRTGEOM: 1653 case DKIOCG_PHYGEOM: 1654 case DKIOCGGEOM: 1655 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 1656 sizeof (struct dk_geom), flag); 1657 if (error) 1658 return (EFAULT); 1659 return (0); 1660 case DKIOCSTATE: 1661 /* 1662 * Normally, lofi devices are always in the INSERTED state. If 1663 * a device is forcefully unmapped, then the device transitions 1664 * to the DKIO_DEV_GONE state. 1665 */ 1666 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate), 1667 flag) != 0) 1668 return (EFAULT); 1669 1670 mutex_enter(&lsp->ls_vp_lock); 1671 while ((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) || 1672 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) { 1673 /* 1674 * By virtue of having the device open, we know that 1675 * 'lsp' will remain valid when we return. 1676 */ 1677 if (!cv_wait_sig(&lsp->ls_vp_cv, 1678 &lsp->ls_vp_lock)) { 1679 mutex_exit(&lsp->ls_vp_lock); 1680 return (EINTR); 1681 } 1682 } 1683 1684 dkstate = (lsp->ls_vp != NULL ? DKIO_INSERTED : DKIO_DEV_GONE); 1685 mutex_exit(&lsp->ls_vp_lock); 1686 1687 if (ddi_copyout(&dkstate, (void *)arg, 1688 sizeof (dkstate), flag) != 0) 1689 return (EFAULT); 1690 return (0); 1691 default: 1692 return (ENOTTY); 1693 } 1694 } 1695 1696 static struct cb_ops lofi_cb_ops = { 1697 lofi_open, /* open */ 1698 lofi_close, /* close */ 1699 lofi_strategy, /* strategy */ 1700 nodev, /* print */ 1701 nodev, /* dump */ 1702 lofi_read, /* read */ 1703 lofi_write, /* write */ 1704 lofi_ioctl, /* ioctl */ 1705 nodev, /* devmap */ 1706 nodev, /* mmap */ 1707 nodev, /* segmap */ 1708 nochpoll, /* poll */ 1709 ddi_prop_op, /* prop_op */ 1710 0, /* streamtab */ 1711 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 1712 CB_REV, 1713 lofi_aread, 1714 lofi_awrite 1715 }; 1716 1717 static struct dev_ops lofi_ops = { 1718 DEVO_REV, /* devo_rev, */ 1719 0, /* refcnt */ 1720 lofi_info, /* info */ 1721 nulldev, /* identify */ 1722 nulldev, /* probe */ 1723 lofi_attach, /* attach */ 1724 lofi_detach, /* detach */ 1725 nodev, /* reset */ 1726 &lofi_cb_ops, /* driver operations */ 1727 NULL /* no bus operations */ 1728 }; 1729 1730 static struct modldrv modldrv = { 1731 &mod_driverops, 1732 "loopback file driver (%I%)", 1733 &lofi_ops, 1734 }; 1735 1736 static struct modlinkage modlinkage = { 1737 MODREV_1, 1738 &modldrv, 1739 NULL 1740 }; 1741 1742 int 1743 _init(void) 1744 { 1745 int error; 1746 1747 error = ddi_soft_state_init(&lofi_statep, 1748 sizeof (struct lofi_state), 0); 1749 if (error) 1750 return (error); 1751 1752 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 1753 error = mod_install(&modlinkage); 1754 if (error) { 1755 mutex_destroy(&lofi_lock); 1756 ddi_soft_state_fini(&lofi_statep); 1757 } 1758 1759 return (error); 1760 } 1761 1762 int 1763 _fini(void) 1764 { 1765 int error; 1766 1767 if (lofi_busy()) 1768 return (EBUSY); 1769 1770 error = mod_remove(&modlinkage); 1771 if (error) 1772 return (error); 1773 1774 mutex_destroy(&lofi_lock); 1775 ddi_soft_state_fini(&lofi_statep); 1776 1777 return (error); 1778 } 1779 1780 int 1781 _info(struct modinfo *modinfop) 1782 { 1783 return (mod_info(&modlinkage, modinfop)); 1784 } 1785