1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * lofi (loopback file) driver - allows you to attach a file to a device, 30 * which can then be accessed through that device. The simple model is that 31 * you tell lofi to open a file, and then use the block device you get as 32 * you would any block device. lofi translates access to the block device 33 * into I/O on the underlying file. This is mostly useful for 34 * mounting images of filesystems. 35 * 36 * lofi is controlled through /dev/lofictl - this is the only device exported 37 * during attach, and is minor number 0. lofiadm communicates with lofi through 38 * ioctls on this device. When a file is attached to lofi, block and character 39 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 40 * are identified by their minor number, and the minor number is also used 41 * as the name in /dev/lofi. If we ever decide to support virtual disks, 42 * we'll have to divide the minor number space to identify fdisk partitions 43 * and slices, and the name will then be the minor number shifted down a 44 * few bits. Minor devices are tracked with state structures handled with 45 * ddi_soft_state(9F) for simplicity. 46 * 47 * A file attached to lofi is opened when attached and not closed until 48 * explicitly detached from lofi. This seems more sensible than deferring 49 * the open until the /dev/lofi device is opened, for a number of reasons. 50 * One is that any failure is likely to be noticed by the person (or script) 51 * running lofiadm. Another is that it would be a security problem if the 52 * file was replaced by another one after being added but before being opened. 53 * 54 * The only hard part about lofi is the ioctls. In order to support things 55 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 56 * So it has to fake disk geometry and partition information. More may need 57 * to be faked if your favorite utility doesn't work and you think it should 58 * (fdformat doesn't work because it really wants to know the type of floppy 59 * controller to talk to, and that didn't seem easy to fake. Or possibly even 60 * necessary, since we have mkfs_pcfs now). 61 * 62 * Known problems: 63 * 64 * UFS logging. Mounting a UFS filesystem image "logging" 65 * works for basic copy testing but wedges during a build of ON through 66 * that image. Some deadlock in lufs holding the log mutex and then 67 * getting stuck on a buf. So for now, don't do that. 68 * 69 * Direct I/O. Since the filesystem data is being cached in the buffer 70 * cache, _and_ again in the underlying filesystem, it's tempting to 71 * enable direct I/O on the underlying file. Don't, because that deadlocks. 72 * I think to fix the cache-twice problem we might need filesystem support. 73 * 74 * lofi on itself. The simple lock strategy (lofi_lock) precludes this 75 * because you'll be in lofi_ioctl, holding the lock when you open the 76 * file, which, if it's lofi, will grab lofi_lock. We prevent this for 77 * now, though not using ddi_soft_state(9F) would make it possible to 78 * do. Though it would still be silly. 79 * 80 * Interesting things to do: 81 * 82 * Allow multiple files for each device. A poor-man's metadisk, basically. 83 * 84 * Pass-through ioctls on block devices. You can (though it's not 85 * documented), give lofi a block device as a file name. Then we shouldn't 86 * need to fake a geometry. But this is also silly unless you're replacing 87 * metadisk. 88 * 89 * Encryption. tpm would like this. Apparently Windows 2000 has it, and 90 * so does Linux. 91 */ 92 93 #include <sys/types.h> 94 #include <sys/sysmacros.h> 95 #include <sys/cmn_err.h> 96 #include <sys/uio.h> 97 #include <sys/kmem.h> 98 #include <sys/cred.h> 99 #include <sys/mman.h> 100 #include <sys/errno.h> 101 #include <sys/aio_req.h> 102 #include <sys/stat.h> 103 #include <sys/file.h> 104 #include <sys/modctl.h> 105 #include <sys/conf.h> 106 #include <sys/debug.h> 107 #include <sys/vnode.h> 108 #include <sys/lofi.h> 109 #include <sys/fcntl.h> 110 #include <sys/pathname.h> 111 #include <sys/filio.h> 112 #include <sys/fdio.h> 113 #include <sys/open.h> 114 #include <sys/disp.h> 115 #include <vm/seg_map.h> 116 #include <sys/ddi.h> 117 #include <sys/sunddi.h> 118 119 /* seems safer than having to get the string right many times */ 120 #define NBLOCKS_PROP_NAME "Nblocks" 121 #define SIZE_PROP_NAME "Size" 122 123 static dev_info_t *lofi_dip; 124 static void *lofi_statep; 125 static kmutex_t lofi_lock; /* state lock */ 126 127 /* 128 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 129 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 130 * high. If we want to be assured that the underlying device is always busy, 131 * we must be sure that the number of bytes enqueued when the number of 132 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 133 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 134 * set maxalloc to be the maximum throughput (in bytes per second) of the 135 * underlying device divided by the minimum I/O size. We assume a realistic 136 * maximum throughput of one hundred megabytes per second; we set maxalloc on 137 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 138 */ 139 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 140 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 141 142 uint32_t lofi_max_files = LOFI_MAX_FILES; 143 144 static int 145 lofi_busy(void) 146 { 147 minor_t minor; 148 149 /* 150 * We need to make sure no mappings exist - mod_remove won't 151 * help because the device isn't open. 152 */ 153 mutex_enter(&lofi_lock); 154 for (minor = 1; minor <= lofi_max_files; minor++) { 155 if (ddi_get_soft_state(lofi_statep, minor) != NULL) { 156 mutex_exit(&lofi_lock); 157 return (EBUSY); 158 } 159 } 160 mutex_exit(&lofi_lock); 161 return (0); 162 } 163 164 static int 165 is_opened(struct lofi_state *lsp) 166 { 167 ASSERT(mutex_owned(&lofi_lock)); 168 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 169 } 170 171 static int 172 mark_opened(struct lofi_state *lsp, int otyp) 173 { 174 ASSERT(mutex_owned(&lofi_lock)); 175 switch (otyp) { 176 case OTYP_CHR: 177 lsp->ls_chr_open = 1; 178 break; 179 case OTYP_BLK: 180 lsp->ls_blk_open = 1; 181 break; 182 case OTYP_LYR: 183 lsp->ls_lyr_open_count++; 184 break; 185 default: 186 return (-1); 187 } 188 return (0); 189 } 190 191 static void 192 mark_closed(struct lofi_state *lsp, int otyp) 193 { 194 ASSERT(mutex_owned(&lofi_lock)); 195 switch (otyp) { 196 case OTYP_CHR: 197 lsp->ls_chr_open = 0; 198 break; 199 case OTYP_BLK: 200 lsp->ls_blk_open = 0; 201 break; 202 case OTYP_LYR: 203 lsp->ls_lyr_open_count--; 204 break; 205 default: 206 break; 207 } 208 } 209 210 /*ARGSUSED3*/ 211 static int 212 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 213 { 214 minor_t minor; 215 struct lofi_state *lsp; 216 217 mutex_enter(&lofi_lock); 218 minor = getminor(*devp); 219 if (minor == 0) { 220 /* master control device */ 221 /* must be opened exclusively */ 222 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { 223 mutex_exit(&lofi_lock); 224 return (EINVAL); 225 } 226 lsp = ddi_get_soft_state(lofi_statep, 0); 227 if (lsp == NULL) { 228 mutex_exit(&lofi_lock); 229 return (ENXIO); 230 } 231 if (is_opened(lsp)) { 232 mutex_exit(&lofi_lock); 233 return (EBUSY); 234 } 235 (void) mark_opened(lsp, OTYP_CHR); 236 mutex_exit(&lofi_lock); 237 return (0); 238 } 239 240 /* otherwise, the mapping should already exist */ 241 lsp = ddi_get_soft_state(lofi_statep, minor); 242 if (lsp == NULL) { 243 mutex_exit(&lofi_lock); 244 return (EINVAL); 245 } 246 247 if (mark_opened(lsp, otyp) == -1) { 248 mutex_exit(&lofi_lock); 249 return (EINVAL); 250 } 251 252 mutex_exit(&lofi_lock); 253 return (0); 254 } 255 256 /*ARGSUSED3*/ 257 static int 258 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 259 { 260 minor_t minor; 261 struct lofi_state *lsp; 262 263 #ifdef lint 264 flag = flag; 265 #endif 266 mutex_enter(&lofi_lock); 267 minor = getminor(dev); 268 lsp = ddi_get_soft_state(lofi_statep, minor); 269 if (lsp == NULL) { 270 mutex_exit(&lofi_lock); 271 return (EINVAL); 272 } 273 mark_closed(lsp, otyp); 274 mutex_exit(&lofi_lock); 275 return (0); 276 } 277 278 /* 279 * This is basically what strategy used to be before we found we 280 * needed task queues. 281 */ 282 static void 283 lofi_strategy_task(void *arg) 284 { 285 struct buf *bp = (struct buf *)arg; 286 int error; 287 struct lofi_state *lsp; 288 offset_t offset, alignedoffset; 289 offset_t mapoffset; 290 caddr_t bufaddr; 291 caddr_t mapaddr; 292 size_t xfersize; 293 size_t len; 294 int isread; 295 int smflags; 296 enum seg_rw srw; 297 298 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 299 if (lsp->ls_kstat) { 300 mutex_enter(lsp->ls_kstat->ks_lock); 301 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 302 mutex_exit(lsp->ls_kstat->ks_lock); 303 } 304 bp_mapin(bp); 305 bufaddr = bp->b_un.b_addr; 306 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 307 308 /* 309 * We used to always use vn_rdwr here, but we cannot do that because 310 * we might decide to read or write from the the underlying 311 * file during this call, which would be a deadlock because 312 * we have the rw_lock. So instead we page, unless it's not 313 * mapable or it's a character device. 314 */ 315 if (((lsp->ls_vp->v_flag & VNOMAP) == 0) && 316 (lsp->ls_vp->v_type != VCHR)) { 317 /* 318 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 319 * an 8K boundary, but the buf transfer address may not be 320 * aligned on more than a 512-byte boundary (we don't 321 * enforce that, though we could). This matters since the 322 * initial part of the transfer may not start at offset 0 323 * within the segmap'd chunk. So we have to compensate for 324 * that with 'mapoffset'. Subsequent chunks always start 325 * off at the beginning, and the last is capped by b_resid. 326 */ 327 mapoffset = offset & MAXBOFFSET; 328 alignedoffset = offset - mapoffset; /* now map-aligned */ 329 bp->b_resid = bp->b_bcount; 330 isread = bp->b_flags & B_READ; 331 srw = isread ? S_READ : S_WRITE; 332 do { 333 xfersize = MIN(lsp->ls_vp_size - offset, 334 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 335 len = roundup(mapoffset + xfersize, PAGESIZE); 336 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 337 alignedoffset, MAXBSIZE, 1, srw); 338 /* 339 * Now fault in the pages. This lets us check 340 * for errors before we reference mapaddr and 341 * try to resolve the fault in bcopy (which would 342 * panic instead). And this can easily happen, 343 * particularly if you've lofi'd a file over NFS 344 * and someone deletes the file on the server. 345 */ 346 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 347 len, F_SOFTLOCK, srw); 348 if (error) { 349 (void) segmap_release(segkmap, mapaddr, 0); 350 if (FC_CODE(error) == FC_OBJERR) 351 error = FC_ERRNO(error); 352 else 353 error = EIO; 354 break; 355 } 356 smflags = 0; 357 if (isread) { 358 bcopy(mapaddr + mapoffset, bufaddr, xfersize); 359 } else { 360 smflags |= SM_WRITE; 361 bcopy(bufaddr, mapaddr + mapoffset, xfersize); 362 } 363 bp->b_resid -= xfersize; 364 bufaddr += xfersize; 365 offset += xfersize; 366 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 367 len, F_SOFTUNLOCK, srw); 368 error = segmap_release(segkmap, mapaddr, smflags); 369 /* only the first map may start partial */ 370 mapoffset = 0; 371 alignedoffset += MAXBSIZE; 372 } while ((error == 0) && (bp->b_resid > 0) && 373 (offset < lsp->ls_vp_size)); 374 } else { 375 ssize_t resid; 376 enum uio_rw rw; 377 378 if (bp->b_flags & B_READ) 379 rw = UIO_READ; 380 else 381 rw = UIO_WRITE; 382 error = vn_rdwr(rw, lsp->ls_vp, bufaddr, bp->b_bcount, 383 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 384 bp->b_resid = resid; 385 } 386 387 if (lsp->ls_kstat) { 388 size_t n_done = bp->b_bcount - bp->b_resid; 389 kstat_io_t *kioptr; 390 391 mutex_enter(lsp->ls_kstat->ks_lock); 392 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 393 if (bp->b_flags & B_READ) { 394 kioptr->nread += n_done; 395 kioptr->reads++; 396 } else { 397 kioptr->nwritten += n_done; 398 kioptr->writes++; 399 } 400 kstat_runq_exit(kioptr); 401 mutex_exit(lsp->ls_kstat->ks_lock); 402 } 403 bioerror(bp, error); 404 biodone(bp); 405 } 406 407 static int 408 lofi_strategy(struct buf *bp) 409 { 410 struct lofi_state *lsp; 411 offset_t offset; 412 413 /* 414 * We cannot just do I/O here, because the current thread 415 * _might_ end up back in here because the underlying filesystem 416 * wants a buffer, which eventually gets into bio_recycle and 417 * might call into lofi to write out a delayed-write buffer. 418 * This is bad if the filesystem above lofi is the same as below. 419 * 420 * We could come up with a complex strategy using threads to 421 * do the I/O asynchronously, or we could use task queues. task 422 * queues were incredibly easy so they win. 423 */ 424 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 425 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 426 if (offset == lsp->ls_vp_size) { 427 /* EOF */ 428 if ((bp->b_flags & B_READ) != 0) { 429 bp->b_resid = bp->b_bcount; 430 bioerror(bp, 0); 431 } else { 432 /* writes should fail */ 433 bioerror(bp, ENXIO); 434 } 435 biodone(bp); 436 return (0); 437 } 438 if (offset > lsp->ls_vp_size) { 439 bioerror(bp, ENXIO); 440 biodone(bp); 441 return (0); 442 } 443 if (lsp->ls_kstat) { 444 mutex_enter(lsp->ls_kstat->ks_lock); 445 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 446 mutex_exit(lsp->ls_kstat->ks_lock); 447 } 448 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 449 return (0); 450 } 451 452 /*ARGSUSED2*/ 453 static int 454 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 455 { 456 if (getminor(dev) == 0) 457 return (EINVAL); 458 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 459 } 460 461 /*ARGSUSED2*/ 462 static int 463 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 464 { 465 if (getminor(dev) == 0) 466 return (EINVAL); 467 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 468 } 469 470 /*ARGSUSED2*/ 471 static int 472 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 473 { 474 if (getminor(dev) == 0) 475 return (EINVAL); 476 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 477 } 478 479 /*ARGSUSED2*/ 480 static int 481 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 482 { 483 if (getminor(dev) == 0) 484 return (EINVAL); 485 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 486 } 487 488 /*ARGSUSED*/ 489 static int 490 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 491 { 492 switch (infocmd) { 493 case DDI_INFO_DEVT2DEVINFO: 494 *result = lofi_dip; 495 return (DDI_SUCCESS); 496 case DDI_INFO_DEVT2INSTANCE: 497 *result = 0; 498 return (DDI_SUCCESS); 499 } 500 return (DDI_FAILURE); 501 } 502 503 static int 504 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 505 { 506 int error; 507 508 if (cmd != DDI_ATTACH) 509 return (DDI_FAILURE); 510 error = ddi_soft_state_zalloc(lofi_statep, 0); 511 if (error == DDI_FAILURE) { 512 return (DDI_FAILURE); 513 } 514 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 515 DDI_PSEUDO, NULL); 516 if (error == DDI_FAILURE) { 517 ddi_soft_state_free(lofi_statep, 0); 518 return (DDI_FAILURE); 519 } 520 lofi_dip = dip; 521 ddi_report_dev(dip); 522 return (DDI_SUCCESS); 523 } 524 525 static int 526 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 527 { 528 if (cmd != DDI_DETACH) 529 return (DDI_FAILURE); 530 if (lofi_busy()) 531 return (DDI_FAILURE); 532 lofi_dip = NULL; 533 ddi_remove_minor_node(dip, NULL); 534 ddi_soft_state_free(lofi_statep, 0); 535 return (DDI_SUCCESS); 536 } 537 538 /* 539 * These two just simplify the rest of the ioctls that need to copyin/out 540 * the lofi_ioctl structure. 541 */ 542 struct lofi_ioctl * 543 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag) 544 { 545 struct lofi_ioctl *klip; 546 int error; 547 548 klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 549 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 550 if (error) { 551 kmem_free(klip, sizeof (struct lofi_ioctl)); 552 return (NULL); 553 } 554 555 /* make sure filename is always null-terminated */ 556 klip->li_filename[MAXPATHLEN] = '\0'; 557 558 /* validate minor number */ 559 if (klip->li_minor > lofi_max_files) { 560 kmem_free(klip, sizeof (struct lofi_ioctl)); 561 return (NULL); 562 } 563 return (klip); 564 } 565 566 int 567 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 568 int flag) 569 { 570 int error; 571 572 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 573 if (error) 574 return (EFAULT); 575 return (0); 576 } 577 578 void 579 free_lofi_ioctl(struct lofi_ioctl *klip) 580 { 581 kmem_free(klip, sizeof (struct lofi_ioctl)); 582 } 583 584 /* 585 * Return the minor number 'filename' is mapped to, if it is. 586 */ 587 static int 588 file_to_minor(char *filename) 589 { 590 minor_t minor; 591 struct lofi_state *lsp; 592 593 ASSERT(mutex_owned(&lofi_lock)); 594 for (minor = 1; minor <= lofi_max_files; minor++) { 595 lsp = ddi_get_soft_state(lofi_statep, minor); 596 if (lsp == NULL) 597 continue; 598 if (strcmp(lsp->ls_filename, filename) == 0) 599 return (minor); 600 } 601 return (0); 602 } 603 604 /* 605 * lofiadm does some validation, but since Joe Random (or crashme) could 606 * do our ioctls, we need to do some validation too. 607 */ 608 static int 609 valid_filename(const char *filename) 610 { 611 static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; 612 static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; 613 614 /* must be absolute path */ 615 if (filename[0] != '/') 616 return (0); 617 /* must not be lofi */ 618 if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) 619 return (0); 620 if (strncmp(filename, charprefix, strlen(charprefix)) == 0) 621 return (0); 622 return (1); 623 } 624 625 /* 626 * Fakes up a disk geometry, and one big partition, based on the size 627 * of the file. This is needed because we allow newfs'ing the device, 628 * and newfs will do several disk ioctls to figure out the geometry and 629 * partition information. It uses that information to determine the parameters 630 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 631 * have to support it. 632 */ 633 static void 634 fake_disk_geometry(struct lofi_state *lsp) 635 { 636 /* dk_geom - see dkio(7I) */ 637 /* 638 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 639 * of sectors), but that breaks programs like fdisk which want to 640 * partition a disk by cylinder. With one cylinder, you can't create 641 * an fdisk partition and put pcfs on it for testing (hard to pick 642 * a number between one and one). 643 * 644 * The cheezy floppy test is an attempt to not have too few cylinders 645 * for a small file, or so many on a big file that you waste space 646 * for backup superblocks or cylinder group structures. 647 */ 648 if (lsp->ls_vp_size < (2 * 1024 * 1024)) /* floppy? */ 649 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (100 * 1024); 650 else 651 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (300 * 1024); 652 /* in case file file is < 100k */ 653 if (lsp->ls_dkg.dkg_ncyl == 0) 654 lsp->ls_dkg.dkg_ncyl = 1; 655 lsp->ls_dkg.dkg_acyl = 0; 656 lsp->ls_dkg.dkg_bcyl = 0; 657 lsp->ls_dkg.dkg_nhead = 1; 658 lsp->ls_dkg.dkg_obs1 = 0; 659 lsp->ls_dkg.dkg_intrlv = 0; 660 lsp->ls_dkg.dkg_obs2 = 0; 661 lsp->ls_dkg.dkg_obs3 = 0; 662 lsp->ls_dkg.dkg_apc = 0; 663 lsp->ls_dkg.dkg_rpm = 7200; 664 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 665 lsp->ls_dkg.dkg_nsect = lsp->ls_vp_size / 666 (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 667 lsp->ls_dkg.dkg_write_reinstruct = 0; 668 lsp->ls_dkg.dkg_read_reinstruct = 0; 669 670 /* vtoc - see dkio(7I) */ 671 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 672 lsp->ls_vtoc.v_sanity = VTOC_SANE; 673 lsp->ls_vtoc.v_version = V_VERSION; 674 bcopy(LOFI_DRIVER_NAME, lsp->ls_vtoc.v_volume, 7); 675 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 676 lsp->ls_vtoc.v_nparts = 1; 677 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 678 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 679 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 680 /* 681 * The partition size cannot just be the number of sectors, because 682 * that might not end on a cylinder boundary. And if that's the case, 683 * newfs/mkfs will print a scary warning. So just figure the size 684 * based on the number of cylinders and sectors/cylinder. 685 */ 686 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 687 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 688 689 /* dk_cinfo - see dkio(7I) */ 690 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 691 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 692 lsp->ls_ci.dki_ctype = DKC_MD; 693 lsp->ls_ci.dki_flags = 0; 694 lsp->ls_ci.dki_cnum = 0; 695 lsp->ls_ci.dki_addr = 0; 696 lsp->ls_ci.dki_space = 0; 697 lsp->ls_ci.dki_prio = 0; 698 lsp->ls_ci.dki_vec = 0; 699 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 700 lsp->ls_ci.dki_unit = 0; 701 lsp->ls_ci.dki_slave = 0; 702 lsp->ls_ci.dki_partition = 0; 703 /* 704 * newfs uses this to set maxcontig. Must not be < 16, or it 705 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 706 * it by the block size. Then tunefs doesn't work because 707 * maxcontig is 0. 708 */ 709 lsp->ls_ci.dki_maxtransfer = 16; 710 } 711 712 /* 713 * map a file to a minor number. Return the minor number. 714 */ 715 static int 716 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 717 int *rvalp, struct cred *credp, int ioctl_flag) 718 { 719 minor_t newminor; 720 struct lofi_state *lsp; 721 struct lofi_ioctl *klip; 722 int error; 723 char namebuf[50]; 724 struct vnode *vp; 725 int64_t Nblocks_prop_val; 726 int64_t Size_prop_val; 727 vattr_t vattr; 728 int flag; 729 enum vtype v_type; 730 dev_t newdev; 731 int zalloced = 0; 732 733 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 734 if (klip == NULL) 735 return (EFAULT); 736 737 mutex_enter(&lofi_lock); 738 739 if (!valid_filename(klip->li_filename)) { 740 error = EINVAL; 741 goto out; 742 } 743 744 if (file_to_minor(klip->li_filename) != 0) { 745 error = EBUSY; 746 goto out; 747 } 748 749 if (pickminor) { 750 /* Find a free one */ 751 for (newminor = 1; newminor <= lofi_max_files; newminor++) 752 if (ddi_get_soft_state(lofi_statep, newminor) == NULL) 753 break; 754 if (newminor >= lofi_max_files) { 755 error = EAGAIN; 756 goto out; 757 } 758 } else { 759 newminor = klip->li_minor; 760 if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { 761 error = EEXIST; 762 goto out; 763 } 764 } 765 766 /* make sure it's valid */ 767 error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, 768 NULLVPP, &vp); 769 if (error) { 770 goto out; 771 } 772 v_type = vp->v_type; 773 VN_RELE(vp); 774 if (!V_ISLOFIABLE(v_type)) { 775 error = EINVAL; 776 goto out; 777 } 778 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 779 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 780 if (error) { 781 /* try read-only */ 782 flag &= ~FWRITE; 783 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 784 &vp, 0, 0); 785 if (error) { 786 goto out; 787 } 788 } 789 vattr.va_mask = AT_SIZE; 790 error = VOP_GETATTR(vp, &vattr, 0, credp); 791 if (error) { 792 goto closeout; 793 } 794 /* the file needs to be a multiple of the block size */ 795 if ((vattr.va_size % DEV_BSIZE) != 0) { 796 error = EINVAL; 797 goto closeout; 798 } 799 newdev = makedevice(getmajor(dev), newminor); 800 Size_prop_val = vattr.va_size; 801 if ((ddi_prop_update_int64(newdev, lofi_dip, 802 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 803 error = EINVAL; 804 goto closeout; 805 } 806 Nblocks_prop_val = vattr.va_size / DEV_BSIZE; 807 if ((ddi_prop_update_int64(newdev, lofi_dip, 808 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 809 error = EINVAL; 810 goto propout; 811 } 812 error = ddi_soft_state_zalloc(lofi_statep, newminor); 813 if (error == DDI_FAILURE) { 814 error = ENOMEM; 815 goto propout; 816 } 817 zalloced = 1; 818 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 819 (void) ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, 820 DDI_PSEUDO, NULL); 821 if (error != DDI_SUCCESS) { 822 error = ENXIO; 823 goto propout; 824 } 825 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); 826 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, 827 DDI_PSEUDO, NULL); 828 if (error != DDI_SUCCESS) { 829 /* remove block node */ 830 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 831 ddi_remove_minor_node(lofi_dip, namebuf); 832 error = ENXIO; 833 goto propout; 834 } 835 lsp = ddi_get_soft_state(lofi_statep, newminor); 836 lsp->ls_filename_sz = strlen(klip->li_filename) + 1; 837 lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); 838 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 839 LOFI_DRIVER_NAME, newminor); 840 lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, 841 minclsyspri, 1, lofi_taskq_maxalloc, 0); 842 lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, 843 NULL, "disk", KSTAT_TYPE_IO, 1, 0); 844 if (lsp->ls_kstat) { 845 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 846 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 847 kstat_install(lsp->ls_kstat); 848 } 849 /* 850 * save open mode so file can be closed properly and vnode counts 851 * updated correctly. 852 */ 853 lsp->ls_openflag = flag; 854 855 /* 856 * Try to handle stacked lofs vnodes. 857 */ 858 if (vp->v_type == VREG) { 859 if (VOP_REALVP(vp, &lsp->ls_vp) != 0) { 860 lsp->ls_vp = vp; 861 } else { 862 /* 863 * Even though vp was obtained via vn_open(), we 864 * can't call vn_close() on it, since lofs will 865 * pass the VOP_CLOSE() on down to the realvp 866 * (which we are about to use). Hence we merely 867 * drop the reference to the lofs vnode and hold 868 * the realvp so things behave as if we've 869 * opened the realvp without any interaction 870 * with lofs. 871 */ 872 VN_HOLD(lsp->ls_vp); 873 VN_RELE(vp); 874 } 875 } else { 876 lsp->ls_vp = vp; 877 } 878 lsp->ls_vp_size = vattr.va_size; 879 (void) strcpy(lsp->ls_filename, klip->li_filename); 880 if (rvalp) 881 *rvalp = (int)newminor; 882 klip->li_minor = newminor; 883 884 fake_disk_geometry(lsp); 885 mutex_exit(&lofi_lock); 886 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 887 free_lofi_ioctl(klip); 888 return (0); 889 890 propout: 891 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 892 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 893 closeout: 894 (void) VOP_CLOSE(vp, flag, 1, 0, credp); 895 VN_RELE(vp); 896 out: 897 if (zalloced) 898 ddi_soft_state_free(lofi_statep, newminor); 899 mutex_exit(&lofi_lock); 900 free_lofi_ioctl(klip); 901 return (error); 902 } 903 904 /* 905 * unmap a file. 906 */ 907 static int 908 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, 909 struct cred *credp, int ioctl_flag) 910 { 911 struct lofi_state *lsp; 912 struct lofi_ioctl *klip; 913 minor_t minor; 914 char namebuf[20]; 915 dev_t newdev; 916 917 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 918 if (klip == NULL) 919 return (EFAULT); 920 921 mutex_enter(&lofi_lock); 922 if (byfilename) { 923 minor = file_to_minor(klip->li_filename); 924 } else { 925 minor = klip->li_minor; 926 } 927 if (minor == 0) { 928 mutex_exit(&lofi_lock); 929 free_lofi_ioctl(klip); 930 return (ENXIO); 931 } 932 lsp = ddi_get_soft_state(lofi_statep, minor); 933 if (lsp == NULL) { 934 mutex_exit(&lofi_lock); 935 free_lofi_ioctl(klip); 936 return (ENXIO); 937 } 938 if (is_opened(lsp)) { 939 mutex_exit(&lofi_lock); 940 free_lofi_ioctl(klip); 941 return (EBUSY); 942 } 943 /* 944 * Use saved open mode to properly update vnode counts 945 */ 946 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0, credp); 947 VN_RELE(lsp->ls_vp); 948 lsp->ls_vp = NULL; 949 newdev = makedevice(getmajor(dev), minor); 950 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 951 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 952 953 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 954 ddi_remove_minor_node(lofi_dip, namebuf); 955 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 956 ddi_remove_minor_node(lofi_dip, namebuf); 957 958 kmem_free(lsp->ls_filename, lsp->ls_filename_sz); 959 taskq_destroy(lsp->ls_taskq); 960 if (lsp->ls_kstat) { 961 kstat_delete(lsp->ls_kstat); 962 mutex_destroy(&lsp->ls_kstat_lock); 963 } 964 ddi_soft_state_free(lofi_statep, minor); 965 klip->li_minor = minor; 966 mutex_exit(&lofi_lock); 967 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 968 free_lofi_ioctl(klip); 969 return (0); 970 } 971 972 /* 973 * get the filename given the minor number, or the minor number given 974 * the name. 975 */ 976 /*ARGSUSED3*/ 977 static int 978 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 979 struct cred *credp, int ioctl_flag) 980 { 981 struct lofi_state *lsp; 982 struct lofi_ioctl *klip; 983 int error; 984 minor_t minor; 985 986 #ifdef lint 987 dev = dev; 988 #endif 989 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 990 if (klip == NULL) 991 return (EFAULT); 992 993 switch (which) { 994 case LOFI_GET_FILENAME: 995 minor = klip->li_minor; 996 if (minor == 0) { 997 free_lofi_ioctl(klip); 998 return (EINVAL); 999 } 1000 1001 mutex_enter(&lofi_lock); 1002 lsp = ddi_get_soft_state(lofi_statep, minor); 1003 if (lsp == NULL) { 1004 mutex_exit(&lofi_lock); 1005 free_lofi_ioctl(klip); 1006 return (ENXIO); 1007 } 1008 (void) strcpy(klip->li_filename, lsp->ls_filename); 1009 mutex_exit(&lofi_lock); 1010 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1011 free_lofi_ioctl(klip); 1012 return (error); 1013 case LOFI_GET_MINOR: 1014 mutex_enter(&lofi_lock); 1015 klip->li_minor = file_to_minor(klip->li_filename); 1016 mutex_exit(&lofi_lock); 1017 if (klip->li_minor == 0) { 1018 free_lofi_ioctl(klip); 1019 return (ENOENT); 1020 } 1021 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1022 free_lofi_ioctl(klip); 1023 return (error); 1024 default: 1025 free_lofi_ioctl(klip); 1026 return (EINVAL); 1027 } 1028 1029 } 1030 1031 static int 1032 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 1033 int *rvalp) 1034 { 1035 int error; 1036 enum dkio_state dkstate; 1037 struct lofi_state *lsp; 1038 minor_t minor; 1039 1040 #ifdef lint 1041 credp = credp; 1042 #endif 1043 1044 minor = getminor(dev); 1045 /* lofi ioctls only apply to the master device */ 1046 if (minor == 0) { 1047 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 1048 1049 /* 1050 * the query command only need read-access - i.e., normal 1051 * users are allowed to do those on the ctl device as 1052 * long as they can open it read-only. 1053 */ 1054 switch (cmd) { 1055 case LOFI_MAP_FILE: 1056 if ((flag & FWRITE) == 0) 1057 return (EPERM); 1058 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 1059 case LOFI_MAP_FILE_MINOR: 1060 if ((flag & FWRITE) == 0) 1061 return (EPERM); 1062 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 1063 case LOFI_UNMAP_FILE: 1064 if ((flag & FWRITE) == 0) 1065 return (EPERM); 1066 return (lofi_unmap_file(dev, lip, 1, credp, flag)); 1067 case LOFI_UNMAP_FILE_MINOR: 1068 if ((flag & FWRITE) == 0) 1069 return (EPERM); 1070 return (lofi_unmap_file(dev, lip, 0, credp, flag)); 1071 case LOFI_GET_FILENAME: 1072 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 1073 credp, flag)); 1074 case LOFI_GET_MINOR: 1075 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 1076 credp, flag)); 1077 case LOFI_GET_MAXMINOR: 1078 error = ddi_copyout(&lofi_max_files, &lip->li_minor, 1079 sizeof (lofi_max_files), flag); 1080 if (error) 1081 return (EFAULT); 1082 return (0); 1083 default: 1084 break; 1085 } 1086 } 1087 1088 lsp = ddi_get_soft_state(lofi_statep, minor); 1089 if (lsp == NULL) 1090 return (ENXIO); 1091 1092 /* these are for faking out utilities like newfs */ 1093 switch (cmd) { 1094 case DKIOCGVTOC: 1095 switch (ddi_model_convert_from(flag & FMODELS)) { 1096 case DDI_MODEL_ILP32: { 1097 struct vtoc32 vtoc32; 1098 1099 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 1100 if (ddi_copyout(&vtoc32, (void *)arg, 1101 sizeof (struct vtoc32), flag)) 1102 return (EFAULT); 1103 break; 1104 } 1105 1106 case DDI_MODEL_NONE: 1107 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 1108 sizeof (struct vtoc), flag)) 1109 return (EFAULT); 1110 break; 1111 } 1112 return (0); 1113 case DKIOCINFO: 1114 error = ddi_copyout(&lsp->ls_ci, (void *)arg, 1115 sizeof (struct dk_cinfo), flag); 1116 if (error) 1117 return (EFAULT); 1118 return (0); 1119 case DKIOCG_VIRTGEOM: 1120 case DKIOCG_PHYGEOM: 1121 case DKIOCGGEOM: 1122 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 1123 sizeof (struct dk_geom), flag); 1124 if (error) 1125 return (EFAULT); 1126 return (0); 1127 case DKIOCSTATE: 1128 /* the file is always there */ 1129 dkstate = DKIO_INSERTED; 1130 error = ddi_copyout(&dkstate, (void *)arg, 1131 sizeof (enum dkio_state), flag); 1132 if (error) 1133 return (EFAULT); 1134 return (0); 1135 default: 1136 return (ENOTTY); 1137 } 1138 } 1139 1140 static struct cb_ops lofi_cb_ops = { 1141 lofi_open, /* open */ 1142 lofi_close, /* close */ 1143 lofi_strategy, /* strategy */ 1144 nodev, /* print */ 1145 nodev, /* dump */ 1146 lofi_read, /* read */ 1147 lofi_write, /* write */ 1148 lofi_ioctl, /* ioctl */ 1149 nodev, /* devmap */ 1150 nodev, /* mmap */ 1151 nodev, /* segmap */ 1152 nochpoll, /* poll */ 1153 ddi_prop_op, /* prop_op */ 1154 0, /* streamtab */ 1155 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 1156 CB_REV, 1157 lofi_aread, 1158 lofi_awrite 1159 }; 1160 1161 static struct dev_ops lofi_ops = { 1162 DEVO_REV, /* devo_rev, */ 1163 0, /* refcnt */ 1164 lofi_info, /* info */ 1165 nulldev, /* identify */ 1166 nulldev, /* probe */ 1167 lofi_attach, /* attach */ 1168 lofi_detach, /* detach */ 1169 nodev, /* reset */ 1170 &lofi_cb_ops, /* driver operations */ 1171 NULL /* no bus operations */ 1172 }; 1173 1174 static struct modldrv modldrv = { 1175 &mod_driverops, 1176 "loopback file driver (%I%)", 1177 &lofi_ops, 1178 }; 1179 1180 static struct modlinkage modlinkage = { 1181 MODREV_1, 1182 &modldrv, 1183 NULL 1184 }; 1185 1186 int 1187 _init(void) 1188 { 1189 int error; 1190 1191 error = ddi_soft_state_init(&lofi_statep, 1192 sizeof (struct lofi_state), 0); 1193 if (error) 1194 return (error); 1195 1196 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 1197 error = mod_install(&modlinkage); 1198 if (error) { 1199 mutex_destroy(&lofi_lock); 1200 ddi_soft_state_fini(&lofi_statep); 1201 } 1202 1203 return (error); 1204 } 1205 1206 int 1207 _fini(void) 1208 { 1209 int error; 1210 1211 if (lofi_busy()) 1212 return (EBUSY); 1213 1214 error = mod_remove(&modlinkage); 1215 if (error) 1216 return (error); 1217 1218 mutex_destroy(&lofi_lock); 1219 ddi_soft_state_fini(&lofi_statep); 1220 1221 return (error); 1222 } 1223 1224 int 1225 _info(struct modinfo *modinfop) 1226 { 1227 return (mod_info(&modlinkage, modinfop)); 1228 } 1229