1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * lofi (loopback file) driver - allows you to attach a file to a device, 31 * which can then be accessed through that device. The simple model is that 32 * you tell lofi to open a file, and then use the block device you get as 33 * you would any block device. lofi translates access to the block device 34 * into I/O on the underlying file. This is mostly useful for 35 * mounting images of filesystems. 36 * 37 * lofi is controlled through /dev/lofictl - this is the only device exported 38 * during attach, and is minor number 0. lofiadm communicates with lofi through 39 * ioctls on this device. When a file is attached to lofi, block and character 40 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 41 * are identified by their minor number, and the minor number is also used 42 * as the name in /dev/lofi. If we ever decide to support virtual disks, 43 * we'll have to divide the minor number space to identify fdisk partitions 44 * and slices, and the name will then be the minor number shifted down a 45 * few bits. Minor devices are tracked with state structures handled with 46 * ddi_soft_state(9F) for simplicity. 47 * 48 * A file attached to lofi is opened when attached and not closed until 49 * explicitly detached from lofi. This seems more sensible than deferring 50 * the open until the /dev/lofi device is opened, for a number of reasons. 51 * One is that any failure is likely to be noticed by the person (or script) 52 * running lofiadm. Another is that it would be a security problem if the 53 * file was replaced by another one after being added but before being opened. 54 * 55 * The only hard part about lofi is the ioctls. In order to support things 56 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 57 * So it has to fake disk geometry and partition information. More may need 58 * to be faked if your favorite utility doesn't work and you think it should 59 * (fdformat doesn't work because it really wants to know the type of floppy 60 * controller to talk to, and that didn't seem easy to fake. Or possibly even 61 * necessary, since we have mkfs_pcfs now). 62 * 63 * Known problems: 64 * 65 * UFS logging. Mounting a UFS filesystem image "logging" 66 * works for basic copy testing but wedges during a build of ON through 67 * that image. Some deadlock in lufs holding the log mutex and then 68 * getting stuck on a buf. So for now, don't do that. 69 * 70 * Direct I/O. Since the filesystem data is being cached in the buffer 71 * cache, _and_ again in the underlying filesystem, it's tempting to 72 * enable direct I/O on the underlying file. Don't, because that deadlocks. 73 * I think to fix the cache-twice problem we might need filesystem support. 74 * 75 * lofi on itself. The simple lock strategy (lofi_lock) precludes this 76 * because you'll be in lofi_ioctl, holding the lock when you open the 77 * file, which, if it's lofi, will grab lofi_lock. We prevent this for 78 * now, though not using ddi_soft_state(9F) would make it possible to 79 * do. Though it would still be silly. 80 * 81 * Interesting things to do: 82 * 83 * Allow multiple files for each device. A poor-man's metadisk, basically. 84 * 85 * Pass-through ioctls on block devices. You can (though it's not 86 * documented), give lofi a block device as a file name. Then we shouldn't 87 * need to fake a geometry. But this is also silly unless you're replacing 88 * metadisk. 89 * 90 * Encryption. tpm would like this. Apparently Windows 2000 has it, and 91 * so does Linux. 92 */ 93 94 #include <sys/types.h> 95 #include <sys/sysmacros.h> 96 #include <sys/cmn_err.h> 97 #include <sys/uio.h> 98 #include <sys/kmem.h> 99 #include <sys/cred.h> 100 #include <sys/mman.h> 101 #include <sys/errno.h> 102 #include <sys/aio_req.h> 103 #include <sys/stat.h> 104 #include <sys/file.h> 105 #include <sys/modctl.h> 106 #include <sys/conf.h> 107 #include <sys/debug.h> 108 #include <sys/vnode.h> 109 #include <sys/lofi.h> 110 #include <sys/vol.h> 111 #include <sys/fcntl.h> 112 #include <sys/pathname.h> 113 #include <sys/filio.h> 114 #include <sys/fdio.h> 115 #include <sys/open.h> 116 #include <sys/disp.h> 117 #include <vm/seg_map.h> 118 #include <sys/ddi.h> 119 #include <sys/sunddi.h> 120 121 /* seems safer than having to get the string right many times */ 122 #define NBLOCKS_PROP_NAME "Nblocks" 123 #define SIZE_PROP_NAME "Size" 124 125 static dev_info_t *lofi_dip; 126 static void *lofi_statep; 127 static kmutex_t lofi_lock; /* state lock */ 128 129 /* 130 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 131 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 132 * high. If we want to be assured that the underlying device is always busy, 133 * we must be sure that the number of bytes enqueued when the number of 134 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 135 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 136 * set maxalloc to be the maximum throughput (in bytes per second) of the 137 * underlying device divided by the minimum I/O size. We assume a realistic 138 * maximum throughput of one hundred megabytes per second; we set maxalloc on 139 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 140 */ 141 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 142 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 143 144 uint32_t lofi_max_files = LOFI_MAX_FILES; 145 146 static int 147 lofi_busy(void) 148 { 149 minor_t minor; 150 151 /* 152 * We need to make sure no mappings exist - mod_remove won't 153 * help because the device isn't open. 154 */ 155 mutex_enter(&lofi_lock); 156 for (minor = 1; minor <= lofi_max_files; minor++) { 157 if (ddi_get_soft_state(lofi_statep, minor) != NULL) { 158 mutex_exit(&lofi_lock); 159 return (EBUSY); 160 } 161 } 162 mutex_exit(&lofi_lock); 163 return (0); 164 } 165 166 static int 167 is_opened(struct lofi_state *lsp) 168 { 169 ASSERT(mutex_owned(&lofi_lock)); 170 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 171 } 172 173 static int 174 mark_opened(struct lofi_state *lsp, int otyp) 175 { 176 ASSERT(mutex_owned(&lofi_lock)); 177 switch (otyp) { 178 case OTYP_CHR: 179 lsp->ls_chr_open = 1; 180 break; 181 case OTYP_BLK: 182 lsp->ls_blk_open = 1; 183 break; 184 case OTYP_LYR: 185 lsp->ls_lyr_open_count++; 186 break; 187 default: 188 return (-1); 189 } 190 return (0); 191 } 192 193 static void 194 mark_closed(struct lofi_state *lsp, int otyp) 195 { 196 ASSERT(mutex_owned(&lofi_lock)); 197 switch (otyp) { 198 case OTYP_CHR: 199 lsp->ls_chr_open = 0; 200 break; 201 case OTYP_BLK: 202 lsp->ls_blk_open = 0; 203 break; 204 case OTYP_LYR: 205 lsp->ls_lyr_open_count--; 206 break; 207 default: 208 break; 209 } 210 } 211 212 /*ARGSUSED3*/ 213 static int 214 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 215 { 216 minor_t minor; 217 struct lofi_state *lsp; 218 219 mutex_enter(&lofi_lock); 220 minor = getminor(*devp); 221 if (minor == 0) { 222 /* master control device */ 223 /* must be opened exclusively */ 224 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { 225 mutex_exit(&lofi_lock); 226 return (EINVAL); 227 } 228 lsp = ddi_get_soft_state(lofi_statep, 0); 229 if (lsp == NULL) { 230 mutex_exit(&lofi_lock); 231 return (ENXIO); 232 } 233 if (is_opened(lsp)) { 234 mutex_exit(&lofi_lock); 235 return (EBUSY); 236 } 237 (void) mark_opened(lsp, OTYP_CHR); 238 mutex_exit(&lofi_lock); 239 return (0); 240 } 241 242 /* otherwise, the mapping should already exist */ 243 lsp = ddi_get_soft_state(lofi_statep, minor); 244 if (lsp == NULL) { 245 mutex_exit(&lofi_lock); 246 return (EINVAL); 247 } 248 249 if (mark_opened(lsp, otyp) == -1) { 250 mutex_exit(&lofi_lock); 251 return (EINVAL); 252 } 253 254 mutex_exit(&lofi_lock); 255 return (0); 256 } 257 258 /*ARGSUSED3*/ 259 static int 260 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 261 { 262 minor_t minor; 263 struct lofi_state *lsp; 264 265 #ifdef lint 266 flag = flag; 267 #endif 268 mutex_enter(&lofi_lock); 269 minor = getminor(dev); 270 lsp = ddi_get_soft_state(lofi_statep, minor); 271 if (lsp == NULL) { 272 mutex_exit(&lofi_lock); 273 return (EINVAL); 274 } 275 mark_closed(lsp, otyp); 276 mutex_exit(&lofi_lock); 277 return (0); 278 } 279 280 /* 281 * This is basically what strategy used to be before we found we 282 * needed task queues. 283 */ 284 static void 285 lofi_strategy_task(void *arg) 286 { 287 struct buf *bp = (struct buf *)arg; 288 int error; 289 struct lofi_state *lsp; 290 offset_t offset, alignedoffset; 291 offset_t mapoffset; 292 caddr_t bufaddr; 293 caddr_t mapaddr; 294 size_t xfersize; 295 size_t len; 296 int isread; 297 int smflags; 298 enum seg_rw srw; 299 300 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 301 if (lsp->ls_kstat) { 302 mutex_enter(lsp->ls_kstat->ks_lock); 303 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 304 mutex_exit(lsp->ls_kstat->ks_lock); 305 } 306 bp_mapin(bp); 307 bufaddr = bp->b_un.b_addr; 308 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 309 310 /* 311 * We used to always use vn_rdwr here, but we cannot do that because 312 * we might decide to read or write from the the underlying 313 * file during this call, which would be a deadlock because 314 * we have the rw_lock. So instead we page, unless it's not 315 * mapable or it's a character device. 316 */ 317 if (((lsp->ls_vp->v_flag & VNOMAP) == 0) && 318 (lsp->ls_vp->v_type != VCHR)) { 319 /* 320 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 321 * an 8K boundary, but the buf transfer address may not be 322 * aligned on more than a 512-byte boundary (we don't 323 * enforce that, though we could). This matters since the 324 * initial part of the transfer may not start at offset 0 325 * within the segmap'd chunk. So we have to compensate for 326 * that with 'mapoffset'. Subsequent chunks always start 327 * off at the beginning, and the last is capped by b_resid. 328 */ 329 mapoffset = offset & MAXBOFFSET; 330 alignedoffset = offset - mapoffset; /* now map-aligned */ 331 bp->b_resid = bp->b_bcount; 332 isread = bp->b_flags & B_READ; 333 srw = isread ? S_READ : S_WRITE; 334 do { 335 xfersize = MIN(lsp->ls_vp_size - offset, 336 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 337 len = roundup(mapoffset + xfersize, PAGESIZE); 338 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 339 alignedoffset, MAXBSIZE, 1, srw); 340 /* 341 * Now fault in the pages. This lets us check 342 * for errors before we reference mapaddr and 343 * try to resolve the fault in bcopy (which would 344 * panic instead). And this can easily happen, 345 * particularly if you've lofi'd a file over NFS 346 * and someone deletes the file on the server. 347 */ 348 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 349 len, F_SOFTLOCK, srw); 350 if (error) { 351 (void) segmap_release(segkmap, mapaddr, 0); 352 if (FC_CODE(error) == FC_OBJERR) 353 error = FC_ERRNO(error); 354 else 355 error = EIO; 356 break; 357 } 358 smflags = 0; 359 if (isread) { 360 bcopy(mapaddr + mapoffset, bufaddr, xfersize); 361 } else { 362 smflags |= SM_WRITE; 363 bcopy(bufaddr, mapaddr + mapoffset, xfersize); 364 } 365 bp->b_resid -= xfersize; 366 bufaddr += xfersize; 367 offset += xfersize; 368 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 369 len, F_SOFTUNLOCK, srw); 370 error = segmap_release(segkmap, mapaddr, smflags); 371 /* only the first map may start partial */ 372 mapoffset = 0; 373 alignedoffset += MAXBSIZE; 374 } while ((error == 0) && (bp->b_resid > 0) && 375 (offset < lsp->ls_vp_size)); 376 } else { 377 ssize_t resid; 378 enum uio_rw rw; 379 380 if (bp->b_flags & B_READ) 381 rw = UIO_READ; 382 else 383 rw = UIO_WRITE; 384 error = vn_rdwr(rw, lsp->ls_vp, bufaddr, bp->b_bcount, 385 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 386 bp->b_resid = resid; 387 } 388 389 if (lsp->ls_kstat) { 390 size_t n_done = bp->b_bcount - bp->b_resid; 391 kstat_io_t *kioptr; 392 393 mutex_enter(lsp->ls_kstat->ks_lock); 394 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 395 if (bp->b_flags & B_READ) { 396 kioptr->nread += n_done; 397 kioptr->reads++; 398 } else { 399 kioptr->nwritten += n_done; 400 kioptr->writes++; 401 } 402 kstat_runq_exit(kioptr); 403 mutex_exit(lsp->ls_kstat->ks_lock); 404 } 405 bioerror(bp, error); 406 biodone(bp); 407 } 408 409 static int 410 lofi_strategy(struct buf *bp) 411 { 412 struct lofi_state *lsp; 413 offset_t offset; 414 415 /* 416 * We cannot just do I/O here, because the current thread 417 * _might_ end up back in here because the underlying filesystem 418 * wants a buffer, which eventually gets into bio_recycle and 419 * might call into lofi to write out a delayed-write buffer. 420 * This is bad if the filesystem above lofi is the same as below. 421 * 422 * We could come up with a complex strategy using threads to 423 * do the I/O asynchronously, or we could use task queues. task 424 * queues were incredibly easy so they win. 425 */ 426 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 427 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 428 if (offset == lsp->ls_vp_size) { 429 /* EOF */ 430 if ((bp->b_flags & B_READ) != 0) { 431 bp->b_resid = bp->b_bcount; 432 bioerror(bp, 0); 433 } else { 434 /* writes should fail */ 435 bioerror(bp, ENXIO); 436 } 437 biodone(bp); 438 return (0); 439 } 440 if (offset > lsp->ls_vp_size) { 441 bioerror(bp, ENXIO); 442 biodone(bp); 443 return (0); 444 } 445 if (lsp->ls_kstat) { 446 mutex_enter(lsp->ls_kstat->ks_lock); 447 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 448 mutex_exit(lsp->ls_kstat->ks_lock); 449 } 450 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 451 return (0); 452 } 453 454 /*ARGSUSED2*/ 455 static int 456 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 457 { 458 if (getminor(dev) == 0) 459 return (EINVAL); 460 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 461 } 462 463 /*ARGSUSED2*/ 464 static int 465 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 466 { 467 if (getminor(dev) == 0) 468 return (EINVAL); 469 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 470 } 471 472 /*ARGSUSED2*/ 473 static int 474 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 475 { 476 if (getminor(dev) == 0) 477 return (EINVAL); 478 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 479 } 480 481 /*ARGSUSED2*/ 482 static int 483 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 484 { 485 if (getminor(dev) == 0) 486 return (EINVAL); 487 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 488 } 489 490 /*ARGSUSED*/ 491 static int 492 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 493 { 494 switch (infocmd) { 495 case DDI_INFO_DEVT2DEVINFO: 496 *result = lofi_dip; 497 return (DDI_SUCCESS); 498 case DDI_INFO_DEVT2INSTANCE: 499 *result = 0; 500 return (DDI_SUCCESS); 501 } 502 return (DDI_FAILURE); 503 } 504 505 static int 506 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 507 { 508 int error; 509 510 if (cmd != DDI_ATTACH) 511 return (DDI_FAILURE); 512 error = ddi_soft_state_zalloc(lofi_statep, 0); 513 if (error == DDI_FAILURE) { 514 return (DDI_FAILURE); 515 } 516 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 517 DDI_PSEUDO, NULL); 518 if (error == DDI_FAILURE) { 519 ddi_soft_state_free(lofi_statep, 0); 520 return (DDI_FAILURE); 521 } 522 lofi_dip = dip; 523 ddi_report_dev(dip); 524 return (DDI_SUCCESS); 525 } 526 527 static int 528 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 529 { 530 if (cmd != DDI_DETACH) 531 return (DDI_FAILURE); 532 if (lofi_busy()) 533 return (DDI_FAILURE); 534 lofi_dip = NULL; 535 ddi_remove_minor_node(dip, NULL); 536 ddi_soft_state_free(lofi_statep, 0); 537 return (DDI_SUCCESS); 538 } 539 540 /* 541 * These two just simplify the rest of the ioctls that need to copyin/out 542 * the lofi_ioctl structure. 543 */ 544 struct lofi_ioctl * 545 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip) 546 { 547 struct lofi_ioctl *klip; 548 int error; 549 550 klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 551 error = copyin(ulip, klip, sizeof (struct lofi_ioctl)); 552 if (error) { 553 kmem_free(klip, sizeof (struct lofi_ioctl)); 554 return (NULL); 555 } 556 557 /* make sure filename is always null-terminated */ 558 klip->li_filename[MAXPATHLEN] = '\0'; 559 560 /* validate minor number */ 561 if (klip->li_minor > lofi_max_files) { 562 kmem_free(klip, sizeof (struct lofi_ioctl)); 563 return (NULL); 564 } 565 return (klip); 566 } 567 568 int 569 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip) 570 { 571 int error; 572 573 error = copyout(klip, ulip, sizeof (struct lofi_ioctl)); 574 if (error) 575 return (EFAULT); 576 return (0); 577 } 578 579 void 580 free_lofi_ioctl(struct lofi_ioctl *klip) 581 { 582 kmem_free(klip, sizeof (struct lofi_ioctl)); 583 } 584 585 /* 586 * Return the minor number 'filename' is mapped to, if it is. 587 */ 588 static int 589 file_to_minor(char *filename) 590 { 591 minor_t minor; 592 struct lofi_state *lsp; 593 594 ASSERT(mutex_owned(&lofi_lock)); 595 for (minor = 1; minor <= lofi_max_files; minor++) { 596 lsp = ddi_get_soft_state(lofi_statep, minor); 597 if (lsp == NULL) 598 continue; 599 if (strcmp(lsp->ls_filename, filename) == 0) 600 return (minor); 601 } 602 return (0); 603 } 604 605 /* 606 * lofiadm does some validation, but since Joe Random (or crashme) could 607 * do our ioctls, we need to do some validation too. 608 */ 609 static int 610 valid_filename(const char *filename) 611 { 612 static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; 613 static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; 614 615 /* must be absolute path */ 616 if (filename[0] != '/') 617 return (0); 618 /* must not be lofi */ 619 if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) 620 return (0); 621 if (strncmp(filename, charprefix, strlen(charprefix)) == 0) 622 return (0); 623 return (1); 624 } 625 626 /* 627 * Fakes up a disk geometry, and one big partition, based on the size 628 * of the file. This is needed because we allow newfs'ing the device, 629 * and newfs will do several disk ioctls to figure out the geometry and 630 * partition information. It uses that information to determine the parameters 631 * to pass to mkfs. Geometry is pretty much irrelevent these days, but we 632 * have to support it. 633 */ 634 static void 635 fake_disk_geometry(struct lofi_state *lsp) 636 { 637 /* dk_geom - see dkio(7I) */ 638 /* 639 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 640 * of sectors), but that breaks programs like fdisk which want to 641 * partition a disk by cylinder. With one cylinder, you can't create 642 * an fdisk partition and put pcfs on it for testing (hard to pick 643 * a number between one and one). 644 * 645 * The cheezy floppy test is an attempt to not have too few cylinders 646 * for a small file, or so many on a big file that you waste space 647 * for backup superblocks or cylinder group structures. 648 */ 649 if (lsp->ls_vp_size < (2 * 1024 * 1024)) /* floppy? */ 650 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (100 * 1024); 651 else 652 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (300 * 1024); 653 /* in case file file is < 100k */ 654 if (lsp->ls_dkg.dkg_ncyl == 0) 655 lsp->ls_dkg.dkg_ncyl = 1; 656 lsp->ls_dkg.dkg_acyl = 0; 657 lsp->ls_dkg.dkg_bcyl = 0; 658 lsp->ls_dkg.dkg_nhead = 1; 659 lsp->ls_dkg.dkg_obs1 = 0; 660 lsp->ls_dkg.dkg_intrlv = 0; 661 lsp->ls_dkg.dkg_obs2 = 0; 662 lsp->ls_dkg.dkg_obs3 = 0; 663 lsp->ls_dkg.dkg_apc = 0; 664 lsp->ls_dkg.dkg_rpm = 7200; 665 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 666 lsp->ls_dkg.dkg_nsect = lsp->ls_vp_size / 667 (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 668 lsp->ls_dkg.dkg_write_reinstruct = 0; 669 lsp->ls_dkg.dkg_read_reinstruct = 0; 670 671 /* vtoc - see dkio(7I) */ 672 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 673 lsp->ls_vtoc.v_sanity = VTOC_SANE; 674 lsp->ls_vtoc.v_version = V_VERSION; 675 bcopy(LOFI_DRIVER_NAME, lsp->ls_vtoc.v_volume, 7); 676 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 677 lsp->ls_vtoc.v_nparts = 1; 678 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 679 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 680 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 681 /* 682 * The partition size cannot just be the number of sectors, because 683 * that might not end on a cylinder boundary. And if that's the case, 684 * newfs/mkfs will print a scary warning. So just figure the size 685 * based on the number of cylinders and sectors/cylinder. 686 */ 687 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 688 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 689 690 /* dk_cinfo - see dkio(7I) */ 691 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 692 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 693 lsp->ls_ci.dki_ctype = DKC_MD; 694 lsp->ls_ci.dki_flags = 0; 695 lsp->ls_ci.dki_cnum = 0; 696 lsp->ls_ci.dki_addr = 0; 697 lsp->ls_ci.dki_space = 0; 698 lsp->ls_ci.dki_prio = 0; 699 lsp->ls_ci.dki_vec = 0; 700 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 701 lsp->ls_ci.dki_unit = 0; 702 lsp->ls_ci.dki_slave = 0; 703 lsp->ls_ci.dki_partition = 0; 704 /* 705 * newfs uses this to set maxcontig. Must not be < 16, or it 706 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 707 * it by the block size. Then tunefs doesn't work because 708 * maxcontig is 0. 709 */ 710 lsp->ls_ci.dki_maxtransfer = 16; 711 } 712 713 /* 714 * map a file to a minor number. Return the minor number. 715 */ 716 static int 717 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 718 int *rvalp, struct cred *credp) 719 { 720 minor_t newminor; 721 struct lofi_state *lsp; 722 struct lofi_ioctl *klip; 723 int error; 724 char namebuf[50]; 725 struct vnode *vp; 726 int64_t Nblocks_prop_val; 727 int64_t Size_prop_val; 728 vattr_t vattr; 729 int flag; 730 enum vtype v_type; 731 dev_t newdev; 732 int zalloced = 0; 733 734 klip = copy_in_lofi_ioctl(ulip); 735 if (klip == NULL) 736 return (EFAULT); 737 738 mutex_enter(&lofi_lock); 739 740 if (!valid_filename(klip->li_filename)) { 741 error = EINVAL; 742 goto out; 743 } 744 745 if (file_to_minor(klip->li_filename) != 0) { 746 error = EBUSY; 747 goto out; 748 } 749 750 if (pickminor) { 751 /* Find a free one */ 752 for (newminor = 1; newminor <= lofi_max_files; newminor++) 753 if (ddi_get_soft_state(lofi_statep, newminor) == NULL) 754 break; 755 if (newminor >= lofi_max_files) { 756 error = EAGAIN; 757 goto out; 758 } 759 } else { 760 newminor = klip->li_minor; 761 if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { 762 error = EEXIST; 763 goto out; 764 } 765 } 766 767 /* make sure it's valid */ 768 error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, 769 NULLVPP, &vp); 770 if (error) { 771 goto out; 772 } 773 v_type = vp->v_type; 774 VN_RELE(vp); 775 if (!V_ISLOFIABLE(v_type)) { 776 error = EINVAL; 777 goto out; 778 } 779 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 780 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 781 if (error) { 782 /* try read-only */ 783 flag &= ~FWRITE; 784 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 785 &vp, 0, 0); 786 if (error) { 787 goto out; 788 } 789 } 790 vattr.va_mask = AT_SIZE; 791 error = VOP_GETATTR(vp, &vattr, 0, credp); 792 if (error) { 793 goto closeout; 794 } 795 /* the file needs to be a multiple of the block size */ 796 if ((vattr.va_size % DEV_BSIZE) != 0) { 797 error = EINVAL; 798 goto closeout; 799 } 800 newdev = makedevice(getmajor(dev), newminor); 801 Size_prop_val = vattr.va_size; 802 if ((ddi_prop_update_int64(newdev, lofi_dip, 803 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 804 error = EINVAL; 805 goto closeout; 806 } 807 Nblocks_prop_val = vattr.va_size / DEV_BSIZE; 808 if ((ddi_prop_update_int64(newdev, lofi_dip, 809 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 810 error = EINVAL; 811 goto propout; 812 } 813 error = ddi_soft_state_zalloc(lofi_statep, newminor); 814 if (error == DDI_FAILURE) { 815 error = ENOMEM; 816 goto propout; 817 } 818 zalloced = 1; 819 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 820 (void) ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, 821 DDI_PSEUDO, NULL); 822 if (error != DDI_SUCCESS) { 823 error = ENXIO; 824 goto propout; 825 } 826 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); 827 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, 828 DDI_PSEUDO, NULL); 829 if (error != DDI_SUCCESS) { 830 /* remove block node */ 831 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 832 ddi_remove_minor_node(lofi_dip, namebuf); 833 error = ENXIO; 834 goto propout; 835 } 836 lsp = ddi_get_soft_state(lofi_statep, newminor); 837 lsp->ls_filename_sz = strlen(klip->li_filename) + 1; 838 lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); 839 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 840 LOFI_DRIVER_NAME, newminor); 841 lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, 842 minclsyspri, 1, lofi_taskq_maxalloc, 0); 843 lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, 844 NULL, "disk", KSTAT_TYPE_IO, 1, 0); 845 if (lsp->ls_kstat) { 846 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 847 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 848 kstat_install(lsp->ls_kstat); 849 } 850 /* 851 * save open mode so file can be closed properly and vnode counts 852 * updated correctly. 853 */ 854 lsp->ls_openflag = flag; 855 856 /* 857 * Try to handle stacked lofs vnodes. 858 */ 859 if (vp->v_type == VREG) { 860 if (VOP_REALVP(vp, &lsp->ls_vp) != 0) { 861 lsp->ls_vp = vp; 862 } else { 863 /* 864 * Even though vp was obtained via vn_open(), we 865 * can't call vn_close() on it, since lofs will 866 * pass the VOP_CLOSE() on down to the realvp 867 * (which we are about to use). Hence we merely 868 * drop the reference to the lofs vnode and hold 869 * the realvp so things behave as if we've 870 * opened the realvp without any interaction 871 * with lofs. 872 */ 873 VN_HOLD(lsp->ls_vp); 874 VN_RELE(vp); 875 } 876 } else { 877 lsp->ls_vp = vp; 878 } 879 lsp->ls_vp_size = vattr.va_size; 880 (void) strcpy(lsp->ls_filename, klip->li_filename); 881 if (rvalp) 882 *rvalp = (int)newminor; 883 klip->li_minor = newminor; 884 885 fake_disk_geometry(lsp); 886 mutex_exit(&lofi_lock); 887 (void) copy_out_lofi_ioctl(klip, ulip); 888 free_lofi_ioctl(klip); 889 return (0); 890 891 propout: 892 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 893 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 894 closeout: 895 (void) VOP_CLOSE(vp, flag, 1, 0, credp); 896 VN_RELE(vp); 897 out: 898 if (zalloced) 899 ddi_soft_state_free(lofi_statep, newminor); 900 mutex_exit(&lofi_lock); 901 free_lofi_ioctl(klip); 902 return (error); 903 } 904 905 /* 906 * unmap a file. 907 */ 908 static int 909 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, 910 struct cred *credp) 911 { 912 struct lofi_state *lsp; 913 struct lofi_ioctl *klip; 914 minor_t minor; 915 char namebuf[20]; 916 dev_t newdev; 917 918 klip = copy_in_lofi_ioctl(ulip); 919 if (klip == NULL) 920 return (EFAULT); 921 922 mutex_enter(&lofi_lock); 923 if (byfilename) { 924 minor = file_to_minor(klip->li_filename); 925 } else { 926 minor = klip->li_minor; 927 } 928 if (minor == 0) { 929 mutex_exit(&lofi_lock); 930 free_lofi_ioctl(klip); 931 return (ENXIO); 932 } 933 lsp = ddi_get_soft_state(lofi_statep, minor); 934 if (lsp == NULL) { 935 mutex_exit(&lofi_lock); 936 free_lofi_ioctl(klip); 937 return (ENXIO); 938 } 939 if (is_opened(lsp)) { 940 mutex_exit(&lofi_lock); 941 free_lofi_ioctl(klip); 942 return (EBUSY); 943 } 944 /* 945 * Use saved open mode to properly update vnode counts 946 */ 947 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0, credp); 948 VN_RELE(lsp->ls_vp); 949 lsp->ls_vp = NULL; 950 newdev = makedevice(getmajor(dev), minor); 951 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 952 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 953 954 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 955 ddi_remove_minor_node(lofi_dip, namebuf); 956 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 957 ddi_remove_minor_node(lofi_dip, namebuf); 958 959 kmem_free(lsp->ls_filename, lsp->ls_filename_sz); 960 taskq_destroy(lsp->ls_taskq); 961 if (lsp->ls_kstat) { 962 kstat_delete(lsp->ls_kstat); 963 mutex_destroy(&lsp->ls_kstat_lock); 964 } 965 ddi_soft_state_free(lofi_statep, minor); 966 klip->li_minor = minor; 967 mutex_exit(&lofi_lock); 968 (void) copy_out_lofi_ioctl(klip, ulip); 969 free_lofi_ioctl(klip); 970 return (0); 971 } 972 973 /* 974 * get the filename given the minor number, or the minor number given 975 * the name. 976 */ 977 /*ARGSUSED3*/ 978 static int 979 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 980 struct cred *credp) 981 { 982 struct lofi_state *lsp; 983 struct lofi_ioctl *klip; 984 int error; 985 minor_t minor; 986 987 #ifdef lint 988 dev = dev; 989 #endif 990 klip = copy_in_lofi_ioctl(ulip); 991 if (klip == NULL) 992 return (EFAULT); 993 994 switch (which) { 995 case LOFI_GET_FILENAME: 996 minor = klip->li_minor; 997 if (minor == 0) { 998 free_lofi_ioctl(klip); 999 return (EINVAL); 1000 } 1001 1002 mutex_enter(&lofi_lock); 1003 lsp = ddi_get_soft_state(lofi_statep, minor); 1004 if (lsp == NULL) { 1005 mutex_exit(&lofi_lock); 1006 free_lofi_ioctl(klip); 1007 return (ENXIO); 1008 } 1009 (void) strcpy(klip->li_filename, lsp->ls_filename); 1010 mutex_exit(&lofi_lock); 1011 error = copy_out_lofi_ioctl(klip, ulip); 1012 free_lofi_ioctl(klip); 1013 return (error); 1014 case LOFI_GET_MINOR: 1015 mutex_enter(&lofi_lock); 1016 klip->li_minor = file_to_minor(klip->li_filename); 1017 mutex_exit(&lofi_lock); 1018 if (klip->li_minor == 0) { 1019 free_lofi_ioctl(klip); 1020 return (ENOENT); 1021 } 1022 error = copy_out_lofi_ioctl(klip, ulip); 1023 free_lofi_ioctl(klip); 1024 return (error); 1025 default: 1026 free_lofi_ioctl(klip); 1027 return (EINVAL); 1028 } 1029 1030 } 1031 1032 static int 1033 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 1034 int *rvalp) 1035 { 1036 int error; 1037 enum dkio_state dkstate; 1038 struct lofi_state *lsp; 1039 minor_t minor; 1040 1041 #ifdef lint 1042 credp = credp; 1043 #endif 1044 1045 minor = getminor(dev); 1046 /* lofi ioctls only apply to the master device */ 1047 if (minor == 0) { 1048 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 1049 1050 /* 1051 * the query command only need read-access - i.e., normal 1052 * users are allowed to do those on the ctl device as 1053 * long as they can open it read-only. 1054 */ 1055 switch (cmd) { 1056 case LOFI_MAP_FILE: 1057 if ((flag & FWRITE) == 0) 1058 return (EPERM); 1059 return (lofi_map_file(dev, lip, 1, rvalp, credp)); 1060 case LOFI_MAP_FILE_MINOR: 1061 if ((flag & FWRITE) == 0) 1062 return (EPERM); 1063 return (lofi_map_file(dev, lip, 0, rvalp, credp)); 1064 case LOFI_UNMAP_FILE: 1065 if ((flag & FWRITE) == 0) 1066 return (EPERM); 1067 return (lofi_unmap_file(dev, lip, 1, credp)); 1068 case LOFI_UNMAP_FILE_MINOR: 1069 if ((flag & FWRITE) == 0) 1070 return (EPERM); 1071 return (lofi_unmap_file(dev, lip, 0, credp)); 1072 case LOFI_GET_FILENAME: 1073 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 1074 credp)); 1075 case LOFI_GET_MINOR: 1076 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 1077 credp)); 1078 case LOFI_GET_MAXMINOR: 1079 error = copyout(&lofi_max_files, &lip->li_minor, 1080 sizeof (lofi_max_files)); 1081 if (error) 1082 return (EFAULT); 1083 return (0); 1084 default: 1085 break; 1086 } 1087 } 1088 1089 lsp = ddi_get_soft_state(lofi_statep, minor); 1090 if (lsp == NULL) 1091 return (ENXIO); 1092 1093 /* these are for faking out utilities like newfs */ 1094 switch (cmd) { 1095 case VOLIOCINFO: 1096 /* pcfs does this to see if it needs to set PCFS_NOCHK */ 1097 /* 0 means it should set it */ 1098 return (0); 1099 case DKIOCGVTOC: 1100 switch (ddi_model_convert_from(flag & FMODELS)) { 1101 case DDI_MODEL_ILP32: { 1102 struct vtoc32 vtoc32; 1103 1104 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 1105 if (ddi_copyout(&vtoc32, (void *)arg, 1106 sizeof (struct vtoc32), flag)) 1107 return (EFAULT); 1108 break; 1109 } 1110 1111 case DDI_MODEL_NONE: 1112 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 1113 sizeof (struct vtoc), flag)) 1114 return (EFAULT); 1115 break; 1116 } 1117 return (0); 1118 case DKIOCINFO: 1119 error = copyout(&lsp->ls_ci, (void *)arg, 1120 sizeof (struct dk_cinfo)); 1121 if (error) 1122 return (EFAULT); 1123 return (0); 1124 case DKIOCG_VIRTGEOM: 1125 case DKIOCG_PHYGEOM: 1126 case DKIOCGGEOM: 1127 error = copyout(&lsp->ls_dkg, (void *)arg, 1128 sizeof (struct dk_geom)); 1129 if (error) 1130 return (EFAULT); 1131 return (0); 1132 case DKIOCSTATE: 1133 /* the file is always there */ 1134 dkstate = DKIO_INSERTED; 1135 error = copyout(&dkstate, (void *)arg, 1136 sizeof (enum dkio_state)); 1137 if (error) 1138 return (EFAULT); 1139 return (0); 1140 default: 1141 return (ENOTTY); 1142 } 1143 } 1144 1145 static struct cb_ops lofi_cb_ops = { 1146 lofi_open, /* open */ 1147 lofi_close, /* close */ 1148 lofi_strategy, /* strategy */ 1149 nodev, /* print */ 1150 nodev, /* dump */ 1151 lofi_read, /* read */ 1152 lofi_write, /* write */ 1153 lofi_ioctl, /* ioctl */ 1154 nodev, /* devmap */ 1155 nodev, /* mmap */ 1156 nodev, /* segmap */ 1157 nochpoll, /* poll */ 1158 ddi_prop_op, /* prop_op */ 1159 0, /* streamtab */ 1160 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 1161 CB_REV, 1162 lofi_aread, 1163 lofi_awrite 1164 }; 1165 1166 static struct dev_ops lofi_ops = { 1167 DEVO_REV, /* devo_rev, */ 1168 0, /* refcnt */ 1169 lofi_info, /* info */ 1170 nulldev, /* identify */ 1171 nulldev, /* probe */ 1172 lofi_attach, /* attach */ 1173 lofi_detach, /* detach */ 1174 nodev, /* reset */ 1175 &lofi_cb_ops, /* driver operations */ 1176 NULL /* no bus operations */ 1177 }; 1178 1179 static struct modldrv modldrv = { 1180 &mod_driverops, 1181 "loopback file driver (%I%)", 1182 &lofi_ops, 1183 }; 1184 1185 static struct modlinkage modlinkage = { 1186 MODREV_1, 1187 &modldrv, 1188 NULL 1189 }; 1190 1191 int 1192 _init(void) 1193 { 1194 int error; 1195 1196 error = ddi_soft_state_init(&lofi_statep, 1197 sizeof (struct lofi_state), 0); 1198 if (error) 1199 return (error); 1200 1201 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 1202 error = mod_install(&modlinkage); 1203 if (error) { 1204 mutex_destroy(&lofi_lock); 1205 ddi_soft_state_fini(&lofi_statep); 1206 } 1207 1208 return (error); 1209 } 1210 1211 int 1212 _fini(void) 1213 { 1214 int error; 1215 1216 if (lofi_busy()) 1217 return (EBUSY); 1218 1219 error = mod_remove(&modlinkage); 1220 if (error) 1221 return (error); 1222 1223 mutex_destroy(&lofi_lock); 1224 ddi_soft_state_fini(&lofi_statep); 1225 1226 return (error); 1227 } 1228 1229 int 1230 _info(struct modinfo *modinfop) 1231 { 1232 return (mod_info(&modlinkage, modinfop)); 1233 } 1234