1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * lofi (loopback file) driver - allows you to attach a file to a device, 30 * which can then be accessed through that device. The simple model is that 31 * you tell lofi to open a file, and then use the block device you get as 32 * you would any block device. lofi translates access to the block device 33 * into I/O on the underlying file. This is mostly useful for 34 * mounting images of filesystems. 35 * 36 * lofi is controlled through /dev/lofictl - this is the only device exported 37 * during attach, and is minor number 0. lofiadm communicates with lofi through 38 * ioctls on this device. When a file is attached to lofi, block and character 39 * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices 40 * are identified by their minor number, and the minor number is also used 41 * as the name in /dev/lofi. If we ever decide to support virtual disks, 42 * we'll have to divide the minor number space to identify fdisk partitions 43 * and slices, and the name will then be the minor number shifted down a 44 * few bits. Minor devices are tracked with state structures handled with 45 * ddi_soft_state(9F) for simplicity. 46 * 47 * A file attached to lofi is opened when attached and not closed until 48 * explicitly detached from lofi. This seems more sensible than deferring 49 * the open until the /dev/lofi device is opened, for a number of reasons. 50 * One is that any failure is likely to be noticed by the person (or script) 51 * running lofiadm. Another is that it would be a security problem if the 52 * file was replaced by another one after being added but before being opened. 53 * 54 * The only hard part about lofi is the ioctls. In order to support things 55 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 56 * So it has to fake disk geometry and partition information. More may need 57 * to be faked if your favorite utility doesn't work and you think it should 58 * (fdformat doesn't work because it really wants to know the type of floppy 59 * controller to talk to, and that didn't seem easy to fake. Or possibly even 60 * necessary, since we have mkfs_pcfs now). 61 * 62 * Known problems: 63 * 64 * UFS logging. Mounting a UFS filesystem image "logging" 65 * works for basic copy testing but wedges during a build of ON through 66 * that image. Some deadlock in lufs holding the log mutex and then 67 * getting stuck on a buf. So for now, don't do that. 68 * 69 * Direct I/O. Since the filesystem data is being cached in the buffer 70 * cache, _and_ again in the underlying filesystem, it's tempting to 71 * enable direct I/O on the underlying file. Don't, because that deadlocks. 72 * I think to fix the cache-twice problem we might need filesystem support. 73 * 74 * lofi on itself. The simple lock strategy (lofi_lock) precludes this 75 * because you'll be in lofi_ioctl, holding the lock when you open the 76 * file, which, if it's lofi, will grab lofi_lock. We prevent this for 77 * now, though not using ddi_soft_state(9F) would make it possible to 78 * do. Though it would still be silly. 79 * 80 * Interesting things to do: 81 * 82 * Allow multiple files for each device. A poor-man's metadisk, basically. 83 * 84 * Pass-through ioctls on block devices. You can (though it's not 85 * documented), give lofi a block device as a file name. Then we shouldn't 86 * need to fake a geometry. But this is also silly unless you're replacing 87 * metadisk. 88 * 89 * Encryption. tpm would like this. Apparently Windows 2000 has it, and 90 * so does Linux. 91 */ 92 93 #include <sys/types.h> 94 #include <sys/sysmacros.h> 95 #include <sys/cmn_err.h> 96 #include <sys/uio.h> 97 #include <sys/kmem.h> 98 #include <sys/cred.h> 99 #include <sys/mman.h> 100 #include <sys/errno.h> 101 #include <sys/aio_req.h> 102 #include <sys/stat.h> 103 #include <sys/file.h> 104 #include <sys/modctl.h> 105 #include <sys/conf.h> 106 #include <sys/debug.h> 107 #include <sys/vnode.h> 108 #include <sys/lofi.h> 109 #include <sys/vol.h> 110 #include <sys/fcntl.h> 111 #include <sys/pathname.h> 112 #include <sys/filio.h> 113 #include <sys/fdio.h> 114 #include <sys/open.h> 115 #include <sys/disp.h> 116 #include <vm/seg_map.h> 117 #include <sys/ddi.h> 118 #include <sys/sunddi.h> 119 120 /* seems safer than having to get the string right many times */ 121 #define NBLOCKS_PROP_NAME "Nblocks" 122 #define SIZE_PROP_NAME "Size" 123 124 static dev_info_t *lofi_dip; 125 static void *lofi_statep; 126 static kmutex_t lofi_lock; /* state lock */ 127 128 /* 129 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 130 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 131 * high. If we want to be assured that the underlying device is always busy, 132 * we must be sure that the number of bytes enqueued when the number of 133 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 134 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 135 * set maxalloc to be the maximum throughput (in bytes per second) of the 136 * underlying device divided by the minimum I/O size. We assume a realistic 137 * maximum throughput of one hundred megabytes per second; we set maxalloc on 138 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 139 */ 140 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 141 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 142 143 uint32_t lofi_max_files = LOFI_MAX_FILES; 144 145 static int 146 lofi_busy(void) 147 { 148 minor_t minor; 149 150 /* 151 * We need to make sure no mappings exist - mod_remove won't 152 * help because the device isn't open. 153 */ 154 mutex_enter(&lofi_lock); 155 for (minor = 1; minor <= lofi_max_files; minor++) { 156 if (ddi_get_soft_state(lofi_statep, minor) != NULL) { 157 mutex_exit(&lofi_lock); 158 return (EBUSY); 159 } 160 } 161 mutex_exit(&lofi_lock); 162 return (0); 163 } 164 165 static int 166 is_opened(struct lofi_state *lsp) 167 { 168 ASSERT(mutex_owned(&lofi_lock)); 169 return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); 170 } 171 172 static int 173 mark_opened(struct lofi_state *lsp, int otyp) 174 { 175 ASSERT(mutex_owned(&lofi_lock)); 176 switch (otyp) { 177 case OTYP_CHR: 178 lsp->ls_chr_open = 1; 179 break; 180 case OTYP_BLK: 181 lsp->ls_blk_open = 1; 182 break; 183 case OTYP_LYR: 184 lsp->ls_lyr_open_count++; 185 break; 186 default: 187 return (-1); 188 } 189 return (0); 190 } 191 192 static void 193 mark_closed(struct lofi_state *lsp, int otyp) 194 { 195 ASSERT(mutex_owned(&lofi_lock)); 196 switch (otyp) { 197 case OTYP_CHR: 198 lsp->ls_chr_open = 0; 199 break; 200 case OTYP_BLK: 201 lsp->ls_blk_open = 0; 202 break; 203 case OTYP_LYR: 204 lsp->ls_lyr_open_count--; 205 break; 206 default: 207 break; 208 } 209 } 210 211 /*ARGSUSED3*/ 212 static int 213 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 214 { 215 minor_t minor; 216 struct lofi_state *lsp; 217 218 mutex_enter(&lofi_lock); 219 minor = getminor(*devp); 220 if (minor == 0) { 221 /* master control device */ 222 /* must be opened exclusively */ 223 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { 224 mutex_exit(&lofi_lock); 225 return (EINVAL); 226 } 227 lsp = ddi_get_soft_state(lofi_statep, 0); 228 if (lsp == NULL) { 229 mutex_exit(&lofi_lock); 230 return (ENXIO); 231 } 232 if (is_opened(lsp)) { 233 mutex_exit(&lofi_lock); 234 return (EBUSY); 235 } 236 (void) mark_opened(lsp, OTYP_CHR); 237 mutex_exit(&lofi_lock); 238 return (0); 239 } 240 241 /* otherwise, the mapping should already exist */ 242 lsp = ddi_get_soft_state(lofi_statep, minor); 243 if (lsp == NULL) { 244 mutex_exit(&lofi_lock); 245 return (EINVAL); 246 } 247 248 if (mark_opened(lsp, otyp) == -1) { 249 mutex_exit(&lofi_lock); 250 return (EINVAL); 251 } 252 253 mutex_exit(&lofi_lock); 254 return (0); 255 } 256 257 /*ARGSUSED3*/ 258 static int 259 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 260 { 261 minor_t minor; 262 struct lofi_state *lsp; 263 264 #ifdef lint 265 flag = flag; 266 #endif 267 mutex_enter(&lofi_lock); 268 minor = getminor(dev); 269 lsp = ddi_get_soft_state(lofi_statep, minor); 270 if (lsp == NULL) { 271 mutex_exit(&lofi_lock); 272 return (EINVAL); 273 } 274 mark_closed(lsp, otyp); 275 mutex_exit(&lofi_lock); 276 return (0); 277 } 278 279 /* 280 * This is basically what strategy used to be before we found we 281 * needed task queues. 282 */ 283 static void 284 lofi_strategy_task(void *arg) 285 { 286 struct buf *bp = (struct buf *)arg; 287 int error; 288 struct lofi_state *lsp; 289 offset_t offset, alignedoffset; 290 offset_t mapoffset; 291 caddr_t bufaddr; 292 caddr_t mapaddr; 293 size_t xfersize; 294 size_t len; 295 int isread; 296 int smflags; 297 enum seg_rw srw; 298 299 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 300 if (lsp->ls_kstat) { 301 mutex_enter(lsp->ls_kstat->ks_lock); 302 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 303 mutex_exit(lsp->ls_kstat->ks_lock); 304 } 305 bp_mapin(bp); 306 bufaddr = bp->b_un.b_addr; 307 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 308 309 /* 310 * We used to always use vn_rdwr here, but we cannot do that because 311 * we might decide to read or write from the the underlying 312 * file during this call, which would be a deadlock because 313 * we have the rw_lock. So instead we page, unless it's not 314 * mapable or it's a character device. 315 */ 316 if (((lsp->ls_vp->v_flag & VNOMAP) == 0) && 317 (lsp->ls_vp->v_type != VCHR)) { 318 /* 319 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 320 * an 8K boundary, but the buf transfer address may not be 321 * aligned on more than a 512-byte boundary (we don't 322 * enforce that, though we could). This matters since the 323 * initial part of the transfer may not start at offset 0 324 * within the segmap'd chunk. So we have to compensate for 325 * that with 'mapoffset'. Subsequent chunks always start 326 * off at the beginning, and the last is capped by b_resid. 327 */ 328 mapoffset = offset & MAXBOFFSET; 329 alignedoffset = offset - mapoffset; /* now map-aligned */ 330 bp->b_resid = bp->b_bcount; 331 isread = bp->b_flags & B_READ; 332 srw = isread ? S_READ : S_WRITE; 333 do { 334 xfersize = MIN(lsp->ls_vp_size - offset, 335 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 336 len = roundup(mapoffset + xfersize, PAGESIZE); 337 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 338 alignedoffset, MAXBSIZE, 1, srw); 339 /* 340 * Now fault in the pages. This lets us check 341 * for errors before we reference mapaddr and 342 * try to resolve the fault in bcopy (which would 343 * panic instead). And this can easily happen, 344 * particularly if you've lofi'd a file over NFS 345 * and someone deletes the file on the server. 346 */ 347 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 348 len, F_SOFTLOCK, srw); 349 if (error) { 350 (void) segmap_release(segkmap, mapaddr, 0); 351 if (FC_CODE(error) == FC_OBJERR) 352 error = FC_ERRNO(error); 353 else 354 error = EIO; 355 break; 356 } 357 smflags = 0; 358 if (isread) { 359 bcopy(mapaddr + mapoffset, bufaddr, xfersize); 360 } else { 361 smflags |= SM_WRITE; 362 bcopy(bufaddr, mapaddr + mapoffset, xfersize); 363 } 364 bp->b_resid -= xfersize; 365 bufaddr += xfersize; 366 offset += xfersize; 367 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 368 len, F_SOFTUNLOCK, srw); 369 error = segmap_release(segkmap, mapaddr, smflags); 370 /* only the first map may start partial */ 371 mapoffset = 0; 372 alignedoffset += MAXBSIZE; 373 } while ((error == 0) && (bp->b_resid > 0) && 374 (offset < lsp->ls_vp_size)); 375 } else { 376 ssize_t resid; 377 enum uio_rw rw; 378 379 if (bp->b_flags & B_READ) 380 rw = UIO_READ; 381 else 382 rw = UIO_WRITE; 383 error = vn_rdwr(rw, lsp->ls_vp, bufaddr, bp->b_bcount, 384 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 385 bp->b_resid = resid; 386 } 387 388 if (lsp->ls_kstat) { 389 size_t n_done = bp->b_bcount - bp->b_resid; 390 kstat_io_t *kioptr; 391 392 mutex_enter(lsp->ls_kstat->ks_lock); 393 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 394 if (bp->b_flags & B_READ) { 395 kioptr->nread += n_done; 396 kioptr->reads++; 397 } else { 398 kioptr->nwritten += n_done; 399 kioptr->writes++; 400 } 401 kstat_runq_exit(kioptr); 402 mutex_exit(lsp->ls_kstat->ks_lock); 403 } 404 bioerror(bp, error); 405 biodone(bp); 406 } 407 408 static int 409 lofi_strategy(struct buf *bp) 410 { 411 struct lofi_state *lsp; 412 offset_t offset; 413 414 /* 415 * We cannot just do I/O here, because the current thread 416 * _might_ end up back in here because the underlying filesystem 417 * wants a buffer, which eventually gets into bio_recycle and 418 * might call into lofi to write out a delayed-write buffer. 419 * This is bad if the filesystem above lofi is the same as below. 420 * 421 * We could come up with a complex strategy using threads to 422 * do the I/O asynchronously, or we could use task queues. task 423 * queues were incredibly easy so they win. 424 */ 425 lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev)); 426 offset = bp->b_lblkno * DEV_BSIZE; /* offset within file */ 427 if (offset == lsp->ls_vp_size) { 428 /* EOF */ 429 if ((bp->b_flags & B_READ) != 0) { 430 bp->b_resid = bp->b_bcount; 431 bioerror(bp, 0); 432 } else { 433 /* writes should fail */ 434 bioerror(bp, ENXIO); 435 } 436 biodone(bp); 437 return (0); 438 } 439 if (offset > lsp->ls_vp_size) { 440 bioerror(bp, ENXIO); 441 biodone(bp); 442 return (0); 443 } 444 if (lsp->ls_kstat) { 445 mutex_enter(lsp->ls_kstat->ks_lock); 446 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 447 mutex_exit(lsp->ls_kstat->ks_lock); 448 } 449 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 450 return (0); 451 } 452 453 /*ARGSUSED2*/ 454 static int 455 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 456 { 457 if (getminor(dev) == 0) 458 return (EINVAL); 459 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 460 } 461 462 /*ARGSUSED2*/ 463 static int 464 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 465 { 466 if (getminor(dev) == 0) 467 return (EINVAL); 468 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 469 } 470 471 /*ARGSUSED2*/ 472 static int 473 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 474 { 475 if (getminor(dev) == 0) 476 return (EINVAL); 477 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 478 } 479 480 /*ARGSUSED2*/ 481 static int 482 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 483 { 484 if (getminor(dev) == 0) 485 return (EINVAL); 486 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 487 } 488 489 /*ARGSUSED*/ 490 static int 491 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 492 { 493 switch (infocmd) { 494 case DDI_INFO_DEVT2DEVINFO: 495 *result = lofi_dip; 496 return (DDI_SUCCESS); 497 case DDI_INFO_DEVT2INSTANCE: 498 *result = 0; 499 return (DDI_SUCCESS); 500 } 501 return (DDI_FAILURE); 502 } 503 504 static int 505 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 506 { 507 int error; 508 509 if (cmd != DDI_ATTACH) 510 return (DDI_FAILURE); 511 error = ddi_soft_state_zalloc(lofi_statep, 0); 512 if (error == DDI_FAILURE) { 513 return (DDI_FAILURE); 514 } 515 error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 516 DDI_PSEUDO, NULL); 517 if (error == DDI_FAILURE) { 518 ddi_soft_state_free(lofi_statep, 0); 519 return (DDI_FAILURE); 520 } 521 lofi_dip = dip; 522 ddi_report_dev(dip); 523 return (DDI_SUCCESS); 524 } 525 526 static int 527 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 528 { 529 if (cmd != DDI_DETACH) 530 return (DDI_FAILURE); 531 if (lofi_busy()) 532 return (DDI_FAILURE); 533 lofi_dip = NULL; 534 ddi_remove_minor_node(dip, NULL); 535 ddi_soft_state_free(lofi_statep, 0); 536 return (DDI_SUCCESS); 537 } 538 539 /* 540 * These two just simplify the rest of the ioctls that need to copyin/out 541 * the lofi_ioctl structure. 542 */ 543 struct lofi_ioctl * 544 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag) 545 { 546 struct lofi_ioctl *klip; 547 int error; 548 549 klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 550 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 551 if (error) { 552 kmem_free(klip, sizeof (struct lofi_ioctl)); 553 return (NULL); 554 } 555 556 /* make sure filename is always null-terminated */ 557 klip->li_filename[MAXPATHLEN] = '\0'; 558 559 /* validate minor number */ 560 if (klip->li_minor > lofi_max_files) { 561 kmem_free(klip, sizeof (struct lofi_ioctl)); 562 return (NULL); 563 } 564 return (klip); 565 } 566 567 int 568 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 569 int flag) 570 { 571 int error; 572 573 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 574 if (error) 575 return (EFAULT); 576 return (0); 577 } 578 579 void 580 free_lofi_ioctl(struct lofi_ioctl *klip) 581 { 582 kmem_free(klip, sizeof (struct lofi_ioctl)); 583 } 584 585 /* 586 * Return the minor number 'filename' is mapped to, if it is. 587 */ 588 static int 589 file_to_minor(char *filename) 590 { 591 minor_t minor; 592 struct lofi_state *lsp; 593 594 ASSERT(mutex_owned(&lofi_lock)); 595 for (minor = 1; minor <= lofi_max_files; minor++) { 596 lsp = ddi_get_soft_state(lofi_statep, minor); 597 if (lsp == NULL) 598 continue; 599 if (strcmp(lsp->ls_filename, filename) == 0) 600 return (minor); 601 } 602 return (0); 603 } 604 605 /* 606 * lofiadm does some validation, but since Joe Random (or crashme) could 607 * do our ioctls, we need to do some validation too. 608 */ 609 static int 610 valid_filename(const char *filename) 611 { 612 static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; 613 static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; 614 615 /* must be absolute path */ 616 if (filename[0] != '/') 617 return (0); 618 /* must not be lofi */ 619 if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) 620 return (0); 621 if (strncmp(filename, charprefix, strlen(charprefix)) == 0) 622 return (0); 623 return (1); 624 } 625 626 /* 627 * Fakes up a disk geometry, and one big partition, based on the size 628 * of the file. This is needed because we allow newfs'ing the device, 629 * and newfs will do several disk ioctls to figure out the geometry and 630 * partition information. It uses that information to determine the parameters 631 * to pass to mkfs. Geometry is pretty much irrelevent these days, but we 632 * have to support it. 633 */ 634 static void 635 fake_disk_geometry(struct lofi_state *lsp) 636 { 637 /* dk_geom - see dkio(7I) */ 638 /* 639 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 640 * of sectors), but that breaks programs like fdisk which want to 641 * partition a disk by cylinder. With one cylinder, you can't create 642 * an fdisk partition and put pcfs on it for testing (hard to pick 643 * a number between one and one). 644 * 645 * The cheezy floppy test is an attempt to not have too few cylinders 646 * for a small file, or so many on a big file that you waste space 647 * for backup superblocks or cylinder group structures. 648 */ 649 if (lsp->ls_vp_size < (2 * 1024 * 1024)) /* floppy? */ 650 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (100 * 1024); 651 else 652 lsp->ls_dkg.dkg_ncyl = lsp->ls_vp_size / (300 * 1024); 653 /* in case file file is < 100k */ 654 if (lsp->ls_dkg.dkg_ncyl == 0) 655 lsp->ls_dkg.dkg_ncyl = 1; 656 lsp->ls_dkg.dkg_acyl = 0; 657 lsp->ls_dkg.dkg_bcyl = 0; 658 lsp->ls_dkg.dkg_nhead = 1; 659 lsp->ls_dkg.dkg_obs1 = 0; 660 lsp->ls_dkg.dkg_intrlv = 0; 661 lsp->ls_dkg.dkg_obs2 = 0; 662 lsp->ls_dkg.dkg_obs3 = 0; 663 lsp->ls_dkg.dkg_apc = 0; 664 lsp->ls_dkg.dkg_rpm = 7200; 665 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl; 666 lsp->ls_dkg.dkg_nsect = lsp->ls_vp_size / 667 (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl); 668 lsp->ls_dkg.dkg_write_reinstruct = 0; 669 lsp->ls_dkg.dkg_read_reinstruct = 0; 670 671 /* vtoc - see dkio(7I) */ 672 bzero(&lsp->ls_vtoc, sizeof (struct vtoc)); 673 lsp->ls_vtoc.v_sanity = VTOC_SANE; 674 lsp->ls_vtoc.v_version = V_VERSION; 675 bcopy(LOFI_DRIVER_NAME, lsp->ls_vtoc.v_volume, 7); 676 lsp->ls_vtoc.v_sectorsz = DEV_BSIZE; 677 lsp->ls_vtoc.v_nparts = 1; 678 lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED; 679 lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT; 680 lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0; 681 /* 682 * The partition size cannot just be the number of sectors, because 683 * that might not end on a cylinder boundary. And if that's the case, 684 * newfs/mkfs will print a scary warning. So just figure the size 685 * based on the number of cylinders and sectors/cylinder. 686 */ 687 lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 688 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 689 690 /* dk_cinfo - see dkio(7I) */ 691 bzero(&lsp->ls_ci, sizeof (struct dk_cinfo)); 692 (void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME); 693 lsp->ls_ci.dki_ctype = DKC_MD; 694 lsp->ls_ci.dki_flags = 0; 695 lsp->ls_ci.dki_cnum = 0; 696 lsp->ls_ci.dki_addr = 0; 697 lsp->ls_ci.dki_space = 0; 698 lsp->ls_ci.dki_prio = 0; 699 lsp->ls_ci.dki_vec = 0; 700 (void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME); 701 lsp->ls_ci.dki_unit = 0; 702 lsp->ls_ci.dki_slave = 0; 703 lsp->ls_ci.dki_partition = 0; 704 /* 705 * newfs uses this to set maxcontig. Must not be < 16, or it 706 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 707 * it by the block size. Then tunefs doesn't work because 708 * maxcontig is 0. 709 */ 710 lsp->ls_ci.dki_maxtransfer = 16; 711 } 712 713 /* 714 * map a file to a minor number. Return the minor number. 715 */ 716 static int 717 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 718 int *rvalp, struct cred *credp, int ioctl_flag) 719 { 720 minor_t newminor; 721 struct lofi_state *lsp; 722 struct lofi_ioctl *klip; 723 int error; 724 char namebuf[50]; 725 struct vnode *vp; 726 int64_t Nblocks_prop_val; 727 int64_t Size_prop_val; 728 vattr_t vattr; 729 int flag; 730 enum vtype v_type; 731 dev_t newdev; 732 int zalloced = 0; 733 734 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 735 if (klip == NULL) 736 return (EFAULT); 737 738 mutex_enter(&lofi_lock); 739 740 if (!valid_filename(klip->li_filename)) { 741 error = EINVAL; 742 goto out; 743 } 744 745 if (file_to_minor(klip->li_filename) != 0) { 746 error = EBUSY; 747 goto out; 748 } 749 750 if (pickminor) { 751 /* Find a free one */ 752 for (newminor = 1; newminor <= lofi_max_files; newminor++) 753 if (ddi_get_soft_state(lofi_statep, newminor) == NULL) 754 break; 755 if (newminor >= lofi_max_files) { 756 error = EAGAIN; 757 goto out; 758 } 759 } else { 760 newminor = klip->li_minor; 761 if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { 762 error = EEXIST; 763 goto out; 764 } 765 } 766 767 /* make sure it's valid */ 768 error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, 769 NULLVPP, &vp); 770 if (error) { 771 goto out; 772 } 773 v_type = vp->v_type; 774 VN_RELE(vp); 775 if (!V_ISLOFIABLE(v_type)) { 776 error = EINVAL; 777 goto out; 778 } 779 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 780 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 781 if (error) { 782 /* try read-only */ 783 flag &= ~FWRITE; 784 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 785 &vp, 0, 0); 786 if (error) { 787 goto out; 788 } 789 } 790 vattr.va_mask = AT_SIZE; 791 error = VOP_GETATTR(vp, &vattr, 0, credp); 792 if (error) { 793 goto closeout; 794 } 795 /* the file needs to be a multiple of the block size */ 796 if ((vattr.va_size % DEV_BSIZE) != 0) { 797 error = EINVAL; 798 goto closeout; 799 } 800 newdev = makedevice(getmajor(dev), newminor); 801 Size_prop_val = vattr.va_size; 802 if ((ddi_prop_update_int64(newdev, lofi_dip, 803 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 804 error = EINVAL; 805 goto closeout; 806 } 807 Nblocks_prop_val = vattr.va_size / DEV_BSIZE; 808 if ((ddi_prop_update_int64(newdev, lofi_dip, 809 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 810 error = EINVAL; 811 goto propout; 812 } 813 error = ddi_soft_state_zalloc(lofi_statep, newminor); 814 if (error == DDI_FAILURE) { 815 error = ENOMEM; 816 goto propout; 817 } 818 zalloced = 1; 819 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 820 (void) ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, 821 DDI_PSEUDO, NULL); 822 if (error != DDI_SUCCESS) { 823 error = ENXIO; 824 goto propout; 825 } 826 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); 827 error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, 828 DDI_PSEUDO, NULL); 829 if (error != DDI_SUCCESS) { 830 /* remove block node */ 831 (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); 832 ddi_remove_minor_node(lofi_dip, namebuf); 833 error = ENXIO; 834 goto propout; 835 } 836 lsp = ddi_get_soft_state(lofi_statep, newminor); 837 lsp->ls_filename_sz = strlen(klip->li_filename) + 1; 838 lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); 839 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 840 LOFI_DRIVER_NAME, newminor); 841 lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, 842 minclsyspri, 1, lofi_taskq_maxalloc, 0); 843 lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, 844 NULL, "disk", KSTAT_TYPE_IO, 1, 0); 845 if (lsp->ls_kstat) { 846 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 847 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 848 kstat_install(lsp->ls_kstat); 849 } 850 /* 851 * save open mode so file can be closed properly and vnode counts 852 * updated correctly. 853 */ 854 lsp->ls_openflag = flag; 855 856 /* 857 * Try to handle stacked lofs vnodes. 858 */ 859 if (vp->v_type == VREG) { 860 if (VOP_REALVP(vp, &lsp->ls_vp) != 0) { 861 lsp->ls_vp = vp; 862 } else { 863 /* 864 * Even though vp was obtained via vn_open(), we 865 * can't call vn_close() on it, since lofs will 866 * pass the VOP_CLOSE() on down to the realvp 867 * (which we are about to use). Hence we merely 868 * drop the reference to the lofs vnode and hold 869 * the realvp so things behave as if we've 870 * opened the realvp without any interaction 871 * with lofs. 872 */ 873 VN_HOLD(lsp->ls_vp); 874 VN_RELE(vp); 875 } 876 } else { 877 lsp->ls_vp = vp; 878 } 879 lsp->ls_vp_size = vattr.va_size; 880 (void) strcpy(lsp->ls_filename, klip->li_filename); 881 if (rvalp) 882 *rvalp = (int)newminor; 883 klip->li_minor = newminor; 884 885 fake_disk_geometry(lsp); 886 mutex_exit(&lofi_lock); 887 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 888 free_lofi_ioctl(klip); 889 return (0); 890 891 propout: 892 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 893 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 894 closeout: 895 (void) VOP_CLOSE(vp, flag, 1, 0, credp); 896 VN_RELE(vp); 897 out: 898 if (zalloced) 899 ddi_soft_state_free(lofi_statep, newminor); 900 mutex_exit(&lofi_lock); 901 free_lofi_ioctl(klip); 902 return (error); 903 } 904 905 /* 906 * unmap a file. 907 */ 908 static int 909 lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename, 910 struct cred *credp, int ioctl_flag) 911 { 912 struct lofi_state *lsp; 913 struct lofi_ioctl *klip; 914 minor_t minor; 915 char namebuf[20]; 916 dev_t newdev; 917 918 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 919 if (klip == NULL) 920 return (EFAULT); 921 922 mutex_enter(&lofi_lock); 923 if (byfilename) { 924 minor = file_to_minor(klip->li_filename); 925 } else { 926 minor = klip->li_minor; 927 } 928 if (minor == 0) { 929 mutex_exit(&lofi_lock); 930 free_lofi_ioctl(klip); 931 return (ENXIO); 932 } 933 lsp = ddi_get_soft_state(lofi_statep, minor); 934 if (lsp == NULL) { 935 mutex_exit(&lofi_lock); 936 free_lofi_ioctl(klip); 937 return (ENXIO); 938 } 939 if (is_opened(lsp)) { 940 mutex_exit(&lofi_lock); 941 free_lofi_ioctl(klip); 942 return (EBUSY); 943 } 944 /* 945 * Use saved open mode to properly update vnode counts 946 */ 947 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 1, 0, credp); 948 VN_RELE(lsp->ls_vp); 949 lsp->ls_vp = NULL; 950 newdev = makedevice(getmajor(dev), minor); 951 (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); 952 (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); 953 954 (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); 955 ddi_remove_minor_node(lofi_dip, namebuf); 956 (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); 957 ddi_remove_minor_node(lofi_dip, namebuf); 958 959 kmem_free(lsp->ls_filename, lsp->ls_filename_sz); 960 taskq_destroy(lsp->ls_taskq); 961 if (lsp->ls_kstat) { 962 kstat_delete(lsp->ls_kstat); 963 mutex_destroy(&lsp->ls_kstat_lock); 964 } 965 ddi_soft_state_free(lofi_statep, minor); 966 klip->li_minor = minor; 967 mutex_exit(&lofi_lock); 968 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 969 free_lofi_ioctl(klip); 970 return (0); 971 } 972 973 /* 974 * get the filename given the minor number, or the minor number given 975 * the name. 976 */ 977 /*ARGSUSED3*/ 978 static int 979 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 980 struct cred *credp, int ioctl_flag) 981 { 982 struct lofi_state *lsp; 983 struct lofi_ioctl *klip; 984 int error; 985 minor_t minor; 986 987 #ifdef lint 988 dev = dev; 989 #endif 990 klip = copy_in_lofi_ioctl(ulip, ioctl_flag); 991 if (klip == NULL) 992 return (EFAULT); 993 994 switch (which) { 995 case LOFI_GET_FILENAME: 996 minor = klip->li_minor; 997 if (minor == 0) { 998 free_lofi_ioctl(klip); 999 return (EINVAL); 1000 } 1001 1002 mutex_enter(&lofi_lock); 1003 lsp = ddi_get_soft_state(lofi_statep, minor); 1004 if (lsp == NULL) { 1005 mutex_exit(&lofi_lock); 1006 free_lofi_ioctl(klip); 1007 return (ENXIO); 1008 } 1009 (void) strcpy(klip->li_filename, lsp->ls_filename); 1010 mutex_exit(&lofi_lock); 1011 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1012 free_lofi_ioctl(klip); 1013 return (error); 1014 case LOFI_GET_MINOR: 1015 mutex_enter(&lofi_lock); 1016 klip->li_minor = file_to_minor(klip->li_filename); 1017 mutex_exit(&lofi_lock); 1018 if (klip->li_minor == 0) { 1019 free_lofi_ioctl(klip); 1020 return (ENOENT); 1021 } 1022 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 1023 free_lofi_ioctl(klip); 1024 return (error); 1025 default: 1026 free_lofi_ioctl(klip); 1027 return (EINVAL); 1028 } 1029 1030 } 1031 1032 static int 1033 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 1034 int *rvalp) 1035 { 1036 int error; 1037 enum dkio_state dkstate; 1038 struct lofi_state *lsp; 1039 minor_t minor; 1040 1041 #ifdef lint 1042 credp = credp; 1043 #endif 1044 1045 minor = getminor(dev); 1046 /* lofi ioctls only apply to the master device */ 1047 if (minor == 0) { 1048 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 1049 1050 /* 1051 * the query command only need read-access - i.e., normal 1052 * users are allowed to do those on the ctl device as 1053 * long as they can open it read-only. 1054 */ 1055 switch (cmd) { 1056 case LOFI_MAP_FILE: 1057 if ((flag & FWRITE) == 0) 1058 return (EPERM); 1059 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 1060 case LOFI_MAP_FILE_MINOR: 1061 if ((flag & FWRITE) == 0) 1062 return (EPERM); 1063 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 1064 case LOFI_UNMAP_FILE: 1065 if ((flag & FWRITE) == 0) 1066 return (EPERM); 1067 return (lofi_unmap_file(dev, lip, 1, credp, flag)); 1068 case LOFI_UNMAP_FILE_MINOR: 1069 if ((flag & FWRITE) == 0) 1070 return (EPERM); 1071 return (lofi_unmap_file(dev, lip, 0, credp, flag)); 1072 case LOFI_GET_FILENAME: 1073 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 1074 credp, flag)); 1075 case LOFI_GET_MINOR: 1076 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 1077 credp, flag)); 1078 case LOFI_GET_MAXMINOR: 1079 error = ddi_copyout(&lofi_max_files, &lip->li_minor, 1080 sizeof (lofi_max_files), flag); 1081 if (error) 1082 return (EFAULT); 1083 return (0); 1084 default: 1085 break; 1086 } 1087 } 1088 1089 lsp = ddi_get_soft_state(lofi_statep, minor); 1090 if (lsp == NULL) 1091 return (ENXIO); 1092 1093 /* these are for faking out utilities like newfs */ 1094 switch (cmd) { 1095 case VOLIOCINFO: 1096 /* pcfs does this to see if it needs to set PCFS_NOCHK */ 1097 /* 0 means it should set it */ 1098 return (0); 1099 case DKIOCGVTOC: 1100 switch (ddi_model_convert_from(flag & FMODELS)) { 1101 case DDI_MODEL_ILP32: { 1102 struct vtoc32 vtoc32; 1103 1104 vtoctovtoc32(lsp->ls_vtoc, vtoc32); 1105 if (ddi_copyout(&vtoc32, (void *)arg, 1106 sizeof (struct vtoc32), flag)) 1107 return (EFAULT); 1108 break; 1109 } 1110 1111 case DDI_MODEL_NONE: 1112 if (ddi_copyout(&lsp->ls_vtoc, (void *)arg, 1113 sizeof (struct vtoc), flag)) 1114 return (EFAULT); 1115 break; 1116 } 1117 return (0); 1118 case DKIOCINFO: 1119 error = ddi_copyout(&lsp->ls_ci, (void *)arg, 1120 sizeof (struct dk_cinfo), flag); 1121 if (error) 1122 return (EFAULT); 1123 return (0); 1124 case DKIOCG_VIRTGEOM: 1125 case DKIOCG_PHYGEOM: 1126 case DKIOCGGEOM: 1127 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 1128 sizeof (struct dk_geom), flag); 1129 if (error) 1130 return (EFAULT); 1131 return (0); 1132 case DKIOCSTATE: 1133 /* the file is always there */ 1134 dkstate = DKIO_INSERTED; 1135 error = ddi_copyout(&dkstate, (void *)arg, 1136 sizeof (enum dkio_state), flag); 1137 if (error) 1138 return (EFAULT); 1139 return (0); 1140 default: 1141 return (ENOTTY); 1142 } 1143 } 1144 1145 static struct cb_ops lofi_cb_ops = { 1146 lofi_open, /* open */ 1147 lofi_close, /* close */ 1148 lofi_strategy, /* strategy */ 1149 nodev, /* print */ 1150 nodev, /* dump */ 1151 lofi_read, /* read */ 1152 lofi_write, /* write */ 1153 lofi_ioctl, /* ioctl */ 1154 nodev, /* devmap */ 1155 nodev, /* mmap */ 1156 nodev, /* segmap */ 1157 nochpoll, /* poll */ 1158 ddi_prop_op, /* prop_op */ 1159 0, /* streamtab */ 1160 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 1161 CB_REV, 1162 lofi_aread, 1163 lofi_awrite 1164 }; 1165 1166 static struct dev_ops lofi_ops = { 1167 DEVO_REV, /* devo_rev, */ 1168 0, /* refcnt */ 1169 lofi_info, /* info */ 1170 nulldev, /* identify */ 1171 nulldev, /* probe */ 1172 lofi_attach, /* attach */ 1173 lofi_detach, /* detach */ 1174 nodev, /* reset */ 1175 &lofi_cb_ops, /* driver operations */ 1176 NULL /* no bus operations */ 1177 }; 1178 1179 static struct modldrv modldrv = { 1180 &mod_driverops, 1181 "loopback file driver (%I%)", 1182 &lofi_ops, 1183 }; 1184 1185 static struct modlinkage modlinkage = { 1186 MODREV_1, 1187 &modldrv, 1188 NULL 1189 }; 1190 1191 int 1192 _init(void) 1193 { 1194 int error; 1195 1196 error = ddi_soft_state_init(&lofi_statep, 1197 sizeof (struct lofi_state), 0); 1198 if (error) 1199 return (error); 1200 1201 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 1202 error = mod_install(&modlinkage); 1203 if (error) { 1204 mutex_destroy(&lofi_lock); 1205 ddi_soft_state_fini(&lofi_statep); 1206 } 1207 1208 return (error); 1209 } 1210 1211 int 1212 _fini(void) 1213 { 1214 int error; 1215 1216 if (lofi_busy()) 1217 return (EBUSY); 1218 1219 error = mod_remove(&modlinkage); 1220 if (error) 1221 return (error); 1222 1223 mutex_destroy(&lofi_lock); 1224 ddi_soft_state_fini(&lofi_statep); 1225 1226 return (error); 1227 } 1228 1229 int 1230 _info(struct modinfo *modinfop) 1231 { 1232 return (mod_info(&modlinkage, modinfop)); 1233 } 1234