1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/time.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/resource.h> 36 #include <sys/signal.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/buf.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/proc.h> 43 #include <sys/disp.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/flock.h> 47 #include <sys/kmem.h> 48 #include <sys/uio.h> 49 #include <sys/conf.h> 50 #include <sys/mman.h> 51 #include <sys/pathname.h> 52 #include <sys/debug.h> 53 #include <sys/vmmeter.h> 54 #include <sys/vmsystm.h> 55 #include <sys/cmn_err.h> 56 #include <sys/vtrace.h> 57 #include <sys/filio.h> 58 #include <sys/dnlc.h> 59 60 #include <sys/fs/ufs_filio.h> 61 #include <sys/fs/ufs_lockfs.h> 62 #include <sys/fs/ufs_fs.h> 63 #include <sys/fs/ufs_inode.h> 64 #include <sys/fs/ufs_fsdir.h> 65 #include <sys/fs/ufs_quota.h> 66 #include <sys/fs/ufs_trans.h> 67 #include <sys/fs/ufs_log.h> 68 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */ 69 #include <sys/errno.h> 70 #include <sys/sysinfo.h> 71 72 #include <vm/hat.h> 73 #include <vm/page.h> 74 #include <vm/pvn.h> 75 #include <vm/as.h> 76 #include <vm/seg.h> 77 #include <vm/seg_map.h> 78 #include <vm/seg_vn.h> 79 #include <vm/rm.h> 80 #include <sys/swap.h> 81 #include <sys/model.h> 82 #include <sys/policy.h> 83 84 #include "fs/fs_subr.h" 85 86 /* 87 * ufs_fioio is the ufs equivalent of NFS_CNVT and is tailored to 88 * metamucil's needs. It may change at any time. 89 */ 90 /* ARGSUSED */ 91 int 92 ufs_fioio( 93 struct vnode *vp, /* any file on the fs */ 94 struct fioio *fiou, /* fioio struct in userland */ 95 int flag, /* flag from VOP_IOCTL() */ 96 struct cred *cr) /* credentials from ufs_ioctl */ 97 { 98 int error = 0; 99 struct vnode *vpio = NULL; /* vnode for inode open */ 100 struct inode *ipio = NULL; /* inode for inode open */ 101 struct file *fpio = NULL; /* file for inode open */ 102 struct inode *ip; /* inode for file system */ 103 struct fs *fs; /* fs for file system */ 104 STRUCT_DECL(fioio, fio); /* copy of user's fioio struct */ 105 106 /* 107 * must be privileged 108 */ 109 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 110 return (EPERM); 111 112 STRUCT_INIT(fio, flag & DATAMODEL_MASK); 113 114 /* 115 * get user's copy of fioio struct 116 */ 117 if (copyin(fiou, STRUCT_BUF(fio), STRUCT_SIZE(fio))) 118 return (EFAULT); 119 120 ip = VTOI(vp); 121 fs = ip->i_fs; 122 123 /* 124 * check the inode number against the fs's inode number bounds 125 */ 126 if (STRUCT_FGET(fio, fio_ino) < UFSROOTINO) 127 return (ESRCH); 128 if (STRUCT_FGET(fio, fio_ino) >= fs->fs_ncg * fs->fs_ipg) 129 return (ESRCH); 130 131 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 132 133 /* 134 * get the inode 135 */ 136 error = ufs_iget(ip->i_vfs, STRUCT_FGET(fio, fio_ino), &ipio, cr); 137 138 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 139 140 if (error) 141 return (error); 142 143 /* 144 * check the generation number 145 */ 146 rw_enter(&ipio->i_contents, RW_READER); 147 if (ipio->i_gen != STRUCT_FGET(fio, fio_gen)) { 148 error = ESTALE; 149 rw_exit(&ipio->i_contents); 150 goto errout; 151 } 152 153 /* 154 * check if the inode is free 155 */ 156 if (ipio->i_mode == 0) { 157 error = ENOENT; 158 rw_exit(&ipio->i_contents); 159 goto errout; 160 } 161 rw_exit(&ipio->i_contents); 162 163 /* 164 * Adapted from copen: get a file struct 165 * Large Files: We open this file descriptor with FOFFMAX flag 166 * set so that it will be like a large file open. 167 */ 168 if (falloc(NULL, (FREAD|FOFFMAX), &fpio, STRUCT_FADDR(fio, fio_fd))) 169 goto errout; 170 171 /* 172 * Adapted from vn_open: check access and then open the file 173 */ 174 vpio = ITOV(ipio); 175 if (error = VOP_ACCESS(vpio, VREAD, 0, cr)) 176 goto errout; 177 178 if (error = VOP_OPEN(&vpio, FREAD, cr)) 179 goto errout; 180 181 /* 182 * Adapted from copen: initialize the file struct 183 */ 184 fpio->f_vnode = vpio; 185 186 /* 187 * return the fd 188 */ 189 if (copyout(STRUCT_BUF(fio), fiou, STRUCT_SIZE(fio))) { 190 error = EFAULT; 191 goto errout; 192 } 193 setf(STRUCT_FGET(fio, fio_fd), fpio); 194 mutex_exit(&fpio->f_tlock); 195 return (0); 196 errout: 197 /* 198 * free the file struct and fd 199 */ 200 if (fpio) { 201 setf(STRUCT_FGET(fio, fio_fd), NULL); 202 unfalloc(fpio); 203 } 204 205 /* 206 * release the hold on the inode 207 */ 208 if (ipio) 209 VN_RELE(ITOV(ipio)); 210 return (error); 211 } 212 213 /* 214 * ufs_fiosatime 215 * set access time w/o altering change time. This ioctl is tailored 216 * to metamucil's needs and may change at any time. 217 */ 218 int 219 ufs_fiosatime( 220 struct vnode *vp, /* file's vnode */ 221 struct timeval *tvu, /* struct timeval in userland */ 222 int flag, /* flag from VOP_IOCTL() */ 223 struct cred *cr) /* credentials from ufs_ioctl */ 224 { 225 struct inode *ip; /* inode for vp */ 226 struct timeval32 tv; /* copy of user's timeval */ 227 int now = 0; 228 229 /* 230 * must have sufficient privileges 231 */ 232 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 233 return (EPERM); 234 235 /* 236 * get user's copy of timeval struct and check values 237 * if input is NULL, will set time to now 238 */ 239 if (tvu == NULL) { 240 now = 1; 241 } else { 242 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 243 if (copyin(tvu, &tv, sizeof (tv))) 244 return (EFAULT); 245 } else { 246 struct timeval tv64; 247 248 if (copyin(tvu, &tv64, sizeof (tv64))) 249 return (EFAULT); 250 if (TIMEVAL_OVERFLOW(&tv64)) 251 return (EOVERFLOW); 252 TIMEVAL_TO_TIMEVAL32(&tv, &tv64); 253 } 254 255 if (tv.tv_usec < 0 || tv.tv_usec >= 1000000) 256 return (EINVAL); 257 } 258 259 /* 260 * update access time 261 */ 262 ip = VTOI(vp); 263 rw_enter(&ip->i_contents, RW_WRITER); 264 ITIMES_NOLOCK(ip); 265 if (now) { 266 mutex_enter(&ufs_iuniqtime_lock); 267 ip->i_atime = iuniqtime; 268 mutex_exit(&ufs_iuniqtime_lock); 269 } else { 270 ip->i_atime = tv; 271 } 272 ip->i_flag |= IMODACC; 273 rw_exit(&ip->i_contents); 274 275 return (0); 276 } 277 278 /* 279 * ufs_fiogdio 280 * Get delayed-io state. This ioctl is tailored 281 * to metamucil's needs and may change at any time. 282 */ 283 /* ARGSUSED */ 284 int 285 ufs_fiogdio( 286 struct vnode *vp, /* file's vnode */ 287 uint_t *diop, /* dio state returned here */ 288 int flag, /* flag from ufs_ioctl */ 289 struct cred *cr) /* credentials from ufs_ioctl */ 290 { 291 struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 292 293 /* 294 * forcibly unmounted 295 */ 296 if (ufsvfsp == NULL) 297 return (EIO); 298 299 if (suword32(diop, ufsvfsp->vfs_dio)) 300 return (EFAULT); 301 return (0); 302 } 303 304 /* 305 * ufs_fiosdio 306 * Set delayed-io state. This ioctl is tailored 307 * to metamucil's needs and may change at any time. 308 */ 309 int 310 ufs_fiosdio( 311 struct vnode *vp, /* file's vnode */ 312 uint_t *diop, /* dio flag */ 313 int flag, /* flag from ufs_ioctl */ 314 struct cred *cr) /* credentials from ufs_ioctl */ 315 { 316 uint_t dio; /* copy of user's dio */ 317 struct inode *ip; /* inode for vp */ 318 struct ufsvfs *ufsvfsp; 319 struct fs *fs; 320 struct ulockfs *ulp; 321 int error = 0; 322 323 #ifdef lint 324 flag = flag; 325 #endif 326 327 /* check input conditions */ 328 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 329 return (EPERM); 330 331 if (copyin(diop, &dio, sizeof (dio))) 332 return (EFAULT); 333 334 if (dio > 1) 335 return (EINVAL); 336 337 /* file system has been forcibly unmounted */ 338 if (VTOI(vp)->i_ufsvfs == NULL) 339 return (EIO); 340 341 ip = VTOI(vp); 342 ufsvfsp = ip->i_ufsvfs; 343 ulp = &ufsvfsp->vfs_ulockfs; 344 345 /* logging file system; dio ignored */ 346 if (TRANS_ISTRANS(ufsvfsp)) 347 return (error); 348 349 /* hold the mutex to prevent race with a lockfs request */ 350 vfs_lock_wait(vp->v_vfsp); 351 mutex_enter(&ulp->ul_lock); 352 353 if (ULOCKFS_IS_HLOCK(ulp)) { 354 error = EIO; 355 goto out; 356 } 357 358 if (ULOCKFS_IS_ELOCK(ulp)) { 359 error = EBUSY; 360 goto out; 361 } 362 /* wait for outstanding accesses to finish */ 363 if (error = ufs_quiesce(ulp)) 364 goto out; 365 366 /* flush w/invalidate */ 367 if (error = ufs_flush(vp->v_vfsp)) 368 goto out; 369 370 /* 371 * update dio 372 */ 373 mutex_enter(&ufsvfsp->vfs_lock); 374 ufsvfsp->vfs_dio = dio; 375 376 /* 377 * enable/disable clean flag processing 378 */ 379 fs = ip->i_fs; 380 if (fs->fs_ronly == 0 && 381 fs->fs_clean != FSBAD && 382 fs->fs_clean != FSLOG) { 383 if (dio) 384 fs->fs_clean = FSSUSPEND; 385 else 386 fs->fs_clean = FSACTIVE; 387 ufs_sbwrite(ufsvfsp); 388 mutex_exit(&ufsvfsp->vfs_lock); 389 } else 390 mutex_exit(&ufsvfsp->vfs_lock); 391 out: 392 /* 393 * we need this broadcast because of the ufs_quiesce call above 394 */ 395 cv_broadcast(&ulp->ul_cv); 396 mutex_exit(&ulp->ul_lock); 397 vfs_unlock(vp->v_vfsp); 398 return (error); 399 } 400 401 /* 402 * ufs_fioffs - ioctl handler for flushing file system 403 */ 404 /* ARGSUSED */ 405 int 406 ufs_fioffs( 407 struct vnode *vp, 408 char *vap, /* must be NULL - reserved */ 409 struct cred *cr) /* credentials from ufs_ioctl */ 410 { 411 int error; 412 struct ufsvfs *ufsvfsp; 413 struct ulockfs *ulp; 414 415 /* file system has been forcibly unmounted */ 416 ufsvfsp = VTOI(vp)->i_ufsvfs; 417 if (ufsvfsp == NULL) 418 return (EIO); 419 420 ulp = &ufsvfsp->vfs_ulockfs; 421 422 /* 423 * suspend the delete thread 424 * this must be done outside the lockfs locking protocol 425 */ 426 ufs_thread_suspend(&ufsvfsp->vfs_delete); 427 428 vfs_lock_wait(vp->v_vfsp); 429 /* hold the mutex to prevent race with a lockfs request */ 430 mutex_enter(&ulp->ul_lock); 431 432 if (ULOCKFS_IS_HLOCK(ulp)) { 433 error = EIO; 434 goto out; 435 } 436 if (ULOCKFS_IS_ELOCK(ulp)) { 437 error = EBUSY; 438 goto out; 439 } 440 /* wait for outstanding accesses to finish */ 441 if (error = ufs_quiesce(ulp)) 442 goto out; 443 444 /* 445 * If logging, and the logmap was marked as not rollable, 446 * make it rollable now, and start the trans_roll thread and 447 * the reclaim thread. The log at this point is safe to write to. 448 */ 449 if (ufsvfsp->vfs_log) { 450 ml_unit_t *ul = ufsvfsp->vfs_log; 451 struct fs *fsp = ufsvfsp->vfs_fs; 452 int err; 453 454 if (ul->un_flags & LDL_NOROLL) { 455 ul->un_flags &= ~LDL_NOROLL; 456 logmap_start_roll(ul); 457 if (!fsp->fs_ronly && (fsp->fs_reclaim & 458 (FS_RECLAIM|FS_RECLAIMING))) { 459 fsp->fs_reclaim &= ~FS_RECLAIM; 460 fsp->fs_reclaim |= FS_RECLAIMING; 461 ufs_thread_start(&ufsvfsp->vfs_reclaim, 462 ufs_thread_reclaim, 463 vp->v_vfsp); 464 if (!fsp->fs_ronly) { 465 TRANS_SBWRITE(ufsvfsp, 466 TOP_SBUPDATE_UPDATE); 467 if (err = 468 geterror(ufsvfsp->vfs_bufp)) { 469 refstr_t *mntpt; 470 mntpt = vfs_getmntpoint( 471 vp->v_vfsp); 472 cmn_err(CE_NOTE, 473 "Filesystem Flush " 474 "Failed to update " 475 "Reclaim Status for " 476 " %s, Write failed to " 477 "update superblock, " 478 "error %d", 479 refstr_value(mntpt), 480 err); 481 refstr_rele(mntpt); 482 } 483 } 484 } 485 } 486 } 487 488 /* synchronously flush dirty data and metadata */ 489 error = ufs_flush(vp->v_vfsp); 490 491 out: 492 cv_broadcast(&ulp->ul_cv); 493 mutex_exit(&ulp->ul_lock); 494 vfs_unlock(vp->v_vfsp); 495 496 /* 497 * allow the delete thread to continue 498 */ 499 ufs_thread_continue(&ufsvfsp->vfs_delete); 500 return (error); 501 } 502 503 /* 504 * ufs_fioisbusy 505 * Get number of references on this vnode. 506 * Contract-private interface for Legato's NetWorker product. 507 */ 508 /* ARGSUSED */ 509 int 510 ufs_fioisbusy(struct vnode *vp, int *isbusy, struct cred *cr) 511 { 512 int is_it_busy; 513 514 /* 515 * The caller holds one reference, there may be one in the dnlc 516 * so we need to flush it. 517 */ 518 if (vp->v_count > 1) 519 dnlc_purge_vp(vp); 520 /* 521 * Since we've just flushed the dnlc and we hold a reference 522 * to this vnode, then anything but 1 means busy (this had 523 * BETTER not be zero!). Also, it's possible for someone to 524 * have this file mmap'ed with no additional reference count. 525 */ 526 ASSERT(vp->v_count > 0); 527 if ((vp->v_count == 1) && (VTOI(vp)->i_mapcnt == 0)) 528 is_it_busy = 0; 529 else 530 is_it_busy = 1; 531 532 if (suword32(isbusy, is_it_busy)) 533 return (EFAULT); 534 return (0); 535 } 536 537 /* ARGSUSED */ 538 int 539 ufs_fiodirectio(struct vnode *vp, int cmd, struct cred *cr) 540 { 541 int error = 0; 542 struct inode *ip = VTOI(vp); 543 544 /* 545 * Acquire reader lock and set/reset direct mode 546 */ 547 rw_enter(&ip->i_contents, RW_READER); 548 mutex_enter(&ip->i_tlock); 549 if (cmd == DIRECTIO_ON) 550 ip->i_flag |= IDIRECTIO; /* enable direct mode */ 551 else if (cmd == DIRECTIO_OFF) 552 ip->i_flag &= ~IDIRECTIO; /* disable direct mode */ 553 else 554 error = EINVAL; 555 mutex_exit(&ip->i_tlock); 556 rw_exit(&ip->i_contents); 557 return (error); 558 } 559 560 /* 561 * ufs_fiotune 562 * Allow some tunables to be set on a mounted fs 563 */ 564 int 565 ufs_fiotune(struct vnode *vp, struct fiotune *uftp, struct cred *cr) 566 { 567 struct fiotune ftp; 568 struct fs *fs; 569 struct ufsvfs *ufsvfsp; 570 571 /* 572 * must have sufficient privileges 573 */ 574 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 575 return (EPERM); 576 577 /* 578 * get user's copy 579 */ 580 if (copyin(uftp, &ftp, sizeof (ftp))) 581 return (EFAULT); 582 583 /* 584 * some minimal sanity checks 585 */ 586 if ((ftp.maxcontig <= 0) || 587 (ftp.rotdelay != 0) || 588 (ftp.maxbpg <= 0) || 589 (ftp.minfree < 0) || 590 (ftp.minfree > 99) || 591 ((ftp.optim != FS_OPTTIME) && (ftp.optim != FS_OPTSPACE))) 592 return (EINVAL); 593 594 /* 595 * update superblock but don't write it! If it gets out, fine. 596 */ 597 fs = VTOI(vp)->i_fs; 598 599 fs->fs_maxcontig = ftp.maxcontig; 600 fs->fs_rotdelay = ftp.rotdelay; 601 fs->fs_maxbpg = ftp.maxbpg; 602 fs->fs_minfree = ftp.minfree; 603 fs->fs_optim = ftp.optim; 604 605 /* 606 * Adjust cluster based on the new maxcontig. The cluster size 607 * can be any positive value. The check for this is done above. 608 */ 609 ufsvfsp = VTOI(vp)->i_ufsvfs; 610 ufsvfsp->vfs_ioclustsz = fs->fs_bsize * fs->fs_maxcontig; 611 612 /* 613 * Adjust minfrags from minfree 614 */ 615 ufsvfsp->vfs_minfrags = (int)((int64_t)fs->fs_dsize * 616 fs->fs_minfree / 100); 617 618 /* 619 * Write the superblock 620 */ 621 if (fs->fs_ronly == 0) { 622 TRANS_BEGIN_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE, 623 TOP_SBWRITE_SIZE); 624 TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE); 625 TRANS_END_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE, TOP_SBWRITE_SIZE); 626 } 627 628 return (0); 629 } 630 631 /* 632 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 633 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 634 */ 635 int 636 ufs_fio_holey(vnode_t *vp, int cmd, offset_t *off) 637 { 638 inode_t *ip = VTOI(vp); 639 u_offset_t noff = (u_offset_t)*off; /* new offset */ 640 u_offset_t isz; 641 int error; 642 boolean_t hole; 643 644 ASSERT(*off >= 0); 645 rw_enter(&ip->i_contents, RW_READER); 646 isz = (offset_t)ip->i_size; 647 if (*off >= isz) { 648 rw_exit(&ip->i_contents); 649 return (ENXIO); 650 } 651 652 /* 653 * Check for the usual case where a file has no holes. 654 * If so we can optimise to set the end of the file as the first 655 * (virtual) hole. This avoids bmap_find() searching through 656 * every block in the file for a (non-existent) hole. 657 */ 658 if (!bmap_has_holes(ip)) { 659 rw_exit(&ip->i_contents); 660 if (cmd == _FIO_SEEK_HOLE) { 661 *off = isz; 662 return (0); 663 } 664 /* *off must already point to valid data (non hole) */ 665 return (0); 666 } 667 668 /* 669 * Calling bmap_read() one block at a time on a 1TB file takes forever, 670 * so we use a special function to search for holes or blocks. 671 */ 672 if (cmd == _FIO_SEEK_HOLE) 673 hole = B_TRUE; 674 else 675 hole = B_FALSE; 676 error = bmap_find(ip, hole, &noff); 677 rw_exit(&ip->i_contents); 678 679 /* end of file? */ 680 if (error == ENXIO) { 681 /* 682 * Handle the virtual hole at the end of file. 683 */ 684 if (cmd == _FIO_SEEK_HOLE) { 685 *off = isz; 686 return (0); 687 } 688 return (ENXIO); 689 } 690 if (noff < *off) 691 return (error); 692 *off = noff; 693 return (error); 694 } 695