1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/types.h> 26 #include <sys/t_lock.h> 27 #include <sys/param.h> 28 #include <sys/time.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/resource.h> 32 #include <sys/signal.h> 33 #include <sys/cred.h> 34 #include <sys/user.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/proc.h> 39 #include <sys/disp.h> 40 #include <sys/file.h> 41 #include <sys/fcntl.h> 42 #include <sys/flock.h> 43 #include <sys/atomic.h> 44 #include <sys/kmem.h> 45 #include <sys/uio.h> 46 #include <sys/conf.h> 47 #include <sys/mman.h> 48 #include <sys/pathname.h> 49 #include <sys/debug.h> 50 #include <sys/vmsystm.h> 51 #include <sys/cmn_err.h> 52 #include <sys/filio.h> 53 #include <sys/dnlc.h> 54 55 #include <sys/fs/ufs_filio.h> 56 #include <sys/fs/ufs_lockfs.h> 57 #include <sys/fs/ufs_fs.h> 58 #include <sys/fs/ufs_inode.h> 59 #include <sys/fs/ufs_fsdir.h> 60 #include <sys/fs/ufs_quota.h> 61 #include <sys/fs/ufs_trans.h> 62 #include <sys/fs/ufs_log.h> 63 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */ 64 #include <sys/errno.h> 65 #include <sys/sysinfo.h> 66 67 #include <vm/hat.h> 68 #include <vm/page.h> 69 #include <vm/pvn.h> 70 #include <vm/as.h> 71 #include <vm/seg.h> 72 #include <vm/seg_map.h> 73 #include <vm/seg_vn.h> 74 #include <vm/rm.h> 75 #include <sys/swap.h> 76 #include <sys/model.h> 77 #include <sys/policy.h> 78 79 #include "fs/fs_subr.h" 80 81 /* 82 * ufs_fioio is the ufs equivalent of NFS_CNVT and is tailored to 83 * metamucil's needs. It may change at any time. 84 */ 85 /* ARGSUSED */ 86 int 87 ufs_fioio( 88 struct vnode *vp, /* any file on the fs */ 89 struct fioio *fiou, /* fioio struct in userland */ 90 int flag, /* flag from VOP_IOCTL() */ 91 struct cred *cr) /* credentials from ufs_ioctl */ 92 { 93 int error = 0; 94 struct vnode *vpio = NULL; /* vnode for inode open */ 95 struct inode *ipio = NULL; /* inode for inode open */ 96 struct file *fpio = NULL; /* file for inode open */ 97 struct inode *ip; /* inode for file system */ 98 struct fs *fs; /* fs for file system */ 99 STRUCT_DECL(fioio, fio); /* copy of user's fioio struct */ 100 101 /* 102 * must be privileged 103 */ 104 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 105 return (EPERM); 106 107 STRUCT_INIT(fio, flag & DATAMODEL_MASK); 108 109 /* 110 * get user's copy of fioio struct 111 */ 112 if (copyin(fiou, STRUCT_BUF(fio), STRUCT_SIZE(fio))) 113 return (EFAULT); 114 115 ip = VTOI(vp); 116 fs = ip->i_fs; 117 118 /* 119 * check the inode number against the fs's inode number bounds 120 */ 121 if (STRUCT_FGET(fio, fio_ino) < UFSROOTINO) 122 return (ESRCH); 123 if (STRUCT_FGET(fio, fio_ino) >= fs->fs_ncg * fs->fs_ipg) 124 return (ESRCH); 125 126 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 127 128 /* 129 * get the inode 130 */ 131 error = ufs_iget(ip->i_vfs, STRUCT_FGET(fio, fio_ino), &ipio, cr); 132 133 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 134 135 if (error) 136 return (error); 137 138 /* 139 * check the generation number 140 */ 141 rw_enter(&ipio->i_contents, RW_READER); 142 if (ipio->i_gen != STRUCT_FGET(fio, fio_gen)) { 143 error = ESTALE; 144 rw_exit(&ipio->i_contents); 145 goto errout; 146 } 147 148 /* 149 * check if the inode is free 150 */ 151 if (ipio->i_mode == 0) { 152 error = ENOENT; 153 rw_exit(&ipio->i_contents); 154 goto errout; 155 } 156 rw_exit(&ipio->i_contents); 157 158 /* 159 * Adapted from copen: get a file struct 160 * Large Files: We open this file descriptor with FOFFMAX flag 161 * set so that it will be like a large file open. 162 */ 163 if (falloc(NULL, (FREAD|FOFFMAX), &fpio, STRUCT_FADDR(fio, fio_fd))) 164 goto errout; 165 166 /* 167 * Adapted from vn_open: check access and then open the file 168 */ 169 vpio = ITOV(ipio); 170 if (error = VOP_ACCESS(vpio, VREAD, 0, cr, NULL)) 171 goto errout; 172 173 if (error = VOP_OPEN(&vpio, FREAD, cr, NULL)) 174 goto errout; 175 176 /* 177 * Adapted from copen: initialize the file struct 178 */ 179 fpio->f_vnode = vpio; 180 181 /* 182 * return the fd 183 */ 184 if (copyout(STRUCT_BUF(fio), fiou, STRUCT_SIZE(fio))) { 185 error = EFAULT; 186 goto errout; 187 } 188 setf(STRUCT_FGET(fio, fio_fd), fpio); 189 mutex_exit(&fpio->f_tlock); 190 return (0); 191 errout: 192 /* 193 * free the file struct and fd 194 */ 195 if (fpio) { 196 setf(STRUCT_FGET(fio, fio_fd), NULL); 197 unfalloc(fpio); 198 } 199 200 /* 201 * release the hold on the inode 202 */ 203 if (ipio) 204 VN_RELE(ITOV(ipio)); 205 return (error); 206 } 207 208 /* 209 * ufs_fiosatime 210 * set access time w/o altering change time. This ioctl is tailored 211 * to metamucil's needs and may change at any time. 212 */ 213 int 214 ufs_fiosatime( 215 struct vnode *vp, /* file's vnode */ 216 struct timeval *tvu, /* struct timeval in userland */ 217 int flag, /* flag from VOP_IOCTL() */ 218 struct cred *cr) /* credentials from ufs_ioctl */ 219 { 220 struct inode *ip; /* inode for vp */ 221 struct timeval32 tv; /* copy of user's timeval */ 222 int now = 0; 223 224 /* 225 * must have sufficient privileges 226 */ 227 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 228 return (EPERM); 229 230 /* 231 * get user's copy of timeval struct and check values 232 * if input is NULL, will set time to now 233 */ 234 if (tvu == NULL) { 235 now = 1; 236 } else { 237 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 238 if (copyin(tvu, &tv, sizeof (tv))) 239 return (EFAULT); 240 } else { 241 struct timeval tv64; 242 243 if (copyin(tvu, &tv64, sizeof (tv64))) 244 return (EFAULT); 245 if (TIMEVAL_OVERFLOW(&tv64)) 246 return (EOVERFLOW); 247 TIMEVAL_TO_TIMEVAL32(&tv, &tv64); 248 } 249 250 if (tv.tv_usec < 0 || tv.tv_usec >= 1000000) 251 return (EINVAL); 252 } 253 254 /* 255 * update access time 256 */ 257 ip = VTOI(vp); 258 rw_enter(&ip->i_contents, RW_WRITER); 259 ITIMES_NOLOCK(ip); 260 if (now) { 261 mutex_enter(&ufs_iuniqtime_lock); 262 ip->i_atime = iuniqtime; 263 mutex_exit(&ufs_iuniqtime_lock); 264 } else { 265 ip->i_atime = tv; 266 } 267 ip->i_flag |= IMODACC; 268 rw_exit(&ip->i_contents); 269 270 return (0); 271 } 272 273 /* 274 * ufs_fiogdio 275 * Get delayed-io state. This ioctl is tailored 276 * to metamucil's needs and may change at any time. 277 */ 278 /* ARGSUSED */ 279 int 280 ufs_fiogdio( 281 struct vnode *vp, /* file's vnode */ 282 uint_t *diop, /* dio state returned here */ 283 int flag, /* flag from ufs_ioctl */ 284 struct cred *cr) /* credentials from ufs_ioctl */ 285 { 286 struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 287 288 /* 289 * forcibly unmounted 290 */ 291 if (ufsvfsp == NULL) 292 return (EIO); 293 294 if (suword32(diop, ufsvfsp->vfs_dio)) 295 return (EFAULT); 296 return (0); 297 } 298 299 /* 300 * ufs_fiosdio 301 * Set delayed-io state. This ioctl is tailored 302 * to metamucil's needs and may change at any time. 303 */ 304 int 305 ufs_fiosdio( 306 struct vnode *vp, /* file's vnode */ 307 uint_t *diop, /* dio flag */ 308 int flag, /* flag from ufs_ioctl */ 309 struct cred *cr) /* credentials from ufs_ioctl */ 310 { 311 uint_t dio; /* copy of user's dio */ 312 struct inode *ip; /* inode for vp */ 313 struct ufsvfs *ufsvfsp; 314 struct fs *fs; 315 struct ulockfs *ulp; 316 int error = 0; 317 318 #ifdef lint 319 flag = flag; 320 #endif 321 322 /* check input conditions */ 323 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 324 return (EPERM); 325 326 if (copyin(diop, &dio, sizeof (dio))) 327 return (EFAULT); 328 329 if (dio > 1) 330 return (EINVAL); 331 332 /* file system has been forcibly unmounted */ 333 if (VTOI(vp)->i_ufsvfs == NULL) 334 return (EIO); 335 336 ip = VTOI(vp); 337 ufsvfsp = ip->i_ufsvfs; 338 ulp = &ufsvfsp->vfs_ulockfs; 339 340 /* logging file system; dio ignored */ 341 if (TRANS_ISTRANS(ufsvfsp)) 342 return (error); 343 344 /* hold the mutex to prevent race with a lockfs request */ 345 vfs_lock_wait(vp->v_vfsp); 346 mutex_enter(&ulp->ul_lock); 347 atomic_inc_ulong(&ufs_quiesce_pend); 348 349 if (ULOCKFS_IS_HLOCK(ulp)) { 350 error = EIO; 351 goto out; 352 } 353 354 if (ULOCKFS_IS_ELOCK(ulp)) { 355 error = EBUSY; 356 goto out; 357 } 358 /* wait for outstanding accesses to finish */ 359 if (error = ufs_quiesce(ulp)) 360 goto out; 361 362 /* flush w/invalidate */ 363 if (error = ufs_flush(vp->v_vfsp)) 364 goto out; 365 366 /* 367 * update dio 368 */ 369 mutex_enter(&ufsvfsp->vfs_lock); 370 ufsvfsp->vfs_dio = dio; 371 372 /* 373 * enable/disable clean flag processing 374 */ 375 fs = ip->i_fs; 376 if (fs->fs_ronly == 0 && 377 fs->fs_clean != FSBAD && 378 fs->fs_clean != FSLOG) { 379 if (dio) 380 fs->fs_clean = FSSUSPEND; 381 else 382 fs->fs_clean = FSACTIVE; 383 ufs_sbwrite(ufsvfsp); 384 mutex_exit(&ufsvfsp->vfs_lock); 385 } else 386 mutex_exit(&ufsvfsp->vfs_lock); 387 out: 388 /* 389 * we need this broadcast because of the ufs_quiesce call above 390 */ 391 atomic_dec_ulong(&ufs_quiesce_pend); 392 cv_broadcast(&ulp->ul_cv); 393 mutex_exit(&ulp->ul_lock); 394 vfs_unlock(vp->v_vfsp); 395 return (error); 396 } 397 398 /* 399 * ufs_fioffs - ioctl handler for flushing file system 400 */ 401 /* ARGSUSED */ 402 int 403 ufs_fioffs( 404 struct vnode *vp, 405 char *vap, /* must be NULL - reserved */ 406 struct cred *cr) /* credentials from ufs_ioctl */ 407 { 408 int error; 409 struct ufsvfs *ufsvfsp; 410 struct ulockfs *ulp; 411 412 /* file system has been forcibly unmounted */ 413 ufsvfsp = VTOI(vp)->i_ufsvfs; 414 if (ufsvfsp == NULL) 415 return (EIO); 416 417 ulp = &ufsvfsp->vfs_ulockfs; 418 419 /* 420 * suspend the delete thread 421 * this must be done outside the lockfs locking protocol 422 */ 423 vfs_lock_wait(vp->v_vfsp); 424 ufs_thread_suspend(&ufsvfsp->vfs_delete); 425 426 /* hold the mutex to prevent race with a lockfs request */ 427 mutex_enter(&ulp->ul_lock); 428 atomic_inc_ulong(&ufs_quiesce_pend); 429 430 if (ULOCKFS_IS_HLOCK(ulp)) { 431 error = EIO; 432 goto out; 433 } 434 if (ULOCKFS_IS_ELOCK(ulp)) { 435 error = EBUSY; 436 goto out; 437 } 438 /* wait for outstanding accesses to finish */ 439 if (error = ufs_quiesce(ulp)) 440 goto out; 441 442 /* 443 * If logging, and the logmap was marked as not rollable, 444 * make it rollable now, and start the trans_roll thread and 445 * the reclaim thread. The log at this point is safe to write to. 446 */ 447 if (ufsvfsp->vfs_log) { 448 ml_unit_t *ul = ufsvfsp->vfs_log; 449 struct fs *fsp = ufsvfsp->vfs_fs; 450 int err; 451 452 if (ul->un_flags & LDL_NOROLL) { 453 ul->un_flags &= ~LDL_NOROLL; 454 logmap_start_roll(ul); 455 if (!fsp->fs_ronly && (fsp->fs_reclaim & 456 (FS_RECLAIM|FS_RECLAIMING))) { 457 fsp->fs_reclaim &= ~FS_RECLAIM; 458 fsp->fs_reclaim |= FS_RECLAIMING; 459 ufs_thread_start(&ufsvfsp->vfs_reclaim, 460 ufs_thread_reclaim, vp->v_vfsp); 461 if (!fsp->fs_ronly) { 462 TRANS_SBWRITE(ufsvfsp, 463 TOP_SBUPDATE_UPDATE); 464 if (err = 465 geterror(ufsvfsp->vfs_bufp)) { 466 refstr_t *mntpt; 467 mntpt = vfs_getmntpoint( 468 vp->v_vfsp); 469 cmn_err(CE_NOTE, 470 "Filesystem Flush " 471 "Failed to update " 472 "Reclaim Status for " 473 " %s, Write failed to " 474 "update superblock, " 475 "error %d", 476 refstr_value(mntpt), 477 err); 478 refstr_rele(mntpt); 479 } 480 } 481 } 482 } 483 } 484 485 /* synchronously flush dirty data and metadata */ 486 error = ufs_flush(vp->v_vfsp); 487 488 out: 489 atomic_dec_ulong(&ufs_quiesce_pend); 490 cv_broadcast(&ulp->ul_cv); 491 mutex_exit(&ulp->ul_lock); 492 vfs_unlock(vp->v_vfsp); 493 494 /* 495 * allow the delete thread to continue 496 */ 497 ufs_thread_continue(&ufsvfsp->vfs_delete); 498 return (error); 499 } 500 501 /* 502 * ufs_fioisbusy 503 * Get number of references on this vnode. 504 * Contract-private interface for Legato's NetWorker product. 505 */ 506 /* ARGSUSED */ 507 int 508 ufs_fioisbusy(struct vnode *vp, int *isbusy, struct cred *cr) 509 { 510 int is_it_busy; 511 512 /* 513 * The caller holds one reference, there may be one in the dnlc 514 * so we need to flush it. 515 */ 516 if (vp->v_count > 1) 517 dnlc_purge_vp(vp); 518 /* 519 * Since we've just flushed the dnlc and we hold a reference 520 * to this vnode, then anything but 1 means busy (this had 521 * BETTER not be zero!). Also, it's possible for someone to 522 * have this file mmap'ed with no additional reference count. 523 */ 524 ASSERT(vp->v_count > 0); 525 if ((vp->v_count == 1) && (VTOI(vp)->i_mapcnt == 0)) 526 is_it_busy = 0; 527 else 528 is_it_busy = 1; 529 530 if (suword32(isbusy, is_it_busy)) 531 return (EFAULT); 532 return (0); 533 } 534 535 /* ARGSUSED */ 536 int 537 ufs_fiodirectio(struct vnode *vp, int cmd, struct cred *cr) 538 { 539 int error = 0; 540 struct inode *ip = VTOI(vp); 541 542 /* 543 * Acquire reader lock and set/reset direct mode 544 */ 545 rw_enter(&ip->i_contents, RW_READER); 546 mutex_enter(&ip->i_tlock); 547 if (cmd == DIRECTIO_ON) 548 ip->i_flag |= IDIRECTIO; /* enable direct mode */ 549 else if (cmd == DIRECTIO_OFF) 550 ip->i_flag &= ~IDIRECTIO; /* disable direct mode */ 551 else 552 error = EINVAL; 553 mutex_exit(&ip->i_tlock); 554 rw_exit(&ip->i_contents); 555 return (error); 556 } 557 558 /* 559 * ufs_fiotune 560 * Allow some tunables to be set on a mounted fs 561 */ 562 int 563 ufs_fiotune(struct vnode *vp, struct fiotune *uftp, struct cred *cr) 564 { 565 struct fiotune ftp; 566 struct fs *fs; 567 struct ufsvfs *ufsvfsp; 568 569 /* 570 * must have sufficient privileges 571 */ 572 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0) 573 return (EPERM); 574 575 /* 576 * get user's copy 577 */ 578 if (copyin(uftp, &ftp, sizeof (ftp))) 579 return (EFAULT); 580 581 /* 582 * some minimal sanity checks 583 */ 584 if ((ftp.maxcontig <= 0) || 585 (ftp.rotdelay != 0) || 586 (ftp.maxbpg <= 0) || 587 (ftp.minfree < 0) || 588 (ftp.minfree > 99) || 589 ((ftp.optim != FS_OPTTIME) && (ftp.optim != FS_OPTSPACE))) 590 return (EINVAL); 591 592 /* 593 * update superblock but don't write it! If it gets out, fine. 594 */ 595 fs = VTOI(vp)->i_fs; 596 597 fs->fs_maxcontig = ftp.maxcontig; 598 fs->fs_rotdelay = ftp.rotdelay; 599 fs->fs_maxbpg = ftp.maxbpg; 600 fs->fs_minfree = ftp.minfree; 601 fs->fs_optim = ftp.optim; 602 603 /* 604 * Adjust cluster based on the new maxcontig. The cluster size 605 * can be any positive value. The check for this is done above. 606 */ 607 ufsvfsp = VTOI(vp)->i_ufsvfs; 608 ufsvfsp->vfs_ioclustsz = fs->fs_bsize * fs->fs_maxcontig; 609 610 /* 611 * Adjust minfrags from minfree 612 */ 613 ufsvfsp->vfs_minfrags = (int)((int64_t)fs->fs_dsize * 614 fs->fs_minfree / 100); 615 616 /* 617 * Write the superblock 618 */ 619 if (fs->fs_ronly == 0) { 620 TRANS_BEGIN_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE, 621 TOP_SBWRITE_SIZE); 622 TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE); 623 TRANS_END_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE, TOP_SBWRITE_SIZE); 624 } 625 626 return (0); 627 } 628 629 /* 630 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 631 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 632 */ 633 int 634 ufs_fio_holey(vnode_t *vp, int cmd, offset_t *off) 635 { 636 inode_t *ip = VTOI(vp); 637 u_offset_t noff = (u_offset_t)*off; /* new offset */ 638 u_offset_t isz; 639 int error; 640 boolean_t hole; 641 642 rw_enter(&ip->i_contents, RW_READER); 643 isz = ip->i_size; 644 if (noff >= isz) { 645 rw_exit(&ip->i_contents); 646 return (ENXIO); 647 } 648 649 /* 650 * Check for the usual case where a file has no holes. 651 * If so we can optimise to set the end of the file as the first 652 * (virtual) hole. This avoids bmap_find() searching through 653 * every block in the file for a (non-existent) hole. 654 */ 655 if (!bmap_has_holes(ip)) { 656 rw_exit(&ip->i_contents); 657 if (cmd == _FIO_SEEK_HOLE) { 658 *off = isz; 659 return (0); 660 } 661 /* *off must already point to valid data (non hole) */ 662 return (0); 663 } 664 665 /* 666 * Calling bmap_read() one block at a time on a 1TB file takes forever, 667 * so we use a special function to search for holes or blocks. 668 */ 669 if (cmd == _FIO_SEEK_HOLE) 670 hole = B_TRUE; 671 else 672 hole = B_FALSE; 673 error = bmap_find(ip, hole, &noff); 674 rw_exit(&ip->i_contents); 675 676 /* end of file? */ 677 if (error == ENXIO) { 678 /* 679 * Handle the virtual hole at the end of file. 680 */ 681 if (cmd == _FIO_SEEK_HOLE) { 682 *off = isz; 683 return (0); 684 } 685 return (ENXIO); 686 } 687 if (noff < *off) 688 return (error); 689 *off = noff; 690 return (error); 691 } 692 693 int 694 ufs_mark_compressed(struct vnode *vp) 695 { 696 struct inode *ip = VTOI(vp); 697 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 698 699 if (vp->v_type != VREG) 700 return (EINVAL); 701 702 rw_enter(&ip->i_contents, RW_WRITER); 703 ip->i_cflags |= ICOMPRESS; 704 TRANS_INODE(ufsvfsp, ip); 705 ip->i_flag |= (ICHG|ISEQ); 706 ip->i_seq++; 707 if (!TRANS_ISTRANS(ufsvfsp)) 708 ufs_iupdat(ip, I_ASYNC); 709 rw_exit(&ip->i_contents); 710 711 return (0); 712 } 713