1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/stat.h> 47 #include <sys/proc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/bio.h> 52 #include <sys/buf.h> 53 #include <sys/filio.h> 54 #include <sys/ttycom.h> 55 #include <sys/conf.h> 56 57 #include <ufs/ufs/quota.h> 58 #include <ufs/ufs/inode.h> 59 60 static int vn_closefile __P((struct file *fp, struct proc *p)); 61 static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, 62 struct proc *p)); 63 static int vn_read __P((struct file *fp, struct uio *uio, 64 struct ucred *cred, int flags, struct proc *p)); 65 static int vn_poll __P((struct file *fp, int events, struct ucred *cred, 66 struct proc *p)); 67 static int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p)); 68 static int vn_write __P((struct file *fp, struct uio *uio, 69 struct ucred *cred, int flags, struct proc *p)); 70 71 struct fileops vnops = 72 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile }; 73 74 static int filt_nullattach(struct knote *kn); 75 static int filt_vnattach(struct knote *kn); 76 static void filt_vndetach(struct knote *kn); 77 static int filt_vnode(struct knote *kn, long hint); 78 static int filt_vnread(struct knote *kn, long hint); 79 80 struct filterops vn_filtops = 81 { 1, filt_vnattach, filt_vndetach, filt_vnode }; 82 83 /* 84 * XXX 85 * filt_vnread is ufs-specific, so the attach routine should really 86 * switch out to different filterops based on the vn filetype 87 */ 88 struct filterops vn_rwfiltops[] = { 89 { 1, filt_vnattach, filt_vndetach, filt_vnread }, 90 { 1, filt_nullattach, NULL, NULL }, 91 }; 92 93 /* 94 * Common code for vnode open operations. 95 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 96 * 97 * Note that this does NOT free nameidata for the successful case, 98 * due to the NDINIT being done elsewhere. 99 */ 100 int 101 vn_open(ndp, flagp, cmode) 102 register struct nameidata *ndp; 103 int *flagp, cmode; 104 { 105 struct vnode *vp; 106 struct mount *mp; 107 struct proc *p = ndp->ni_cnd.cn_proc; 108 struct ucred *cred = p->p_ucred; 109 struct vattr vat; 110 struct vattr *vap = &vat; 111 int mode, fmode, error; 112 113 restart: 114 fmode = *flagp; 115 if (fmode & O_CREAT) { 116 ndp->ni_cnd.cn_nameiop = CREATE; 117 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 118 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 119 ndp->ni_cnd.cn_flags |= FOLLOW; 120 bwillwrite(); 121 if ((error = namei(ndp)) != 0) 122 return (error); 123 if (ndp->ni_vp == NULL) { 124 VATTR_NULL(vap); 125 vap->va_type = VREG; 126 vap->va_mode = cmode; 127 if (fmode & O_EXCL) 128 vap->va_vaflags |= VA_EXCLUSIVE; 129 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 130 NDFREE(ndp, NDF_ONLY_PNBUF); 131 vput(ndp->ni_dvp); 132 if ((error = vn_start_write(NULL, &mp, 133 V_XSLEEP | PCATCH)) != 0) 134 return (error); 135 goto restart; 136 } 137 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 138 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 139 &ndp->ni_cnd, vap); 140 vput(ndp->ni_dvp); 141 vn_finished_write(mp); 142 if (error) { 143 NDFREE(ndp, NDF_ONLY_PNBUF); 144 return (error); 145 } 146 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 147 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 148 fmode &= ~O_TRUNC; 149 vp = ndp->ni_vp; 150 } else { 151 if (ndp->ni_dvp == ndp->ni_vp) 152 vrele(ndp->ni_dvp); 153 else 154 vput(ndp->ni_dvp); 155 ndp->ni_dvp = NULL; 156 vp = ndp->ni_vp; 157 if (fmode & O_EXCL) { 158 error = EEXIST; 159 goto bad; 160 } 161 fmode &= ~O_CREAT; 162 } 163 } else { 164 ndp->ni_cnd.cn_nameiop = LOOKUP; 165 ndp->ni_cnd.cn_flags = 166 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; 167 if ((error = namei(ndp)) != 0) 168 return (error); 169 vp = ndp->ni_vp; 170 } 171 if (vp->v_type == VLNK) { 172 error = EMLINK; 173 goto bad; 174 } 175 if (vp->v_type == VSOCK) { 176 error = EOPNOTSUPP; 177 goto bad; 178 } 179 if ((fmode & O_CREAT) == 0) { 180 mode = 0; 181 if (fmode & (FWRITE | O_TRUNC)) { 182 if (vp->v_type == VDIR) { 183 error = EISDIR; 184 goto bad; 185 } 186 error = vn_writechk(vp); 187 if (error) 188 goto bad; 189 mode |= VWRITE; 190 } 191 if (fmode & FREAD) 192 mode |= VREAD; 193 if (mode) { 194 error = VOP_ACCESS(vp, mode, cred, p); 195 if (error) 196 goto bad; 197 } 198 } 199 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 200 goto bad; 201 /* 202 * Make sure that a VM object is created for VMIO support. 203 */ 204 if (vn_canvmio(vp) == TRUE) { 205 if ((error = vfs_object_create(vp, p, cred)) != 0) 206 goto bad; 207 } 208 209 if (fmode & FWRITE) 210 vp->v_writecount++; 211 *flagp = fmode; 212 return (0); 213 bad: 214 NDFREE(ndp, NDF_ONLY_PNBUF); 215 vput(vp); 216 *flagp = fmode; 217 return (error); 218 } 219 220 /* 221 * Check for write permissions on the specified vnode. 222 * Prototype text segments cannot be written. 223 */ 224 int 225 vn_writechk(vp) 226 register struct vnode *vp; 227 { 228 229 /* 230 * If there's shared text associated with 231 * the vnode, try to free it up once. If 232 * we fail, we can't allow writing. 233 */ 234 if (vp->v_flag & VTEXT) 235 return (ETXTBSY); 236 return (0); 237 } 238 239 /* 240 * Vnode close call 241 */ 242 int 243 vn_close(vp, flags, cred, p) 244 register struct vnode *vp; 245 int flags; 246 struct ucred *cred; 247 struct proc *p; 248 { 249 int error; 250 251 if (flags & FWRITE) 252 vp->v_writecount--; 253 error = VOP_CLOSE(vp, flags, cred, p); 254 vrele(vp); 255 return (error); 256 } 257 258 static __inline 259 int 260 sequential_heuristic(struct uio *uio, struct file *fp) 261 { 262 /* 263 * Sequential heuristic - detect sequential operation 264 */ 265 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 266 uio->uio_offset == fp->f_nextoff) { 267 /* 268 * XXX we assume that the filesystem block size is 269 * the default. Not true, but still gives us a pretty 270 * good indicator of how sequential the read operations 271 * are. 272 */ 273 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 274 if (fp->f_seqcount >= 127) 275 fp->f_seqcount = 127; 276 return(fp->f_seqcount << 16); 277 } 278 279 /* 280 * Not sequential, quick draw-down of seqcount 281 */ 282 if (fp->f_seqcount > 1) 283 fp->f_seqcount = 1; 284 else 285 fp->f_seqcount = 0; 286 return(0); 287 } 288 289 /* 290 * Package up an I/O request on a vnode into a uio and do it. 291 */ 292 int 293 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 294 enum uio_rw rw; 295 struct vnode *vp; 296 caddr_t base; 297 int len; 298 off_t offset; 299 enum uio_seg segflg; 300 int ioflg; 301 struct ucred *cred; 302 int *aresid; 303 struct proc *p; 304 { 305 struct uio auio; 306 struct iovec aiov; 307 struct mount *mp; 308 int error; 309 310 if ((ioflg & IO_NODELOCKED) == 0) { 311 mp = NULL; 312 if (rw == UIO_WRITE && 313 vp->v_type != VCHR && vp->v_type != VBLK && 314 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 315 return (error); 316 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 317 } 318 auio.uio_iov = &aiov; 319 auio.uio_iovcnt = 1; 320 aiov.iov_base = base; 321 aiov.iov_len = len; 322 auio.uio_resid = len; 323 auio.uio_offset = offset; 324 auio.uio_segflg = segflg; 325 auio.uio_rw = rw; 326 auio.uio_procp = p; 327 if (rw == UIO_READ) { 328 error = VOP_READ(vp, &auio, ioflg, cred); 329 } else { 330 error = VOP_WRITE(vp, &auio, ioflg, cred); 331 } 332 if (aresid) 333 *aresid = auio.uio_resid; 334 else 335 if (auio.uio_resid && error == 0) 336 error = EIO; 337 if ((ioflg & IO_NODELOCKED) == 0) { 338 vn_finished_write(mp); 339 VOP_UNLOCK(vp, 0, p); 340 } 341 return (error); 342 } 343 344 /* 345 * File table vnode read routine. 346 */ 347 static int 348 vn_read(fp, uio, cred, flags, p) 349 struct file *fp; 350 struct uio *uio; 351 struct ucred *cred; 352 struct proc *p; 353 int flags; 354 { 355 struct vnode *vp; 356 int error, ioflag; 357 358 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 359 uio->uio_procp, p)); 360 vp = (struct vnode *)fp->f_data; 361 ioflag = 0; 362 if (fp->f_flag & FNONBLOCK) 363 ioflag |= IO_NDELAY; 364 VOP_LEASE(vp, p, cred, LEASE_READ); 365 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 366 if ((flags & FOF_OFFSET) == 0) 367 uio->uio_offset = fp->f_offset; 368 369 ioflag |= sequential_heuristic(uio, fp); 370 371 error = VOP_READ(vp, uio, ioflag, cred); 372 if ((flags & FOF_OFFSET) == 0) 373 fp->f_offset = uio->uio_offset; 374 fp->f_nextoff = uio->uio_offset; 375 VOP_UNLOCK(vp, 0, p); 376 return (error); 377 } 378 379 /* 380 * File table vnode write routine. 381 */ 382 static int 383 vn_write(fp, uio, cred, flags, p) 384 struct file *fp; 385 struct uio *uio; 386 struct ucred *cred; 387 struct proc *p; 388 int flags; 389 { 390 struct vnode *vp; 391 struct mount *mp; 392 int error, ioflag; 393 394 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 395 uio->uio_procp, p)); 396 vp = (struct vnode *)fp->f_data; 397 if (vp->v_type == VREG) 398 bwillwrite(); 399 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 400 ioflag = IO_UNIT; 401 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 402 ioflag |= IO_APPEND; 403 if (fp->f_flag & FNONBLOCK) 404 ioflag |= IO_NDELAY; 405 if ((fp->f_flag & O_FSYNC) || 406 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 407 ioflag |= IO_SYNC; 408 mp = NULL; 409 if (vp->v_type != VCHR && vp->v_type != VBLK && 410 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 411 return (error); 412 VOP_LEASE(vp, p, cred, LEASE_WRITE); 413 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 414 if ((flags & FOF_OFFSET) == 0) 415 uio->uio_offset = fp->f_offset; 416 ioflag |= sequential_heuristic(uio, fp); 417 error = VOP_WRITE(vp, uio, ioflag, cred); 418 if ((flags & FOF_OFFSET) == 0) 419 fp->f_offset = uio->uio_offset; 420 fp->f_nextoff = uio->uio_offset; 421 VOP_UNLOCK(vp, 0, p); 422 vn_finished_write(mp); 423 return (error); 424 } 425 426 /* 427 * File table vnode stat routine. 428 */ 429 static int 430 vn_statfile(fp, sb, p) 431 struct file *fp; 432 struct stat *sb; 433 struct proc *p; 434 { 435 struct vnode *vp = (struct vnode *)fp->f_data; 436 437 return vn_stat(vp, sb, p); 438 } 439 440 int 441 vn_stat(vp, sb, p) 442 struct vnode *vp; 443 register struct stat *sb; 444 struct proc *p; 445 { 446 struct vattr vattr; 447 register struct vattr *vap; 448 int error; 449 u_short mode; 450 451 vap = &vattr; 452 error = VOP_GETATTR(vp, vap, p->p_ucred, p); 453 if (error) 454 return (error); 455 456 /* 457 * Zero the spare stat fields 458 */ 459 sb->st_lspare = 0; 460 sb->st_qspare[0] = 0; 461 sb->st_qspare[1] = 0; 462 463 /* 464 * Copy from vattr table 465 */ 466 if (vap->va_fsid != VNOVAL) 467 sb->st_dev = vap->va_fsid; 468 else 469 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 470 sb->st_ino = vap->va_fileid; 471 mode = vap->va_mode; 472 switch (vap->va_type) { 473 case VREG: 474 mode |= S_IFREG; 475 break; 476 case VDIR: 477 mode |= S_IFDIR; 478 break; 479 case VBLK: 480 mode |= S_IFBLK; 481 break; 482 case VCHR: 483 mode |= S_IFCHR; 484 break; 485 case VLNK: 486 mode |= S_IFLNK; 487 /* This is a cosmetic change, symlinks do not have a mode. */ 488 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 489 sb->st_mode &= ~ACCESSPERMS; /* 0000 */ 490 else 491 sb->st_mode |= ACCESSPERMS; /* 0777 */ 492 break; 493 case VSOCK: 494 mode |= S_IFSOCK; 495 break; 496 case VFIFO: 497 mode |= S_IFIFO; 498 break; 499 default: 500 return (EBADF); 501 }; 502 sb->st_mode = mode; 503 sb->st_nlink = vap->va_nlink; 504 sb->st_uid = vap->va_uid; 505 sb->st_gid = vap->va_gid; 506 sb->st_rdev = vap->va_rdev; 507 sb->st_size = vap->va_size; 508 sb->st_atimespec = vap->va_atime; 509 sb->st_mtimespec = vap->va_mtime; 510 sb->st_ctimespec = vap->va_ctime; 511 512 /* 513 * According to www.opengroup.org, the meaning of st_blksize is 514 * "a filesystem-specific preferred I/O block size for this 515 * object. In some filesystem types, this may vary from file 516 * to file" 517 * Default to zero to catch bogus uses of this field. 518 */ 519 520 if (vap->va_type == VREG) { 521 sb->st_blksize = vap->va_blocksize; 522 } else if (vn_isdisk(vp, NULL)) { 523 sb->st_blksize = vp->v_rdev->si_bsize_best; 524 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 525 sb->st_blksize = vp->v_rdev->si_bsize_phys; 526 if (sb->st_blksize < BLKDEV_IOSIZE) 527 sb->st_blksize = BLKDEV_IOSIZE; 528 } else { 529 sb->st_blksize = 0; 530 } 531 532 sb->st_flags = vap->va_flags; 533 if (suser_xxx(p->p_ucred, 0, 0)) 534 sb->st_gen = 0; 535 else 536 sb->st_gen = vap->va_gen; 537 538 #if (S_BLKSIZE == 512) 539 /* Optimize this case */ 540 sb->st_blocks = vap->va_bytes >> 9; 541 #else 542 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 543 #endif 544 return (0); 545 } 546 547 /* 548 * File table vnode ioctl routine. 549 */ 550 static int 551 vn_ioctl(fp, com, data, p) 552 struct file *fp; 553 u_long com; 554 caddr_t data; 555 struct proc *p; 556 { 557 register struct vnode *vp = ((struct vnode *)fp->f_data); 558 struct vattr vattr; 559 int error; 560 561 switch (vp->v_type) { 562 563 case VREG: 564 case VDIR: 565 if (com == FIONREAD) { 566 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 567 if (error) 568 return (error); 569 *(int *)data = vattr.va_size - fp->f_offset; 570 return (0); 571 } 572 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 573 return (0); /* XXX */ 574 /* fall into ... */ 575 576 default: 577 #if 0 578 return (ENOTTY); 579 #endif 580 case VFIFO: 581 case VCHR: 582 case VBLK: 583 if (com == FIODTYPE) { 584 if (vp->v_type != VCHR && vp->v_type != VBLK) 585 return (ENOTTY); 586 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 587 return (0); 588 } 589 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 590 if (error == 0 && com == TIOCSCTTY) { 591 592 /* Do nothing if reassigning same control tty */ 593 if (p->p_session->s_ttyvp == vp) 594 return (0); 595 596 /* Get rid of reference to old control tty */ 597 if (p->p_session->s_ttyvp) 598 vrele(p->p_session->s_ttyvp); 599 600 p->p_session->s_ttyvp = vp; 601 VREF(vp); 602 } 603 return (error); 604 } 605 } 606 607 /* 608 * File table vnode poll routine. 609 */ 610 static int 611 vn_poll(fp, events, cred, p) 612 struct file *fp; 613 int events; 614 struct ucred *cred; 615 struct proc *p; 616 { 617 618 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p)); 619 } 620 621 /* 622 * Check that the vnode is still valid, and if so 623 * acquire requested lock. 624 */ 625 int 626 #ifndef DEBUG_LOCKS 627 vn_lock(vp, flags, p) 628 #else 629 debug_vn_lock(vp, flags, p, filename, line) 630 #endif 631 struct vnode *vp; 632 int flags; 633 struct proc *p; 634 #ifdef DEBUG_LOCKS 635 const char *filename; 636 int line; 637 #endif 638 { 639 int error; 640 641 do { 642 if ((flags & LK_INTERLOCK) == 0) 643 simple_lock(&vp->v_interlock); 644 if (vp->v_flag & VXLOCK) { 645 vp->v_flag |= VXWANT; 646 simple_unlock(&vp->v_interlock); 647 tsleep((caddr_t)vp, PINOD, "vn_lock", 0); 648 error = ENOENT; 649 } else { 650 #ifdef DEBUG_LOCKS 651 vp->filename = filename; 652 vp->line = line; 653 #endif 654 error = VOP_LOCK(vp, 655 flags | LK_NOPAUSE | LK_INTERLOCK, p); 656 if (error == 0) 657 return (error); 658 } 659 flags &= ~LK_INTERLOCK; 660 } while (flags & LK_RETRY); 661 return (error); 662 } 663 664 /* 665 * File table vnode close routine. 666 */ 667 static int 668 vn_closefile(fp, p) 669 struct file *fp; 670 struct proc *p; 671 { 672 673 fp->f_ops = &badfileops; 674 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 675 fp->f_cred, p)); 676 } 677 678 /* 679 * Preparing to start a filesystem write operation. If the operation is 680 * permitted, then we bump the count of operations in progress and 681 * proceed. If a suspend request is in progress, we wait until the 682 * suspension is over, and then proceed. 683 */ 684 int 685 vn_start_write(vp, mpp, flags) 686 struct vnode *vp; 687 struct mount **mpp; 688 int flags; 689 { 690 struct mount *mp; 691 int error; 692 693 /* 694 * If a vnode is provided, get and return the mount point that 695 * to which it will write. 696 */ 697 if (vp != NULL) { 698 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 699 *mpp = NULL; 700 if (error != EOPNOTSUPP) 701 return (error); 702 return (0); 703 } 704 } 705 if ((mp = *mpp) == NULL) 706 return (0); 707 /* 708 * Check on status of suspension. 709 */ 710 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 711 if (flags & V_NOWAIT) 712 return (EWOULDBLOCK); 713 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 714 "suspfs", 0); 715 if (error) 716 return (error); 717 } 718 if (flags & V_XSLEEP) 719 return (0); 720 mp->mnt_writeopcount++; 721 return (0); 722 } 723 724 /* 725 * Secondary suspension. Used by operations such as vop_inactive 726 * routines that are needed by the higher level functions. These 727 * are allowed to proceed until all the higher level functions have 728 * completed (indicated by mnt_writeopcount dropping to zero). At that 729 * time, these operations are halted until the suspension is over. 730 */ 731 int 732 vn_write_suspend_wait(vp, mp, flags) 733 struct vnode *vp; 734 struct mount *mp; 735 int flags; 736 { 737 int error; 738 739 if (vp != NULL) { 740 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 741 if (error != EOPNOTSUPP) 742 return (error); 743 return (0); 744 } 745 } 746 /* 747 * If we are not suspended or have not yet reached suspended 748 * mode, then let the operation proceed. 749 */ 750 if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 751 return (0); 752 if (flags & V_NOWAIT) 753 return (EWOULDBLOCK); 754 /* 755 * Wait for the suspension to finish. 756 */ 757 return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 758 "suspfs", 0)); 759 } 760 761 /* 762 * Filesystem write operation has completed. If we are suspending and this 763 * operation is the last one, notify the suspender that the suspension is 764 * now in effect. 765 */ 766 void 767 vn_finished_write(mp) 768 struct mount *mp; 769 { 770 771 if (mp == NULL) 772 return; 773 mp->mnt_writeopcount--; 774 if (mp->mnt_writeopcount < 0) 775 panic("vn_finished_write: neg cnt"); 776 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 777 mp->mnt_writeopcount <= 0) 778 wakeup(&mp->mnt_writeopcount); 779 } 780 781 /* 782 * Request a filesystem to suspend write operations. 783 */ 784 void 785 vfs_write_suspend(mp) 786 struct mount *mp; 787 { 788 struct proc *p = curproc; 789 790 if (mp->mnt_kern_flag & MNTK_SUSPEND) 791 return; 792 mp->mnt_kern_flag |= MNTK_SUSPEND; 793 if (mp->mnt_writeopcount > 0) 794 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 795 VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); 796 mp->mnt_kern_flag |= MNTK_SUSPENDED; 797 } 798 799 /* 800 * Request a filesystem to resume write operations. 801 */ 802 void 803 vfs_write_resume(mp) 804 struct mount *mp; 805 { 806 807 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 808 return; 809 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED); 810 wakeup(&mp->mnt_writeopcount); 811 wakeup(&mp->mnt_flag); 812 } 813 814 static int 815 filt_vnattach(struct knote *kn) 816 { 817 struct vnode *vp; 818 819 if (kn->kn_fp->f_type != DTYPE_VNODE && 820 kn->kn_fp->f_type != DTYPE_FIFO) 821 return (EBADF); 822 823 vp = (struct vnode *)kn->kn_fp->f_data; 824 825 /* 826 * XXX 827 * this is a hack simply to cause the filter attach to fail 828 * for non-ufs filesystems, until the support for them is done. 829 */ 830 if ((vp)->v_tag != VT_UFS) 831 return (EOPNOTSUPP); 832 833 simple_lock(&vp->v_pollinfo.vpi_lock); 834 SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext); 835 simple_unlock(&vp->v_pollinfo.vpi_lock); 836 837 return (0); 838 } 839 840 static void 841 filt_vndetach(struct knote *kn) 842 { 843 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data; 844 845 simple_lock(&vp->v_pollinfo.vpi_lock); 846 SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note, 847 kn, knote, kn_selnext); 848 simple_unlock(&vp->v_pollinfo.vpi_lock); 849 } 850 851 static int 852 filt_vnode(struct knote *kn, long hint) 853 { 854 855 if (kn->kn_sfflags & hint) 856 kn->kn_fflags |= hint; 857 return (kn->kn_fflags != 0); 858 } 859 860 static int 861 filt_nullattach(struct knote *kn) 862 { 863 return (ENXIO); 864 } 865 866 /*ARGSUSED*/ 867 static int 868 filt_vnread(struct knote *kn, long hint) 869 { 870 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data; 871 struct inode *ip = VTOI(vp); 872 873 kn->kn_data = ip->i_size - kn->kn_fp->f_offset; 874 return (kn->kn_data != 0); 875 } 876