1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/stat.h> 47 #include <sys/proc.h> 48 #include <sys/lock.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/bio.h> 54 #include <sys/buf.h> 55 #include <sys/filio.h> 56 #include <sys/ttycom.h> 57 #include <sys/conf.h> 58 59 #include <machine/limits.h> 60 61 static int vn_closefile __P((struct file *fp, struct thread *td)); 62 static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, 63 struct thread *td)); 64 static int vn_read __P((struct file *fp, struct uio *uio, 65 struct ucred *cred, int flags, struct thread *td)); 66 static int vn_poll __P((struct file *fp, int events, struct ucred *cred, 67 struct thread *td)); 68 static int vn_kqfilter __P((struct file *fp, struct knote *kn)); 69 static int vn_statfile __P((struct file *fp, struct stat *sb, struct thread *td)); 70 static int vn_write __P((struct file *fp, struct uio *uio, 71 struct ucred *cred, int flags, struct thread *td)); 72 73 struct fileops vnops = { 74 vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, 75 vn_statfile, vn_closefile 76 }; 77 78 /* 79 * Common code for vnode open operations. 80 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 81 * 82 * Note that this does NOT free nameidata for the successful case, 83 * due to the NDINIT being done elsewhere. 84 */ 85 int 86 vn_open(ndp, flagp, cmode) 87 register struct nameidata *ndp; 88 int *flagp, cmode; 89 { 90 struct vnode *vp; 91 struct mount *mp; 92 struct thread *td = ndp->ni_cnd.cn_thread; 93 struct ucred *cred = td->td_proc->p_ucred; 94 struct vattr vat; 95 struct vattr *vap = &vat; 96 int mode, fmode, error; 97 98 restart: 99 fmode = *flagp; 100 if (fmode & O_CREAT) { 101 ndp->ni_cnd.cn_nameiop = CREATE; 102 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 103 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 104 ndp->ni_cnd.cn_flags |= FOLLOW; 105 bwillwrite(); 106 if ((error = namei(ndp)) != 0) 107 return (error); 108 if (ndp->ni_vp == NULL) { 109 VATTR_NULL(vap); 110 vap->va_type = VREG; 111 vap->va_mode = cmode; 112 if (fmode & O_EXCL) 113 vap->va_vaflags |= VA_EXCLUSIVE; 114 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 115 NDFREE(ndp, NDF_ONLY_PNBUF); 116 vput(ndp->ni_dvp); 117 if ((error = vn_start_write(NULL, &mp, 118 V_XSLEEP | PCATCH)) != 0) 119 return (error); 120 goto restart; 121 } 122 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE); 123 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 124 &ndp->ni_cnd, vap); 125 vput(ndp->ni_dvp); 126 vn_finished_write(mp); 127 if (error) { 128 NDFREE(ndp, NDF_ONLY_PNBUF); 129 return (error); 130 } 131 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 132 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 133 fmode &= ~O_TRUNC; 134 vp = ndp->ni_vp; 135 } else { 136 if (ndp->ni_dvp == ndp->ni_vp) 137 vrele(ndp->ni_dvp); 138 else 139 vput(ndp->ni_dvp); 140 ndp->ni_dvp = NULL; 141 vp = ndp->ni_vp; 142 if (fmode & O_EXCL) { 143 error = EEXIST; 144 goto bad; 145 } 146 fmode &= ~O_CREAT; 147 } 148 } else { 149 ndp->ni_cnd.cn_nameiop = LOOKUP; 150 ndp->ni_cnd.cn_flags = 151 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; 152 if ((error = namei(ndp)) != 0) 153 return (error); 154 vp = ndp->ni_vp; 155 } 156 if (vp->v_type == VLNK) { 157 error = EMLINK; 158 goto bad; 159 } 160 if (vp->v_type == VSOCK) { 161 error = EOPNOTSUPP; 162 goto bad; 163 } 164 if ((fmode & O_CREAT) == 0) { 165 mode = 0; 166 if (fmode & (FWRITE | O_TRUNC)) { 167 if (vp->v_type == VDIR) { 168 error = EISDIR; 169 goto bad; 170 } 171 error = vn_writechk(vp); 172 if (error) 173 goto bad; 174 mode |= VWRITE; 175 } 176 if (fmode & FREAD) 177 mode |= VREAD; 178 if (mode) { 179 error = VOP_ACCESS(vp, mode, cred, td); 180 if (error) 181 goto bad; 182 } 183 } 184 if ((error = VOP_OPEN(vp, fmode, cred, td)) != 0) 185 goto bad; 186 /* 187 * Make sure that a VM object is created for VMIO support. 188 */ 189 if (vn_canvmio(vp) == TRUE) { 190 if ((error = vfs_object_create(vp, td, cred)) != 0) 191 /* XXX: Should VOP_CLOSE() again here. */ 192 goto bad; 193 } 194 195 if (fmode & FWRITE) 196 vp->v_writecount++; 197 *flagp = fmode; 198 return (0); 199 bad: 200 NDFREE(ndp, NDF_ONLY_PNBUF); 201 vput(vp); 202 *flagp = fmode; 203 return (error); 204 } 205 206 /* 207 * Check for write permissions on the specified vnode. 208 * Prototype text segments cannot be written. 209 */ 210 int 211 vn_writechk(vp) 212 register struct vnode *vp; 213 { 214 215 /* 216 * If there's shared text associated with 217 * the vnode, try to free it up once. If 218 * we fail, we can't allow writing. 219 */ 220 if (vp->v_flag & VTEXT) 221 return (ETXTBSY); 222 return (0); 223 } 224 225 /* 226 * Vnode close call 227 */ 228 int 229 vn_close(vp, flags, cred, td) 230 register struct vnode *vp; 231 int flags; 232 struct ucred *cred; 233 struct thread *td; 234 { 235 int error; 236 237 if (flags & FWRITE) 238 vp->v_writecount--; 239 error = VOP_CLOSE(vp, flags, cred, td); 240 /* 241 * XXX - In certain instances VOP_CLOSE has to do the vrele 242 * itself. If the vrele has been done, it will return EAGAIN 243 * to indicate that the vrele should not be done again. When 244 * this happens, we just return success. The correct thing to 245 * do would be to have all VOP_CLOSE instances do the vrele. 246 */ 247 if (error == EAGAIN) 248 return (0); 249 vrele(vp); 250 return (error); 251 } 252 253 static __inline 254 int 255 sequential_heuristic(struct uio *uio, struct file *fp) 256 { 257 /* 258 * Sequential heuristic - detect sequential operation 259 */ 260 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 261 uio->uio_offset == fp->f_nextoff) { 262 /* 263 * XXX we assume that the filesystem block size is 264 * the default. Not true, but still gives us a pretty 265 * good indicator of how sequential the read operations 266 * are. 267 */ 268 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 269 if (fp->f_seqcount >= 127) 270 fp->f_seqcount = 127; 271 return(fp->f_seqcount << 16); 272 } 273 274 /* 275 * Not sequential, quick draw-down of seqcount 276 */ 277 if (fp->f_seqcount > 1) 278 fp->f_seqcount = 1; 279 else 280 fp->f_seqcount = 0; 281 return(0); 282 } 283 284 /* 285 * Package up an I/O request on a vnode into a uio and do it. 286 */ 287 int 288 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, td) 289 enum uio_rw rw; 290 struct vnode *vp; 291 caddr_t base; 292 int len; 293 off_t offset; 294 enum uio_seg segflg; 295 int ioflg; 296 struct ucred *cred; 297 int *aresid; 298 struct thread *td; 299 { 300 struct uio auio; 301 struct iovec aiov; 302 struct mount *mp; 303 int error; 304 305 if ((ioflg & IO_NODELOCKED) == 0) { 306 mp = NULL; 307 if (rw == UIO_WRITE && 308 vp->v_type != VCHR && 309 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 310 return (error); 311 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 312 } 313 auio.uio_iov = &aiov; 314 auio.uio_iovcnt = 1; 315 aiov.iov_base = base; 316 aiov.iov_len = len; 317 auio.uio_resid = len; 318 auio.uio_offset = offset; 319 auio.uio_segflg = segflg; 320 auio.uio_rw = rw; 321 auio.uio_td = td; 322 if (rw == UIO_READ) { 323 error = VOP_READ(vp, &auio, ioflg, cred); 324 } else { 325 error = VOP_WRITE(vp, &auio, ioflg, cred); 326 } 327 if (aresid) 328 *aresid = auio.uio_resid; 329 else 330 if (auio.uio_resid && error == 0) 331 error = EIO; 332 if ((ioflg & IO_NODELOCKED) == 0) { 333 vn_finished_write(mp); 334 VOP_UNLOCK(vp, 0, td); 335 } 336 return (error); 337 } 338 339 /* 340 * Package up an I/O request on a vnode into a uio and do it. The I/O 341 * request is split up into smaller chunks and we try to avoid saturating 342 * the buffer cache while potentially holding a vnode locked, so we 343 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield() 344 * to give other processes a chance to lock the vnode (either other processes 345 * core'ing the same binary, or unrelated processes scanning the directory). 346 */ 347 int 348 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, td) 349 enum uio_rw rw; 350 struct vnode *vp; 351 caddr_t base; 352 int len; 353 off_t offset; 354 enum uio_seg segflg; 355 int ioflg; 356 struct ucred *cred; 357 int *aresid; 358 struct thread *td; 359 { 360 int error = 0; 361 362 do { 363 int chunk = (len > MAXBSIZE) ? MAXBSIZE : len; 364 365 if (rw != UIO_READ && vp->v_type == VREG) 366 bwillwrite(); 367 error = vn_rdwr(rw, vp, base, chunk, offset, segflg, 368 ioflg, cred, aresid, td); 369 len -= chunk; /* aresid calc already includes length */ 370 if (error) 371 break; 372 offset += chunk; 373 base += chunk; 374 uio_yield(); 375 } while (len); 376 if (aresid) 377 *aresid += len; 378 return (error); 379 } 380 381 /* 382 * File table vnode read routine. 383 */ 384 static int 385 vn_read(fp, uio, cred, flags, td) 386 struct file *fp; 387 struct uio *uio; 388 struct ucred *cred; 389 struct thread *td; 390 int flags; 391 { 392 struct vnode *vp; 393 int error, ioflag; 394 395 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", 396 uio->uio_td, td)); 397 vp = (struct vnode *)fp->f_data; 398 ioflag = 0; 399 if (fp->f_flag & FNONBLOCK) 400 ioflag |= IO_NDELAY; 401 if (fp->f_flag & O_DIRECT) 402 ioflag |= IO_DIRECT; 403 VOP_LEASE(vp, td, cred, LEASE_READ); 404 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 405 if ((flags & FOF_OFFSET) == 0) 406 uio->uio_offset = fp->f_offset; 407 408 ioflag |= sequential_heuristic(uio, fp); 409 410 error = VOP_READ(vp, uio, ioflag, cred); 411 if ((flags & FOF_OFFSET) == 0) 412 fp->f_offset = uio->uio_offset; 413 fp->f_nextoff = uio->uio_offset; 414 VOP_UNLOCK(vp, 0, td); 415 return (error); 416 } 417 418 /* 419 * File table vnode write routine. 420 */ 421 static int 422 vn_write(fp, uio, cred, flags, td) 423 struct file *fp; 424 struct uio *uio; 425 struct ucred *cred; 426 struct thread *td; 427 int flags; 428 { 429 struct vnode *vp; 430 struct mount *mp; 431 int error, ioflag; 432 433 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", 434 uio->uio_td, td)); 435 vp = (struct vnode *)fp->f_data; 436 if (vp->v_type == VREG) 437 bwillwrite(); 438 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 439 ioflag = IO_UNIT; 440 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 441 ioflag |= IO_APPEND; 442 if (fp->f_flag & FNONBLOCK) 443 ioflag |= IO_NDELAY; 444 if (fp->f_flag & O_DIRECT) 445 ioflag |= IO_DIRECT; 446 if ((fp->f_flag & O_FSYNC) || 447 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 448 ioflag |= IO_SYNC; 449 mp = NULL; 450 if (vp->v_type != VCHR && 451 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 452 return (error); 453 VOP_LEASE(vp, td, cred, LEASE_WRITE); 454 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 455 if ((flags & FOF_OFFSET) == 0) 456 uio->uio_offset = fp->f_offset; 457 ioflag |= sequential_heuristic(uio, fp); 458 error = VOP_WRITE(vp, uio, ioflag, cred); 459 if ((flags & FOF_OFFSET) == 0) 460 fp->f_offset = uio->uio_offset; 461 fp->f_nextoff = uio->uio_offset; 462 VOP_UNLOCK(vp, 0, td); 463 vn_finished_write(mp); 464 return (error); 465 } 466 467 /* 468 * File table vnode stat routine. 469 */ 470 static int 471 vn_statfile(fp, sb, td) 472 struct file *fp; 473 struct stat *sb; 474 struct thread *td; 475 { 476 struct vnode *vp = (struct vnode *)fp->f_data; 477 478 return vn_stat(vp, sb, td); 479 } 480 481 int 482 vn_stat(vp, sb, td) 483 struct vnode *vp; 484 register struct stat *sb; 485 struct thread *td; 486 { 487 struct vattr vattr; 488 register struct vattr *vap; 489 int error; 490 u_short mode; 491 492 vap = &vattr; 493 error = VOP_GETATTR(vp, vap, td->td_proc->p_ucred, td); 494 if (error) 495 return (error); 496 497 /* 498 * Zero the spare stat fields 499 */ 500 sb->st_lspare = 0; 501 sb->st_qspare[0] = 0; 502 sb->st_qspare[1] = 0; 503 504 /* 505 * Copy from vattr table 506 */ 507 if (vap->va_fsid != VNOVAL) 508 sb->st_dev = vap->va_fsid; 509 else 510 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 511 sb->st_ino = vap->va_fileid; 512 mode = vap->va_mode; 513 switch (vap->va_type) { 514 case VREG: 515 mode |= S_IFREG; 516 break; 517 case VDIR: 518 mode |= S_IFDIR; 519 break; 520 case VBLK: 521 mode |= S_IFBLK; 522 break; 523 case VCHR: 524 mode |= S_IFCHR; 525 break; 526 case VLNK: 527 mode |= S_IFLNK; 528 /* This is a cosmetic change, symlinks do not have a mode. */ 529 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 530 sb->st_mode &= ~ACCESSPERMS; /* 0000 */ 531 else 532 sb->st_mode |= ACCESSPERMS; /* 0777 */ 533 break; 534 case VSOCK: 535 mode |= S_IFSOCK; 536 break; 537 case VFIFO: 538 mode |= S_IFIFO; 539 break; 540 default: 541 return (EBADF); 542 }; 543 sb->st_mode = mode; 544 sb->st_nlink = vap->va_nlink; 545 sb->st_uid = vap->va_uid; 546 sb->st_gid = vap->va_gid; 547 sb->st_rdev = vap->va_rdev; 548 if (vap->va_size > OFF_MAX) 549 return (EOVERFLOW); 550 sb->st_size = vap->va_size; 551 sb->st_atimespec = vap->va_atime; 552 sb->st_mtimespec = vap->va_mtime; 553 sb->st_ctimespec = vap->va_ctime; 554 555 /* 556 * According to www.opengroup.org, the meaning of st_blksize is 557 * "a filesystem-specific preferred I/O block size for this 558 * object. In some filesystem types, this may vary from file 559 * to file" 560 * Default to zero to catch bogus uses of this field. 561 */ 562 563 if (vap->va_type == VREG) { 564 sb->st_blksize = vap->va_blocksize; 565 } else if (vn_isdisk(vp, NULL)) { 566 sb->st_blksize = vp->v_rdev->si_bsize_best; 567 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 568 sb->st_blksize = vp->v_rdev->si_bsize_phys; 569 if (sb->st_blksize < BLKDEV_IOSIZE) 570 sb->st_blksize = BLKDEV_IOSIZE; 571 } else { 572 sb->st_blksize = 0; 573 } 574 575 sb->st_flags = vap->va_flags; 576 if (suser_xxx(td->td_proc->p_ucred, 0, 0)) 577 sb->st_gen = 0; 578 else 579 sb->st_gen = vap->va_gen; 580 581 #if (S_BLKSIZE == 512) 582 /* Optimize this case */ 583 sb->st_blocks = vap->va_bytes >> 9; 584 #else 585 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 586 #endif 587 return (0); 588 } 589 590 /* 591 * File table vnode ioctl routine. 592 */ 593 static int 594 vn_ioctl(fp, com, data, td) 595 struct file *fp; 596 u_long com; 597 caddr_t data; 598 struct thread *td; 599 { 600 register struct vnode *vp = ((struct vnode *)fp->f_data); 601 struct vattr vattr; 602 int error; 603 604 switch (vp->v_type) { 605 606 case VREG: 607 case VDIR: 608 if (com == FIONREAD) { 609 error = VOP_GETATTR(vp, &vattr, td->td_proc->p_ucred, td); 610 if (error) 611 return (error); 612 *(int *)data = vattr.va_size - fp->f_offset; 613 return (0); 614 } 615 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 616 return (0); /* XXX */ 617 /* fall into ... */ 618 619 default: 620 #if 0 621 return (ENOTTY); 622 #endif 623 case VFIFO: 624 case VCHR: 625 case VBLK: 626 if (com == FIODTYPE) { 627 if (vp->v_type != VCHR && vp->v_type != VBLK) 628 return (ENOTTY); 629 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 630 return (0); 631 } 632 error = VOP_IOCTL(vp, com, data, fp->f_flag, td->td_proc->p_ucred, td); 633 if (error == 0 && com == TIOCSCTTY) { 634 635 /* Do nothing if reassigning same control tty */ 636 if (td->td_proc->p_session->s_ttyvp == vp) 637 return (0); 638 639 /* Get rid of reference to old control tty */ 640 if (td->td_proc->p_session->s_ttyvp) 641 vrele(td->td_proc->p_session->s_ttyvp); 642 643 td->td_proc->p_session->s_ttyvp = vp; 644 VREF(vp); 645 } 646 return (error); 647 } 648 } 649 650 /* 651 * File table vnode poll routine. 652 */ 653 static int 654 vn_poll(fp, events, cred, td) 655 struct file *fp; 656 int events; 657 struct ucred *cred; 658 struct thread *td; 659 { 660 661 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, td)); 662 } 663 664 /* 665 * Check that the vnode is still valid, and if so 666 * acquire requested lock. 667 */ 668 int 669 #ifndef DEBUG_LOCKS 670 vn_lock(vp, flags, td) 671 #else 672 debug_vn_lock(vp, flags, td, filename, line) 673 #endif 674 struct vnode *vp; 675 int flags; 676 struct thread *td; 677 #ifdef DEBUG_LOCKS 678 const char *filename; 679 int line; 680 #endif 681 { 682 int error; 683 684 do { 685 if ((flags & LK_INTERLOCK) == 0) 686 mtx_lock(&vp->v_interlock); 687 if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curthread) { 688 vp->v_flag |= VXWANT; 689 msleep(vp, &vp->v_interlock, PINOD | PDROP, 690 "vn_lock", 0); 691 error = ENOENT; 692 } else { 693 if (vp->v_vxproc != NULL) 694 printf("VXLOCK interlock avoided in vn_lock\n"); 695 #ifdef DEBUG_LOCKS 696 vp->filename = filename; 697 vp->line = line; 698 #endif 699 error = VOP_LOCK(vp, 700 flags | LK_NOPAUSE | LK_INTERLOCK, td); 701 if (error == 0) 702 return (error); 703 } 704 flags &= ~LK_INTERLOCK; 705 } while (flags & LK_RETRY); 706 return (error); 707 } 708 709 /* 710 * File table vnode close routine. 711 */ 712 static int 713 vn_closefile(fp, td) 714 struct file *fp; 715 struct thread *td; 716 { 717 718 fp->f_ops = &badfileops; 719 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 720 fp->f_cred, td)); 721 } 722 723 /* 724 * Preparing to start a filesystem write operation. If the operation is 725 * permitted, then we bump the count of operations in progress and 726 * proceed. If a suspend request is in progress, we wait until the 727 * suspension is over, and then proceed. 728 */ 729 int 730 vn_start_write(vp, mpp, flags) 731 struct vnode *vp; 732 struct mount **mpp; 733 int flags; 734 { 735 struct mount *mp; 736 int error; 737 738 /* 739 * If a vnode is provided, get and return the mount point that 740 * to which it will write. 741 */ 742 if (vp != NULL) { 743 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 744 *mpp = NULL; 745 if (error != EOPNOTSUPP) 746 return (error); 747 return (0); 748 } 749 } 750 if ((mp = *mpp) == NULL) 751 return (0); 752 /* 753 * Check on status of suspension. 754 */ 755 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 756 if (flags & V_NOWAIT) 757 return (EWOULDBLOCK); 758 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 759 "suspfs", 0); 760 if (error) 761 return (error); 762 } 763 if (flags & V_XSLEEP) 764 return (0); 765 mp->mnt_writeopcount++; 766 return (0); 767 } 768 769 /* 770 * Secondary suspension. Used by operations such as vop_inactive 771 * routines that are needed by the higher level functions. These 772 * are allowed to proceed until all the higher level functions have 773 * completed (indicated by mnt_writeopcount dropping to zero). At that 774 * time, these operations are halted until the suspension is over. 775 */ 776 int 777 vn_write_suspend_wait(vp, mp, flags) 778 struct vnode *vp; 779 struct mount *mp; 780 int flags; 781 { 782 int error; 783 784 if (vp != NULL) { 785 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 786 if (error != EOPNOTSUPP) 787 return (error); 788 return (0); 789 } 790 } 791 /* 792 * If we are not suspended or have not yet reached suspended 793 * mode, then let the operation proceed. 794 */ 795 if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 796 return (0); 797 if (flags & V_NOWAIT) 798 return (EWOULDBLOCK); 799 /* 800 * Wait for the suspension to finish. 801 */ 802 return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 803 "suspfs", 0)); 804 } 805 806 /* 807 * Filesystem write operation has completed. If we are suspending and this 808 * operation is the last one, notify the suspender that the suspension is 809 * now in effect. 810 */ 811 void 812 vn_finished_write(mp) 813 struct mount *mp; 814 { 815 816 if (mp == NULL) 817 return; 818 mp->mnt_writeopcount--; 819 if (mp->mnt_writeopcount < 0) 820 panic("vn_finished_write: neg cnt"); 821 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 822 mp->mnt_writeopcount <= 0) 823 wakeup(&mp->mnt_writeopcount); 824 } 825 826 /* 827 * Request a filesystem to suspend write operations. 828 */ 829 void 830 vfs_write_suspend(mp) 831 struct mount *mp; 832 { 833 struct thread *td = curthread; 834 835 if (mp->mnt_kern_flag & MNTK_SUSPEND) 836 return; 837 mp->mnt_kern_flag |= MNTK_SUSPEND; 838 if (mp->mnt_writeopcount > 0) 839 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 840 VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td); 841 mp->mnt_kern_flag |= MNTK_SUSPENDED; 842 } 843 844 /* 845 * Request a filesystem to resume write operations. 846 */ 847 void 848 vfs_write_resume(mp) 849 struct mount *mp; 850 { 851 852 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 853 return; 854 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED); 855 wakeup(&mp->mnt_writeopcount); 856 wakeup(&mp->mnt_flag); 857 } 858 859 static int 860 vn_kqfilter(struct file *fp, struct knote *kn) 861 { 862 863 return (VOP_KQFILTER(((struct vnode *)fp->f_data), kn)); 864 } 865 866 /* 867 * Simplified in-kernel wrapper calls for extended attribute access. 868 * Both calls pass in a NULL credential, authorizing as "kernel" access. 869 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 870 */ 871 int 872 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, 873 const char *attrname, int *buflen, char *buf, struct thread *td) 874 { 875 struct uio auio; 876 struct iovec iov; 877 int error; 878 879 iov.iov_len = *buflen; 880 iov.iov_base = buf; 881 882 auio.uio_iov = &iov; 883 auio.uio_iovcnt = 1; 884 auio.uio_rw = UIO_READ; 885 auio.uio_segflg = UIO_SYSSPACE; 886 auio.uio_td = td; 887 auio.uio_offset = 0; 888 auio.uio_resid = *buflen; 889 890 if ((ioflg & IO_NODELOCKED) == 0) 891 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 892 893 /* authorize attribute retrieval as kernel */ 894 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td); 895 896 if ((ioflg & IO_NODELOCKED) == 0) 897 VOP_UNLOCK(vp, 0, td); 898 899 if (error == 0) { 900 *buflen = *buflen - auio.uio_resid; 901 } 902 903 return (error); 904 } 905 906 /* 907 * XXX failure mode if partially written? 908 */ 909 int 910 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, 911 const char *attrname, int buflen, char *buf, struct thread *td) 912 { 913 struct uio auio; 914 struct iovec iov; 915 struct mount *mp; 916 int error; 917 918 iov.iov_len = buflen; 919 iov.iov_base = buf; 920 921 auio.uio_iov = &iov; 922 auio.uio_iovcnt = 1; 923 auio.uio_rw = UIO_WRITE; 924 auio.uio_segflg = UIO_SYSSPACE; 925 auio.uio_td = td; 926 auio.uio_offset = 0; 927 auio.uio_resid = buflen; 928 929 if ((ioflg & IO_NODELOCKED) == 0) { 930 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 931 return (error); 932 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 933 } 934 935 /* authorize attribute setting as kernel */ 936 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td); 937 938 if ((ioflg & IO_NODELOCKED) == 0) { 939 vn_finished_write(mp); 940 VOP_UNLOCK(vp, 0, td); 941 } 942 943 return (error); 944 } 945 946 int 947 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, 948 const char *attrname, struct thread *td) 949 { 950 struct mount *mp; 951 int error; 952 953 if ((ioflg & IO_NODELOCKED) == 0) { 954 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 955 return (error); 956 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 957 } 958 959 /* authorize attribute removal as kernel */ 960 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td); 961 962 if ((ioflg & IO_NODELOCKED) == 0) { 963 vn_finished_write(mp); 964 VOP_UNLOCK(vp, 0, td); 965 } 966 967 return (error); 968 } 969