1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/stat.h> 47 #include <sys/proc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/buf.h> 52 #include <sys/filio.h> 53 #include <sys/ttycom.h> 54 #include <sys/conf.h> 55 #include <vm/vm_zone.h> 56 57 static int vn_closefile __P((struct file *fp, struct proc *p)); 58 static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, 59 struct proc *p)); 60 static int vn_read __P((struct file *fp, struct uio *uio, 61 struct ucred *cred, int flags, struct proc *p)); 62 static int vn_poll __P((struct file *fp, int events, struct ucred *cred, 63 struct proc *p)); 64 static int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p)); 65 static int vn_write __P((struct file *fp, struct uio *uio, 66 struct ucred *cred, int flags, struct proc *p)); 67 68 struct fileops vnops = 69 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile }; 70 71 /* 72 * Common code for vnode open operations. 73 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 74 * 75 * Note that this do NOT free nameidata for the successful case, 76 * due to the NDINIT being done elsewhere. 77 */ 78 int 79 vn_open(ndp, fmode, cmode) 80 register struct nameidata *ndp; 81 int fmode, cmode; 82 { 83 register struct vnode *vp; 84 register struct proc *p = ndp->ni_cnd.cn_proc; 85 register struct ucred *cred = p->p_ucred; 86 struct vattr vat; 87 struct vattr *vap = &vat; 88 int mode, error; 89 90 if (fmode & O_CREAT) { 91 ndp->ni_cnd.cn_nameiop = CREATE; 92 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 93 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 94 ndp->ni_cnd.cn_flags |= FOLLOW; 95 error = namei(ndp); 96 if (error) 97 return (error); 98 if (ndp->ni_vp == NULL) { 99 VATTR_NULL(vap); 100 vap->va_type = VREG; 101 vap->va_mode = cmode; 102 if (fmode & O_EXCL) 103 vap->va_vaflags |= VA_EXCLUSIVE; 104 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 105 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 106 &ndp->ni_cnd, vap); 107 if (error) { 108 NDFREE(ndp, NDF_ONLY_PNBUF); 109 vput(ndp->ni_dvp); 110 return (error); 111 } 112 vput(ndp->ni_dvp); 113 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 114 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 115 fmode &= ~O_TRUNC; 116 vp = ndp->ni_vp; 117 } else { 118 if (ndp->ni_dvp == ndp->ni_vp) 119 vrele(ndp->ni_dvp); 120 else 121 vput(ndp->ni_dvp); 122 ndp->ni_dvp = NULL; 123 vp = ndp->ni_vp; 124 if (fmode & O_EXCL) { 125 error = EEXIST; 126 goto bad; 127 } 128 fmode &= ~O_CREAT; 129 } 130 } else { 131 ndp->ni_cnd.cn_nameiop = LOOKUP; 132 ndp->ni_cnd.cn_flags = 133 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; 134 error = namei(ndp); 135 if (error) 136 return (error); 137 vp = ndp->ni_vp; 138 } 139 if (vp->v_type == VLNK) { 140 error = EMLINK; 141 goto bad; 142 } 143 if (vp->v_type == VSOCK) { 144 error = EOPNOTSUPP; 145 goto bad; 146 } 147 if ((fmode & O_CREAT) == 0) { 148 mode = 0; 149 if (fmode & (FWRITE | O_TRUNC)) { 150 if (vp->v_type == VDIR) { 151 error = EISDIR; 152 goto bad; 153 } 154 error = vn_writechk(vp); 155 if (error) 156 goto bad; 157 mode |= VWRITE; 158 } 159 if (fmode & FREAD) 160 mode |= VREAD; 161 if (mode) { 162 error = VOP_ACCESS(vp, mode, cred, p); 163 if (error) 164 goto bad; 165 } 166 } 167 if (fmode & O_TRUNC) { 168 VOP_UNLOCK(vp, 0, p); /* XXX */ 169 VOP_LEASE(vp, p, cred, LEASE_WRITE); 170 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 171 VATTR_NULL(vap); 172 vap->va_size = 0; 173 error = VOP_SETATTR(vp, vap, cred, p); 174 if (error) 175 goto bad; 176 } 177 error = VOP_OPEN(vp, fmode, cred, p); 178 if (error) 179 goto bad; 180 /* 181 * Make sure that a VM object is created for VMIO support. 182 */ 183 if (vn_canvmio(vp) == TRUE) { 184 if ((error = vfs_object_create(vp, p, cred)) != 0) 185 goto bad; 186 } 187 188 if (fmode & FWRITE) 189 vp->v_writecount++; 190 return (0); 191 bad: 192 NDFREE(ndp, NDF_ONLY_PNBUF); 193 vput(vp); 194 return (error); 195 } 196 197 /* 198 * Check for write permissions on the specified vnode. 199 * Prototype text segments cannot be written. 200 */ 201 int 202 vn_writechk(vp) 203 register struct vnode *vp; 204 { 205 206 /* 207 * If there's shared text associated with 208 * the vnode, try to free it up once. If 209 * we fail, we can't allow writing. 210 */ 211 if (vp->v_flag & VTEXT) 212 return (ETXTBSY); 213 return (0); 214 } 215 216 /* 217 * Vnode close call 218 */ 219 int 220 vn_close(vp, flags, cred, p) 221 register struct vnode *vp; 222 int flags; 223 struct ucred *cred; 224 struct proc *p; 225 { 226 int error; 227 228 if (flags & FWRITE) 229 vp->v_writecount--; 230 error = VOP_CLOSE(vp, flags, cred, p); 231 vrele(vp); 232 return (error); 233 } 234 235 /* 236 * Package up an I/O request on a vnode into a uio and do it. 237 */ 238 int 239 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 240 enum uio_rw rw; 241 struct vnode *vp; 242 caddr_t base; 243 int len; 244 off_t offset; 245 enum uio_seg segflg; 246 int ioflg; 247 struct ucred *cred; 248 int *aresid; 249 struct proc *p; 250 { 251 struct uio auio; 252 struct iovec aiov; 253 int error; 254 255 if ((ioflg & IO_NODELOCKED) == 0) 256 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 257 auio.uio_iov = &aiov; 258 auio.uio_iovcnt = 1; 259 aiov.iov_base = base; 260 aiov.iov_len = len; 261 auio.uio_resid = len; 262 auio.uio_offset = offset; 263 auio.uio_segflg = segflg; 264 auio.uio_rw = rw; 265 auio.uio_procp = p; 266 if (rw == UIO_READ) { 267 error = VOP_READ(vp, &auio, ioflg, cred); 268 } else { 269 error = VOP_WRITE(vp, &auio, ioflg, cred); 270 } 271 if (aresid) 272 *aresid = auio.uio_resid; 273 else 274 if (auio.uio_resid && error == 0) 275 error = EIO; 276 if ((ioflg & IO_NODELOCKED) == 0) 277 VOP_UNLOCK(vp, 0, p); 278 return (error); 279 } 280 281 /* 282 * File table vnode read routine. 283 */ 284 static int 285 vn_read(fp, uio, cred, flags, p) 286 struct file *fp; 287 struct uio *uio; 288 struct ucred *cred; 289 struct proc *p; 290 int flags; 291 { 292 struct vnode *vp; 293 int error, ioflag; 294 295 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 296 uio->uio_procp, p)); 297 vp = (struct vnode *)fp->f_data; 298 ioflag = 0; 299 if (fp->f_flag & FNONBLOCK) 300 ioflag |= IO_NDELAY; 301 VOP_LEASE(vp, p, cred, LEASE_READ); 302 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 303 if ((flags & FOF_OFFSET) == 0) 304 uio->uio_offset = fp->f_offset; 305 306 /* 307 * Sequential read heuristic. 308 * If we have been doing sequential input, 309 * a rewind operation doesn't turn off 310 * sequential input mode. 311 */ 312 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 313 uio->uio_offset == fp->f_nextread) { 314 int tmpseq = fp->f_seqcount; 315 /* 316 * XXX we assume that the filesystem block size is 317 * the default. Not true, but still gives us a pretty 318 * good indicator of how sequential the read operations 319 * are. 320 */ 321 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 322 if (tmpseq >= 127) 323 tmpseq = 127; 324 fp->f_seqcount = tmpseq; 325 ioflag |= fp->f_seqcount << 16; 326 } else { 327 if (fp->f_seqcount > 1) 328 fp->f_seqcount = 1; 329 else 330 fp->f_seqcount = 0; 331 } 332 error = VOP_READ(vp, uio, ioflag, cred); 333 if ((flags & FOF_OFFSET) == 0) 334 fp->f_offset = uio->uio_offset; 335 fp->f_nextread = uio->uio_offset; 336 VOP_UNLOCK(vp, 0, p); 337 return (error); 338 } 339 340 /* 341 * File table vnode write routine. 342 */ 343 static int 344 vn_write(fp, uio, cred, flags, p) 345 struct file *fp; 346 struct uio *uio; 347 struct ucred *cred; 348 struct proc *p; 349 int flags; 350 { 351 struct vnode *vp; 352 int error, ioflag; 353 354 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 355 uio->uio_procp, p)); 356 vp = (struct vnode *)fp->f_data; 357 if (vp->v_type == VREG) 358 bwillwrite(); 359 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 360 ioflag = IO_UNIT; 361 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 362 ioflag |= IO_APPEND; 363 if (fp->f_flag & FNONBLOCK) 364 ioflag |= IO_NDELAY; 365 if ((fp->f_flag & O_FSYNC) || 366 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 367 ioflag |= IO_SYNC; 368 VOP_LEASE(vp, p, cred, LEASE_WRITE); 369 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 370 if ((flags & FOF_OFFSET) == 0) 371 uio->uio_offset = fp->f_offset; 372 error = VOP_WRITE(vp, uio, ioflag, cred); 373 if ((flags & FOF_OFFSET) == 0) 374 fp->f_offset = uio->uio_offset; 375 VOP_UNLOCK(vp, 0, p); 376 return (error); 377 } 378 379 /* 380 * File table vnode stat routine. 381 */ 382 static int 383 vn_statfile(fp, sb, p) 384 struct file *fp; 385 struct stat *sb; 386 struct proc *p; 387 { 388 struct vnode *vp = (struct vnode *)fp->f_data; 389 390 return vn_stat(vp, sb, p); 391 } 392 393 int 394 vn_stat(vp, sb, p) 395 struct vnode *vp; 396 register struct stat *sb; 397 struct proc *p; 398 { 399 struct vattr vattr; 400 register struct vattr *vap; 401 int error; 402 u_short mode; 403 404 vap = &vattr; 405 error = VOP_GETATTR(vp, vap, p->p_ucred, p); 406 if (error) 407 return (error); 408 409 /* 410 * Zero the spare stat fields 411 */ 412 sb->st_lspare = 0; 413 sb->st_qspare[0] = 0; 414 sb->st_qspare[1] = 0; 415 416 /* 417 * Copy from vattr table 418 */ 419 if (vap->va_fsid != VNOVAL) 420 sb->st_dev = vap->va_fsid; 421 else 422 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 423 sb->st_ino = vap->va_fileid; 424 mode = vap->va_mode; 425 switch (vap->va_type) { 426 case VREG: 427 mode |= S_IFREG; 428 break; 429 case VDIR: 430 mode |= S_IFDIR; 431 break; 432 case VBLK: 433 mode |= S_IFBLK; 434 break; 435 case VCHR: 436 mode |= S_IFCHR; 437 break; 438 case VLNK: 439 mode |= S_IFLNK; 440 /* This is a cosmetic change, symlinks do not have a mode. */ 441 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 442 sb->st_mode &= ~ACCESSPERMS; /* 0000 */ 443 else 444 sb->st_mode |= ACCESSPERMS; /* 0777 */ 445 break; 446 case VSOCK: 447 mode |= S_IFSOCK; 448 break; 449 case VFIFO: 450 mode |= S_IFIFO; 451 break; 452 default: 453 return (EBADF); 454 }; 455 sb->st_mode = mode; 456 sb->st_nlink = vap->va_nlink; 457 sb->st_uid = vap->va_uid; 458 sb->st_gid = vap->va_gid; 459 sb->st_rdev = vap->va_rdev; 460 sb->st_size = vap->va_size; 461 sb->st_atimespec = vap->va_atime; 462 sb->st_mtimespec = vap->va_mtime; 463 sb->st_ctimespec = vap->va_ctime; 464 465 /* 466 * According to www.opengroup.org, the meaning of st_blksize is 467 * "a filesystem-specific preferred I/O block size for this 468 * object. In some filesystem types, this may vary from file 469 * to file" 470 * Default to zero to catch bogus uses of this field. 471 */ 472 473 if (vap->va_type == VREG) { 474 sb->st_blksize = vap->va_blocksize; 475 } else if ((vp->v_type == VBLK || vp->v_type == VCHR) && 476 devsw(vp->v_rdev) && (devsw(vp->v_rdev)->d_flags & D_DISK)) { 477 /* XXX use vn_isdisk() above once VCHR is also disk */ 478 sb->st_blksize = vp->v_rdev->si_bsize_best; 479 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 480 sb->st_blksize = vp->v_rdev->si_bsize_phys; 481 if (sb->st_blksize < BLKDEV_IOSIZE) 482 sb->st_blksize = BLKDEV_IOSIZE; 483 } else { 484 sb->st_blksize = 0; 485 } 486 487 sb->st_flags = vap->va_flags; 488 if (suser_xxx(p->p_ucred, 0, 0)) 489 sb->st_gen = 0; 490 else 491 sb->st_gen = vap->va_gen; 492 493 #if (S_BLKSIZE == 512) 494 /* Optimize this case */ 495 sb->st_blocks = vap->va_bytes >> 9; 496 #else 497 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 498 #endif 499 return (0); 500 } 501 502 /* 503 * File table vnode ioctl routine. 504 */ 505 static int 506 vn_ioctl(fp, com, data, p) 507 struct file *fp; 508 u_long com; 509 caddr_t data; 510 struct proc *p; 511 { 512 register struct vnode *vp = ((struct vnode *)fp->f_data); 513 struct vattr vattr; 514 int error; 515 516 switch (vp->v_type) { 517 518 case VREG: 519 case VDIR: 520 if (com == FIONREAD) { 521 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 522 if (error) 523 return (error); 524 *(int *)data = vattr.va_size - fp->f_offset; 525 return (0); 526 } 527 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 528 return (0); /* XXX */ 529 /* fall into ... */ 530 531 default: 532 #if 0 533 return (ENOTTY); 534 #endif 535 case VFIFO: 536 case VCHR: 537 case VBLK: 538 if (com == FIODTYPE) { 539 if (vp->v_type != VCHR && vp->v_type != VBLK) 540 return (ENOTTY); 541 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 542 return (0); 543 } 544 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 545 if (error == 0 && com == TIOCSCTTY) { 546 547 /* Do nothing if reassigning same control tty */ 548 if (p->p_session->s_ttyvp == vp) 549 return (0); 550 551 /* Get rid of reference to old control tty */ 552 if (p->p_session->s_ttyvp) 553 vrele(p->p_session->s_ttyvp); 554 555 p->p_session->s_ttyvp = vp; 556 VREF(vp); 557 } 558 return (error); 559 } 560 } 561 562 /* 563 * File table vnode poll routine. 564 */ 565 static int 566 vn_poll(fp, events, cred, p) 567 struct file *fp; 568 int events; 569 struct ucred *cred; 570 struct proc *p; 571 { 572 573 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p)); 574 } 575 576 /* 577 * Check that the vnode is still valid, and if so 578 * acquire requested lock. 579 */ 580 int 581 #ifndef DEBUG_LOCKS 582 vn_lock(vp, flags, p) 583 #else 584 debug_vn_lock(vp, flags, p, filename, line) 585 #endif 586 struct vnode *vp; 587 int flags; 588 struct proc *p; 589 #ifdef DEBUG_LOCKS 590 const char *filename; 591 int line; 592 #endif 593 { 594 int error; 595 596 do { 597 if ((flags & LK_INTERLOCK) == 0) 598 simple_lock(&vp->v_interlock); 599 if (vp->v_flag & VXLOCK) { 600 vp->v_flag |= VXWANT; 601 simple_unlock(&vp->v_interlock); 602 tsleep((caddr_t)vp, PINOD, "vn_lock", 0); 603 error = ENOENT; 604 } else { 605 #ifdef DEBUG_LOCKS 606 vp->filename = filename; 607 vp->line = line; 608 #endif 609 error = VOP_LOCK(vp, 610 flags | LK_NOPAUSE | LK_INTERLOCK, p); 611 if (error == 0) 612 return (error); 613 } 614 flags &= ~LK_INTERLOCK; 615 } while (flags & LK_RETRY); 616 return (error); 617 } 618 619 /* 620 * File table vnode close routine. 621 */ 622 static int 623 vn_closefile(fp, p) 624 struct file *fp; 625 struct proc *p; 626 { 627 628 fp->f_ops = &badfileops; 629 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 630 fp->f_cred, p)); 631 } 632