1 /*- 2 * Copyright (c) 2000-2004 3 * Poul-Henning Kamp. All rights reserved. 4 * Copyright (c) 1989, 1992-1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software donated to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 32 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 33 * 34 * $FreeBSD$ 35 */ 36 37 /* 38 * TODO: 39 * remove empty directories 40 * mkdir: want it ? 41 */ 42 43 #include <opt_devfs.h> 44 #include <opt_mac.h> 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/conf.h> 49 #include <sys/dirent.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/filedesc.h> 53 #include <sys/filio.h> 54 #include <sys/kernel.h> 55 #include <sys/lock.h> 56 #include <sys/mac.h> 57 #include <sys/malloc.h> 58 #include <sys/mount.h> 59 #include <sys/namei.h> 60 #include <sys/proc.h> 61 #include <sys/stat.h> 62 #include <sys/sx.h> 63 #include <sys/time.h> 64 #include <sys/ttycom.h> 65 #include <sys/unistd.h> 66 #include <sys/vnode.h> 67 68 static struct vop_vector devfs_vnodeops; 69 static struct vop_vector devfs_specops; 70 static struct fileops devfs_ops_f; 71 72 #include <fs/devfs/devfs.h> 73 #include <fs/devfs/devfs_int.h> 74 75 static int 76 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) 77 { 78 79 *devp = fp->f_vnode->v_rdev; 80 if (*devp != fp->f_data) 81 return (ENXIO); 82 KASSERT((*devp)->si_refcount > 0, 83 ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); 84 *dswp = dev_refthread(*devp); 85 if (*dswp == NULL) 86 return (ENXIO); 87 return (0); 88 } 89 90 /* 91 * Construct the fully qualified path name relative to the mountpoint 92 */ 93 static char * 94 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp) 95 { 96 int i; 97 struct devfs_dirent *de, *dd; 98 struct devfs_mount *dmp; 99 100 dmp = VFSTODEVFS(dvp->v_mount); 101 dd = dvp->v_data; 102 i = SPECNAMELEN; 103 buf[i] = '\0'; 104 i -= cnp->cn_namelen; 105 if (i < 0) 106 return (NULL); 107 bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); 108 de = dd; 109 while (de != dmp->dm_rootdir) { 110 i--; 111 if (i < 0) 112 return (NULL); 113 buf[i] = '/'; 114 i -= de->de_dirent->d_namlen; 115 if (i < 0) 116 return (NULL); 117 bcopy(de->de_dirent->d_name, buf + i, 118 de->de_dirent->d_namlen); 119 de = TAILQ_FIRST(&de->de_dlist); /* "." */ 120 de = TAILQ_NEXT(de, de_list); /* ".." */ 121 de = de->de_dir; 122 } 123 return (buf + i); 124 } 125 126 int 127 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) 128 { 129 int error; 130 struct vnode *vp; 131 struct cdev *dev; 132 133 KASSERT(td == curthread, ("devfs_allocv: td != curthread")); 134 loop: 135 vp = de->de_vnode; 136 if (vp != NULL) { 137 if (vget(vp, LK_EXCLUSIVE, td)) 138 goto loop; 139 *vpp = vp; 140 return (0); 141 } 142 if (de->de_dirent->d_type == DT_CHR) { 143 if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) 144 return (ENOENT); 145 dev = &de->de_cdp->cdp_c; 146 } else { 147 dev = NULL; 148 } 149 error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); 150 if (error != 0) { 151 printf("devfs_allocv: failed to allocate new vnode\n"); 152 return (error); 153 } 154 155 if (de->de_dirent->d_type == DT_CHR) { 156 vp->v_type = VCHR; 157 VI_LOCK(vp); 158 dev_lock(); 159 dev_refl(dev); 160 vp->v_rdev = dev; 161 KASSERT(vp->v_usecount == 1, 162 ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); 163 dev->si_usecount += vp->v_usecount; 164 dev_unlock(); 165 VI_UNLOCK(vp); 166 vp->v_op = &devfs_specops; 167 } else if (de->de_dirent->d_type == DT_DIR) { 168 vp->v_type = VDIR; 169 } else if (de->de_dirent->d_type == DT_LNK) { 170 vp->v_type = VLNK; 171 } else { 172 vp->v_type = VBAD; 173 } 174 vp->v_data = de; 175 de->de_vnode = vp; 176 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 177 #ifdef MAC 178 mac_associate_vnode_devfs(mp, de, vp); 179 #endif 180 *vpp = vp; 181 return (0); 182 } 183 184 static int 185 devfs_access(struct vop_access_args *ap) 186 { 187 struct vnode *vp = ap->a_vp; 188 struct devfs_dirent *de; 189 int error; 190 191 de = vp->v_data; 192 if (vp->v_type == VDIR) 193 de = de->de_dir; 194 195 error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, 196 ap->a_mode, ap->a_cred, NULL); 197 if (!error) 198 return (error); 199 if (error != EACCES) 200 return (error); 201 /* We do, however, allow access to the controlling terminal */ 202 if (!(ap->a_td->td_proc->p_flag & P_CONTROLT)) 203 return (error); 204 if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode) 205 return (0); 206 return (error); 207 } 208 209 /* ARGSUSED */ 210 static int 211 devfs_advlock(struct vop_advlock_args *ap) 212 { 213 214 return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); 215 } 216 217 /* ARGSUSED */ 218 static int 219 devfs_close(struct vop_close_args *ap) 220 { 221 struct vnode *vp = ap->a_vp, *oldvp; 222 struct thread *td = ap->a_td; 223 struct cdev *dev = vp->v_rdev; 224 struct cdevsw *dsw; 225 int error; 226 227 /* 228 * Hack: a tty device that is a controlling terminal 229 * has a reference from the session structure. 230 * We cannot easily tell that a character device is 231 * a controlling terminal, unless it is the closing 232 * process' controlling terminal. In that case, 233 * if the reference count is 2 (this last descriptor 234 * plus the session), release the reference from the session. 235 */ 236 237 /* 238 * This needs to be rewritten to take the vp interlock into 239 * consideration. 240 */ 241 242 oldvp = NULL; 243 sx_xlock(&proctree_lock); 244 if (td && vp == td->td_proc->p_session->s_ttyvp) { 245 SESS_LOCK(td->td_proc->p_session); 246 VI_LOCK(vp); 247 if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { 248 td->td_proc->p_session->s_ttyvp = NULL; 249 oldvp = vp; 250 } 251 VI_UNLOCK(vp); 252 SESS_UNLOCK(td->td_proc->p_session); 253 } 254 sx_xunlock(&proctree_lock); 255 if (oldvp != NULL) 256 vrele(oldvp); 257 /* 258 * We do not want to really close the device if it 259 * is still in use unless we are trying to close it 260 * forcibly. Since every use (buffer, vnode, swap, cmap) 261 * holds a reference to the vnode, and because we mark 262 * any other vnodes that alias this device, when the 263 * sum of the reference counts on all the aliased 264 * vnodes descends to one, we are on last close. 265 */ 266 dsw = dev_refthread(dev); 267 if (dsw == NULL) 268 return (ENXIO); 269 VI_LOCK(vp); 270 if (vp->v_iflag & VI_DOOMED) { 271 /* Forced close. */ 272 } else if (dsw->d_flags & D_TRACKCLOSE) { 273 /* Keep device updated on status. */ 274 } else if (count_dev(dev) > 1) { 275 VI_UNLOCK(vp); 276 dev_relthread(dev); 277 return (0); 278 } 279 VI_UNLOCK(vp); 280 KASSERT(dev->si_refcount > 0, 281 ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); 282 if (!(dsw->d_flags & D_NEEDGIANT)) { 283 DROP_GIANT(); 284 error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); 285 PICKUP_GIANT(); 286 } else { 287 error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); 288 } 289 dev_relthread(dev); 290 return (error); 291 } 292 293 static int 294 devfs_close_f(struct file *fp, struct thread *td) 295 { 296 297 return (vnops.fo_close(fp, td)); 298 } 299 300 /* ARGSUSED */ 301 static int 302 devfs_fsync(struct vop_fsync_args *ap) 303 { 304 if (!vn_isdisk(ap->a_vp, NULL)) 305 return (0); 306 307 return (vop_stdfsync(ap)); 308 } 309 310 static int 311 devfs_getattr(struct vop_getattr_args *ap) 312 { 313 struct vnode *vp = ap->a_vp; 314 struct vattr *vap = ap->a_vap; 315 int error = 0; 316 struct devfs_dirent *de; 317 struct cdev *dev; 318 319 de = vp->v_data; 320 KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); 321 if (vp->v_type == VDIR) { 322 de = de->de_dir; 323 KASSERT(de != NULL, 324 ("Null dir dirent in devfs_getattr vp=%p", vp)); 325 } 326 bzero((caddr_t) vap, sizeof(*vap)); 327 vattr_null(vap); 328 vap->va_uid = de->de_uid; 329 vap->va_gid = de->de_gid; 330 vap->va_mode = de->de_mode; 331 if (vp->v_type == VLNK) 332 vap->va_size = strlen(de->de_symlink); 333 else if (vp->v_type == VDIR) 334 vap->va_size = vap->va_bytes = DEV_BSIZE; 335 else 336 vap->va_size = 0; 337 if (vp->v_type != VDIR) 338 vap->va_bytes = 0; 339 vap->va_blocksize = DEV_BSIZE; 340 vap->va_type = vp->v_type; 341 342 #define fix(aa) \ 343 do { \ 344 if ((aa).tv_sec == 0) { \ 345 (aa).tv_sec = boottime.tv_sec; \ 346 (aa).tv_nsec = boottime.tv_usec * 1000; \ 347 } \ 348 } while (0) 349 350 if (vp->v_type != VCHR) { 351 fix(de->de_atime); 352 vap->va_atime = de->de_atime; 353 fix(de->de_mtime); 354 vap->va_mtime = de->de_mtime; 355 fix(de->de_ctime); 356 vap->va_ctime = de->de_ctime; 357 } else { 358 dev = vp->v_rdev; 359 fix(dev->si_atime); 360 vap->va_atime = dev->si_atime; 361 fix(dev->si_mtime); 362 vap->va_mtime = dev->si_mtime; 363 fix(dev->si_ctime); 364 vap->va_ctime = dev->si_ctime; 365 366 vap->va_rdev = dev->si_priv->cdp_inode; 367 } 368 vap->va_gen = 0; 369 vap->va_flags = 0; 370 vap->va_nlink = de->de_links; 371 vap->va_fileid = de->de_inode; 372 373 return (error); 374 } 375 376 /* ARGSUSED */ 377 static int 378 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) 379 { 380 struct cdev *dev; 381 struct cdevsw *dsw; 382 struct vnode *vp; 383 struct vnode *vpold; 384 int error, i; 385 const char *p; 386 struct fiodgname_arg *fgn; 387 388 error = devfs_fp_check(fp, &dev, &dsw); 389 if (error) 390 return (error); 391 392 if (com == FIODTYPE) { 393 *(int *)data = dsw->d_flags & D_TYPEMASK; 394 dev_relthread(dev); 395 return (0); 396 } else if (com == FIODGNAME) { 397 fgn = data; 398 p = devtoname(dev); 399 i = strlen(p) + 1; 400 if (i > fgn->len) 401 error = EINVAL; 402 else 403 error = copyout(p, fgn->buf, i); 404 dev_relthread(dev); 405 return (error); 406 } 407 error = dsw->d_ioctl(dev, com, data, fp->f_flag, td); 408 dev_relthread(dev); 409 if (error == ENOIOCTL) 410 error = ENOTTY; 411 if (error == 0 && com == TIOCSCTTY) { 412 vp = fp->f_vnode; 413 414 /* Do nothing if reassigning same control tty */ 415 sx_slock(&proctree_lock); 416 if (td->td_proc->p_session->s_ttyvp == vp) { 417 sx_sunlock(&proctree_lock); 418 return (0); 419 } 420 421 mtx_lock(&Giant); 422 423 vpold = td->td_proc->p_session->s_ttyvp; 424 VREF(vp); 425 SESS_LOCK(td->td_proc->p_session); 426 td->td_proc->p_session->s_ttyvp = vp; 427 SESS_UNLOCK(td->td_proc->p_session); 428 429 sx_sunlock(&proctree_lock); 430 431 /* Get rid of reference to old control tty */ 432 if (vpold) 433 vrele(vpold); 434 mtx_unlock(&Giant); 435 } 436 return (error); 437 } 438 439 /* ARGSUSED */ 440 static int 441 devfs_kqfilter_f(struct file *fp, struct knote *kn) 442 { 443 struct cdev *dev; 444 struct cdevsw *dsw; 445 int error; 446 447 error = devfs_fp_check(fp, &dev, &dsw); 448 if (error) 449 return (error); 450 error = dsw->d_kqfilter(dev, kn); 451 dev_relthread(dev); 452 return (error); 453 } 454 455 static int 456 devfs_lookupx(struct vop_lookup_args *ap) 457 { 458 struct componentname *cnp; 459 struct vnode *dvp, **vpp; 460 struct thread *td; 461 struct devfs_dirent *de, *dd; 462 struct devfs_dirent **dde; 463 struct devfs_mount *dmp; 464 struct cdev *cdev; 465 int error, flags, nameiop; 466 char specname[SPECNAMELEN + 1], *pname; 467 468 cnp = ap->a_cnp; 469 vpp = ap->a_vpp; 470 dvp = ap->a_dvp; 471 pname = cnp->cn_nameptr; 472 td = cnp->cn_thread; 473 flags = cnp->cn_flags; 474 nameiop = cnp->cn_nameiop; 475 dmp = VFSTODEVFS(dvp->v_mount); 476 dd = dvp->v_data; 477 *vpp = NULLVP; 478 479 if ((flags & ISLASTCN) && nameiop == RENAME) 480 return (EOPNOTSUPP); 481 482 if (dvp->v_type != VDIR) 483 return (ENOTDIR); 484 485 if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) 486 return (EIO); 487 488 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); 489 if (error) 490 return (error); 491 492 if (cnp->cn_namelen == 1 && *pname == '.') { 493 if ((flags & ISLASTCN) && nameiop != LOOKUP) 494 return (EINVAL); 495 *vpp = dvp; 496 VREF(dvp); 497 return (0); 498 } 499 500 if (flags & ISDOTDOT) { 501 if ((flags & ISLASTCN) && nameiop != LOOKUP) 502 return (EINVAL); 503 VOP_UNLOCK(dvp, 0, td); 504 de = TAILQ_FIRST(&dd->de_dlist); /* "." */ 505 de = TAILQ_NEXT(de, de_list); /* ".." */ 506 de = de->de_dir; 507 error = devfs_allocv(de, dvp->v_mount, vpp, td); 508 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 509 return (error); 510 } 511 512 devfs_populate(dmp); 513 dd = dvp->v_data; 514 de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen); 515 while (de == NULL) { /* While(...) so we can use break */ 516 517 if (nameiop == DELETE) 518 return (ENOENT); 519 520 /* 521 * OK, we didn't have an entry for the name we were asked for 522 * so we try to see if anybody can create it on demand. 523 */ 524 pname = devfs_fqpn(specname, dvp, cnp); 525 if (pname == NULL) 526 break; 527 528 cdev = NULL; 529 EVENTHANDLER_INVOKE(dev_clone, 530 td->td_ucred, pname, strlen(pname), &cdev); 531 if (cdev == NULL) 532 break; 533 534 devfs_populate(dmp); 535 536 dev_lock(); 537 dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx]; 538 if (dde != NULL && *dde != NULL) 539 de = *dde; 540 dev_unlock(); 541 dev_rel(cdev); 542 break; 543 } 544 545 if (de == NULL || de->de_flags & DE_WHITEOUT) { 546 if ((nameiop == CREATE || nameiop == RENAME) && 547 (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { 548 cnp->cn_flags |= SAVENAME; 549 return (EJUSTRETURN); 550 } 551 return (ENOENT); 552 } 553 554 if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { 555 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); 556 if (error) 557 return (error); 558 if (*vpp == dvp) { 559 VREF(dvp); 560 *vpp = dvp; 561 return (0); 562 } 563 } 564 error = devfs_allocv(de, dvp->v_mount, vpp, td); 565 return (error); 566 } 567 568 static int 569 devfs_lookup(struct vop_lookup_args *ap) 570 { 571 int j; 572 struct devfs_mount *dmp; 573 574 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 575 sx_xlock(&dmp->dm_lock); 576 j = devfs_lookupx(ap); 577 sx_xunlock(&dmp->dm_lock); 578 return (j); 579 } 580 581 static int 582 devfs_mknod(struct vop_mknod_args *ap) 583 { 584 struct componentname *cnp; 585 struct vnode *dvp, **vpp; 586 struct thread *td; 587 struct devfs_dirent *dd, *de; 588 struct devfs_mount *dmp; 589 int error; 590 591 /* 592 * The only type of node we should be creating here is a 593 * character device, for anything else return EOPNOTSUPP. 594 */ 595 if (ap->a_vap->va_type != VCHR) 596 return (EOPNOTSUPP); 597 dvp = ap->a_dvp; 598 dmp = VFSTODEVFS(dvp->v_mount); 599 sx_xlock(&dmp->dm_lock); 600 601 cnp = ap->a_cnp; 602 vpp = ap->a_vpp; 603 td = cnp->cn_thread; 604 dd = dvp->v_data; 605 606 error = ENOENT; 607 TAILQ_FOREACH(de, &dd->de_dlist, de_list) { 608 if (cnp->cn_namelen != de->de_dirent->d_namlen) 609 continue; 610 if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, 611 de->de_dirent->d_namlen) != 0) 612 continue; 613 if (de->de_flags & DE_WHITEOUT) 614 break; 615 goto notfound; 616 } 617 if (de == NULL) 618 goto notfound; 619 de->de_flags &= ~DE_WHITEOUT; 620 error = devfs_allocv(de, dvp->v_mount, vpp, td); 621 notfound: 622 sx_xunlock(&dmp->dm_lock); 623 return (error); 624 } 625 626 /* ARGSUSED */ 627 static int 628 devfs_open(struct vop_open_args *ap) 629 { 630 struct thread *td = ap->a_td; 631 struct vnode *vp = ap->a_vp; 632 struct cdev *dev = vp->v_rdev; 633 struct file *fp; 634 int error; 635 struct cdevsw *dsw; 636 637 if (vp->v_type == VBLK) 638 return (ENXIO); 639 640 if (dev == NULL) 641 return (ENXIO); 642 643 /* Make this field valid before any I/O in d_open. */ 644 if (dev->si_iosize_max == 0) 645 dev->si_iosize_max = DFLTPHYS; 646 647 if (vn_isdisk(vp, NULL) && 648 ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 649 /* 650 * When running in very secure mode, do not allow 651 * opens for writing of any disks. 652 * XXX: should be in geom_dev.c, but we lack the cred there. 653 */ 654 error = securelevel_ge(td->td_ucred, 2); 655 if (error) 656 return (error); 657 } 658 659 dsw = dev_refthread(dev); 660 if (dsw == NULL) 661 return (ENXIO); 662 663 /* XXX: Special casing of ttys for deadfs. Probably redundant. */ 664 if (dsw->d_flags & D_TTY) 665 vp->v_vflag |= VV_ISTTY; 666 667 VOP_UNLOCK(vp, 0, td); 668 669 if(!(dsw->d_flags & D_NEEDGIANT)) { 670 DROP_GIANT(); 671 if (dsw->d_fdopen != NULL) 672 error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx); 673 else 674 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 675 PICKUP_GIANT(); 676 } else { 677 if (dsw->d_fdopen != NULL) 678 error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx); 679 else 680 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 681 } 682 683 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 684 685 dev_relthread(dev); 686 687 if (error) 688 return (error); 689 690 #if 0 /* /dev/console */ 691 KASSERT(ap->a_fdidx >= 0, 692 ("Could not vnode bypass device on fd %d", ap->a_fdidx)); 693 #else 694 if(ap->a_fdidx < 0) 695 return (error); 696 #endif 697 /* 698 * This is a pretty disgustingly long chain, but I am not 699 * sure there is any better way. Passing the fdidx into 700 * VOP_OPEN() offers us more information than just passing 701 * the file *. 702 */ 703 fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx]; 704 KASSERT(fp->f_ops == &badfileops, 705 ("Could not vnode bypass device on fdops %p", fp->f_ops)); 706 fp->f_ops = &devfs_ops_f; 707 fp->f_data = dev; 708 return (error); 709 } 710 711 static int 712 devfs_pathconf(struct vop_pathconf_args *ap) 713 { 714 715 switch (ap->a_name) { 716 case _PC_MAC_PRESENT: 717 #ifdef MAC 718 /* 719 * If MAC is enabled, devfs automatically supports 720 * trivial non-persistant label storage. 721 */ 722 *ap->a_retval = 1; 723 #else 724 *ap->a_retval = 0; 725 #endif 726 return (0); 727 default: 728 return (vop_stdpathconf(ap)); 729 } 730 /* NOTREACHED */ 731 } 732 733 /* ARGSUSED */ 734 static int 735 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) 736 { 737 struct cdev *dev; 738 struct cdevsw *dsw; 739 int error; 740 741 error = devfs_fp_check(fp, &dev, &dsw); 742 if (error) 743 return (error); 744 error = dsw->d_poll(dev, events, td); 745 dev_relthread(dev); 746 return(error); 747 } 748 749 /* 750 * Print out the contents of a special device vnode. 751 */ 752 static int 753 devfs_print(struct vop_print_args *ap) 754 { 755 756 printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); 757 return (0); 758 } 759 760 /* ARGSUSED */ 761 static int 762 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) 763 { 764 struct cdev *dev; 765 int ioflag, error, resid; 766 struct cdevsw *dsw; 767 768 error = devfs_fp_check(fp, &dev, &dsw); 769 if (error) 770 return (error); 771 resid = uio->uio_resid; 772 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); 773 if (ioflag & O_DIRECT) 774 ioflag |= IO_DIRECT; 775 776 if ((flags & FOF_OFFSET) == 0) 777 uio->uio_offset = fp->f_offset; 778 779 error = dsw->d_read(dev, uio, ioflag); 780 dev_relthread(dev); 781 if (uio->uio_resid != resid || (error == 0 && resid != 0)) 782 vfs_timestamp(&dev->si_atime); 783 784 if ((flags & FOF_OFFSET) == 0) 785 fp->f_offset = uio->uio_offset; 786 fp->f_nextoff = uio->uio_offset; 787 return (error); 788 } 789 790 static int 791 devfs_readdir(struct vop_readdir_args *ap) 792 { 793 int error; 794 struct uio *uio; 795 struct dirent *dp; 796 struct devfs_dirent *dd; 797 struct devfs_dirent *de; 798 struct devfs_mount *dmp; 799 off_t off, oldoff; 800 801 if (ap->a_vp->v_type != VDIR) 802 return (ENOTDIR); 803 804 uio = ap->a_uio; 805 if (uio->uio_offset < 0) 806 return (EINVAL); 807 808 dmp = VFSTODEVFS(ap->a_vp->v_mount); 809 sx_xlock(&dmp->dm_lock); 810 devfs_populate(dmp); 811 error = 0; 812 de = ap->a_vp->v_data; 813 off = 0; 814 oldoff = uio->uio_offset; 815 TAILQ_FOREACH(dd, &de->de_dlist, de_list) { 816 KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); 817 if (dd->de_flags & DE_WHITEOUT) 818 continue; 819 if (dd->de_dirent->d_type == DT_DIR) 820 de = dd->de_dir; 821 else 822 de = dd; 823 dp = dd->de_dirent; 824 if (dp->d_reclen > uio->uio_resid) 825 break; 826 dp->d_fileno = de->de_inode; 827 if (off >= uio->uio_offset) { 828 error = vfs_read_dirent(ap, dp, off); 829 if (error) 830 break; 831 } 832 off += dp->d_reclen; 833 } 834 sx_xunlock(&dmp->dm_lock); 835 uio->uio_offset = off; 836 return (error); 837 } 838 839 static int 840 devfs_readlink(struct vop_readlink_args *ap) 841 { 842 struct devfs_dirent *de; 843 844 de = ap->a_vp->v_data; 845 return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); 846 } 847 848 static int 849 devfs_reclaim(struct vop_reclaim_args *ap) 850 { 851 struct vnode *vp = ap->a_vp; 852 struct devfs_dirent *de; 853 struct cdev *dev; 854 855 de = vp->v_data; 856 if (de != NULL) 857 de->de_vnode = NULL; 858 vp->v_data = NULL; 859 vnode_destroy_vobject(vp); 860 861 dev = vp->v_rdev; 862 vp->v_rdev = NULL; 863 864 if (dev == NULL) 865 return (0); 866 867 dev_lock(); 868 dev->si_usecount -= vp->v_usecount; 869 dev_unlock(); 870 dev_rel(dev); 871 return (0); 872 } 873 874 static int 875 devfs_remove(struct vop_remove_args *ap) 876 { 877 struct vnode *vp = ap->a_vp; 878 struct devfs_dirent *dd; 879 struct devfs_dirent *de; 880 struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); 881 882 sx_xlock(&dmp->dm_lock); 883 dd = ap->a_dvp->v_data; 884 de = vp->v_data; 885 if (de->de_cdp == NULL) { 886 TAILQ_REMOVE(&dd->de_dlist, de, de_list); 887 devfs_delete(dmp, de); 888 } else { 889 de->de_flags |= DE_WHITEOUT; 890 } 891 sx_xunlock(&dmp->dm_lock); 892 return (0); 893 } 894 895 /* 896 * Revoke is called on a tty when a terminal session ends. The vnode 897 * is orphaned by setting v_op to deadfs so we need to let go of it 898 * as well so that we create a new one next time around. 899 * 900 * XXX: locking :-( 901 * XXX: We mess around with other mountpoints without holding their sxlock. 902 * XXX: We hold the devlock() when we zero their vnode pointer, but is that 903 * XXX: enough ? 904 */ 905 static int 906 devfs_revoke(struct vop_revoke_args *ap) 907 { 908 struct vnode *vp = ap->a_vp, *vp2; 909 struct cdev *dev; 910 struct cdev_priv *cdp; 911 struct devfs_dirent *de; 912 int i; 913 914 KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); 915 916 dev = vp->v_rdev; 917 cdp = dev->si_priv; 918 for (;;) { 919 dev_lock(); 920 vp2 = NULL; 921 for (i = 0; i <= cdp->cdp_maxdirent; i++) { 922 de = cdp->cdp_dirents[i]; 923 if (de == NULL) 924 continue; 925 vp2 = de->de_vnode; 926 de->de_vnode = NULL; 927 if (vp2 != NULL) 928 break; 929 } 930 dev_unlock(); 931 if (vp2 != NULL) { 932 vgone(vp2); 933 continue; 934 } 935 break; 936 } 937 return (0); 938 } 939 940 static int 941 devfs_rioctl(struct vop_ioctl_args *ap) 942 { 943 int error; 944 struct devfs_mount *dmp; 945 946 dmp = VFSTODEVFS(ap->a_vp->v_mount); 947 sx_xlock(&dmp->dm_lock); 948 devfs_populate(dmp); 949 error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); 950 sx_xunlock(&dmp->dm_lock); 951 return (error); 952 } 953 954 static int 955 devfs_rread(struct vop_read_args *ap) 956 { 957 958 if (ap->a_vp->v_type != VDIR) 959 return (EINVAL); 960 return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); 961 } 962 963 static int 964 devfs_setattr(struct vop_setattr_args *ap) 965 { 966 struct devfs_dirent *de; 967 struct vattr *vap; 968 struct vnode *vp; 969 int c, error; 970 uid_t uid; 971 gid_t gid; 972 973 vap = ap->a_vap; 974 vp = ap->a_vp; 975 if ((vap->va_type != VNON) || 976 (vap->va_nlink != VNOVAL) || 977 (vap->va_fsid != VNOVAL) || 978 (vap->va_fileid != VNOVAL) || 979 (vap->va_blocksize != VNOVAL) || 980 (vap->va_flags != VNOVAL && vap->va_flags != 0) || 981 (vap->va_rdev != VNOVAL) || 982 ((int)vap->va_bytes != VNOVAL) || 983 (vap->va_gen != VNOVAL)) { 984 return (EINVAL); 985 } 986 987 de = vp->v_data; 988 if (vp->v_type == VDIR) 989 de = de->de_dir; 990 991 error = c = 0; 992 if (vap->va_uid == (uid_t)VNOVAL) 993 uid = de->de_uid; 994 else 995 uid = vap->va_uid; 996 if (vap->va_gid == (gid_t)VNOVAL) 997 gid = de->de_gid; 998 else 999 gid = vap->va_gid; 1000 if (uid != de->de_uid || gid != de->de_gid) { 1001 if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || 1002 (gid != de->de_gid && !groupmember(gid, ap->a_cred))) && 1003 (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0) 1004 return (error); 1005 de->de_uid = uid; 1006 de->de_gid = gid; 1007 c = 1; 1008 } 1009 1010 if (vap->va_mode != (mode_t)VNOVAL) { 1011 if ((ap->a_cred->cr_uid != de->de_uid) && 1012 (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL))) 1013 return (error); 1014 de->de_mode = vap->va_mode; 1015 c = 1; 1016 } 1017 1018 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1019 /* See the comment in ufs_vnops::ufs_setattr(). */ 1020 if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) && 1021 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 1022 (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td)))) 1023 return (error); 1024 if (vap->va_atime.tv_sec != VNOVAL) { 1025 if (vp->v_type == VCHR) 1026 vp->v_rdev->si_atime = vap->va_atime; 1027 else 1028 de->de_atime = vap->va_atime; 1029 } 1030 if (vap->va_mtime.tv_sec != VNOVAL) { 1031 if (vp->v_type == VCHR) 1032 vp->v_rdev->si_mtime = vap->va_mtime; 1033 else 1034 de->de_mtime = vap->va_mtime; 1035 } 1036 c = 1; 1037 } 1038 1039 if (c) { 1040 if (vp->v_type == VCHR) 1041 vfs_timestamp(&vp->v_rdev->si_ctime); 1042 else 1043 vfs_timestamp(&de->de_mtime); 1044 } 1045 return (0); 1046 } 1047 1048 #ifdef MAC 1049 static int 1050 devfs_setlabel(struct vop_setlabel_args *ap) 1051 { 1052 struct vnode *vp; 1053 struct devfs_dirent *de; 1054 1055 vp = ap->a_vp; 1056 de = vp->v_data; 1057 1058 mac_relabel_vnode(ap->a_cred, vp, ap->a_label); 1059 mac_update_devfsdirent(vp->v_mount, de, vp); 1060 1061 return (0); 1062 } 1063 #endif 1064 1065 static int 1066 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) 1067 { 1068 1069 return (vnops.fo_stat(fp, sb, cred, td)); 1070 } 1071 1072 static int 1073 devfs_symlink(struct vop_symlink_args *ap) 1074 { 1075 int i, error; 1076 struct devfs_dirent *dd; 1077 struct devfs_dirent *de; 1078 struct devfs_mount *dmp; 1079 struct thread *td; 1080 1081 td = ap->a_cnp->cn_thread; 1082 KASSERT(td == curthread, ("devfs_symlink: td != curthread")); 1083 error = suser(td); 1084 if (error) 1085 return(error); 1086 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1087 dd = ap->a_dvp->v_data; 1088 de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); 1089 de->de_uid = 0; 1090 de->de_gid = 0; 1091 de->de_mode = 0755; 1092 de->de_inode = alloc_unr(devfs_inos); 1093 de->de_dirent->d_type = DT_LNK; 1094 i = strlen(ap->a_target) + 1; 1095 de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); 1096 bcopy(ap->a_target, de->de_symlink, i); 1097 sx_xlock(&dmp->dm_lock); 1098 #ifdef MAC 1099 mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); 1100 #endif 1101 TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); 1102 devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td); 1103 sx_xunlock(&dmp->dm_lock); 1104 return (0); 1105 } 1106 1107 /* ARGSUSED */ 1108 static int 1109 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) 1110 { 1111 struct cdev *dev; 1112 int error, ioflag, resid; 1113 struct cdevsw *dsw; 1114 1115 error = devfs_fp_check(fp, &dev, &dsw); 1116 if (error) 1117 return (error); 1118 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); 1119 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); 1120 if (ioflag & O_DIRECT) 1121 ioflag |= IO_DIRECT; 1122 if ((flags & FOF_OFFSET) == 0) 1123 uio->uio_offset = fp->f_offset; 1124 1125 resid = uio->uio_resid; 1126 1127 error = dsw->d_write(dev, uio, ioflag); 1128 dev_relthread(dev); 1129 if (uio->uio_resid != resid || (error == 0 && resid != 0)) { 1130 vfs_timestamp(&dev->si_ctime); 1131 dev->si_mtime = dev->si_ctime; 1132 } 1133 1134 if ((flags & FOF_OFFSET) == 0) 1135 fp->f_offset = uio->uio_offset; 1136 fp->f_nextoff = uio->uio_offset; 1137 return (error); 1138 } 1139 1140 dev_t 1141 dev2udev(struct cdev *x) 1142 { 1143 if (x == NULL) 1144 return (NODEV); 1145 return (x->si_priv->cdp_inode); 1146 } 1147 1148 static struct fileops devfs_ops_f = { 1149 .fo_read = devfs_read_f, 1150 .fo_write = devfs_write_f, 1151 .fo_ioctl = devfs_ioctl_f, 1152 .fo_poll = devfs_poll_f, 1153 .fo_kqfilter = devfs_kqfilter_f, 1154 .fo_stat = devfs_stat_f, 1155 .fo_close = devfs_close_f, 1156 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE 1157 }; 1158 1159 static struct vop_vector devfs_vnodeops = { 1160 .vop_default = &default_vnodeops, 1161 1162 .vop_access = devfs_access, 1163 .vop_getattr = devfs_getattr, 1164 .vop_ioctl = devfs_rioctl, 1165 .vop_lookup = devfs_lookup, 1166 .vop_mknod = devfs_mknod, 1167 .vop_pathconf = devfs_pathconf, 1168 .vop_read = devfs_rread, 1169 .vop_readdir = devfs_readdir, 1170 .vop_readlink = devfs_readlink, 1171 .vop_reclaim = devfs_reclaim, 1172 .vop_remove = devfs_remove, 1173 .vop_revoke = devfs_revoke, 1174 .vop_setattr = devfs_setattr, 1175 #ifdef MAC 1176 .vop_setlabel = devfs_setlabel, 1177 #endif 1178 .vop_symlink = devfs_symlink, 1179 }; 1180 1181 static struct vop_vector devfs_specops = { 1182 .vop_default = &default_vnodeops, 1183 1184 .vop_access = devfs_access, 1185 .vop_advlock = devfs_advlock, 1186 .vop_bmap = VOP_PANIC, 1187 .vop_close = devfs_close, 1188 .vop_create = VOP_PANIC, 1189 .vop_fsync = devfs_fsync, 1190 .vop_getattr = devfs_getattr, 1191 .vop_lease = VOP_NULL, 1192 .vop_link = VOP_PANIC, 1193 .vop_mkdir = VOP_PANIC, 1194 .vop_mknod = VOP_PANIC, 1195 .vop_open = devfs_open, 1196 .vop_pathconf = devfs_pathconf, 1197 .vop_print = devfs_print, 1198 .vop_read = VOP_PANIC, 1199 .vop_readdir = VOP_PANIC, 1200 .vop_readlink = VOP_PANIC, 1201 .vop_reallocblks = VOP_PANIC, 1202 .vop_reclaim = devfs_reclaim, 1203 .vop_remove = devfs_remove, 1204 .vop_rename = VOP_PANIC, 1205 .vop_revoke = devfs_revoke, 1206 .vop_rmdir = VOP_PANIC, 1207 .vop_setattr = devfs_setattr, 1208 #ifdef MAC 1209 .vop_setlabel = devfs_setlabel, 1210 #endif 1211 .vop_strategy = VOP_PANIC, 1212 .vop_symlink = VOP_PANIC, 1213 .vop_write = VOP_PANIC, 1214 }; 1215 1216 /* 1217 * Our calling convention to the device drivers used to be that we passed 1218 * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 1219 * flags instead since that's what open(), close() and ioctl() takes and 1220 * we don't really want vnode.h in device drivers. 1221 * We solved the source compatibility by redefining some vnode flags to 1222 * be the same as the fcntl ones and by sending down the bitwise OR of 1223 * the respective fcntl/vnode flags. These CTASSERTS make sure nobody 1224 * pulls the rug out under this. 1225 */ 1226 CTASSERT(O_NONBLOCK == IO_NDELAY); 1227 CTASSERT(O_FSYNC == IO_SYNC); 1228