1 /*- 2 * Copyright (c) 2000-2004 3 * Poul-Henning Kamp. All rights reserved. 4 * Copyright (c) 1989, 1992-1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software donated to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 32 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 33 * 34 * $FreeBSD$ 35 */ 36 37 /* 38 * TODO: 39 * remove empty directories 40 * mkdir: want it ? 41 */ 42 43 #include <opt_devfs.h> 44 #include <opt_mac.h> 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/conf.h> 49 #include <sys/dirent.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/filedesc.h> 53 #include <sys/filio.h> 54 #include <sys/kernel.h> 55 #include <sys/lock.h> 56 #include <sys/mac.h> 57 #include <sys/malloc.h> 58 #include <sys/mount.h> 59 #include <sys/namei.h> 60 #include <sys/proc.h> 61 #include <sys/stat.h> 62 #include <sys/sx.h> 63 #include <sys/time.h> 64 #include <sys/ttycom.h> 65 #include <sys/unistd.h> 66 #include <sys/vnode.h> 67 68 static struct vop_vector devfs_vnodeops; 69 static struct vop_vector devfs_specops; 70 static struct fileops devfs_ops_f; 71 72 #include <fs/devfs/devfs.h> 73 #include <fs/devfs/devfs_int.h> 74 75 static int 76 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) 77 { 78 79 *devp = fp->f_vnode->v_rdev; 80 if (*devp != fp->f_data) 81 return (ENXIO); 82 KASSERT((*devp)->si_refcount > 0, 83 ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); 84 *dswp = dev_refthread(*devp); 85 if (*dswp == NULL) 86 return (ENXIO); 87 return (0); 88 } 89 90 /* 91 * Construct the fully qualified path name relative to the mountpoint 92 */ 93 static char * 94 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp) 95 { 96 int i; 97 struct devfs_dirent *de, *dd; 98 struct devfs_mount *dmp; 99 100 dmp = VFSTODEVFS(dvp->v_mount); 101 dd = dvp->v_data; 102 i = SPECNAMELEN; 103 buf[i] = '\0'; 104 i -= cnp->cn_namelen; 105 if (i < 0) 106 return (NULL); 107 bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); 108 de = dd; 109 while (de != dmp->dm_rootdir) { 110 i--; 111 if (i < 0) 112 return (NULL); 113 buf[i] = '/'; 114 i -= de->de_dirent->d_namlen; 115 if (i < 0) 116 return (NULL); 117 bcopy(de->de_dirent->d_name, buf + i, 118 de->de_dirent->d_namlen); 119 de = TAILQ_FIRST(&de->de_dlist); /* "." */ 120 de = TAILQ_NEXT(de, de_list); /* ".." */ 121 de = de->de_dir; 122 } 123 return (buf + i); 124 } 125 126 int 127 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) 128 { 129 int error; 130 struct vnode *vp; 131 struct cdev *dev; 132 133 KASSERT(td == curthread, ("devfs_allocv: td != curthread")); 134 loop: 135 vp = de->de_vnode; 136 if (vp != NULL) { 137 if (vget(vp, LK_EXCLUSIVE, td)) 138 goto loop; 139 *vpp = vp; 140 return (0); 141 } 142 if (de->de_dirent->d_type == DT_CHR) { 143 if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) 144 return (ENOENT); 145 dev = &de->de_cdp->cdp_c; 146 } else { 147 dev = NULL; 148 } 149 error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); 150 if (error != 0) { 151 printf("devfs_allocv: failed to allocate new vnode\n"); 152 return (error); 153 } 154 155 if (de->de_dirent->d_type == DT_CHR) { 156 vp->v_type = VCHR; 157 VI_LOCK(vp); 158 dev_lock(); 159 dev_refl(dev); 160 vp->v_rdev = dev; 161 KASSERT(vp->v_usecount == 1, 162 ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); 163 dev->si_usecount += vp->v_usecount; 164 dev_unlock(); 165 VI_UNLOCK(vp); 166 vp->v_op = &devfs_specops; 167 } else if (de->de_dirent->d_type == DT_DIR) { 168 vp->v_type = VDIR; 169 } else if (de->de_dirent->d_type == DT_LNK) { 170 vp->v_type = VLNK; 171 } else { 172 vp->v_type = VBAD; 173 } 174 vp->v_data = de; 175 de->de_vnode = vp; 176 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 177 #ifdef MAC 178 mac_associate_vnode_devfs(mp, de, vp); 179 #endif 180 *vpp = vp; 181 return (0); 182 } 183 184 static int 185 devfs_access(struct vop_access_args *ap) 186 { 187 struct vnode *vp = ap->a_vp; 188 struct devfs_dirent *de; 189 int error; 190 191 de = vp->v_data; 192 if (vp->v_type == VDIR) 193 de = de->de_dir; 194 195 error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, 196 ap->a_mode, ap->a_cred, NULL); 197 if (!error) 198 return (error); 199 if (error != EACCES) 200 return (error); 201 /* We do, however, allow access to the controlling terminal */ 202 if (!(ap->a_td->td_proc->p_flag & P_CONTROLT)) 203 return (error); 204 if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode) 205 return (0); 206 return (error); 207 } 208 209 /* ARGSUSED */ 210 static int 211 devfs_advlock(struct vop_advlock_args *ap) 212 { 213 214 return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); 215 } 216 217 /* ARGSUSED */ 218 static int 219 devfs_close(struct vop_close_args *ap) 220 { 221 struct vnode *vp = ap->a_vp, *oldvp; 222 struct thread *td = ap->a_td; 223 struct cdev *dev = vp->v_rdev; 224 struct cdevsw *dsw; 225 int error; 226 227 /* 228 * Hack: a tty device that is a controlling terminal 229 * has a reference from the session structure. 230 * We cannot easily tell that a character device is 231 * a controlling terminal, unless it is the closing 232 * process' controlling terminal. In that case, 233 * if the reference count is 2 (this last descriptor 234 * plus the session), release the reference from the session. 235 */ 236 237 /* 238 * This needs to be rewritten to take the vp interlock into 239 * consideration. 240 */ 241 242 oldvp = NULL; 243 sx_xlock(&proctree_lock); 244 if (td && vp == td->td_proc->p_session->s_ttyvp) { 245 SESS_LOCK(td->td_proc->p_session); 246 VI_LOCK(vp); 247 if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { 248 td->td_proc->p_session->s_ttyvp = NULL; 249 oldvp = vp; 250 } 251 VI_UNLOCK(vp); 252 SESS_UNLOCK(td->td_proc->p_session); 253 } 254 sx_xunlock(&proctree_lock); 255 if (oldvp != NULL) 256 vrele(oldvp); 257 /* 258 * We do not want to really close the device if it 259 * is still in use unless we are trying to close it 260 * forcibly. Since every use (buffer, vnode, swap, cmap) 261 * holds a reference to the vnode, and because we mark 262 * any other vnodes that alias this device, when the 263 * sum of the reference counts on all the aliased 264 * vnodes descends to one, we are on last close. 265 */ 266 dsw = dev_refthread(dev); 267 if (dsw == NULL) 268 return (ENXIO); 269 VI_LOCK(vp); 270 if (vp->v_iflag & VI_DOOMED) { 271 /* Forced close. */ 272 } else if (dsw->d_flags & D_TRACKCLOSE) { 273 /* Keep device updated on status. */ 274 } else if (count_dev(dev) > 1) { 275 VI_UNLOCK(vp); 276 dev_relthread(dev); 277 return (0); 278 } 279 VI_UNLOCK(vp); 280 KASSERT(dev->si_refcount > 0, 281 ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); 282 if (!(dsw->d_flags & D_NEEDGIANT)) { 283 DROP_GIANT(); 284 error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); 285 PICKUP_GIANT(); 286 } else { 287 error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); 288 } 289 dev_relthread(dev); 290 return (error); 291 } 292 293 static int 294 devfs_close_f(struct file *fp, struct thread *td) 295 { 296 297 return (vnops.fo_close(fp, td)); 298 } 299 300 /* ARGSUSED */ 301 static int 302 devfs_fsync(struct vop_fsync_args *ap) 303 { 304 if (!vn_isdisk(ap->a_vp, NULL)) 305 return (0); 306 307 return (vop_stdfsync(ap)); 308 } 309 310 static int 311 devfs_getattr(struct vop_getattr_args *ap) 312 { 313 struct vnode *vp = ap->a_vp; 314 struct vattr *vap = ap->a_vap; 315 int error = 0; 316 struct devfs_dirent *de; 317 struct cdev *dev; 318 319 de = vp->v_data; 320 KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); 321 if (vp->v_type == VDIR) { 322 de = de->de_dir; 323 KASSERT(de != NULL, 324 ("Null dir dirent in devfs_getattr vp=%p", vp)); 325 } 326 bzero((caddr_t) vap, sizeof(*vap)); 327 vattr_null(vap); 328 vap->va_uid = de->de_uid; 329 vap->va_gid = de->de_gid; 330 vap->va_mode = de->de_mode; 331 if (vp->v_type == VLNK) 332 vap->va_size = strlen(de->de_symlink); 333 else if (vp->v_type == VDIR) 334 vap->va_size = vap->va_bytes = DEV_BSIZE; 335 else 336 vap->va_size = 0; 337 if (vp->v_type != VDIR) 338 vap->va_bytes = 0; 339 vap->va_blocksize = DEV_BSIZE; 340 vap->va_type = vp->v_type; 341 342 #define fix(aa) \ 343 do { \ 344 if ((aa).tv_sec == 0) { \ 345 (aa).tv_sec = boottime.tv_sec; \ 346 (aa).tv_nsec = boottime.tv_usec * 1000; \ 347 } \ 348 } while (0) 349 350 if (vp->v_type != VCHR) { 351 fix(de->de_atime); 352 vap->va_atime = de->de_atime; 353 fix(de->de_mtime); 354 vap->va_mtime = de->de_mtime; 355 fix(de->de_ctime); 356 vap->va_ctime = de->de_ctime; 357 } else { 358 dev = vp->v_rdev; 359 fix(dev->si_atime); 360 vap->va_atime = dev->si_atime; 361 fix(dev->si_mtime); 362 vap->va_mtime = dev->si_mtime; 363 fix(dev->si_ctime); 364 vap->va_ctime = dev->si_ctime; 365 366 vap->va_rdev = dev->si_priv->cdp_inode; 367 } 368 vap->va_gen = 0; 369 vap->va_flags = 0; 370 vap->va_nlink = de->de_links; 371 vap->va_fileid = de->de_inode; 372 373 return (error); 374 } 375 376 /* ARGSUSED */ 377 static int 378 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) 379 { 380 struct cdev *dev; 381 struct cdevsw *dsw; 382 struct vnode *vp; 383 struct vnode *vpold; 384 int error, i; 385 const char *p; 386 struct fiodgname_arg *fgn; 387 388 error = devfs_fp_check(fp, &dev, &dsw); 389 if (error) 390 return (error); 391 392 if (com == FIODTYPE) { 393 *(int *)data = dsw->d_flags & D_TYPEMASK; 394 dev_relthread(dev); 395 return (0); 396 } else if (com == FIODGNAME) { 397 fgn = data; 398 p = devtoname(dev); 399 i = strlen(p) + 1; 400 if (i > fgn->len) 401 error = EINVAL; 402 else 403 error = copyout(p, fgn->buf, i); 404 dev_relthread(dev); 405 return (error); 406 } 407 error = dsw->d_ioctl(dev, com, data, fp->f_flag, td); 408 dev_relthread(dev); 409 if (error == ENOIOCTL) 410 error = ENOTTY; 411 if (error == 0 && com == TIOCSCTTY) { 412 vp = fp->f_vnode; 413 414 /* Do nothing if reassigning same control tty */ 415 sx_slock(&proctree_lock); 416 if (td->td_proc->p_session->s_ttyvp == vp) { 417 sx_sunlock(&proctree_lock); 418 return (0); 419 } 420 421 mtx_lock(&Giant); 422 423 vpold = td->td_proc->p_session->s_ttyvp; 424 VREF(vp); 425 SESS_LOCK(td->td_proc->p_session); 426 td->td_proc->p_session->s_ttyvp = vp; 427 SESS_UNLOCK(td->td_proc->p_session); 428 429 sx_sunlock(&proctree_lock); 430 431 /* Get rid of reference to old control tty */ 432 if (vpold) 433 vrele(vpold); 434 mtx_unlock(&Giant); 435 } 436 return (error); 437 } 438 439 /* ARGSUSED */ 440 static int 441 devfs_kqfilter_f(struct file *fp, struct knote *kn) 442 { 443 struct cdev *dev; 444 struct cdevsw *dsw; 445 int error; 446 447 error = devfs_fp_check(fp, &dev, &dsw); 448 if (error) 449 return (error); 450 error = dsw->d_kqfilter(dev, kn); 451 dev_relthread(dev); 452 return (error); 453 } 454 455 static int 456 devfs_lookupx(struct vop_lookup_args *ap) 457 { 458 struct componentname *cnp; 459 struct vnode *dvp, **vpp; 460 struct thread *td; 461 struct devfs_dirent *de, *dd; 462 struct devfs_dirent **dde; 463 struct devfs_mount *dmp; 464 struct cdev *cdev; 465 int error, flags, nameiop; 466 char specname[SPECNAMELEN + 1], *pname; 467 468 cnp = ap->a_cnp; 469 vpp = ap->a_vpp; 470 dvp = ap->a_dvp; 471 pname = cnp->cn_nameptr; 472 td = cnp->cn_thread; 473 flags = cnp->cn_flags; 474 nameiop = cnp->cn_nameiop; 475 dmp = VFSTODEVFS(dvp->v_mount); 476 dd = dvp->v_data; 477 *vpp = NULLVP; 478 479 if ((flags & ISLASTCN) && nameiop == RENAME) 480 return (EOPNOTSUPP); 481 482 if (dvp->v_type != VDIR) 483 return (ENOTDIR); 484 485 if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) 486 return (EIO); 487 488 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); 489 if (error) 490 return (error); 491 492 if (cnp->cn_namelen == 1 && *pname == '.') { 493 if ((flags & ISLASTCN) && nameiop != LOOKUP) 494 return (EINVAL); 495 *vpp = dvp; 496 VREF(dvp); 497 return (0); 498 } 499 500 if (flags & ISDOTDOT) { 501 if ((flags & ISLASTCN) && nameiop != LOOKUP) 502 return (EINVAL); 503 VOP_UNLOCK(dvp, 0, td); 504 de = TAILQ_FIRST(&dd->de_dlist); /* "." */ 505 de = TAILQ_NEXT(de, de_list); /* ".." */ 506 de = de->de_dir; 507 error = devfs_allocv(de, dvp->v_mount, vpp, td); 508 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 509 return (error); 510 } 511 512 devfs_populate(dmp); 513 dd = dvp->v_data; 514 de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen); 515 while (de == NULL) { /* While(...) so we can use break */ 516 517 if (nameiop == DELETE) 518 return (ENOENT); 519 520 /* 521 * OK, we didn't have an entry for the name we were asked for 522 * so we try to see if anybody can create it on demand. 523 */ 524 pname = devfs_fqpn(specname, dvp, cnp); 525 if (pname == NULL) 526 break; 527 528 cdev = NULL; 529 EVENTHANDLER_INVOKE(dev_clone, 530 td->td_ucred, pname, strlen(pname), &cdev); 531 if (cdev == NULL) 532 break; 533 534 devfs_populate(dmp); 535 536 dev_lock(); 537 dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx]; 538 if (dde != NULL && *dde != NULL) 539 de = *dde; 540 dev_unlock(); 541 dev_rel(cdev); 542 break; 543 } 544 545 if (de == NULL || de->de_flags & DE_WHITEOUT) { 546 if ((nameiop == CREATE || nameiop == RENAME) && 547 (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { 548 cnp->cn_flags |= SAVENAME; 549 return (EJUSTRETURN); 550 } 551 return (ENOENT); 552 } 553 554 if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { 555 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); 556 if (error) 557 return (error); 558 if (*vpp == dvp) { 559 VREF(dvp); 560 *vpp = dvp; 561 return (0); 562 } 563 } 564 error = devfs_allocv(de, dvp->v_mount, vpp, td); 565 return (error); 566 } 567 568 static int 569 devfs_lookup(struct vop_lookup_args *ap) 570 { 571 int j; 572 struct devfs_mount *dmp; 573 574 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 575 sx_xlock(&dmp->dm_lock); 576 j = devfs_lookupx(ap); 577 sx_xunlock(&dmp->dm_lock); 578 return (j); 579 } 580 581 static int 582 devfs_mknod(struct vop_mknod_args *ap) 583 { 584 struct componentname *cnp; 585 struct vnode *dvp, **vpp; 586 struct thread *td; 587 struct devfs_dirent *dd, *de; 588 struct devfs_mount *dmp; 589 int error; 590 591 /* 592 * The only type of node we should be creating here is a 593 * character device, for anything else return EOPNOTSUPP. 594 */ 595 if (ap->a_vap->va_type != VCHR) 596 return (EOPNOTSUPP); 597 dvp = ap->a_dvp; 598 dmp = VFSTODEVFS(dvp->v_mount); 599 sx_xlock(&dmp->dm_lock); 600 601 cnp = ap->a_cnp; 602 vpp = ap->a_vpp; 603 td = cnp->cn_thread; 604 dd = dvp->v_data; 605 606 error = ENOENT; 607 TAILQ_FOREACH(de, &dd->de_dlist, de_list) { 608 if (cnp->cn_namelen != de->de_dirent->d_namlen) 609 continue; 610 if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, 611 de->de_dirent->d_namlen) != 0) 612 continue; 613 if (de->de_flags & DE_WHITEOUT) 614 break; 615 goto notfound; 616 } 617 if (de == NULL) 618 goto notfound; 619 de->de_flags &= ~DE_WHITEOUT; 620 error = devfs_allocv(de, dvp->v_mount, vpp, td); 621 notfound: 622 sx_xunlock(&dmp->dm_lock); 623 return (error); 624 } 625 626 /* ARGSUSED */ 627 static int 628 devfs_open(struct vop_open_args *ap) 629 { 630 struct thread *td = ap->a_td; 631 struct vnode *vp = ap->a_vp; 632 struct cdev *dev = vp->v_rdev; 633 struct file *fp; 634 int error; 635 struct cdevsw *dsw; 636 637 if (vp->v_type == VBLK) 638 return (ENXIO); 639 640 if (dev == NULL) 641 return (ENXIO); 642 643 /* Make this field valid before any I/O in d_open. */ 644 if (dev->si_iosize_max == 0) 645 dev->si_iosize_max = DFLTPHYS; 646 647 if (vn_isdisk(vp, NULL) && 648 ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { 649 /* 650 * When running in very secure mode, do not allow 651 * opens for writing of any disks. 652 * XXX: should be in geom_dev.c, but we lack the cred there. 653 */ 654 error = securelevel_ge(td->td_ucred, 2); 655 if (error) 656 return (error); 657 } 658 659 dsw = dev_refthread(dev); 660 if (dsw == NULL) 661 return (ENXIO); 662 663 /* XXX: Special casing of ttys for deadfs. Probably redundant. */ 664 if (dsw->d_flags & D_TTY) 665 vp->v_vflag |= VV_ISTTY; 666 667 VOP_UNLOCK(vp, 0, td); 668 669 if(!(dsw->d_flags & D_NEEDGIANT)) { 670 DROP_GIANT(); 671 if (dsw->d_fdopen != NULL) 672 error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx); 673 else 674 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 675 PICKUP_GIANT(); 676 } else { 677 if (dsw->d_fdopen != NULL) 678 error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx); 679 else 680 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 681 } 682 683 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 684 685 dev_relthread(dev); 686 687 if (error) 688 return (error); 689 690 #if 0 /* /dev/console */ 691 KASSERT(ap->a_fdidx >= 0, 692 ("Could not vnode bypass device on fd %d", ap->a_fdidx)); 693 #else 694 if(ap->a_fdidx < 0) 695 return (error); 696 #endif 697 /* 698 * This is a pretty disgustingly long chain, but I am not 699 * sure there is any better way. Passing the fdidx into 700 * VOP_OPEN() offers us more information than just passing 701 * the file *. 702 */ 703 fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx]; 704 KASSERT(fp->f_ops == &badfileops, 705 ("Could not vnode bypass device on fdops %p", fp->f_ops)); 706 fp->f_ops = &devfs_ops_f; 707 fp->f_data = dev; 708 return (error); 709 } 710 711 static int 712 devfs_pathconf(struct vop_pathconf_args *ap) 713 { 714 715 switch (ap->a_name) { 716 case _PC_MAC_PRESENT: 717 #ifdef MAC 718 /* 719 * If MAC is enabled, devfs automatically supports 720 * trivial non-persistant label storage. 721 */ 722 *ap->a_retval = 1; 723 #else 724 *ap->a_retval = 0; 725 #endif 726 return (0); 727 default: 728 return (vop_stdpathconf(ap)); 729 } 730 /* NOTREACHED */ 731 } 732 733 /* ARGSUSED */ 734 static int 735 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) 736 { 737 struct cdev *dev; 738 struct cdevsw *dsw; 739 int error; 740 741 error = devfs_fp_check(fp, &dev, &dsw); 742 if (error) 743 return (error); 744 error = dsw->d_poll(dev, events, td); 745 dev_relthread(dev); 746 return(error); 747 } 748 749 /* 750 * Print out the contents of a special device vnode. 751 */ 752 static int 753 devfs_print(struct vop_print_args *ap) 754 { 755 756 printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); 757 return (0); 758 } 759 760 /* ARGSUSED */ 761 static int 762 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) 763 { 764 struct cdev *dev; 765 int ioflag, error, resid; 766 struct cdevsw *dsw; 767 768 error = devfs_fp_check(fp, &dev, &dsw); 769 if (error) 770 return (error); 771 resid = uio->uio_resid; 772 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); 773 if (ioflag & O_DIRECT) 774 ioflag |= IO_DIRECT; 775 776 if ((flags & FOF_OFFSET) == 0) 777 uio->uio_offset = fp->f_offset; 778 779 error = dsw->d_read(dev, uio, ioflag); 780 dev_relthread(dev); 781 if (uio->uio_resid != resid || (error == 0 && resid != 0)) 782 vfs_timestamp(&dev->si_atime); 783 784 if ((flags & FOF_OFFSET) == 0) 785 fp->f_offset = uio->uio_offset; 786 fp->f_nextoff = uio->uio_offset; 787 return (error); 788 } 789 790 static int 791 devfs_readdir(struct vop_readdir_args *ap) 792 { 793 int error; 794 struct uio *uio; 795 struct dirent *dp; 796 struct devfs_dirent *dd; 797 struct devfs_dirent *de; 798 struct devfs_mount *dmp; 799 off_t off, oldoff; 800 int *tmp_ncookies = NULL; 801 802 if (ap->a_vp->v_type != VDIR) 803 return (ENOTDIR); 804 805 uio = ap->a_uio; 806 if (uio->uio_offset < 0) 807 return (EINVAL); 808 809 /* 810 * XXX: This is a temporary hack to get around this filesystem not 811 * supporting cookies. We store the location of the ncookies pointer 812 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() 813 * and set the number of cookies to 0. We then set the pointer to 814 * NULL so that vfs_read_dirent doesn't try to call realloc() on 815 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies 816 * pointer to its original location before returning to the caller. 817 */ 818 if (ap->a_ncookies != NULL) { 819 tmp_ncookies = ap->a_ncookies; 820 *ap->a_ncookies = 0; 821 ap->a_ncookies = NULL; 822 } 823 824 dmp = VFSTODEVFS(ap->a_vp->v_mount); 825 sx_xlock(&dmp->dm_lock); 826 devfs_populate(dmp); 827 error = 0; 828 de = ap->a_vp->v_data; 829 off = 0; 830 oldoff = uio->uio_offset; 831 TAILQ_FOREACH(dd, &de->de_dlist, de_list) { 832 KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); 833 if (dd->de_flags & DE_WHITEOUT) 834 continue; 835 if (dd->de_dirent->d_type == DT_DIR) 836 de = dd->de_dir; 837 else 838 de = dd; 839 dp = dd->de_dirent; 840 if (dp->d_reclen > uio->uio_resid) 841 break; 842 dp->d_fileno = de->de_inode; 843 if (off >= uio->uio_offset) { 844 error = vfs_read_dirent(ap, dp, off); 845 if (error) 846 break; 847 } 848 off += dp->d_reclen; 849 } 850 sx_xunlock(&dmp->dm_lock); 851 uio->uio_offset = off; 852 853 /* 854 * Restore ap->a_ncookies if it wasn't originally NULL in the first 855 * place. 856 */ 857 if (tmp_ncookies != NULL) 858 ap->a_ncookies = tmp_ncookies; 859 860 return (error); 861 } 862 863 static int 864 devfs_readlink(struct vop_readlink_args *ap) 865 { 866 struct devfs_dirent *de; 867 868 de = ap->a_vp->v_data; 869 return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); 870 } 871 872 static int 873 devfs_reclaim(struct vop_reclaim_args *ap) 874 { 875 struct vnode *vp = ap->a_vp; 876 struct devfs_dirent *de; 877 struct cdev *dev; 878 879 de = vp->v_data; 880 if (de != NULL) 881 de->de_vnode = NULL; 882 vp->v_data = NULL; 883 vnode_destroy_vobject(vp); 884 885 dev = vp->v_rdev; 886 vp->v_rdev = NULL; 887 888 if (dev == NULL) 889 return (0); 890 891 dev_lock(); 892 dev->si_usecount -= vp->v_usecount; 893 dev_unlock(); 894 dev_rel(dev); 895 return (0); 896 } 897 898 static int 899 devfs_remove(struct vop_remove_args *ap) 900 { 901 struct vnode *vp = ap->a_vp; 902 struct devfs_dirent *dd; 903 struct devfs_dirent *de; 904 struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); 905 906 sx_xlock(&dmp->dm_lock); 907 dd = ap->a_dvp->v_data; 908 de = vp->v_data; 909 if (de->de_cdp == NULL) { 910 TAILQ_REMOVE(&dd->de_dlist, de, de_list); 911 devfs_delete(dmp, de); 912 } else { 913 de->de_flags |= DE_WHITEOUT; 914 } 915 sx_xunlock(&dmp->dm_lock); 916 return (0); 917 } 918 919 /* 920 * Revoke is called on a tty when a terminal session ends. The vnode 921 * is orphaned by setting v_op to deadfs so we need to let go of it 922 * as well so that we create a new one next time around. 923 * 924 * XXX: locking :-( 925 * XXX: We mess around with other mountpoints without holding their sxlock. 926 * XXX: We hold the devlock() when we zero their vnode pointer, but is that 927 * XXX: enough ? 928 */ 929 static int 930 devfs_revoke(struct vop_revoke_args *ap) 931 { 932 struct vnode *vp = ap->a_vp, *vp2; 933 struct cdev *dev; 934 struct cdev_priv *cdp; 935 struct devfs_dirent *de; 936 int i; 937 938 KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); 939 940 dev = vp->v_rdev; 941 cdp = dev->si_priv; 942 for (;;) { 943 dev_lock(); 944 vp2 = NULL; 945 for (i = 0; i <= cdp->cdp_maxdirent; i++) { 946 de = cdp->cdp_dirents[i]; 947 if (de == NULL) 948 continue; 949 vp2 = de->de_vnode; 950 de->de_vnode = NULL; 951 if (vp2 != NULL) 952 break; 953 } 954 dev_unlock(); 955 if (vp2 != NULL) { 956 vgone(vp2); 957 continue; 958 } 959 break; 960 } 961 return (0); 962 } 963 964 static int 965 devfs_rioctl(struct vop_ioctl_args *ap) 966 { 967 int error; 968 struct devfs_mount *dmp; 969 970 dmp = VFSTODEVFS(ap->a_vp->v_mount); 971 sx_xlock(&dmp->dm_lock); 972 devfs_populate(dmp); 973 error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); 974 sx_xunlock(&dmp->dm_lock); 975 return (error); 976 } 977 978 static int 979 devfs_rread(struct vop_read_args *ap) 980 { 981 982 if (ap->a_vp->v_type != VDIR) 983 return (EINVAL); 984 return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); 985 } 986 987 static int 988 devfs_setattr(struct vop_setattr_args *ap) 989 { 990 struct devfs_dirent *de; 991 struct vattr *vap; 992 struct vnode *vp; 993 int c, error; 994 uid_t uid; 995 gid_t gid; 996 997 vap = ap->a_vap; 998 vp = ap->a_vp; 999 if ((vap->va_type != VNON) || 1000 (vap->va_nlink != VNOVAL) || 1001 (vap->va_fsid != VNOVAL) || 1002 (vap->va_fileid != VNOVAL) || 1003 (vap->va_blocksize != VNOVAL) || 1004 (vap->va_flags != VNOVAL && vap->va_flags != 0) || 1005 (vap->va_rdev != VNOVAL) || 1006 ((int)vap->va_bytes != VNOVAL) || 1007 (vap->va_gen != VNOVAL)) { 1008 return (EINVAL); 1009 } 1010 1011 de = vp->v_data; 1012 if (vp->v_type == VDIR) 1013 de = de->de_dir; 1014 1015 error = c = 0; 1016 if (vap->va_uid == (uid_t)VNOVAL) 1017 uid = de->de_uid; 1018 else 1019 uid = vap->va_uid; 1020 if (vap->va_gid == (gid_t)VNOVAL) 1021 gid = de->de_gid; 1022 else 1023 gid = vap->va_gid; 1024 if (uid != de->de_uid || gid != de->de_gid) { 1025 if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || 1026 (gid != de->de_gid && !groupmember(gid, ap->a_cred))) && 1027 (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0) 1028 return (error); 1029 de->de_uid = uid; 1030 de->de_gid = gid; 1031 c = 1; 1032 } 1033 1034 if (vap->va_mode != (mode_t)VNOVAL) { 1035 if ((ap->a_cred->cr_uid != de->de_uid) && 1036 (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL))) 1037 return (error); 1038 de->de_mode = vap->va_mode; 1039 c = 1; 1040 } 1041 1042 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1043 /* See the comment in ufs_vnops::ufs_setattr(). */ 1044 if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) && 1045 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 1046 (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td)))) 1047 return (error); 1048 if (vap->va_atime.tv_sec != VNOVAL) { 1049 if (vp->v_type == VCHR) 1050 vp->v_rdev->si_atime = vap->va_atime; 1051 else 1052 de->de_atime = vap->va_atime; 1053 } 1054 if (vap->va_mtime.tv_sec != VNOVAL) { 1055 if (vp->v_type == VCHR) 1056 vp->v_rdev->si_mtime = vap->va_mtime; 1057 else 1058 de->de_mtime = vap->va_mtime; 1059 } 1060 c = 1; 1061 } 1062 1063 if (c) { 1064 if (vp->v_type == VCHR) 1065 vfs_timestamp(&vp->v_rdev->si_ctime); 1066 else 1067 vfs_timestamp(&de->de_mtime); 1068 } 1069 return (0); 1070 } 1071 1072 #ifdef MAC 1073 static int 1074 devfs_setlabel(struct vop_setlabel_args *ap) 1075 { 1076 struct vnode *vp; 1077 struct devfs_dirent *de; 1078 1079 vp = ap->a_vp; 1080 de = vp->v_data; 1081 1082 mac_relabel_vnode(ap->a_cred, vp, ap->a_label); 1083 mac_update_devfsdirent(vp->v_mount, de, vp); 1084 1085 return (0); 1086 } 1087 #endif 1088 1089 static int 1090 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) 1091 { 1092 1093 return (vnops.fo_stat(fp, sb, cred, td)); 1094 } 1095 1096 static int 1097 devfs_symlink(struct vop_symlink_args *ap) 1098 { 1099 int i, error; 1100 struct devfs_dirent *dd; 1101 struct devfs_dirent *de; 1102 struct devfs_mount *dmp; 1103 struct thread *td; 1104 1105 td = ap->a_cnp->cn_thread; 1106 KASSERT(td == curthread, ("devfs_symlink: td != curthread")); 1107 error = suser(td); 1108 if (error) 1109 return(error); 1110 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1111 dd = ap->a_dvp->v_data; 1112 de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); 1113 de->de_uid = 0; 1114 de->de_gid = 0; 1115 de->de_mode = 0755; 1116 de->de_inode = alloc_unr(devfs_inos); 1117 de->de_dirent->d_type = DT_LNK; 1118 i = strlen(ap->a_target) + 1; 1119 de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); 1120 bcopy(ap->a_target, de->de_symlink, i); 1121 sx_xlock(&dmp->dm_lock); 1122 #ifdef MAC 1123 mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); 1124 #endif 1125 TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); 1126 devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td); 1127 sx_xunlock(&dmp->dm_lock); 1128 return (0); 1129 } 1130 1131 /* ARGSUSED */ 1132 static int 1133 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) 1134 { 1135 struct cdev *dev; 1136 int error, ioflag, resid; 1137 struct cdevsw *dsw; 1138 1139 error = devfs_fp_check(fp, &dev, &dsw); 1140 if (error) 1141 return (error); 1142 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); 1143 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); 1144 if (ioflag & O_DIRECT) 1145 ioflag |= IO_DIRECT; 1146 if ((flags & FOF_OFFSET) == 0) 1147 uio->uio_offset = fp->f_offset; 1148 1149 resid = uio->uio_resid; 1150 1151 error = dsw->d_write(dev, uio, ioflag); 1152 dev_relthread(dev); 1153 if (uio->uio_resid != resid || (error == 0 && resid != 0)) { 1154 vfs_timestamp(&dev->si_ctime); 1155 dev->si_mtime = dev->si_ctime; 1156 } 1157 1158 if ((flags & FOF_OFFSET) == 0) 1159 fp->f_offset = uio->uio_offset; 1160 fp->f_nextoff = uio->uio_offset; 1161 return (error); 1162 } 1163 1164 dev_t 1165 dev2udev(struct cdev *x) 1166 { 1167 if (x == NULL) 1168 return (NODEV); 1169 return (x->si_priv->cdp_inode); 1170 } 1171 1172 static struct fileops devfs_ops_f = { 1173 .fo_read = devfs_read_f, 1174 .fo_write = devfs_write_f, 1175 .fo_ioctl = devfs_ioctl_f, 1176 .fo_poll = devfs_poll_f, 1177 .fo_kqfilter = devfs_kqfilter_f, 1178 .fo_stat = devfs_stat_f, 1179 .fo_close = devfs_close_f, 1180 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE 1181 }; 1182 1183 static struct vop_vector devfs_vnodeops = { 1184 .vop_default = &default_vnodeops, 1185 1186 .vop_access = devfs_access, 1187 .vop_getattr = devfs_getattr, 1188 .vop_ioctl = devfs_rioctl, 1189 .vop_lookup = devfs_lookup, 1190 .vop_mknod = devfs_mknod, 1191 .vop_pathconf = devfs_pathconf, 1192 .vop_read = devfs_rread, 1193 .vop_readdir = devfs_readdir, 1194 .vop_readlink = devfs_readlink, 1195 .vop_reclaim = devfs_reclaim, 1196 .vop_remove = devfs_remove, 1197 .vop_revoke = devfs_revoke, 1198 .vop_setattr = devfs_setattr, 1199 #ifdef MAC 1200 .vop_setlabel = devfs_setlabel, 1201 #endif 1202 .vop_symlink = devfs_symlink, 1203 }; 1204 1205 static struct vop_vector devfs_specops = { 1206 .vop_default = &default_vnodeops, 1207 1208 .vop_access = devfs_access, 1209 .vop_advlock = devfs_advlock, 1210 .vop_bmap = VOP_PANIC, 1211 .vop_close = devfs_close, 1212 .vop_create = VOP_PANIC, 1213 .vop_fsync = devfs_fsync, 1214 .vop_getattr = devfs_getattr, 1215 .vop_lease = VOP_NULL, 1216 .vop_link = VOP_PANIC, 1217 .vop_mkdir = VOP_PANIC, 1218 .vop_mknod = VOP_PANIC, 1219 .vop_open = devfs_open, 1220 .vop_pathconf = devfs_pathconf, 1221 .vop_print = devfs_print, 1222 .vop_read = VOP_PANIC, 1223 .vop_readdir = VOP_PANIC, 1224 .vop_readlink = VOP_PANIC, 1225 .vop_reallocblks = VOP_PANIC, 1226 .vop_reclaim = devfs_reclaim, 1227 .vop_remove = devfs_remove, 1228 .vop_rename = VOP_PANIC, 1229 .vop_revoke = devfs_revoke, 1230 .vop_rmdir = VOP_PANIC, 1231 .vop_setattr = devfs_setattr, 1232 #ifdef MAC 1233 .vop_setlabel = devfs_setlabel, 1234 #endif 1235 .vop_strategy = VOP_PANIC, 1236 .vop_symlink = VOP_PANIC, 1237 .vop_write = VOP_PANIC, 1238 }; 1239 1240 /* 1241 * Our calling convention to the device drivers used to be that we passed 1242 * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 1243 * flags instead since that's what open(), close() and ioctl() takes and 1244 * we don't really want vnode.h in device drivers. 1245 * We solved the source compatibility by redefining some vnode flags to 1246 * be the same as the fcntl ones and by sending down the bitwise OR of 1247 * the respective fcntl/vnode flags. These CTASSERTS make sure nobody 1248 * pulls the rug out under this. 1249 */ 1250 CTASSERT(O_NONBLOCK == IO_NDELAY); 1251 CTASSERT(O_FSYNC == IO_SYNC); 1252