1 /*- 2 * Copyright (c) 2000-2004 3 * Poul-Henning Kamp. All rights reserved. 4 * Copyright (c) 1989, 1992-1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software donated to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 32 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 33 * 34 * $FreeBSD$ 35 */ 36 37 /* 38 * TODO: 39 * remove empty directories 40 * mkdir: want it ? 41 */ 42 43 #include "opt_mac.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/conf.h> 48 #include <sys/dirent.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/filio.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/mac.h> 56 #include <sys/malloc.h> 57 #include <sys/mount.h> 58 #include <sys/namei.h> 59 #include <sys/proc.h> 60 #include <sys/stat.h> 61 #include <sys/sx.h> 62 #include <sys/time.h> 63 #include <sys/ttycom.h> 64 #include <sys/unistd.h> 65 #include <sys/vnode.h> 66 67 static struct vop_vector devfs_vnodeops; 68 static struct vop_vector devfs_specops; 69 static struct fileops devfs_ops_f; 70 71 #include <fs/devfs/devfs.h> 72 #include <fs/devfs/devfs_int.h> 73 74 static struct mtx devfs_de_interlock; 75 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); 76 77 static int 78 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) 79 { 80 81 *devp = fp->f_vnode->v_rdev; 82 if (*devp != fp->f_data) 83 return (ENXIO); 84 KASSERT((*devp)->si_refcount > 0, 85 ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); 86 *dswp = dev_refthread(*devp); 87 if (*dswp == NULL) 88 return (ENXIO); 89 return (0); 90 } 91 92 /* 93 * Construct the fully qualified path name relative to the mountpoint 94 */ 95 static char * 96 devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp) 97 { 98 int i; 99 struct devfs_dirent *de, *dd; 100 struct devfs_mount *dmp; 101 102 dmp = VFSTODEVFS(dvp->v_mount); 103 dd = dvp->v_data; 104 i = SPECNAMELEN; 105 buf[i] = '\0'; 106 i -= cnp->cn_namelen; 107 if (i < 0) 108 return (NULL); 109 bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); 110 de = dd; 111 while (de != dmp->dm_rootdir) { 112 i--; 113 if (i < 0) 114 return (NULL); 115 buf[i] = '/'; 116 i -= de->de_dirent->d_namlen; 117 if (i < 0) 118 return (NULL); 119 bcopy(de->de_dirent->d_name, buf + i, 120 de->de_dirent->d_namlen); 121 de = TAILQ_FIRST(&de->de_dlist); /* "." */ 122 de = TAILQ_NEXT(de, de_list); /* ".." */ 123 de = de->de_dir; 124 } 125 return (buf + i); 126 } 127 128 static int 129 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, 130 struct devfs_dirent *de) 131 { 132 int not_found; 133 134 not_found = 0; 135 if (de->de_flags & DE_DOOMED) 136 not_found = 1; 137 if (DEVFS_DE_DROP(de)) { 138 KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); 139 devfs_dirent_free(de); 140 } 141 if (DEVFS_DMP_DROP(dmp)) { 142 KASSERT(not_found == 1, 143 ("DEVFS mount struct freed before dirent")); 144 not_found = 2; 145 sx_xunlock(&dmp->dm_lock); 146 devfs_unmount_final(dmp); 147 } 148 if (not_found == 1 || (drop_dm_lock && not_found != 2)) 149 sx_unlock(&dmp->dm_lock); 150 return (not_found); 151 } 152 153 /* 154 * devfs_allocv shall be entered with dmp->dm_lock held, and it drops 155 * it on return. 156 */ 157 int 158 devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) 159 { 160 int error; 161 struct vnode *vp; 162 struct cdev *dev; 163 struct devfs_mount *dmp; 164 165 KASSERT(td == curthread, ("devfs_allocv: td != curthread")); 166 dmp = VFSTODEVFS(mp); 167 if (de->de_flags & DE_DOOMED) { 168 sx_xunlock(&dmp->dm_lock); 169 return (ENOENT); 170 } 171 loop: 172 DEVFS_DE_HOLD(de); 173 DEVFS_DMP_HOLD(dmp); 174 mtx_lock(&devfs_de_interlock); 175 vp = de->de_vnode; 176 if (vp != NULL) { 177 VI_LOCK(vp); 178 mtx_unlock(&devfs_de_interlock); 179 sx_xunlock(&dmp->dm_lock); 180 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); 181 sx_xlock(&dmp->dm_lock); 182 if (devfs_allocv_drop_refs(0, dmp, de)) { 183 if (error == 0) 184 vput(vp); 185 return (ENOENT); 186 } 187 else if (error) 188 goto loop; 189 sx_xunlock(&dmp->dm_lock); 190 *vpp = vp; 191 return (0); 192 } 193 mtx_unlock(&devfs_de_interlock); 194 if (de->de_dirent->d_type == DT_CHR) { 195 if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { 196 devfs_allocv_drop_refs(1, dmp, de); 197 return (ENOENT); 198 } 199 dev = &de->de_cdp->cdp_c; 200 } else { 201 dev = NULL; 202 } 203 error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); 204 if (error != 0) { 205 devfs_allocv_drop_refs(1, dmp, de); 206 printf("devfs_allocv: failed to allocate new vnode\n"); 207 return (error); 208 } 209 210 if (de->de_dirent->d_type == DT_CHR) { 211 vp->v_type = VCHR; 212 VI_LOCK(vp); 213 dev_lock(); 214 dev_refl(dev); 215 vp->v_rdev = dev; 216 KASSERT(vp->v_usecount == 1, 217 ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); 218 dev->si_usecount += vp->v_usecount; 219 dev_unlock(); 220 VI_UNLOCK(vp); 221 vp->v_op = &devfs_specops; 222 } else if (de->de_dirent->d_type == DT_DIR) { 223 vp->v_type = VDIR; 224 } else if (de->de_dirent->d_type == DT_LNK) { 225 vp->v_type = VLNK; 226 } else { 227 vp->v_type = VBAD; 228 } 229 mtx_lock(&devfs_de_interlock); 230 vp->v_data = de; 231 de->de_vnode = vp; 232 mtx_unlock(&devfs_de_interlock); 233 sx_xunlock(&dmp->dm_lock); 234 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 235 sx_xlock(&dmp->dm_lock); 236 if (devfs_allocv_drop_refs(0, dmp, de)) { 237 vput(vp); 238 return (ENOENT); 239 } 240 #ifdef MAC 241 mac_associate_vnode_devfs(mp, de, vp); 242 #endif 243 sx_xunlock(&dmp->dm_lock); 244 *vpp = vp; 245 return (0); 246 } 247 248 static int 249 devfs_access(struct vop_access_args *ap) 250 { 251 struct vnode *vp = ap->a_vp; 252 struct devfs_dirent *de; 253 int error; 254 255 de = vp->v_data; 256 if (vp->v_type == VDIR) 257 de = de->de_dir; 258 259 error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, 260 ap->a_mode, ap->a_cred, NULL); 261 if (!error) 262 return (error); 263 if (error != EACCES) 264 return (error); 265 /* We do, however, allow access to the controlling terminal */ 266 if (!(ap->a_td->td_proc->p_flag & P_CONTROLT)) 267 return (error); 268 if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode) 269 return (0); 270 return (error); 271 } 272 273 /* ARGSUSED */ 274 static int 275 devfs_advlock(struct vop_advlock_args *ap) 276 { 277 278 return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); 279 } 280 281 /* ARGSUSED */ 282 static int 283 devfs_close(struct vop_close_args *ap) 284 { 285 struct vnode *vp = ap->a_vp, *oldvp; 286 struct thread *td = ap->a_td; 287 struct cdev *dev = vp->v_rdev; 288 struct cdevsw *dsw; 289 int error; 290 291 /* 292 * Hack: a tty device that is a controlling terminal 293 * has a reference from the session structure. 294 * We cannot easily tell that a character device is 295 * a controlling terminal, unless it is the closing 296 * process' controlling terminal. In that case, 297 * if the reference count is 2 (this last descriptor 298 * plus the session), release the reference from the session. 299 */ 300 oldvp = NULL; 301 sx_xlock(&proctree_lock); 302 if (td && vp == td->td_proc->p_session->s_ttyvp) { 303 SESS_LOCK(td->td_proc->p_session); 304 VI_LOCK(vp); 305 if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { 306 td->td_proc->p_session->s_ttyvp = NULL; 307 oldvp = vp; 308 } 309 VI_UNLOCK(vp); 310 SESS_UNLOCK(td->td_proc->p_session); 311 } 312 sx_xunlock(&proctree_lock); 313 if (oldvp != NULL) 314 vrele(oldvp); 315 /* 316 * We do not want to really close the device if it 317 * is still in use unless we are trying to close it 318 * forcibly. Since every use (buffer, vnode, swap, cmap) 319 * holds a reference to the vnode, and because we mark 320 * any other vnodes that alias this device, when the 321 * sum of the reference counts on all the aliased 322 * vnodes descends to one, we are on last close. 323 */ 324 dsw = dev_refthread(dev); 325 if (dsw == NULL) 326 return (ENXIO); 327 VI_LOCK(vp); 328 if (vp->v_iflag & VI_DOOMED) { 329 /* Forced close. */ 330 } else if (dsw->d_flags & D_TRACKCLOSE) { 331 /* Keep device updated on status. */ 332 } else if (count_dev(dev) > 1) { 333 VI_UNLOCK(vp); 334 dev_relthread(dev); 335 return (0); 336 } 337 VI_UNLOCK(vp); 338 KASSERT(dev->si_refcount > 0, 339 ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); 340 if (!(dsw->d_flags & D_NEEDGIANT)) { 341 DROP_GIANT(); 342 error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); 343 PICKUP_GIANT(); 344 } else { 345 error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); 346 } 347 dev_relthread(dev); 348 return (error); 349 } 350 351 static int 352 devfs_close_f(struct file *fp, struct thread *td) 353 { 354 355 return (vnops.fo_close(fp, td)); 356 } 357 358 /* ARGSUSED */ 359 static int 360 devfs_fsync(struct vop_fsync_args *ap) 361 { 362 if (!vn_isdisk(ap->a_vp, NULL)) 363 return (0); 364 365 return (vop_stdfsync(ap)); 366 } 367 368 static int 369 devfs_getattr(struct vop_getattr_args *ap) 370 { 371 struct vnode *vp = ap->a_vp; 372 struct vattr *vap = ap->a_vap; 373 int error = 0; 374 struct devfs_dirent *de; 375 struct cdev *dev; 376 377 de = vp->v_data; 378 KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); 379 if (vp->v_type == VDIR) { 380 de = de->de_dir; 381 KASSERT(de != NULL, 382 ("Null dir dirent in devfs_getattr vp=%p", vp)); 383 } 384 bzero((caddr_t) vap, sizeof(*vap)); 385 vattr_null(vap); 386 vap->va_uid = de->de_uid; 387 vap->va_gid = de->de_gid; 388 vap->va_mode = de->de_mode; 389 if (vp->v_type == VLNK) 390 vap->va_size = strlen(de->de_symlink); 391 else if (vp->v_type == VDIR) 392 vap->va_size = vap->va_bytes = DEV_BSIZE; 393 else 394 vap->va_size = 0; 395 if (vp->v_type != VDIR) 396 vap->va_bytes = 0; 397 vap->va_blocksize = DEV_BSIZE; 398 vap->va_type = vp->v_type; 399 400 #define fix(aa) \ 401 do { \ 402 if ((aa).tv_sec == 0) { \ 403 (aa).tv_sec = boottime.tv_sec; \ 404 (aa).tv_nsec = boottime.tv_usec * 1000; \ 405 } \ 406 } while (0) 407 408 if (vp->v_type != VCHR) { 409 fix(de->de_atime); 410 vap->va_atime = de->de_atime; 411 fix(de->de_mtime); 412 vap->va_mtime = de->de_mtime; 413 fix(de->de_ctime); 414 vap->va_ctime = de->de_ctime; 415 } else { 416 dev = vp->v_rdev; 417 fix(dev->si_atime); 418 vap->va_atime = dev->si_atime; 419 fix(dev->si_mtime); 420 vap->va_mtime = dev->si_mtime; 421 fix(dev->si_ctime); 422 vap->va_ctime = dev->si_ctime; 423 424 vap->va_rdev = dev->si_priv->cdp_inode; 425 } 426 vap->va_gen = 0; 427 vap->va_flags = 0; 428 vap->va_nlink = de->de_links; 429 vap->va_fileid = de->de_inode; 430 431 return (error); 432 } 433 434 /* ARGSUSED */ 435 static int 436 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) 437 { 438 struct cdev *dev; 439 struct cdevsw *dsw; 440 struct vnode *vp; 441 struct vnode *vpold; 442 int error, i; 443 const char *p; 444 struct fiodgname_arg *fgn; 445 446 error = devfs_fp_check(fp, &dev, &dsw); 447 if (error) 448 return (error); 449 450 if (com == FIODTYPE) { 451 *(int *)data = dsw->d_flags & D_TYPEMASK; 452 dev_relthread(dev); 453 return (0); 454 } else if (com == FIODGNAME) { 455 fgn = data; 456 p = devtoname(dev); 457 i = strlen(p) + 1; 458 if (i > fgn->len) 459 error = EINVAL; 460 else 461 error = copyout(p, fgn->buf, i); 462 dev_relthread(dev); 463 return (error); 464 } 465 error = dsw->d_ioctl(dev, com, data, fp->f_flag, td); 466 dev_relthread(dev); 467 if (error == ENOIOCTL) 468 error = ENOTTY; 469 if (error == 0 && com == TIOCSCTTY) { 470 vp = fp->f_vnode; 471 472 /* Do nothing if reassigning same control tty */ 473 sx_slock(&proctree_lock); 474 if (td->td_proc->p_session->s_ttyvp == vp) { 475 sx_sunlock(&proctree_lock); 476 return (0); 477 } 478 479 mtx_lock(&Giant); 480 481 vpold = td->td_proc->p_session->s_ttyvp; 482 VREF(vp); 483 SESS_LOCK(td->td_proc->p_session); 484 td->td_proc->p_session->s_ttyvp = vp; 485 SESS_UNLOCK(td->td_proc->p_session); 486 487 sx_sunlock(&proctree_lock); 488 489 /* Get rid of reference to old control tty */ 490 if (vpold) 491 vrele(vpold); 492 mtx_unlock(&Giant); 493 } 494 return (error); 495 } 496 497 /* ARGSUSED */ 498 static int 499 devfs_kqfilter_f(struct file *fp, struct knote *kn) 500 { 501 struct cdev *dev; 502 struct cdevsw *dsw; 503 int error; 504 505 error = devfs_fp_check(fp, &dev, &dsw); 506 if (error) 507 return (error); 508 error = dsw->d_kqfilter(dev, kn); 509 dev_relthread(dev); 510 return (error); 511 } 512 513 static int 514 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) 515 { 516 struct componentname *cnp; 517 struct vnode *dvp, **vpp; 518 struct thread *td; 519 struct devfs_dirent *de, *dd; 520 struct devfs_dirent **dde; 521 struct devfs_mount *dmp; 522 struct cdev *cdev; 523 int error, flags, nameiop; 524 char specname[SPECNAMELEN + 1], *pname; 525 526 cnp = ap->a_cnp; 527 vpp = ap->a_vpp; 528 dvp = ap->a_dvp; 529 pname = cnp->cn_nameptr; 530 td = cnp->cn_thread; 531 flags = cnp->cn_flags; 532 nameiop = cnp->cn_nameiop; 533 dmp = VFSTODEVFS(dvp->v_mount); 534 dd = dvp->v_data; 535 *vpp = NULLVP; 536 537 if ((flags & ISLASTCN) && nameiop == RENAME) 538 return (EOPNOTSUPP); 539 540 if (dvp->v_type != VDIR) 541 return (ENOTDIR); 542 543 if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) 544 return (EIO); 545 546 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); 547 if (error) 548 return (error); 549 550 if (cnp->cn_namelen == 1 && *pname == '.') { 551 if ((flags & ISLASTCN) && nameiop != LOOKUP) 552 return (EINVAL); 553 *vpp = dvp; 554 VREF(dvp); 555 return (0); 556 } 557 558 if (flags & ISDOTDOT) { 559 if ((flags & ISLASTCN) && nameiop != LOOKUP) 560 return (EINVAL); 561 VOP_UNLOCK(dvp, 0, td); 562 de = TAILQ_FIRST(&dd->de_dlist); /* "." */ 563 de = TAILQ_NEXT(de, de_list); /* ".." */ 564 de = de->de_dir; 565 error = devfs_allocv(de, dvp->v_mount, vpp, td); 566 *dm_unlock = 0; 567 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 568 return (error); 569 } 570 571 devfs_populate(dmp); 572 dd = dvp->v_data; 573 de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen); 574 while (de == NULL) { /* While(...) so we can use break */ 575 576 if (nameiop == DELETE) 577 return (ENOENT); 578 579 /* 580 * OK, we didn't have an entry for the name we were asked for 581 * so we try to see if anybody can create it on demand. 582 */ 583 pname = devfs_fqpn(specname, dvp, cnp); 584 if (pname == NULL) 585 break; 586 587 cdev = NULL; 588 EVENTHANDLER_INVOKE(dev_clone, 589 td->td_ucred, pname, strlen(pname), &cdev); 590 if (cdev == NULL) 591 break; 592 593 devfs_populate(dmp); 594 595 dev_lock(); 596 dde = &cdev->si_priv->cdp_dirents[dmp->dm_idx]; 597 if (dde != NULL && *dde != NULL) 598 de = *dde; 599 dev_unlock(); 600 dev_rel(cdev); 601 break; 602 } 603 604 if (de == NULL || de->de_flags & DE_WHITEOUT) { 605 if ((nameiop == CREATE || nameiop == RENAME) && 606 (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { 607 cnp->cn_flags |= SAVENAME; 608 return (EJUSTRETURN); 609 } 610 return (ENOENT); 611 } 612 613 if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { 614 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); 615 if (error) 616 return (error); 617 if (*vpp == dvp) { 618 VREF(dvp); 619 *vpp = dvp; 620 return (0); 621 } 622 } 623 error = devfs_allocv(de, dvp->v_mount, vpp, td); 624 *dm_unlock = 0; 625 return (error); 626 } 627 628 static int 629 devfs_lookup(struct vop_lookup_args *ap) 630 { 631 int j; 632 struct devfs_mount *dmp; 633 int dm_unlock; 634 635 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 636 dm_unlock = 1; 637 sx_xlock(&dmp->dm_lock); 638 j = devfs_lookupx(ap, &dm_unlock); 639 if (dm_unlock == 1) 640 sx_xunlock(&dmp->dm_lock); 641 return (j); 642 } 643 644 static int 645 devfs_mknod(struct vop_mknod_args *ap) 646 { 647 struct componentname *cnp; 648 struct vnode *dvp, **vpp; 649 struct thread *td; 650 struct devfs_dirent *dd, *de; 651 struct devfs_mount *dmp; 652 int error; 653 654 /* 655 * The only type of node we should be creating here is a 656 * character device, for anything else return EOPNOTSUPP. 657 */ 658 if (ap->a_vap->va_type != VCHR) 659 return (EOPNOTSUPP); 660 dvp = ap->a_dvp; 661 dmp = VFSTODEVFS(dvp->v_mount); 662 663 cnp = ap->a_cnp; 664 vpp = ap->a_vpp; 665 td = cnp->cn_thread; 666 dd = dvp->v_data; 667 668 error = ENOENT; 669 sx_xlock(&dmp->dm_lock); 670 TAILQ_FOREACH(de, &dd->de_dlist, de_list) { 671 if (cnp->cn_namelen != de->de_dirent->d_namlen) 672 continue; 673 if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, 674 de->de_dirent->d_namlen) != 0) 675 continue; 676 if (de->de_flags & DE_WHITEOUT) 677 break; 678 goto notfound; 679 } 680 if (de == NULL) 681 goto notfound; 682 de->de_flags &= ~DE_WHITEOUT; 683 error = devfs_allocv(de, dvp->v_mount, vpp, td); 684 return (error); 685 notfound: 686 sx_xunlock(&dmp->dm_lock); 687 return (error); 688 } 689 690 /* ARGSUSED */ 691 static int 692 devfs_open(struct vop_open_args *ap) 693 { 694 struct thread *td = ap->a_td; 695 struct vnode *vp = ap->a_vp; 696 struct cdev *dev = vp->v_rdev; 697 struct file *fp; 698 int error; 699 struct cdevsw *dsw; 700 701 if (vp->v_type == VBLK) 702 return (ENXIO); 703 704 if (dev == NULL) 705 return (ENXIO); 706 707 /* Make this field valid before any I/O in d_open. */ 708 if (dev->si_iosize_max == 0) 709 dev->si_iosize_max = DFLTPHYS; 710 711 dsw = dev_refthread(dev); 712 if (dsw == NULL) 713 return (ENXIO); 714 715 /* XXX: Special casing of ttys for deadfs. Probably redundant. */ 716 if (dsw->d_flags & D_TTY) 717 vp->v_vflag |= VV_ISTTY; 718 719 VOP_UNLOCK(vp, 0, td); 720 721 if(!(dsw->d_flags & D_NEEDGIANT)) { 722 DROP_GIANT(); 723 if (dsw->d_fdopen != NULL) 724 error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx); 725 else 726 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 727 PICKUP_GIANT(); 728 } else { 729 if (dsw->d_fdopen != NULL) 730 error = dsw->d_fdopen(dev, ap->a_mode, td, ap->a_fdidx); 731 else 732 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 733 } 734 735 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 736 737 dev_relthread(dev); 738 739 if (error) 740 return (error); 741 742 #if 0 /* /dev/console */ 743 KASSERT(ap->a_fdidx >= 0, 744 ("Could not vnode bypass device on fd %d", ap->a_fdidx)); 745 #else 746 if(ap->a_fdidx < 0) 747 return (error); 748 #endif 749 /* 750 * This is a pretty disgustingly long chain, but I am not 751 * sure there is any better way. Passing the fdidx into 752 * VOP_OPEN() offers us more information than just passing 753 * the file *. 754 */ 755 fp = ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx]; 756 KASSERT(fp->f_ops == &badfileops, 757 ("Could not vnode bypass device on fdops %p", fp->f_ops)); 758 fp->f_ops = &devfs_ops_f; 759 fp->f_data = dev; 760 return (error); 761 } 762 763 static int 764 devfs_pathconf(struct vop_pathconf_args *ap) 765 { 766 767 switch (ap->a_name) { 768 case _PC_MAC_PRESENT: 769 #ifdef MAC 770 /* 771 * If MAC is enabled, devfs automatically supports 772 * trivial non-persistant label storage. 773 */ 774 *ap->a_retval = 1; 775 #else 776 *ap->a_retval = 0; 777 #endif 778 return (0); 779 default: 780 return (vop_stdpathconf(ap)); 781 } 782 /* NOTREACHED */ 783 } 784 785 /* ARGSUSED */ 786 static int 787 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) 788 { 789 struct cdev *dev; 790 struct cdevsw *dsw; 791 int error; 792 793 error = devfs_fp_check(fp, &dev, &dsw); 794 if (error) 795 return (error); 796 error = dsw->d_poll(dev, events, td); 797 dev_relthread(dev); 798 return(error); 799 } 800 801 /* 802 * Print out the contents of a special device vnode. 803 */ 804 static int 805 devfs_print(struct vop_print_args *ap) 806 { 807 808 printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); 809 return (0); 810 } 811 812 /* ARGSUSED */ 813 static int 814 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) 815 { 816 struct cdev *dev; 817 int ioflag, error, resid; 818 struct cdevsw *dsw; 819 820 error = devfs_fp_check(fp, &dev, &dsw); 821 if (error) 822 return (error); 823 resid = uio->uio_resid; 824 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); 825 if (ioflag & O_DIRECT) 826 ioflag |= IO_DIRECT; 827 828 if ((flags & FOF_OFFSET) == 0) 829 uio->uio_offset = fp->f_offset; 830 831 error = dsw->d_read(dev, uio, ioflag); 832 dev_relthread(dev); 833 if (uio->uio_resid != resid || (error == 0 && resid != 0)) 834 vfs_timestamp(&dev->si_atime); 835 836 if ((flags & FOF_OFFSET) == 0) 837 fp->f_offset = uio->uio_offset; 838 fp->f_nextoff = uio->uio_offset; 839 return (error); 840 } 841 842 static int 843 devfs_readdir(struct vop_readdir_args *ap) 844 { 845 int error; 846 struct uio *uio; 847 struct dirent *dp; 848 struct devfs_dirent *dd; 849 struct devfs_dirent *de; 850 struct devfs_mount *dmp; 851 off_t off, oldoff; 852 int *tmp_ncookies = NULL; 853 854 if (ap->a_vp->v_type != VDIR) 855 return (ENOTDIR); 856 857 uio = ap->a_uio; 858 if (uio->uio_offset < 0) 859 return (EINVAL); 860 861 /* 862 * XXX: This is a temporary hack to get around this filesystem not 863 * supporting cookies. We store the location of the ncookies pointer 864 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() 865 * and set the number of cookies to 0. We then set the pointer to 866 * NULL so that vfs_read_dirent doesn't try to call realloc() on 867 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies 868 * pointer to its original location before returning to the caller. 869 */ 870 if (ap->a_ncookies != NULL) { 871 tmp_ncookies = ap->a_ncookies; 872 *ap->a_ncookies = 0; 873 ap->a_ncookies = NULL; 874 } 875 876 dmp = VFSTODEVFS(ap->a_vp->v_mount); 877 sx_xlock(&dmp->dm_lock); 878 devfs_populate(dmp); 879 error = 0; 880 de = ap->a_vp->v_data; 881 off = 0; 882 oldoff = uio->uio_offset; 883 TAILQ_FOREACH(dd, &de->de_dlist, de_list) { 884 KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); 885 if (dd->de_flags & DE_WHITEOUT) 886 continue; 887 if (dd->de_dirent->d_type == DT_DIR) 888 de = dd->de_dir; 889 else 890 de = dd; 891 dp = dd->de_dirent; 892 if (dp->d_reclen > uio->uio_resid) 893 break; 894 dp->d_fileno = de->de_inode; 895 if (off >= uio->uio_offset) { 896 error = vfs_read_dirent(ap, dp, off); 897 if (error) 898 break; 899 } 900 off += dp->d_reclen; 901 } 902 sx_xunlock(&dmp->dm_lock); 903 uio->uio_offset = off; 904 905 /* 906 * Restore ap->a_ncookies if it wasn't originally NULL in the first 907 * place. 908 */ 909 if (tmp_ncookies != NULL) 910 ap->a_ncookies = tmp_ncookies; 911 912 return (error); 913 } 914 915 static int 916 devfs_readlink(struct vop_readlink_args *ap) 917 { 918 struct devfs_dirent *de; 919 920 de = ap->a_vp->v_data; 921 return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); 922 } 923 924 static int 925 devfs_reclaim(struct vop_reclaim_args *ap) 926 { 927 struct vnode *vp = ap->a_vp; 928 struct devfs_dirent *de; 929 struct cdev *dev; 930 931 mtx_lock(&devfs_de_interlock); 932 de = vp->v_data; 933 if (de != NULL) { 934 de->de_vnode = NULL; 935 vp->v_data = NULL; 936 } 937 mtx_unlock(&devfs_de_interlock); 938 939 vnode_destroy_vobject(vp); 940 941 dev = vp->v_rdev; 942 vp->v_rdev = NULL; 943 944 if (dev == NULL) 945 return (0); 946 947 dev_lock(); 948 dev->si_usecount -= vp->v_usecount; 949 dev_unlock(); 950 dev_rel(dev); 951 return (0); 952 } 953 954 static int 955 devfs_remove(struct vop_remove_args *ap) 956 { 957 struct vnode *vp = ap->a_vp; 958 struct devfs_dirent *dd; 959 struct devfs_dirent *de; 960 struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); 961 962 sx_xlock(&dmp->dm_lock); 963 dd = ap->a_dvp->v_data; 964 de = vp->v_data; 965 if (de->de_cdp == NULL) { 966 TAILQ_REMOVE(&dd->de_dlist, de, de_list); 967 devfs_delete(dmp, de); 968 } else { 969 de->de_flags |= DE_WHITEOUT; 970 } 971 sx_xunlock(&dmp->dm_lock); 972 return (0); 973 } 974 975 /* 976 * Revoke is called on a tty when a terminal session ends. The vnode 977 * is orphaned by setting v_op to deadfs so we need to let go of it 978 * as well so that we create a new one next time around. 979 * 980 */ 981 static int 982 devfs_revoke(struct vop_revoke_args *ap) 983 { 984 struct vnode *vp = ap->a_vp, *vp2; 985 struct cdev *dev; 986 struct cdev_priv *cdp; 987 struct devfs_dirent *de; 988 int i; 989 990 KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); 991 992 dev = vp->v_rdev; 993 cdp = dev->si_priv; 994 for (;;) { 995 mtx_lock(&devfs_de_interlock); 996 dev_lock(); 997 vp2 = NULL; 998 for (i = 0; i <= cdp->cdp_maxdirent; i++) { 999 de = cdp->cdp_dirents[i]; 1000 if (de == NULL) 1001 continue; 1002 1003 vp2 = de->de_vnode; 1004 if (vp2 != NULL) { 1005 de->de_vnode = NULL; 1006 dev_unlock(); 1007 VI_LOCK(vp2); 1008 mtx_unlock(&devfs_de_interlock); 1009 vholdl(vp2); 1010 VI_UNLOCK(vp2); 1011 vgone(vp2); 1012 vdrop(vp2); 1013 break; 1014 } 1015 } 1016 if (vp2 != NULL) { 1017 continue; 1018 } 1019 dev_unlock(); 1020 mtx_unlock(&devfs_de_interlock); 1021 break; 1022 } 1023 return (0); 1024 } 1025 1026 static int 1027 devfs_rioctl(struct vop_ioctl_args *ap) 1028 { 1029 int error; 1030 struct devfs_mount *dmp; 1031 1032 dmp = VFSTODEVFS(ap->a_vp->v_mount); 1033 sx_xlock(&dmp->dm_lock); 1034 devfs_populate(dmp); 1035 error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); 1036 sx_xunlock(&dmp->dm_lock); 1037 return (error); 1038 } 1039 1040 static int 1041 devfs_rread(struct vop_read_args *ap) 1042 { 1043 1044 if (ap->a_vp->v_type != VDIR) 1045 return (EINVAL); 1046 return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); 1047 } 1048 1049 static int 1050 devfs_setattr(struct vop_setattr_args *ap) 1051 { 1052 struct devfs_dirent *de; 1053 struct vattr *vap; 1054 struct vnode *vp; 1055 int c, error; 1056 uid_t uid; 1057 gid_t gid; 1058 1059 vap = ap->a_vap; 1060 vp = ap->a_vp; 1061 if ((vap->va_type != VNON) || 1062 (vap->va_nlink != VNOVAL) || 1063 (vap->va_fsid != VNOVAL) || 1064 (vap->va_fileid != VNOVAL) || 1065 (vap->va_blocksize != VNOVAL) || 1066 (vap->va_flags != VNOVAL && vap->va_flags != 0) || 1067 (vap->va_rdev != VNOVAL) || 1068 ((int)vap->va_bytes != VNOVAL) || 1069 (vap->va_gen != VNOVAL)) { 1070 return (EINVAL); 1071 } 1072 1073 de = vp->v_data; 1074 if (vp->v_type == VDIR) 1075 de = de->de_dir; 1076 1077 error = c = 0; 1078 if (vap->va_uid == (uid_t)VNOVAL) 1079 uid = de->de_uid; 1080 else 1081 uid = vap->va_uid; 1082 if (vap->va_gid == (gid_t)VNOVAL) 1083 gid = de->de_gid; 1084 else 1085 gid = vap->va_gid; 1086 if (uid != de->de_uid || gid != de->de_gid) { 1087 if (((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || 1088 (gid != de->de_gid && !groupmember(gid, ap->a_cred))) && 1089 (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)) != 0) 1090 return (error); 1091 de->de_uid = uid; 1092 de->de_gid = gid; 1093 c = 1; 1094 } 1095 1096 if (vap->va_mode != (mode_t)VNOVAL) { 1097 if ((ap->a_cred->cr_uid != de->de_uid) && 1098 (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL))) 1099 return (error); 1100 de->de_mode = vap->va_mode; 1101 c = 1; 1102 } 1103 1104 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1105 /* See the comment in ufs_vnops::ufs_setattr(). */ 1106 if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) && 1107 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 1108 (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td)))) 1109 return (error); 1110 if (vap->va_atime.tv_sec != VNOVAL) { 1111 if (vp->v_type == VCHR) 1112 vp->v_rdev->si_atime = vap->va_atime; 1113 else 1114 de->de_atime = vap->va_atime; 1115 } 1116 if (vap->va_mtime.tv_sec != VNOVAL) { 1117 if (vp->v_type == VCHR) 1118 vp->v_rdev->si_mtime = vap->va_mtime; 1119 else 1120 de->de_mtime = vap->va_mtime; 1121 } 1122 c = 1; 1123 } 1124 1125 if (c) { 1126 if (vp->v_type == VCHR) 1127 vfs_timestamp(&vp->v_rdev->si_ctime); 1128 else 1129 vfs_timestamp(&de->de_mtime); 1130 } 1131 return (0); 1132 } 1133 1134 #ifdef MAC 1135 static int 1136 devfs_setlabel(struct vop_setlabel_args *ap) 1137 { 1138 struct vnode *vp; 1139 struct devfs_dirent *de; 1140 1141 vp = ap->a_vp; 1142 de = vp->v_data; 1143 1144 mac_relabel_vnode(ap->a_cred, vp, ap->a_label); 1145 mac_update_devfsdirent(vp->v_mount, de, vp); 1146 1147 return (0); 1148 } 1149 #endif 1150 1151 static int 1152 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) 1153 { 1154 1155 return (vnops.fo_stat(fp, sb, cred, td)); 1156 } 1157 1158 static int 1159 devfs_symlink(struct vop_symlink_args *ap) 1160 { 1161 int i, error; 1162 struct devfs_dirent *dd; 1163 struct devfs_dirent *de; 1164 struct devfs_mount *dmp; 1165 struct thread *td; 1166 1167 td = ap->a_cnp->cn_thread; 1168 KASSERT(td == curthread, ("devfs_symlink: td != curthread")); 1169 error = suser(td); 1170 if (error) 1171 return(error); 1172 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1173 dd = ap->a_dvp->v_data; 1174 de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); 1175 de->de_uid = 0; 1176 de->de_gid = 0; 1177 de->de_mode = 0755; 1178 de->de_inode = alloc_unr(devfs_inos); 1179 de->de_dirent->d_type = DT_LNK; 1180 i = strlen(ap->a_target) + 1; 1181 de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); 1182 bcopy(ap->a_target, de->de_symlink, i); 1183 sx_xlock(&dmp->dm_lock); 1184 #ifdef MAC 1185 mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); 1186 #endif 1187 TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); 1188 return (devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td)); 1189 } 1190 1191 /* ARGSUSED */ 1192 static int 1193 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) 1194 { 1195 struct cdev *dev; 1196 int error, ioflag, resid; 1197 struct cdevsw *dsw; 1198 1199 error = devfs_fp_check(fp, &dev, &dsw); 1200 if (error) 1201 return (error); 1202 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); 1203 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); 1204 if (ioflag & O_DIRECT) 1205 ioflag |= IO_DIRECT; 1206 if ((flags & FOF_OFFSET) == 0) 1207 uio->uio_offset = fp->f_offset; 1208 1209 resid = uio->uio_resid; 1210 1211 error = dsw->d_write(dev, uio, ioflag); 1212 dev_relthread(dev); 1213 if (uio->uio_resid != resid || (error == 0 && resid != 0)) { 1214 vfs_timestamp(&dev->si_ctime); 1215 dev->si_mtime = dev->si_ctime; 1216 } 1217 1218 if ((flags & FOF_OFFSET) == 0) 1219 fp->f_offset = uio->uio_offset; 1220 fp->f_nextoff = uio->uio_offset; 1221 return (error); 1222 } 1223 1224 dev_t 1225 dev2udev(struct cdev *x) 1226 { 1227 if (x == NULL) 1228 return (NODEV); 1229 return (x->si_priv->cdp_inode); 1230 } 1231 1232 static struct fileops devfs_ops_f = { 1233 .fo_read = devfs_read_f, 1234 .fo_write = devfs_write_f, 1235 .fo_ioctl = devfs_ioctl_f, 1236 .fo_poll = devfs_poll_f, 1237 .fo_kqfilter = devfs_kqfilter_f, 1238 .fo_stat = devfs_stat_f, 1239 .fo_close = devfs_close_f, 1240 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE 1241 }; 1242 1243 static struct vop_vector devfs_vnodeops = { 1244 .vop_default = &default_vnodeops, 1245 1246 .vop_access = devfs_access, 1247 .vop_getattr = devfs_getattr, 1248 .vop_ioctl = devfs_rioctl, 1249 .vop_lookup = devfs_lookup, 1250 .vop_mknod = devfs_mknod, 1251 .vop_pathconf = devfs_pathconf, 1252 .vop_read = devfs_rread, 1253 .vop_readdir = devfs_readdir, 1254 .vop_readlink = devfs_readlink, 1255 .vop_reclaim = devfs_reclaim, 1256 .vop_remove = devfs_remove, 1257 .vop_revoke = devfs_revoke, 1258 .vop_setattr = devfs_setattr, 1259 #ifdef MAC 1260 .vop_setlabel = devfs_setlabel, 1261 #endif 1262 .vop_symlink = devfs_symlink, 1263 }; 1264 1265 static struct vop_vector devfs_specops = { 1266 .vop_default = &default_vnodeops, 1267 1268 .vop_access = devfs_access, 1269 .vop_advlock = devfs_advlock, 1270 .vop_bmap = VOP_PANIC, 1271 .vop_close = devfs_close, 1272 .vop_create = VOP_PANIC, 1273 .vop_fsync = devfs_fsync, 1274 .vop_getattr = devfs_getattr, 1275 .vop_lease = VOP_NULL, 1276 .vop_link = VOP_PANIC, 1277 .vop_mkdir = VOP_PANIC, 1278 .vop_mknod = VOP_PANIC, 1279 .vop_open = devfs_open, 1280 .vop_pathconf = devfs_pathconf, 1281 .vop_print = devfs_print, 1282 .vop_read = VOP_PANIC, 1283 .vop_readdir = VOP_PANIC, 1284 .vop_readlink = VOP_PANIC, 1285 .vop_reallocblks = VOP_PANIC, 1286 .vop_reclaim = devfs_reclaim, 1287 .vop_remove = devfs_remove, 1288 .vop_rename = VOP_PANIC, 1289 .vop_revoke = devfs_revoke, 1290 .vop_rmdir = VOP_PANIC, 1291 .vop_setattr = devfs_setattr, 1292 #ifdef MAC 1293 .vop_setlabel = devfs_setlabel, 1294 #endif 1295 .vop_strategy = VOP_PANIC, 1296 .vop_symlink = VOP_PANIC, 1297 .vop_write = VOP_PANIC, 1298 }; 1299 1300 /* 1301 * Our calling convention to the device drivers used to be that we passed 1302 * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 1303 * flags instead since that's what open(), close() and ioctl() takes and 1304 * we don't really want vnode.h in device drivers. 1305 * We solved the source compatibility by redefining some vnode flags to 1306 * be the same as the fcntl ones and by sending down the bitwise OR of 1307 * the respective fcntl/vnode flags. These CTASSERTS make sure nobody 1308 * pulls the rug out under this. 1309 */ 1310 CTASSERT(O_NONBLOCK == IO_NDELAY); 1311 CTASSERT(O_FSYNC == IO_SYNC); 1312