1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2000-2004 5 * Poul-Henning Kamp. All rights reserved. 6 * Copyright (c) 1989, 1992-1993, 1995 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software donated to Berkeley by 10 * Jan-Simon Pendry. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 34 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 35 * 36 * $FreeBSD$ 37 */ 38 39 /* 40 * TODO: 41 * mkdir: want it ? 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/conf.h> 47 #include <sys/dirent.h> 48 #include <sys/eventhandler.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/filio.h> 53 #include <sys/jail.h> 54 #include <sys/kernel.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/malloc.h> 58 #include <sys/mman.h> 59 #include <sys/mount.h> 60 #include <sys/namei.h> 61 #include <sys/priv.h> 62 #include <sys/proc.h> 63 #include <sys/stat.h> 64 #include <sys/sx.h> 65 #include <sys/sysctl.h> 66 #include <sys/time.h> 67 #include <sys/ttycom.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 71 static struct vop_vector devfs_vnodeops; 72 static struct vop_vector devfs_specops; 73 static struct fileops devfs_ops_f; 74 75 #include <fs/devfs/devfs.h> 76 #include <fs/devfs/devfs_int.h> 77 78 #include <security/mac/mac_framework.h> 79 80 #include <vm/vm.h> 81 #include <vm/vm_extern.h> 82 #include <vm/vm_object.h> 83 84 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); 85 86 struct mtx devfs_de_interlock; 87 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); 88 struct sx clone_drain_lock; 89 SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); 90 struct mtx cdevpriv_mtx; 91 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); 92 93 SYSCTL_DECL(_vfs_devfs); 94 95 static int devfs_dotimes; 96 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW, 97 &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision"); 98 99 /* 100 * Update devfs node timestamp. Note that updates are unlocked and 101 * stat(2) could see partially updated times. 102 */ 103 static void 104 devfs_timestamp(struct timespec *tsp) 105 { 106 time_t ts; 107 108 if (devfs_dotimes) { 109 vfs_timestamp(tsp); 110 } else { 111 ts = time_second; 112 if (tsp->tv_sec != ts) { 113 tsp->tv_sec = ts; 114 tsp->tv_nsec = 0; 115 } 116 } 117 } 118 119 static int 120 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp, 121 int *ref) 122 { 123 124 *dswp = devvn_refthread(fp->f_vnode, devp, ref); 125 if (*devp != fp->f_data) { 126 if (*dswp != NULL) 127 dev_relthread(*devp, *ref); 128 return (ENXIO); 129 } 130 KASSERT((*devp)->si_refcount > 0, 131 ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); 132 if (*dswp == NULL) 133 return (ENXIO); 134 curthread->td_fpop = fp; 135 return (0); 136 } 137 138 int 139 devfs_get_cdevpriv(void **datap) 140 { 141 struct file *fp; 142 struct cdev_privdata *p; 143 int error; 144 145 fp = curthread->td_fpop; 146 if (fp == NULL) 147 return (EBADF); 148 p = fp->f_cdevpriv; 149 if (p != NULL) { 150 error = 0; 151 *datap = p->cdpd_data; 152 } else 153 error = ENOENT; 154 return (error); 155 } 156 157 int 158 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr) 159 { 160 struct file *fp; 161 struct cdev_priv *cdp; 162 struct cdev_privdata *p; 163 int error; 164 165 fp = curthread->td_fpop; 166 if (fp == NULL) 167 return (ENOENT); 168 cdp = cdev2priv((struct cdev *)fp->f_data); 169 p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); 170 p->cdpd_data = priv; 171 p->cdpd_dtr = priv_dtr; 172 p->cdpd_fp = fp; 173 mtx_lock(&cdevpriv_mtx); 174 if (fp->f_cdevpriv == NULL) { 175 LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); 176 fp->f_cdevpriv = p; 177 mtx_unlock(&cdevpriv_mtx); 178 error = 0; 179 } else { 180 mtx_unlock(&cdevpriv_mtx); 181 free(p, M_CDEVPDATA); 182 error = EBUSY; 183 } 184 return (error); 185 } 186 187 void 188 devfs_destroy_cdevpriv(struct cdev_privdata *p) 189 { 190 191 mtx_assert(&cdevpriv_mtx, MA_OWNED); 192 KASSERT(p->cdpd_fp->f_cdevpriv == p, 193 ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p)); 194 p->cdpd_fp->f_cdevpriv = NULL; 195 LIST_REMOVE(p, cdpd_list); 196 mtx_unlock(&cdevpriv_mtx); 197 (p->cdpd_dtr)(p->cdpd_data); 198 free(p, M_CDEVPDATA); 199 } 200 201 static void 202 devfs_fpdrop(struct file *fp) 203 { 204 struct cdev_privdata *p; 205 206 mtx_lock(&cdevpriv_mtx); 207 if ((p = fp->f_cdevpriv) == NULL) { 208 mtx_unlock(&cdevpriv_mtx); 209 return; 210 } 211 devfs_destroy_cdevpriv(p); 212 } 213 214 void 215 devfs_clear_cdevpriv(void) 216 { 217 struct file *fp; 218 219 fp = curthread->td_fpop; 220 if (fp == NULL) 221 return; 222 devfs_fpdrop(fp); 223 } 224 225 /* 226 * On success devfs_populate_vp() returns with dmp->dm_lock held. 227 */ 228 static int 229 devfs_populate_vp(struct vnode *vp) 230 { 231 struct devfs_dirent *de; 232 struct devfs_mount *dmp; 233 int locked; 234 235 ASSERT_VOP_LOCKED(vp, "devfs_populate_vp"); 236 237 dmp = VFSTODEVFS(vp->v_mount); 238 locked = VOP_ISLOCKED(vp); 239 240 sx_xlock(&dmp->dm_lock); 241 DEVFS_DMP_HOLD(dmp); 242 243 /* Can't call devfs_populate() with the vnode lock held. */ 244 VOP_UNLOCK(vp); 245 devfs_populate(dmp); 246 247 sx_xunlock(&dmp->dm_lock); 248 vn_lock(vp, locked | LK_RETRY); 249 sx_xlock(&dmp->dm_lock); 250 if (DEVFS_DMP_DROP(dmp)) { 251 sx_xunlock(&dmp->dm_lock); 252 devfs_unmount_final(dmp); 253 return (ERESTART); 254 } 255 if (VN_IS_DOOMED(vp)) { 256 sx_xunlock(&dmp->dm_lock); 257 return (ERESTART); 258 } 259 de = vp->v_data; 260 KASSERT(de != NULL, 261 ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed")); 262 if ((de->de_flags & DE_DOOMED) != 0) { 263 sx_xunlock(&dmp->dm_lock); 264 return (ERESTART); 265 } 266 267 return (0); 268 } 269 270 static int 271 devfs_vptocnp(struct vop_vptocnp_args *ap) 272 { 273 struct vnode *vp = ap->a_vp; 274 struct vnode **dvp = ap->a_vpp; 275 struct devfs_mount *dmp; 276 char *buf = ap->a_buf; 277 size_t *buflen = ap->a_buflen; 278 struct devfs_dirent *dd, *de; 279 int i, error; 280 281 dmp = VFSTODEVFS(vp->v_mount); 282 283 error = devfs_populate_vp(vp); 284 if (error != 0) 285 return (error); 286 287 if (vp->v_type != VCHR && vp->v_type != VDIR) { 288 error = ENOENT; 289 goto finished; 290 } 291 292 dd = vp->v_data; 293 if (vp->v_type == VDIR && dd == dmp->dm_rootdir) { 294 *dvp = vp; 295 vref(*dvp); 296 goto finished; 297 } 298 299 i = *buflen; 300 i -= dd->de_dirent->d_namlen; 301 if (i < 0) { 302 error = ENOMEM; 303 goto finished; 304 } 305 bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen); 306 *buflen = i; 307 de = devfs_parent_dirent(dd); 308 if (de == NULL) { 309 error = ENOENT; 310 goto finished; 311 } 312 mtx_lock(&devfs_de_interlock); 313 *dvp = de->de_vnode; 314 if (*dvp != NULL) { 315 VI_LOCK(*dvp); 316 mtx_unlock(&devfs_de_interlock); 317 vholdl(*dvp); 318 VI_UNLOCK(*dvp); 319 vref(*dvp); 320 vdrop(*dvp); 321 } else { 322 mtx_unlock(&devfs_de_interlock); 323 error = ENOENT; 324 } 325 finished: 326 sx_xunlock(&dmp->dm_lock); 327 return (error); 328 } 329 330 /* 331 * Construct the fully qualified path name relative to the mountpoint. 332 * If a NULL cnp is provided, no '/' is appended to the resulting path. 333 */ 334 char * 335 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd, 336 struct componentname *cnp) 337 { 338 int i; 339 struct devfs_dirent *de; 340 341 sx_assert(&dmp->dm_lock, SA_LOCKED); 342 343 i = SPECNAMELEN; 344 buf[i] = '\0'; 345 if (cnp != NULL) 346 i -= cnp->cn_namelen; 347 if (i < 0) 348 return (NULL); 349 if (cnp != NULL) 350 bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); 351 de = dd; 352 while (de != dmp->dm_rootdir) { 353 if (cnp != NULL || i < SPECNAMELEN) { 354 i--; 355 if (i < 0) 356 return (NULL); 357 buf[i] = '/'; 358 } 359 i -= de->de_dirent->d_namlen; 360 if (i < 0) 361 return (NULL); 362 bcopy(de->de_dirent->d_name, buf + i, 363 de->de_dirent->d_namlen); 364 de = devfs_parent_dirent(de); 365 if (de == NULL) 366 return (NULL); 367 } 368 return (buf + i); 369 } 370 371 static int 372 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, 373 struct devfs_dirent *de) 374 { 375 int not_found; 376 377 not_found = 0; 378 if (de->de_flags & DE_DOOMED) 379 not_found = 1; 380 if (DEVFS_DE_DROP(de)) { 381 KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); 382 devfs_dirent_free(de); 383 } 384 if (DEVFS_DMP_DROP(dmp)) { 385 KASSERT(not_found == 1, 386 ("DEVFS mount struct freed before dirent")); 387 not_found = 2; 388 sx_xunlock(&dmp->dm_lock); 389 devfs_unmount_final(dmp); 390 } 391 if (not_found == 1 || (drop_dm_lock && not_found != 2)) 392 sx_unlock(&dmp->dm_lock); 393 return (not_found); 394 } 395 396 static void 397 devfs_insmntque_dtr(struct vnode *vp, void *arg) 398 { 399 struct devfs_dirent *de; 400 401 de = (struct devfs_dirent *)arg; 402 mtx_lock(&devfs_de_interlock); 403 vp->v_data = NULL; 404 de->de_vnode = NULL; 405 mtx_unlock(&devfs_de_interlock); 406 vgone(vp); 407 vput(vp); 408 } 409 410 /* 411 * devfs_allocv shall be entered with dmp->dm_lock held, and it drops 412 * it on return. 413 */ 414 int 415 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, 416 struct vnode **vpp) 417 { 418 int error; 419 struct vnode *vp; 420 struct cdev *dev; 421 struct devfs_mount *dmp; 422 struct cdevsw *dsw; 423 424 dmp = VFSTODEVFS(mp); 425 if (de->de_flags & DE_DOOMED) { 426 sx_xunlock(&dmp->dm_lock); 427 return (ENOENT); 428 } 429 loop: 430 DEVFS_DE_HOLD(de); 431 DEVFS_DMP_HOLD(dmp); 432 mtx_lock(&devfs_de_interlock); 433 vp = de->de_vnode; 434 if (vp != NULL) { 435 VI_LOCK(vp); 436 mtx_unlock(&devfs_de_interlock); 437 sx_xunlock(&dmp->dm_lock); 438 vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); 439 sx_xlock(&dmp->dm_lock); 440 if (devfs_allocv_drop_refs(0, dmp, de)) { 441 vput(vp); 442 return (ENOENT); 443 } 444 else if (VN_IS_DOOMED(vp)) { 445 mtx_lock(&devfs_de_interlock); 446 if (de->de_vnode == vp) { 447 de->de_vnode = NULL; 448 vp->v_data = NULL; 449 } 450 mtx_unlock(&devfs_de_interlock); 451 vput(vp); 452 goto loop; 453 } 454 sx_xunlock(&dmp->dm_lock); 455 *vpp = vp; 456 return (0); 457 } 458 mtx_unlock(&devfs_de_interlock); 459 if (de->de_dirent->d_type == DT_CHR) { 460 if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { 461 devfs_allocv_drop_refs(1, dmp, de); 462 return (ENOENT); 463 } 464 dev = &de->de_cdp->cdp_c; 465 } else { 466 dev = NULL; 467 } 468 error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); 469 if (error != 0) { 470 devfs_allocv_drop_refs(1, dmp, de); 471 printf("devfs_allocv: failed to allocate new vnode\n"); 472 return (error); 473 } 474 475 if (de->de_dirent->d_type == DT_CHR) { 476 vp->v_type = VCHR; 477 VI_LOCK(vp); 478 dev_lock(); 479 dev_refl(dev); 480 /* XXX: v_rdev should be protect by vnode lock */ 481 vp->v_rdev = dev; 482 VNPASS(vp->v_usecount == 1, vp); 483 dev->si_usecount++; 484 /* Special casing of ttys for deadfs. Probably redundant. */ 485 dsw = dev->si_devsw; 486 if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) 487 vp->v_vflag |= VV_ISTTY; 488 dev_unlock(); 489 VI_UNLOCK(vp); 490 if ((dev->si_flags & SI_ETERNAL) != 0) 491 vp->v_vflag |= VV_ETERNALDEV; 492 vp->v_op = &devfs_specops; 493 } else if (de->de_dirent->d_type == DT_DIR) { 494 vp->v_type = VDIR; 495 } else if (de->de_dirent->d_type == DT_LNK) { 496 vp->v_type = VLNK; 497 } else { 498 vp->v_type = VBAD; 499 } 500 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); 501 VN_LOCK_ASHARE(vp); 502 mtx_lock(&devfs_de_interlock); 503 vp->v_data = de; 504 de->de_vnode = vp; 505 mtx_unlock(&devfs_de_interlock); 506 error = insmntque1(vp, mp, devfs_insmntque_dtr, de); 507 if (error != 0) { 508 (void) devfs_allocv_drop_refs(1, dmp, de); 509 return (error); 510 } 511 if (devfs_allocv_drop_refs(0, dmp, de)) { 512 vput(vp); 513 return (ENOENT); 514 } 515 #ifdef MAC 516 mac_devfs_vnode_associate(mp, de, vp); 517 #endif 518 sx_xunlock(&dmp->dm_lock); 519 *vpp = vp; 520 return (0); 521 } 522 523 static int 524 devfs_access(struct vop_access_args *ap) 525 { 526 struct vnode *vp = ap->a_vp; 527 struct devfs_dirent *de; 528 struct proc *p; 529 int error; 530 531 de = vp->v_data; 532 if (vp->v_type == VDIR) 533 de = de->de_dir; 534 535 error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, 536 ap->a_accmode, ap->a_cred, NULL); 537 if (error == 0) 538 return (0); 539 if (error != EACCES) 540 return (error); 541 p = ap->a_td->td_proc; 542 /* We do, however, allow access to the controlling terminal */ 543 PROC_LOCK(p); 544 if (!(p->p_flag & P_CONTROLT)) { 545 PROC_UNLOCK(p); 546 return (error); 547 } 548 if (p->p_session->s_ttydp == de->de_cdp) 549 error = 0; 550 PROC_UNLOCK(p); 551 return (error); 552 } 553 554 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0, 555 "devfs-only flag reuse failed"); 556 557 static int 558 devfs_close(struct vop_close_args *ap) 559 { 560 struct vnode *vp = ap->a_vp, *oldvp; 561 struct thread *td = ap->a_td; 562 struct proc *p; 563 struct cdev *dev = vp->v_rdev; 564 struct cdevsw *dsw; 565 int dflags, error, ref, vp_locked; 566 567 /* 568 * XXX: Don't call d_close() if we were called because of 569 * XXX: insmntque1() failure. 570 */ 571 if (vp->v_data == NULL) 572 return (0); 573 574 /* 575 * Hack: a tty device that is a controlling terminal 576 * has a reference from the session structure. 577 * We cannot easily tell that a character device is 578 * a controlling terminal, unless it is the closing 579 * process' controlling terminal. In that case, 580 * if the reference count is 2 (this last descriptor 581 * plus the session), release the reference from the session. 582 */ 583 if (vp->v_usecount == 2 && td != NULL) { 584 p = td->td_proc; 585 PROC_LOCK(p); 586 if (vp == p->p_session->s_ttyvp) { 587 PROC_UNLOCK(p); 588 oldvp = NULL; 589 sx_xlock(&proctree_lock); 590 if (vp == p->p_session->s_ttyvp) { 591 SESS_LOCK(p->p_session); 592 VI_LOCK(vp); 593 if (vp->v_usecount == 2 && vcount(vp) == 1 && 594 !VN_IS_DOOMED(vp)) { 595 p->p_session->s_ttyvp = NULL; 596 p->p_session->s_ttydp = NULL; 597 oldvp = vp; 598 } 599 VI_UNLOCK(vp); 600 SESS_UNLOCK(p->p_session); 601 } 602 sx_xunlock(&proctree_lock); 603 if (oldvp != NULL) 604 vrele(oldvp); 605 } else 606 PROC_UNLOCK(p); 607 } 608 /* 609 * We do not want to really close the device if it 610 * is still in use unless we are trying to close it 611 * forcibly. Since every use (buffer, vnode, swap, cmap) 612 * holds a reference to the vnode, and because we mark 613 * any other vnodes that alias this device, when the 614 * sum of the reference counts on all the aliased 615 * vnodes descends to one, we are on last close. 616 */ 617 dsw = dev_refthread(dev, &ref); 618 if (dsw == NULL) 619 return (ENXIO); 620 dflags = 0; 621 VI_LOCK(vp); 622 if (vp->v_usecount == 1 && vcount(vp) == 1) 623 dflags |= FLASTCLOSE; 624 if (VN_IS_DOOMED(vp)) { 625 /* Forced close. */ 626 dflags |= FREVOKE | FNONBLOCK; 627 } else if (dsw->d_flags & D_TRACKCLOSE) { 628 /* Keep device updated on status. */ 629 } else if ((dflags & FLASTCLOSE) == 0) { 630 VI_UNLOCK(vp); 631 dev_relthread(dev, ref); 632 return (0); 633 } 634 vholdnz(vp); 635 VI_UNLOCK(vp); 636 vp_locked = VOP_ISLOCKED(vp); 637 VOP_UNLOCK(vp); 638 KASSERT(dev->si_refcount > 0, 639 ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); 640 error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td); 641 dev_relthread(dev, ref); 642 vn_lock(vp, vp_locked | LK_RETRY); 643 vdrop(vp); 644 return (error); 645 } 646 647 static int 648 devfs_close_f(struct file *fp, struct thread *td) 649 { 650 int error; 651 struct file *fpop; 652 653 /* 654 * NB: td may be NULL if this descriptor is closed due to 655 * garbage collection from a closed UNIX domain socket. 656 */ 657 fpop = curthread->td_fpop; 658 curthread->td_fpop = fp; 659 error = vnops.fo_close(fp, td); 660 curthread->td_fpop = fpop; 661 662 /* 663 * The f_cdevpriv cannot be assigned non-NULL value while we 664 * are destroying the file. 665 */ 666 if (fp->f_cdevpriv != NULL) 667 devfs_fpdrop(fp); 668 return (error); 669 } 670 671 static int 672 devfs_getattr(struct vop_getattr_args *ap) 673 { 674 struct vnode *vp = ap->a_vp; 675 struct vattr *vap = ap->a_vap; 676 struct devfs_dirent *de; 677 struct devfs_mount *dmp; 678 struct cdev *dev; 679 struct timeval boottime; 680 int error; 681 682 error = devfs_populate_vp(vp); 683 if (error != 0) 684 return (error); 685 686 dmp = VFSTODEVFS(vp->v_mount); 687 sx_xunlock(&dmp->dm_lock); 688 689 de = vp->v_data; 690 KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); 691 if (vp->v_type == VDIR) { 692 de = de->de_dir; 693 KASSERT(de != NULL, 694 ("Null dir dirent in devfs_getattr vp=%p", vp)); 695 } 696 vap->va_uid = de->de_uid; 697 vap->va_gid = de->de_gid; 698 vap->va_mode = de->de_mode; 699 if (vp->v_type == VLNK) 700 vap->va_size = strlen(de->de_symlink); 701 else if (vp->v_type == VDIR) 702 vap->va_size = vap->va_bytes = DEV_BSIZE; 703 else 704 vap->va_size = 0; 705 if (vp->v_type != VDIR) 706 vap->va_bytes = 0; 707 vap->va_blocksize = DEV_BSIZE; 708 vap->va_type = vp->v_type; 709 710 getboottime(&boottime); 711 #define fix(aa) \ 712 do { \ 713 if ((aa).tv_sec <= 3600) { \ 714 (aa).tv_sec = boottime.tv_sec; \ 715 (aa).tv_nsec = boottime.tv_usec * 1000; \ 716 } \ 717 } while (0) 718 719 if (vp->v_type != VCHR) { 720 fix(de->de_atime); 721 vap->va_atime = de->de_atime; 722 fix(de->de_mtime); 723 vap->va_mtime = de->de_mtime; 724 fix(de->de_ctime); 725 vap->va_ctime = de->de_ctime; 726 } else { 727 dev = vp->v_rdev; 728 fix(dev->si_atime); 729 vap->va_atime = dev->si_atime; 730 fix(dev->si_mtime); 731 vap->va_mtime = dev->si_mtime; 732 fix(dev->si_ctime); 733 vap->va_ctime = dev->si_ctime; 734 735 vap->va_rdev = cdev2priv(dev)->cdp_inode; 736 } 737 vap->va_gen = 0; 738 vap->va_flags = 0; 739 vap->va_filerev = 0; 740 vap->va_nlink = de->de_links; 741 vap->va_fileid = de->de_inode; 742 743 return (error); 744 } 745 746 /* ARGSUSED */ 747 static int 748 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) 749 { 750 struct file *fpop; 751 int error; 752 753 fpop = td->td_fpop; 754 td->td_fpop = fp; 755 error = vnops.fo_ioctl(fp, com, data, cred, td); 756 td->td_fpop = fpop; 757 return (error); 758 } 759 760 void * 761 fiodgname_buf_get_ptr(void *fgnp, u_long com) 762 { 763 union { 764 struct fiodgname_arg fgn; 765 #ifdef COMPAT_FREEBSD32 766 struct fiodgname_arg32 fgn32; 767 #endif 768 } *fgnup; 769 770 fgnup = fgnp; 771 switch (com) { 772 case FIODGNAME: 773 return (fgnup->fgn.buf); 774 #ifdef COMPAT_FREEBSD32 775 case FIODGNAME_32: 776 return ((void *)(uintptr_t)fgnup->fgn32.buf); 777 #endif 778 default: 779 panic("Unhandled ioctl command %ld", com); 780 } 781 } 782 783 static int 784 devfs_ioctl(struct vop_ioctl_args *ap) 785 { 786 struct fiodgname_arg *fgn; 787 struct vnode *vpold, *vp; 788 struct cdevsw *dsw; 789 struct thread *td; 790 struct cdev *dev; 791 int error, ref, i; 792 const char *p; 793 u_long com; 794 795 vp = ap->a_vp; 796 com = ap->a_command; 797 td = ap->a_td; 798 799 dsw = devvn_refthread(vp, &dev, &ref); 800 if (dsw == NULL) 801 return (ENXIO); 802 KASSERT(dev->si_refcount > 0, 803 ("devfs: un-referenced struct cdev *(%s)", devtoname(dev))); 804 805 switch (com) { 806 case FIODTYPE: 807 *(int *)ap->a_data = dsw->d_flags & D_TYPEMASK; 808 error = 0; 809 break; 810 case FIODGNAME: 811 #ifdef COMPAT_FREEBSD32 812 case FIODGNAME_32: 813 #endif 814 fgn = ap->a_data; 815 p = devtoname(dev); 816 i = strlen(p) + 1; 817 if (i > fgn->len) 818 error = EINVAL; 819 else 820 error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i); 821 break; 822 default: 823 error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); 824 } 825 826 dev_relthread(dev, ref); 827 if (error == ENOIOCTL) 828 error = ENOTTY; 829 830 if (error == 0 && com == TIOCSCTTY) { 831 /* 832 * Do nothing if reassigning same control tty, or if the 833 * control tty has already disappeared. If it disappeared, 834 * it's because we were racing with TIOCNOTTY. TIOCNOTTY 835 * already took care of releasing the old vnode and we have 836 * nothing left to do. 837 */ 838 sx_slock(&proctree_lock); 839 if (td->td_proc->p_session->s_ttyvp == vp || 840 td->td_proc->p_session->s_ttyp == NULL) { 841 sx_sunlock(&proctree_lock); 842 return (0); 843 } 844 845 vpold = td->td_proc->p_session->s_ttyvp; 846 VREF(vp); 847 SESS_LOCK(td->td_proc->p_session); 848 td->td_proc->p_session->s_ttyvp = vp; 849 td->td_proc->p_session->s_ttydp = cdev2priv(dev); 850 SESS_UNLOCK(td->td_proc->p_session); 851 852 sx_sunlock(&proctree_lock); 853 854 /* Get rid of reference to old control tty */ 855 if (vpold) 856 vrele(vpold); 857 } 858 return (error); 859 } 860 861 /* ARGSUSED */ 862 static int 863 devfs_kqfilter_f(struct file *fp, struct knote *kn) 864 { 865 struct cdev *dev; 866 struct cdevsw *dsw; 867 int error, ref; 868 struct file *fpop; 869 struct thread *td; 870 871 td = curthread; 872 fpop = td->td_fpop; 873 error = devfs_fp_check(fp, &dev, &dsw, &ref); 874 if (error) 875 return (error); 876 error = dsw->d_kqfilter(dev, kn); 877 td->td_fpop = fpop; 878 dev_relthread(dev, ref); 879 return (error); 880 } 881 882 static inline int 883 devfs_prison_check(struct devfs_dirent *de, struct thread *td) 884 { 885 struct cdev_priv *cdp; 886 struct ucred *dcr; 887 struct proc *p; 888 int error; 889 890 cdp = de->de_cdp; 891 if (cdp == NULL) 892 return (0); 893 dcr = cdp->cdp_c.si_cred; 894 if (dcr == NULL) 895 return (0); 896 897 error = prison_check(td->td_ucred, dcr); 898 if (error == 0) 899 return (0); 900 /* We do, however, allow access to the controlling terminal */ 901 p = td->td_proc; 902 PROC_LOCK(p); 903 if (!(p->p_flag & P_CONTROLT)) { 904 PROC_UNLOCK(p); 905 return (error); 906 } 907 if (p->p_session->s_ttydp == cdp) 908 error = 0; 909 PROC_UNLOCK(p); 910 return (error); 911 } 912 913 static int 914 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) 915 { 916 struct componentname *cnp; 917 struct vnode *dvp, **vpp; 918 struct thread *td; 919 struct devfs_dirent *de, *dd; 920 struct devfs_dirent **dde; 921 struct devfs_mount *dmp; 922 struct mount *mp; 923 struct cdev *cdev; 924 int error, flags, nameiop, dvplocked; 925 char specname[SPECNAMELEN + 1], *pname; 926 927 cnp = ap->a_cnp; 928 vpp = ap->a_vpp; 929 dvp = ap->a_dvp; 930 pname = cnp->cn_nameptr; 931 td = cnp->cn_thread; 932 flags = cnp->cn_flags; 933 nameiop = cnp->cn_nameiop; 934 mp = dvp->v_mount; 935 dmp = VFSTODEVFS(mp); 936 dd = dvp->v_data; 937 *vpp = NULLVP; 938 939 if ((flags & ISLASTCN) && nameiop == RENAME) 940 return (EOPNOTSUPP); 941 942 if (dvp->v_type != VDIR) 943 return (ENOTDIR); 944 945 if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) 946 return (EIO); 947 948 error = vn_dir_check_exec(dvp, cnp); 949 if (error != 0) 950 return (error); 951 952 if (cnp->cn_namelen == 1 && *pname == '.') { 953 if ((flags & ISLASTCN) && nameiop != LOOKUP) 954 return (EINVAL); 955 *vpp = dvp; 956 VREF(dvp); 957 return (0); 958 } 959 960 if (flags & ISDOTDOT) { 961 if ((flags & ISLASTCN) && nameiop != LOOKUP) 962 return (EINVAL); 963 de = devfs_parent_dirent(dd); 964 if (de == NULL) 965 return (ENOENT); 966 dvplocked = VOP_ISLOCKED(dvp); 967 VOP_UNLOCK(dvp); 968 error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, 969 vpp); 970 *dm_unlock = 0; 971 vn_lock(dvp, dvplocked | LK_RETRY); 972 return (error); 973 } 974 975 dd = dvp->v_data; 976 de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0); 977 while (de == NULL) { /* While(...) so we can use break */ 978 979 if (nameiop == DELETE) 980 return (ENOENT); 981 982 /* 983 * OK, we didn't have an entry for the name we were asked for 984 * so we try to see if anybody can create it on demand. 985 */ 986 pname = devfs_fqpn(specname, dmp, dd, cnp); 987 if (pname == NULL) 988 break; 989 990 cdev = NULL; 991 DEVFS_DMP_HOLD(dmp); 992 sx_xunlock(&dmp->dm_lock); 993 sx_slock(&clone_drain_lock); 994 EVENTHANDLER_INVOKE(dev_clone, 995 td->td_ucred, pname, strlen(pname), &cdev); 996 sx_sunlock(&clone_drain_lock); 997 998 if (cdev == NULL) 999 sx_xlock(&dmp->dm_lock); 1000 else if (devfs_populate_vp(dvp) != 0) { 1001 *dm_unlock = 0; 1002 sx_xlock(&dmp->dm_lock); 1003 if (DEVFS_DMP_DROP(dmp)) { 1004 sx_xunlock(&dmp->dm_lock); 1005 devfs_unmount_final(dmp); 1006 } else 1007 sx_xunlock(&dmp->dm_lock); 1008 dev_rel(cdev); 1009 return (ENOENT); 1010 } 1011 if (DEVFS_DMP_DROP(dmp)) { 1012 *dm_unlock = 0; 1013 sx_xunlock(&dmp->dm_lock); 1014 devfs_unmount_final(dmp); 1015 if (cdev != NULL) 1016 dev_rel(cdev); 1017 return (ENOENT); 1018 } 1019 1020 if (cdev == NULL) 1021 break; 1022 1023 dev_lock(); 1024 dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; 1025 if (dde != NULL && *dde != NULL) 1026 de = *dde; 1027 dev_unlock(); 1028 dev_rel(cdev); 1029 break; 1030 } 1031 1032 if (de == NULL || de->de_flags & DE_WHITEOUT) { 1033 if ((nameiop == CREATE || nameiop == RENAME) && 1034 (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { 1035 cnp->cn_flags |= SAVENAME; 1036 return (EJUSTRETURN); 1037 } 1038 return (ENOENT); 1039 } 1040 1041 if (devfs_prison_check(de, td)) 1042 return (ENOENT); 1043 1044 if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { 1045 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); 1046 if (error) 1047 return (error); 1048 if (*vpp == dvp) { 1049 VREF(dvp); 1050 *vpp = dvp; 1051 return (0); 1052 } 1053 } 1054 error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp); 1055 *dm_unlock = 0; 1056 return (error); 1057 } 1058 1059 static int 1060 devfs_lookup(struct vop_lookup_args *ap) 1061 { 1062 int j; 1063 struct devfs_mount *dmp; 1064 int dm_unlock; 1065 1066 if (devfs_populate_vp(ap->a_dvp) != 0) 1067 return (ENOTDIR); 1068 1069 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1070 dm_unlock = 1; 1071 j = devfs_lookupx(ap, &dm_unlock); 1072 if (dm_unlock == 1) 1073 sx_xunlock(&dmp->dm_lock); 1074 return (j); 1075 } 1076 1077 static int 1078 devfs_mknod(struct vop_mknod_args *ap) 1079 { 1080 struct componentname *cnp; 1081 struct vnode *dvp, **vpp; 1082 struct devfs_dirent *dd, *de; 1083 struct devfs_mount *dmp; 1084 int error; 1085 1086 /* 1087 * The only type of node we should be creating here is a 1088 * character device, for anything else return EOPNOTSUPP. 1089 */ 1090 if (ap->a_vap->va_type != VCHR) 1091 return (EOPNOTSUPP); 1092 dvp = ap->a_dvp; 1093 dmp = VFSTODEVFS(dvp->v_mount); 1094 1095 cnp = ap->a_cnp; 1096 vpp = ap->a_vpp; 1097 dd = dvp->v_data; 1098 1099 error = ENOENT; 1100 sx_xlock(&dmp->dm_lock); 1101 TAILQ_FOREACH(de, &dd->de_dlist, de_list) { 1102 if (cnp->cn_namelen != de->de_dirent->d_namlen) 1103 continue; 1104 if (de->de_dirent->d_type == DT_CHR && 1105 (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) 1106 continue; 1107 if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, 1108 de->de_dirent->d_namlen) != 0) 1109 continue; 1110 if (de->de_flags & DE_WHITEOUT) 1111 break; 1112 goto notfound; 1113 } 1114 if (de == NULL) 1115 goto notfound; 1116 de->de_flags &= ~DE_WHITEOUT; 1117 error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp); 1118 return (error); 1119 notfound: 1120 sx_xunlock(&dmp->dm_lock); 1121 return (error); 1122 } 1123 1124 /* ARGSUSED */ 1125 static int 1126 devfs_open(struct vop_open_args *ap) 1127 { 1128 struct thread *td = ap->a_td; 1129 struct vnode *vp = ap->a_vp; 1130 struct cdev *dev = vp->v_rdev; 1131 struct file *fp = ap->a_fp; 1132 int error, ref, vlocked; 1133 struct cdevsw *dsw; 1134 struct file *fpop; 1135 1136 if (vp->v_type == VBLK) 1137 return (ENXIO); 1138 1139 if (dev == NULL) 1140 return (ENXIO); 1141 1142 /* Make this field valid before any I/O in d_open. */ 1143 if (dev->si_iosize_max == 0) 1144 dev->si_iosize_max = DFLTPHYS; 1145 1146 dsw = dev_refthread(dev, &ref); 1147 if (dsw == NULL) 1148 return (ENXIO); 1149 if (fp == NULL && dsw->d_fdopen != NULL) { 1150 dev_relthread(dev, ref); 1151 return (ENXIO); 1152 } 1153 1154 vlocked = VOP_ISLOCKED(vp); 1155 VOP_UNLOCK(vp); 1156 1157 fpop = td->td_fpop; 1158 td->td_fpop = fp; 1159 if (fp != NULL) { 1160 fp->f_data = dev; 1161 fp->f_vnode = vp; 1162 } 1163 if (dsw->d_fdopen != NULL) 1164 error = dsw->d_fdopen(dev, ap->a_mode, td, fp); 1165 else 1166 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 1167 /* Clean up any cdevpriv upon error. */ 1168 if (error != 0) 1169 devfs_clear_cdevpriv(); 1170 td->td_fpop = fpop; 1171 1172 vn_lock(vp, vlocked | LK_RETRY); 1173 dev_relthread(dev, ref); 1174 if (error != 0) { 1175 if (error == ERESTART) 1176 error = EINTR; 1177 return (error); 1178 } 1179 1180 #if 0 /* /dev/console */ 1181 KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); 1182 #else 1183 if (fp == NULL) 1184 return (error); 1185 #endif 1186 if (fp->f_ops == &badfileops) 1187 finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); 1188 return (error); 1189 } 1190 1191 static int 1192 devfs_pathconf(struct vop_pathconf_args *ap) 1193 { 1194 1195 switch (ap->a_name) { 1196 case _PC_FILESIZEBITS: 1197 *ap->a_retval = 64; 1198 return (0); 1199 case _PC_NAME_MAX: 1200 *ap->a_retval = NAME_MAX; 1201 return (0); 1202 case _PC_LINK_MAX: 1203 *ap->a_retval = INT_MAX; 1204 return (0); 1205 case _PC_SYMLINK_MAX: 1206 *ap->a_retval = MAXPATHLEN; 1207 return (0); 1208 case _PC_MAX_CANON: 1209 if (ap->a_vp->v_vflag & VV_ISTTY) { 1210 *ap->a_retval = MAX_CANON; 1211 return (0); 1212 } 1213 return (EINVAL); 1214 case _PC_MAX_INPUT: 1215 if (ap->a_vp->v_vflag & VV_ISTTY) { 1216 *ap->a_retval = MAX_INPUT; 1217 return (0); 1218 } 1219 return (EINVAL); 1220 case _PC_VDISABLE: 1221 if (ap->a_vp->v_vflag & VV_ISTTY) { 1222 *ap->a_retval = _POSIX_VDISABLE; 1223 return (0); 1224 } 1225 return (EINVAL); 1226 case _PC_MAC_PRESENT: 1227 #ifdef MAC 1228 /* 1229 * If MAC is enabled, devfs automatically supports 1230 * trivial non-persistant label storage. 1231 */ 1232 *ap->a_retval = 1; 1233 #else 1234 *ap->a_retval = 0; 1235 #endif 1236 return (0); 1237 case _PC_CHOWN_RESTRICTED: 1238 *ap->a_retval = 1; 1239 return (0); 1240 default: 1241 return (vop_stdpathconf(ap)); 1242 } 1243 /* NOTREACHED */ 1244 } 1245 1246 /* ARGSUSED */ 1247 static int 1248 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) 1249 { 1250 struct cdev *dev; 1251 struct cdevsw *dsw; 1252 int error, ref; 1253 struct file *fpop; 1254 1255 fpop = td->td_fpop; 1256 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1257 if (error != 0) { 1258 error = vnops.fo_poll(fp, events, cred, td); 1259 return (error); 1260 } 1261 error = dsw->d_poll(dev, events, td); 1262 td->td_fpop = fpop; 1263 dev_relthread(dev, ref); 1264 return(error); 1265 } 1266 1267 /* 1268 * Print out the contents of a special device vnode. 1269 */ 1270 static int 1271 devfs_print(struct vop_print_args *ap) 1272 { 1273 1274 printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); 1275 return (0); 1276 } 1277 1278 static int 1279 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, 1280 int flags, struct thread *td) 1281 { 1282 struct cdev *dev; 1283 int ioflag, error, ref; 1284 ssize_t resid; 1285 struct cdevsw *dsw; 1286 struct file *fpop; 1287 1288 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1289 return (EINVAL); 1290 fpop = td->td_fpop; 1291 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1292 if (error != 0) { 1293 error = vnops.fo_read(fp, uio, cred, flags, td); 1294 return (error); 1295 } 1296 resid = uio->uio_resid; 1297 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); 1298 if (ioflag & O_DIRECT) 1299 ioflag |= IO_DIRECT; 1300 1301 foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); 1302 error = dsw->d_read(dev, uio, ioflag); 1303 if (uio->uio_resid != resid || (error == 0 && resid != 0)) 1304 devfs_timestamp(&dev->si_atime); 1305 td->td_fpop = fpop; 1306 dev_relthread(dev, ref); 1307 1308 foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); 1309 return (error); 1310 } 1311 1312 static int 1313 devfs_readdir(struct vop_readdir_args *ap) 1314 { 1315 int error; 1316 struct uio *uio; 1317 struct dirent *dp; 1318 struct devfs_dirent *dd; 1319 struct devfs_dirent *de; 1320 struct devfs_mount *dmp; 1321 off_t off; 1322 int *tmp_ncookies = NULL; 1323 1324 if (ap->a_vp->v_type != VDIR) 1325 return (ENOTDIR); 1326 1327 uio = ap->a_uio; 1328 if (uio->uio_offset < 0) 1329 return (EINVAL); 1330 1331 /* 1332 * XXX: This is a temporary hack to get around this filesystem not 1333 * supporting cookies. We store the location of the ncookies pointer 1334 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() 1335 * and set the number of cookies to 0. We then set the pointer to 1336 * NULL so that vfs_read_dirent doesn't try to call realloc() on 1337 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies 1338 * pointer to its original location before returning to the caller. 1339 */ 1340 if (ap->a_ncookies != NULL) { 1341 tmp_ncookies = ap->a_ncookies; 1342 *ap->a_ncookies = 0; 1343 ap->a_ncookies = NULL; 1344 } 1345 1346 dmp = VFSTODEVFS(ap->a_vp->v_mount); 1347 if (devfs_populate_vp(ap->a_vp) != 0) { 1348 if (tmp_ncookies != NULL) 1349 ap->a_ncookies = tmp_ncookies; 1350 return (EIO); 1351 } 1352 error = 0; 1353 de = ap->a_vp->v_data; 1354 off = 0; 1355 TAILQ_FOREACH(dd, &de->de_dlist, de_list) { 1356 KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); 1357 if (dd->de_flags & (DE_COVERED | DE_WHITEOUT)) 1358 continue; 1359 if (devfs_prison_check(dd, uio->uio_td)) 1360 continue; 1361 if (dd->de_dirent->d_type == DT_DIR) 1362 de = dd->de_dir; 1363 else 1364 de = dd; 1365 dp = dd->de_dirent; 1366 MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp)); 1367 if (dp->d_reclen > uio->uio_resid) 1368 break; 1369 dp->d_fileno = de->de_inode; 1370 /* NOTE: d_off is the offset for the *next* entry. */ 1371 dp->d_off = off + dp->d_reclen; 1372 if (off >= uio->uio_offset) { 1373 error = vfs_read_dirent(ap, dp, off); 1374 if (error) 1375 break; 1376 } 1377 off += dp->d_reclen; 1378 } 1379 sx_xunlock(&dmp->dm_lock); 1380 uio->uio_offset = off; 1381 1382 /* 1383 * Restore ap->a_ncookies if it wasn't originally NULL in the first 1384 * place. 1385 */ 1386 if (tmp_ncookies != NULL) 1387 ap->a_ncookies = tmp_ncookies; 1388 1389 return (error); 1390 } 1391 1392 static int 1393 devfs_readlink(struct vop_readlink_args *ap) 1394 { 1395 struct devfs_dirent *de; 1396 1397 de = ap->a_vp->v_data; 1398 return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); 1399 } 1400 1401 static int 1402 devfs_reclaim(struct vop_reclaim_args *ap) 1403 { 1404 struct vnode *vp; 1405 struct devfs_dirent *de; 1406 1407 vp = ap->a_vp; 1408 mtx_lock(&devfs_de_interlock); 1409 de = vp->v_data; 1410 if (de != NULL) { 1411 de->de_vnode = NULL; 1412 vp->v_data = NULL; 1413 } 1414 mtx_unlock(&devfs_de_interlock); 1415 return (0); 1416 } 1417 1418 static int 1419 devfs_reclaim_vchr(struct vop_reclaim_args *ap) 1420 { 1421 struct vnode *vp; 1422 struct cdev *dev; 1423 1424 vp = ap->a_vp; 1425 MPASS(vp->v_type == VCHR); 1426 1427 devfs_reclaim(ap); 1428 1429 VI_LOCK(vp); 1430 dev_lock(); 1431 dev = vp->v_rdev; 1432 vp->v_rdev = NULL; 1433 if (dev != NULL) 1434 dev->si_usecount -= (vp->v_usecount > 0); 1435 dev_unlock(); 1436 VI_UNLOCK(vp); 1437 if (dev != NULL) 1438 dev_rel(dev); 1439 return (0); 1440 } 1441 1442 static int 1443 devfs_remove(struct vop_remove_args *ap) 1444 { 1445 struct vnode *dvp = ap->a_dvp; 1446 struct vnode *vp = ap->a_vp; 1447 struct devfs_dirent *dd; 1448 struct devfs_dirent *de, *de_covered; 1449 struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); 1450 1451 ASSERT_VOP_ELOCKED(dvp, "devfs_remove"); 1452 ASSERT_VOP_ELOCKED(vp, "devfs_remove"); 1453 1454 sx_xlock(&dmp->dm_lock); 1455 dd = ap->a_dvp->v_data; 1456 de = vp->v_data; 1457 if (de->de_cdp == NULL) { 1458 TAILQ_REMOVE(&dd->de_dlist, de, de_list); 1459 if (de->de_dirent->d_type == DT_LNK) { 1460 de_covered = devfs_find(dd, de->de_dirent->d_name, 1461 de->de_dirent->d_namlen, 0); 1462 if (de_covered != NULL) 1463 de_covered->de_flags &= ~DE_COVERED; 1464 } 1465 /* We need to unlock dvp because devfs_delete() may lock it. */ 1466 VOP_UNLOCK(vp); 1467 if (dvp != vp) 1468 VOP_UNLOCK(dvp); 1469 devfs_delete(dmp, de, 0); 1470 sx_xunlock(&dmp->dm_lock); 1471 if (dvp != vp) 1472 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1473 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1474 } else { 1475 de->de_flags |= DE_WHITEOUT; 1476 sx_xunlock(&dmp->dm_lock); 1477 } 1478 return (0); 1479 } 1480 1481 /* 1482 * Revoke is called on a tty when a terminal session ends. The vnode 1483 * is orphaned by setting v_op to deadfs so we need to let go of it 1484 * as well so that we create a new one next time around. 1485 * 1486 */ 1487 static int 1488 devfs_revoke(struct vop_revoke_args *ap) 1489 { 1490 struct vnode *vp = ap->a_vp, *vp2; 1491 struct cdev *dev; 1492 struct cdev_priv *cdp; 1493 struct devfs_dirent *de; 1494 u_int i; 1495 1496 KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); 1497 1498 dev = vp->v_rdev; 1499 cdp = cdev2priv(dev); 1500 1501 dev_lock(); 1502 cdp->cdp_inuse++; 1503 dev_unlock(); 1504 1505 vhold(vp); 1506 vgone(vp); 1507 vdrop(vp); 1508 1509 VOP_UNLOCK(vp); 1510 loop: 1511 for (;;) { 1512 mtx_lock(&devfs_de_interlock); 1513 dev_lock(); 1514 vp2 = NULL; 1515 for (i = 0; i <= cdp->cdp_maxdirent; i++) { 1516 de = cdp->cdp_dirents[i]; 1517 if (de == NULL) 1518 continue; 1519 1520 vp2 = de->de_vnode; 1521 if (vp2 != NULL) { 1522 dev_unlock(); 1523 VI_LOCK(vp2); 1524 mtx_unlock(&devfs_de_interlock); 1525 if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, 1526 curthread)) 1527 goto loop; 1528 vhold(vp2); 1529 vgone(vp2); 1530 vdrop(vp2); 1531 vput(vp2); 1532 break; 1533 } 1534 } 1535 if (vp2 != NULL) { 1536 continue; 1537 } 1538 dev_unlock(); 1539 mtx_unlock(&devfs_de_interlock); 1540 break; 1541 } 1542 dev_lock(); 1543 cdp->cdp_inuse--; 1544 if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { 1545 TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); 1546 dev_unlock(); 1547 dev_rel(&cdp->cdp_c); 1548 } else 1549 dev_unlock(); 1550 1551 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1552 return (0); 1553 } 1554 1555 static int 1556 devfs_rioctl(struct vop_ioctl_args *ap) 1557 { 1558 struct vnode *vp; 1559 struct devfs_mount *dmp; 1560 int error; 1561 1562 vp = ap->a_vp; 1563 vn_lock(vp, LK_SHARED | LK_RETRY); 1564 if (VN_IS_DOOMED(vp)) { 1565 VOP_UNLOCK(vp); 1566 return (EBADF); 1567 } 1568 dmp = VFSTODEVFS(vp->v_mount); 1569 sx_xlock(&dmp->dm_lock); 1570 VOP_UNLOCK(vp); 1571 DEVFS_DMP_HOLD(dmp); 1572 devfs_populate(dmp); 1573 if (DEVFS_DMP_DROP(dmp)) { 1574 sx_xunlock(&dmp->dm_lock); 1575 devfs_unmount_final(dmp); 1576 return (ENOENT); 1577 } 1578 error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); 1579 sx_xunlock(&dmp->dm_lock); 1580 return (error); 1581 } 1582 1583 static int 1584 devfs_rread(struct vop_read_args *ap) 1585 { 1586 1587 if (ap->a_vp->v_type != VDIR) 1588 return (EINVAL); 1589 return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); 1590 } 1591 1592 static int 1593 devfs_setattr(struct vop_setattr_args *ap) 1594 { 1595 struct devfs_dirent *de; 1596 struct vattr *vap; 1597 struct vnode *vp; 1598 struct thread *td; 1599 int c, error; 1600 uid_t uid; 1601 gid_t gid; 1602 1603 vap = ap->a_vap; 1604 vp = ap->a_vp; 1605 td = curthread; 1606 if ((vap->va_type != VNON) || 1607 (vap->va_nlink != VNOVAL) || 1608 (vap->va_fsid != VNOVAL) || 1609 (vap->va_fileid != VNOVAL) || 1610 (vap->va_blocksize != VNOVAL) || 1611 (vap->va_flags != VNOVAL && vap->va_flags != 0) || 1612 (vap->va_rdev != VNOVAL) || 1613 ((int)vap->va_bytes != VNOVAL) || 1614 (vap->va_gen != VNOVAL)) { 1615 return (EINVAL); 1616 } 1617 1618 error = devfs_populate_vp(vp); 1619 if (error != 0) 1620 return (error); 1621 1622 de = vp->v_data; 1623 if (vp->v_type == VDIR) 1624 de = de->de_dir; 1625 1626 c = 0; 1627 if (vap->va_uid == (uid_t)VNOVAL) 1628 uid = de->de_uid; 1629 else 1630 uid = vap->va_uid; 1631 if (vap->va_gid == (gid_t)VNOVAL) 1632 gid = de->de_gid; 1633 else 1634 gid = vap->va_gid; 1635 if (uid != de->de_uid || gid != de->de_gid) { 1636 if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || 1637 (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { 1638 error = priv_check(td, PRIV_VFS_CHOWN); 1639 if (error != 0) 1640 goto ret; 1641 } 1642 de->de_uid = uid; 1643 de->de_gid = gid; 1644 c = 1; 1645 } 1646 1647 if (vap->va_mode != (mode_t)VNOVAL) { 1648 if (ap->a_cred->cr_uid != de->de_uid) { 1649 error = priv_check(td, PRIV_VFS_ADMIN); 1650 if (error != 0) 1651 goto ret; 1652 } 1653 de->de_mode = vap->va_mode; 1654 c = 1; 1655 } 1656 1657 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1658 error = vn_utimes_perm(vp, vap, ap->a_cred, td); 1659 if (error != 0) 1660 goto ret; 1661 if (vap->va_atime.tv_sec != VNOVAL) { 1662 if (vp->v_type == VCHR) 1663 vp->v_rdev->si_atime = vap->va_atime; 1664 else 1665 de->de_atime = vap->va_atime; 1666 } 1667 if (vap->va_mtime.tv_sec != VNOVAL) { 1668 if (vp->v_type == VCHR) 1669 vp->v_rdev->si_mtime = vap->va_mtime; 1670 else 1671 de->de_mtime = vap->va_mtime; 1672 } 1673 c = 1; 1674 } 1675 1676 if (c) { 1677 if (vp->v_type == VCHR) 1678 vfs_timestamp(&vp->v_rdev->si_ctime); 1679 else 1680 vfs_timestamp(&de->de_mtime); 1681 } 1682 1683 ret: 1684 sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock); 1685 return (error); 1686 } 1687 1688 #ifdef MAC 1689 static int 1690 devfs_setlabel(struct vop_setlabel_args *ap) 1691 { 1692 struct vnode *vp; 1693 struct devfs_dirent *de; 1694 1695 vp = ap->a_vp; 1696 de = vp->v_data; 1697 1698 mac_vnode_relabel(ap->a_cred, vp, ap->a_label); 1699 mac_devfs_update(vp->v_mount, de, vp); 1700 1701 return (0); 1702 } 1703 #endif 1704 1705 static int 1706 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) 1707 { 1708 1709 return (vnops.fo_stat(fp, sb, cred, td)); 1710 } 1711 1712 static int 1713 devfs_symlink(struct vop_symlink_args *ap) 1714 { 1715 int i, error; 1716 struct devfs_dirent *dd; 1717 struct devfs_dirent *de, *de_covered, *de_dotdot; 1718 struct devfs_mount *dmp; 1719 1720 error = priv_check(curthread, PRIV_DEVFS_SYMLINK); 1721 if (error) 1722 return(error); 1723 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1724 if (devfs_populate_vp(ap->a_dvp) != 0) 1725 return (ENOENT); 1726 1727 dd = ap->a_dvp->v_data; 1728 de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); 1729 de->de_flags = DE_USER; 1730 de->de_uid = 0; 1731 de->de_gid = 0; 1732 de->de_mode = 0755; 1733 de->de_inode = alloc_unr(devfs_inos); 1734 de->de_dir = dd; 1735 de->de_dirent->d_type = DT_LNK; 1736 i = strlen(ap->a_target) + 1; 1737 de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); 1738 bcopy(ap->a_target, de->de_symlink, i); 1739 #ifdef MAC 1740 mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); 1741 #endif 1742 de_covered = devfs_find(dd, de->de_dirent->d_name, 1743 de->de_dirent->d_namlen, 0); 1744 if (de_covered != NULL) { 1745 if ((de_covered->de_flags & DE_USER) != 0) { 1746 devfs_delete(dmp, de, DEVFS_DEL_NORECURSE); 1747 sx_xunlock(&dmp->dm_lock); 1748 return (EEXIST); 1749 } 1750 KASSERT((de_covered->de_flags & DE_COVERED) == 0, 1751 ("devfs_symlink: entry %p already covered", de_covered)); 1752 de_covered->de_flags |= DE_COVERED; 1753 } 1754 1755 de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */ 1756 de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */ 1757 TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list); 1758 devfs_dir_ref_de(dmp, dd); 1759 devfs_rules_apply(dmp, de); 1760 1761 return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp)); 1762 } 1763 1764 static int 1765 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) 1766 { 1767 1768 return (vnops.fo_truncate(fp, length, cred, td)); 1769 } 1770 1771 static int 1772 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, 1773 int flags, struct thread *td) 1774 { 1775 struct cdev *dev; 1776 int error, ioflag, ref; 1777 ssize_t resid; 1778 struct cdevsw *dsw; 1779 struct file *fpop; 1780 1781 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1782 return (EINVAL); 1783 fpop = td->td_fpop; 1784 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1785 if (error != 0) { 1786 error = vnops.fo_write(fp, uio, cred, flags, td); 1787 return (error); 1788 } 1789 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); 1790 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); 1791 if (ioflag & O_DIRECT) 1792 ioflag |= IO_DIRECT; 1793 foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); 1794 1795 resid = uio->uio_resid; 1796 1797 error = dsw->d_write(dev, uio, ioflag); 1798 if (uio->uio_resid != resid || (error == 0 && resid != 0)) { 1799 devfs_timestamp(&dev->si_ctime); 1800 dev->si_mtime = dev->si_ctime; 1801 } 1802 td->td_fpop = fpop; 1803 dev_relthread(dev, ref); 1804 1805 foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); 1806 return (error); 1807 } 1808 1809 static int 1810 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, 1811 vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, 1812 struct thread *td) 1813 { 1814 struct cdev *dev; 1815 struct cdevsw *dsw; 1816 struct mount *mp; 1817 struct vnode *vp; 1818 struct file *fpop; 1819 vm_object_t object; 1820 vm_prot_t maxprot; 1821 int error, ref; 1822 1823 vp = fp->f_vnode; 1824 1825 /* 1826 * Ensure that file and memory protections are 1827 * compatible. 1828 */ 1829 mp = vp->v_mount; 1830 if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { 1831 maxprot = VM_PROT_NONE; 1832 if ((prot & VM_PROT_EXECUTE) != 0) 1833 return (EACCES); 1834 } else 1835 maxprot = VM_PROT_EXECUTE; 1836 if ((fp->f_flag & FREAD) != 0) 1837 maxprot |= VM_PROT_READ; 1838 else if ((prot & VM_PROT_READ) != 0) 1839 return (EACCES); 1840 1841 /* 1842 * If we are sharing potential changes via MAP_SHARED and we 1843 * are trying to get write permission although we opened it 1844 * without asking for it, bail out. 1845 * 1846 * Note that most character devices always share mappings. 1847 * The one exception is that D_MMAP_ANON devices 1848 * (i.e. /dev/zero) permit private writable mappings. 1849 * 1850 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests 1851 * as well as updating maxprot to permit writing for 1852 * D_MMAP_ANON devices rather than doing that here. 1853 */ 1854 if ((flags & MAP_SHARED) != 0) { 1855 if ((fp->f_flag & FWRITE) != 0) 1856 maxprot |= VM_PROT_WRITE; 1857 else if ((prot & VM_PROT_WRITE) != 0) 1858 return (EACCES); 1859 } 1860 maxprot &= cap_maxprot; 1861 1862 fpop = td->td_fpop; 1863 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1864 if (error != 0) 1865 return (error); 1866 1867 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, 1868 &object); 1869 td->td_fpop = fpop; 1870 dev_relthread(dev, ref); 1871 if (error != 0) 1872 return (error); 1873 1874 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1875 foff, FALSE, td); 1876 if (error != 0) 1877 vm_object_deallocate(object); 1878 return (error); 1879 } 1880 1881 dev_t 1882 dev2udev(struct cdev *x) 1883 { 1884 if (x == NULL) 1885 return (NODEV); 1886 return (cdev2priv(x)->cdp_inode); 1887 } 1888 1889 static struct fileops devfs_ops_f = { 1890 .fo_read = devfs_read_f, 1891 .fo_write = devfs_write_f, 1892 .fo_truncate = devfs_truncate_f, 1893 .fo_ioctl = devfs_ioctl_f, 1894 .fo_poll = devfs_poll_f, 1895 .fo_kqfilter = devfs_kqfilter_f, 1896 .fo_stat = devfs_stat_f, 1897 .fo_close = devfs_close_f, 1898 .fo_chmod = vn_chmod, 1899 .fo_chown = vn_chown, 1900 .fo_sendfile = vn_sendfile, 1901 .fo_seek = vn_seek, 1902 .fo_fill_kinfo = vn_fill_kinfo, 1903 .fo_mmap = devfs_mmap_f, 1904 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE 1905 }; 1906 1907 /* Vops for non-CHR vnodes in /dev. */ 1908 static struct vop_vector devfs_vnodeops = { 1909 .vop_default = &default_vnodeops, 1910 1911 .vop_access = devfs_access, 1912 .vop_getattr = devfs_getattr, 1913 .vop_ioctl = devfs_rioctl, 1914 .vop_lookup = devfs_lookup, 1915 .vop_mknod = devfs_mknod, 1916 .vop_pathconf = devfs_pathconf, 1917 .vop_read = devfs_rread, 1918 .vop_readdir = devfs_readdir, 1919 .vop_readlink = devfs_readlink, 1920 .vop_reclaim = devfs_reclaim, 1921 .vop_remove = devfs_remove, 1922 .vop_revoke = devfs_revoke, 1923 .vop_setattr = devfs_setattr, 1924 #ifdef MAC 1925 .vop_setlabel = devfs_setlabel, 1926 #endif 1927 .vop_symlink = devfs_symlink, 1928 .vop_vptocnp = devfs_vptocnp, 1929 }; 1930 VFS_VOP_VECTOR_REGISTER(devfs_vnodeops); 1931 1932 /* Vops for VCHR vnodes in /dev. */ 1933 static struct vop_vector devfs_specops = { 1934 .vop_default = &default_vnodeops, 1935 1936 .vop_access = devfs_access, 1937 .vop_bmap = VOP_PANIC, 1938 .vop_close = devfs_close, 1939 .vop_create = VOP_PANIC, 1940 .vop_fsync = vop_stdfsync, 1941 .vop_getattr = devfs_getattr, 1942 .vop_ioctl = devfs_ioctl, 1943 .vop_link = VOP_PANIC, 1944 .vop_mkdir = VOP_PANIC, 1945 .vop_mknod = VOP_PANIC, 1946 .vop_open = devfs_open, 1947 .vop_pathconf = devfs_pathconf, 1948 .vop_poll = dead_poll, 1949 .vop_print = devfs_print, 1950 .vop_read = dead_read, 1951 .vop_readdir = VOP_PANIC, 1952 .vop_readlink = VOP_PANIC, 1953 .vop_reallocblks = VOP_PANIC, 1954 .vop_reclaim = devfs_reclaim_vchr, 1955 .vop_remove = devfs_remove, 1956 .vop_rename = VOP_PANIC, 1957 .vop_revoke = devfs_revoke, 1958 .vop_rmdir = VOP_PANIC, 1959 .vop_setattr = devfs_setattr, 1960 #ifdef MAC 1961 .vop_setlabel = devfs_setlabel, 1962 #endif 1963 .vop_strategy = VOP_PANIC, 1964 .vop_symlink = VOP_PANIC, 1965 .vop_vptocnp = devfs_vptocnp, 1966 .vop_write = dead_write, 1967 }; 1968 VFS_VOP_VECTOR_REGISTER(devfs_specops); 1969 1970 /* 1971 * Our calling convention to the device drivers used to be that we passed 1972 * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 1973 * flags instead since that's what open(), close() and ioctl() takes and 1974 * we don't really want vnode.h in device drivers. 1975 * We solved the source compatibility by redefining some vnode flags to 1976 * be the same as the fcntl ones and by sending down the bitwise OR of 1977 * the respective fcntl/vnode flags. These CTASSERTS make sure nobody 1978 * pulls the rug out under this. 1979 */ 1980 CTASSERT(O_NONBLOCK == IO_NDELAY); 1981 CTASSERT(O_FSYNC == IO_SYNC); 1982