1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2000-2004 5 * Poul-Henning Kamp. All rights reserved. 6 * Copyright (c) 1989, 1992-1993, 1995 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software donated to Berkeley by 10 * Jan-Simon Pendry. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 34 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 35 * 36 * $FreeBSD$ 37 */ 38 39 /* 40 * TODO: 41 * mkdir: want it ? 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/conf.h> 47 #include <sys/dirent.h> 48 #include <sys/fcntl.h> 49 #include <sys/file.h> 50 #include <sys/filedesc.h> 51 #include <sys/filio.h> 52 #include <sys/jail.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mman.h> 57 #include <sys/mount.h> 58 #include <sys/namei.h> 59 #include <sys/priv.h> 60 #include <sys/proc.h> 61 #include <sys/stat.h> 62 #include <sys/sx.h> 63 #include <sys/sysctl.h> 64 #include <sys/time.h> 65 #include <sys/ttycom.h> 66 #include <sys/unistd.h> 67 #include <sys/vnode.h> 68 69 static struct vop_vector devfs_vnodeops; 70 static struct vop_vector devfs_specops; 71 static struct fileops devfs_ops_f; 72 73 #include <fs/devfs/devfs.h> 74 #include <fs/devfs/devfs_int.h> 75 76 #include <security/mac/mac_framework.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_extern.h> 80 #include <vm/vm_object.h> 81 82 #ifdef COMPAT_FREEBSD32 83 struct fiodgname_arg32 { 84 int len; 85 uint32_t buf; /* (void *) */ 86 }; 87 #define FIODGNAME_32 _IOC_NEWTYPE(FIODGNAME, struct fiodgname_arg32) 88 #endif 89 90 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); 91 92 struct mtx devfs_de_interlock; 93 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); 94 struct sx clone_drain_lock; 95 SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); 96 struct mtx cdevpriv_mtx; 97 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); 98 99 SYSCTL_DECL(_vfs_devfs); 100 101 static int devfs_dotimes; 102 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW, 103 &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision"); 104 105 /* 106 * Update devfs node timestamp. Note that updates are unlocked and 107 * stat(2) could see partially updated times. 108 */ 109 static void 110 devfs_timestamp(struct timespec *tsp) 111 { 112 time_t ts; 113 114 if (devfs_dotimes) { 115 vfs_timestamp(tsp); 116 } else { 117 ts = time_second; 118 if (tsp->tv_sec != ts) { 119 tsp->tv_sec = ts; 120 tsp->tv_nsec = 0; 121 } 122 } 123 } 124 125 static int 126 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp, 127 int *ref) 128 { 129 130 *dswp = devvn_refthread(fp->f_vnode, devp, ref); 131 if (*devp != fp->f_data) { 132 if (*dswp != NULL) 133 dev_relthread(*devp, *ref); 134 return (ENXIO); 135 } 136 KASSERT((*devp)->si_refcount > 0, 137 ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); 138 if (*dswp == NULL) 139 return (ENXIO); 140 curthread->td_fpop = fp; 141 return (0); 142 } 143 144 int 145 devfs_get_cdevpriv(void **datap) 146 { 147 struct file *fp; 148 struct cdev_privdata *p; 149 int error; 150 151 fp = curthread->td_fpop; 152 if (fp == NULL) 153 return (EBADF); 154 p = fp->f_cdevpriv; 155 if (p != NULL) { 156 error = 0; 157 *datap = p->cdpd_data; 158 } else 159 error = ENOENT; 160 return (error); 161 } 162 163 int 164 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr) 165 { 166 struct file *fp; 167 struct cdev_priv *cdp; 168 struct cdev_privdata *p; 169 int error; 170 171 fp = curthread->td_fpop; 172 if (fp == NULL) 173 return (ENOENT); 174 cdp = cdev2priv((struct cdev *)fp->f_data); 175 p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); 176 p->cdpd_data = priv; 177 p->cdpd_dtr = priv_dtr; 178 p->cdpd_fp = fp; 179 mtx_lock(&cdevpriv_mtx); 180 if (fp->f_cdevpriv == NULL) { 181 LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); 182 fp->f_cdevpriv = p; 183 mtx_unlock(&cdevpriv_mtx); 184 error = 0; 185 } else { 186 mtx_unlock(&cdevpriv_mtx); 187 free(p, M_CDEVPDATA); 188 error = EBUSY; 189 } 190 return (error); 191 } 192 193 void 194 devfs_destroy_cdevpriv(struct cdev_privdata *p) 195 { 196 197 mtx_assert(&cdevpriv_mtx, MA_OWNED); 198 KASSERT(p->cdpd_fp->f_cdevpriv == p, 199 ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p)); 200 p->cdpd_fp->f_cdevpriv = NULL; 201 LIST_REMOVE(p, cdpd_list); 202 mtx_unlock(&cdevpriv_mtx); 203 (p->cdpd_dtr)(p->cdpd_data); 204 free(p, M_CDEVPDATA); 205 } 206 207 static void 208 devfs_fpdrop(struct file *fp) 209 { 210 struct cdev_privdata *p; 211 212 mtx_lock(&cdevpriv_mtx); 213 if ((p = fp->f_cdevpriv) == NULL) { 214 mtx_unlock(&cdevpriv_mtx); 215 return; 216 } 217 devfs_destroy_cdevpriv(p); 218 } 219 220 void 221 devfs_clear_cdevpriv(void) 222 { 223 struct file *fp; 224 225 fp = curthread->td_fpop; 226 if (fp == NULL) 227 return; 228 devfs_fpdrop(fp); 229 } 230 231 /* 232 * On success devfs_populate_vp() returns with dmp->dm_lock held. 233 */ 234 static int 235 devfs_populate_vp(struct vnode *vp) 236 { 237 struct devfs_dirent *de; 238 struct devfs_mount *dmp; 239 int locked; 240 241 ASSERT_VOP_LOCKED(vp, "devfs_populate_vp"); 242 243 dmp = VFSTODEVFS(vp->v_mount); 244 locked = VOP_ISLOCKED(vp); 245 246 sx_xlock(&dmp->dm_lock); 247 DEVFS_DMP_HOLD(dmp); 248 249 /* Can't call devfs_populate() with the vnode lock held. */ 250 VOP_UNLOCK(vp, 0); 251 devfs_populate(dmp); 252 253 sx_xunlock(&dmp->dm_lock); 254 vn_lock(vp, locked | LK_RETRY); 255 sx_xlock(&dmp->dm_lock); 256 if (DEVFS_DMP_DROP(dmp)) { 257 sx_xunlock(&dmp->dm_lock); 258 devfs_unmount_final(dmp); 259 return (ERESTART); 260 } 261 if ((vp->v_iflag & VI_DOOMED) != 0) { 262 sx_xunlock(&dmp->dm_lock); 263 return (ERESTART); 264 } 265 de = vp->v_data; 266 KASSERT(de != NULL, 267 ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed")); 268 if ((de->de_flags & DE_DOOMED) != 0) { 269 sx_xunlock(&dmp->dm_lock); 270 return (ERESTART); 271 } 272 273 return (0); 274 } 275 276 static int 277 devfs_vptocnp(struct vop_vptocnp_args *ap) 278 { 279 struct vnode *vp = ap->a_vp; 280 struct vnode **dvp = ap->a_vpp; 281 struct devfs_mount *dmp; 282 char *buf = ap->a_buf; 283 int *buflen = ap->a_buflen; 284 struct devfs_dirent *dd, *de; 285 int i, error; 286 287 dmp = VFSTODEVFS(vp->v_mount); 288 289 error = devfs_populate_vp(vp); 290 if (error != 0) 291 return (error); 292 293 i = *buflen; 294 dd = vp->v_data; 295 296 if (vp->v_type == VCHR) { 297 i -= strlen(dd->de_cdp->cdp_c.si_name); 298 if (i < 0) { 299 error = ENOMEM; 300 goto finished; 301 } 302 bcopy(dd->de_cdp->cdp_c.si_name, buf + i, 303 strlen(dd->de_cdp->cdp_c.si_name)); 304 de = dd->de_dir; 305 } else if (vp->v_type == VDIR) { 306 if (dd == dmp->dm_rootdir) { 307 *dvp = vp; 308 vref(*dvp); 309 goto finished; 310 } 311 i -= dd->de_dirent->d_namlen; 312 if (i < 0) { 313 error = ENOMEM; 314 goto finished; 315 } 316 bcopy(dd->de_dirent->d_name, buf + i, 317 dd->de_dirent->d_namlen); 318 de = dd; 319 } else { 320 error = ENOENT; 321 goto finished; 322 } 323 *buflen = i; 324 de = devfs_parent_dirent(de); 325 if (de == NULL) { 326 error = ENOENT; 327 goto finished; 328 } 329 mtx_lock(&devfs_de_interlock); 330 *dvp = de->de_vnode; 331 if (*dvp != NULL) { 332 VI_LOCK(*dvp); 333 mtx_unlock(&devfs_de_interlock); 334 vholdl(*dvp); 335 VI_UNLOCK(*dvp); 336 vref(*dvp); 337 vdrop(*dvp); 338 } else { 339 mtx_unlock(&devfs_de_interlock); 340 error = ENOENT; 341 } 342 finished: 343 sx_xunlock(&dmp->dm_lock); 344 return (error); 345 } 346 347 /* 348 * Construct the fully qualified path name relative to the mountpoint. 349 * If a NULL cnp is provided, no '/' is appended to the resulting path. 350 */ 351 char * 352 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd, 353 struct componentname *cnp) 354 { 355 int i; 356 struct devfs_dirent *de; 357 358 sx_assert(&dmp->dm_lock, SA_LOCKED); 359 360 i = SPECNAMELEN; 361 buf[i] = '\0'; 362 if (cnp != NULL) 363 i -= cnp->cn_namelen; 364 if (i < 0) 365 return (NULL); 366 if (cnp != NULL) 367 bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); 368 de = dd; 369 while (de != dmp->dm_rootdir) { 370 if (cnp != NULL || i < SPECNAMELEN) { 371 i--; 372 if (i < 0) 373 return (NULL); 374 buf[i] = '/'; 375 } 376 i -= de->de_dirent->d_namlen; 377 if (i < 0) 378 return (NULL); 379 bcopy(de->de_dirent->d_name, buf + i, 380 de->de_dirent->d_namlen); 381 de = devfs_parent_dirent(de); 382 if (de == NULL) 383 return (NULL); 384 } 385 return (buf + i); 386 } 387 388 static int 389 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, 390 struct devfs_dirent *de) 391 { 392 int not_found; 393 394 not_found = 0; 395 if (de->de_flags & DE_DOOMED) 396 not_found = 1; 397 if (DEVFS_DE_DROP(de)) { 398 KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); 399 devfs_dirent_free(de); 400 } 401 if (DEVFS_DMP_DROP(dmp)) { 402 KASSERT(not_found == 1, 403 ("DEVFS mount struct freed before dirent")); 404 not_found = 2; 405 sx_xunlock(&dmp->dm_lock); 406 devfs_unmount_final(dmp); 407 } 408 if (not_found == 1 || (drop_dm_lock && not_found != 2)) 409 sx_unlock(&dmp->dm_lock); 410 return (not_found); 411 } 412 413 static void 414 devfs_insmntque_dtr(struct vnode *vp, void *arg) 415 { 416 struct devfs_dirent *de; 417 418 de = (struct devfs_dirent *)arg; 419 mtx_lock(&devfs_de_interlock); 420 vp->v_data = NULL; 421 de->de_vnode = NULL; 422 mtx_unlock(&devfs_de_interlock); 423 vgone(vp); 424 vput(vp); 425 } 426 427 /* 428 * devfs_allocv shall be entered with dmp->dm_lock held, and it drops 429 * it on return. 430 */ 431 int 432 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, 433 struct vnode **vpp) 434 { 435 int error; 436 struct vnode *vp; 437 struct cdev *dev; 438 struct devfs_mount *dmp; 439 struct cdevsw *dsw; 440 441 dmp = VFSTODEVFS(mp); 442 if (de->de_flags & DE_DOOMED) { 443 sx_xunlock(&dmp->dm_lock); 444 return (ENOENT); 445 } 446 loop: 447 DEVFS_DE_HOLD(de); 448 DEVFS_DMP_HOLD(dmp); 449 mtx_lock(&devfs_de_interlock); 450 vp = de->de_vnode; 451 if (vp != NULL) { 452 VI_LOCK(vp); 453 mtx_unlock(&devfs_de_interlock); 454 sx_xunlock(&dmp->dm_lock); 455 vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); 456 sx_xlock(&dmp->dm_lock); 457 if (devfs_allocv_drop_refs(0, dmp, de)) { 458 vput(vp); 459 return (ENOENT); 460 } 461 else if ((vp->v_iflag & VI_DOOMED) != 0) { 462 mtx_lock(&devfs_de_interlock); 463 if (de->de_vnode == vp) { 464 de->de_vnode = NULL; 465 vp->v_data = NULL; 466 } 467 mtx_unlock(&devfs_de_interlock); 468 vput(vp); 469 goto loop; 470 } 471 sx_xunlock(&dmp->dm_lock); 472 *vpp = vp; 473 return (0); 474 } 475 mtx_unlock(&devfs_de_interlock); 476 if (de->de_dirent->d_type == DT_CHR) { 477 if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { 478 devfs_allocv_drop_refs(1, dmp, de); 479 return (ENOENT); 480 } 481 dev = &de->de_cdp->cdp_c; 482 } else { 483 dev = NULL; 484 } 485 error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); 486 if (error != 0) { 487 devfs_allocv_drop_refs(1, dmp, de); 488 printf("devfs_allocv: failed to allocate new vnode\n"); 489 return (error); 490 } 491 492 if (de->de_dirent->d_type == DT_CHR) { 493 vp->v_type = VCHR; 494 VI_LOCK(vp); 495 dev_lock(); 496 dev_refl(dev); 497 /* XXX: v_rdev should be protect by vnode lock */ 498 vp->v_rdev = dev; 499 KASSERT(vp->v_usecount == 1, 500 ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); 501 dev->si_usecount += vp->v_usecount; 502 /* Special casing of ttys for deadfs. Probably redundant. */ 503 dsw = dev->si_devsw; 504 if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) 505 vp->v_vflag |= VV_ISTTY; 506 dev_unlock(); 507 VI_UNLOCK(vp); 508 if ((dev->si_flags & SI_ETERNAL) != 0) 509 vp->v_vflag |= VV_ETERNALDEV; 510 vp->v_op = &devfs_specops; 511 } else if (de->de_dirent->d_type == DT_DIR) { 512 vp->v_type = VDIR; 513 } else if (de->de_dirent->d_type == DT_LNK) { 514 vp->v_type = VLNK; 515 } else { 516 vp->v_type = VBAD; 517 } 518 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); 519 VN_LOCK_ASHARE(vp); 520 mtx_lock(&devfs_de_interlock); 521 vp->v_data = de; 522 de->de_vnode = vp; 523 mtx_unlock(&devfs_de_interlock); 524 error = insmntque1(vp, mp, devfs_insmntque_dtr, de); 525 if (error != 0) { 526 (void) devfs_allocv_drop_refs(1, dmp, de); 527 return (error); 528 } 529 if (devfs_allocv_drop_refs(0, dmp, de)) { 530 vput(vp); 531 return (ENOENT); 532 } 533 #ifdef MAC 534 mac_devfs_vnode_associate(mp, de, vp); 535 #endif 536 sx_xunlock(&dmp->dm_lock); 537 *vpp = vp; 538 return (0); 539 } 540 541 static int 542 devfs_access(struct vop_access_args *ap) 543 { 544 struct vnode *vp = ap->a_vp; 545 struct devfs_dirent *de; 546 struct proc *p; 547 int error; 548 549 de = vp->v_data; 550 if (vp->v_type == VDIR) 551 de = de->de_dir; 552 553 error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, 554 ap->a_accmode, ap->a_cred, NULL); 555 if (error == 0) 556 return (0); 557 if (error != EACCES) 558 return (error); 559 p = ap->a_td->td_proc; 560 /* We do, however, allow access to the controlling terminal */ 561 PROC_LOCK(p); 562 if (!(p->p_flag & P_CONTROLT)) { 563 PROC_UNLOCK(p); 564 return (error); 565 } 566 if (p->p_session->s_ttydp == de->de_cdp) 567 error = 0; 568 PROC_UNLOCK(p); 569 return (error); 570 } 571 572 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0, 573 "devfs-only flag reuse failed"); 574 575 static int 576 devfs_close(struct vop_close_args *ap) 577 { 578 struct vnode *vp = ap->a_vp, *oldvp; 579 struct thread *td = ap->a_td; 580 struct proc *p; 581 struct cdev *dev = vp->v_rdev; 582 struct cdevsw *dsw; 583 int dflags, error, ref, vp_locked; 584 585 /* 586 * XXX: Don't call d_close() if we were called because of 587 * XXX: insmntque1() failure. 588 */ 589 if (vp->v_data == NULL) 590 return (0); 591 592 /* 593 * Hack: a tty device that is a controlling terminal 594 * has a reference from the session structure. 595 * We cannot easily tell that a character device is 596 * a controlling terminal, unless it is the closing 597 * process' controlling terminal. In that case, 598 * if the reference count is 2 (this last descriptor 599 * plus the session), release the reference from the session. 600 */ 601 if (td != NULL) { 602 p = td->td_proc; 603 PROC_LOCK(p); 604 if (vp == p->p_session->s_ttyvp) { 605 PROC_UNLOCK(p); 606 oldvp = NULL; 607 sx_xlock(&proctree_lock); 608 if (vp == p->p_session->s_ttyvp) { 609 SESS_LOCK(p->p_session); 610 VI_LOCK(vp); 611 if (count_dev(dev) == 2 && 612 (vp->v_iflag & VI_DOOMED) == 0) { 613 p->p_session->s_ttyvp = NULL; 614 p->p_session->s_ttydp = NULL; 615 oldvp = vp; 616 } 617 VI_UNLOCK(vp); 618 SESS_UNLOCK(p->p_session); 619 } 620 sx_xunlock(&proctree_lock); 621 if (oldvp != NULL) 622 vrele(oldvp); 623 } else 624 PROC_UNLOCK(p); 625 } 626 /* 627 * We do not want to really close the device if it 628 * is still in use unless we are trying to close it 629 * forcibly. Since every use (buffer, vnode, swap, cmap) 630 * holds a reference to the vnode, and because we mark 631 * any other vnodes that alias this device, when the 632 * sum of the reference counts on all the aliased 633 * vnodes descends to one, we are on last close. 634 */ 635 dsw = dev_refthread(dev, &ref); 636 if (dsw == NULL) 637 return (ENXIO); 638 dflags = 0; 639 VI_LOCK(vp); 640 if (vp->v_iflag & VI_DOOMED) { 641 /* Forced close. */ 642 dflags |= FREVOKE | FNONBLOCK; 643 } else if (dsw->d_flags & D_TRACKCLOSE) { 644 /* Keep device updated on status. */ 645 } else if (count_dev(dev) > 1) { 646 VI_UNLOCK(vp); 647 dev_relthread(dev, ref); 648 return (0); 649 } 650 if (count_dev(dev) == 1) 651 dflags |= FLASTCLOSE; 652 vholdl(vp); 653 VI_UNLOCK(vp); 654 vp_locked = VOP_ISLOCKED(vp); 655 VOP_UNLOCK(vp, 0); 656 KASSERT(dev->si_refcount > 0, 657 ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); 658 error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td); 659 dev_relthread(dev, ref); 660 vn_lock(vp, vp_locked | LK_RETRY); 661 vdrop(vp); 662 return (error); 663 } 664 665 static int 666 devfs_close_f(struct file *fp, struct thread *td) 667 { 668 int error; 669 struct file *fpop; 670 671 /* 672 * NB: td may be NULL if this descriptor is closed due to 673 * garbage collection from a closed UNIX domain socket. 674 */ 675 fpop = curthread->td_fpop; 676 curthread->td_fpop = fp; 677 error = vnops.fo_close(fp, td); 678 curthread->td_fpop = fpop; 679 680 /* 681 * The f_cdevpriv cannot be assigned non-NULL value while we 682 * are destroying the file. 683 */ 684 if (fp->f_cdevpriv != NULL) 685 devfs_fpdrop(fp); 686 return (error); 687 } 688 689 static int 690 devfs_getattr(struct vop_getattr_args *ap) 691 { 692 struct vnode *vp = ap->a_vp; 693 struct vattr *vap = ap->a_vap; 694 struct devfs_dirent *de; 695 struct devfs_mount *dmp; 696 struct cdev *dev; 697 struct timeval boottime; 698 int error; 699 700 error = devfs_populate_vp(vp); 701 if (error != 0) 702 return (error); 703 704 dmp = VFSTODEVFS(vp->v_mount); 705 sx_xunlock(&dmp->dm_lock); 706 707 de = vp->v_data; 708 KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); 709 if (vp->v_type == VDIR) { 710 de = de->de_dir; 711 KASSERT(de != NULL, 712 ("Null dir dirent in devfs_getattr vp=%p", vp)); 713 } 714 vap->va_uid = de->de_uid; 715 vap->va_gid = de->de_gid; 716 vap->va_mode = de->de_mode; 717 if (vp->v_type == VLNK) 718 vap->va_size = strlen(de->de_symlink); 719 else if (vp->v_type == VDIR) 720 vap->va_size = vap->va_bytes = DEV_BSIZE; 721 else 722 vap->va_size = 0; 723 if (vp->v_type != VDIR) 724 vap->va_bytes = 0; 725 vap->va_blocksize = DEV_BSIZE; 726 vap->va_type = vp->v_type; 727 728 getboottime(&boottime); 729 #define fix(aa) \ 730 do { \ 731 if ((aa).tv_sec <= 3600) { \ 732 (aa).tv_sec = boottime.tv_sec; \ 733 (aa).tv_nsec = boottime.tv_usec * 1000; \ 734 } \ 735 } while (0) 736 737 if (vp->v_type != VCHR) { 738 fix(de->de_atime); 739 vap->va_atime = de->de_atime; 740 fix(de->de_mtime); 741 vap->va_mtime = de->de_mtime; 742 fix(de->de_ctime); 743 vap->va_ctime = de->de_ctime; 744 } else { 745 dev = vp->v_rdev; 746 fix(dev->si_atime); 747 vap->va_atime = dev->si_atime; 748 fix(dev->si_mtime); 749 vap->va_mtime = dev->si_mtime; 750 fix(dev->si_ctime); 751 vap->va_ctime = dev->si_ctime; 752 753 vap->va_rdev = cdev2priv(dev)->cdp_inode; 754 } 755 vap->va_gen = 0; 756 vap->va_flags = 0; 757 vap->va_filerev = 0; 758 vap->va_nlink = de->de_links; 759 vap->va_fileid = de->de_inode; 760 761 return (error); 762 } 763 764 /* ARGSUSED */ 765 static int 766 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) 767 { 768 struct file *fpop; 769 int error; 770 771 fpop = td->td_fpop; 772 td->td_fpop = fp; 773 error = vnops.fo_ioctl(fp, com, data, cred, td); 774 td->td_fpop = fpop; 775 return (error); 776 } 777 778 static void * 779 fiodgname_buf_get_ptr(void *fgnp, u_long com) 780 { 781 union { 782 struct fiodgname_arg fgn; 783 #ifdef COMPAT_FREEBSD32 784 struct fiodgname_arg32 fgn32; 785 #endif 786 } *fgnup; 787 788 fgnup = fgnp; 789 switch (com) { 790 case FIODGNAME: 791 return (fgnup->fgn.buf); 792 #ifdef COMPAT_FREEBSD32 793 case FIODGNAME_32: 794 return ((void *)(uintptr_t)fgnup->fgn32.buf); 795 #endif 796 default: 797 panic("Unhandled ioctl command %ld", com); 798 } 799 } 800 801 static int 802 devfs_ioctl(struct vop_ioctl_args *ap) 803 { 804 struct fiodgname_arg *fgn; 805 struct vnode *vpold, *vp; 806 struct cdevsw *dsw; 807 struct thread *td; 808 struct cdev *dev; 809 int error, ref, i; 810 const char *p; 811 u_long com; 812 813 vp = ap->a_vp; 814 com = ap->a_command; 815 td = ap->a_td; 816 817 dsw = devvn_refthread(vp, &dev, &ref); 818 if (dsw == NULL) 819 return (ENXIO); 820 KASSERT(dev->si_refcount > 0, 821 ("devfs: un-referenced struct cdev *(%s)", devtoname(dev))); 822 823 switch (com) { 824 case FIODTYPE: 825 *(int *)ap->a_data = dsw->d_flags & D_TYPEMASK; 826 error = 0; 827 break; 828 case FIODGNAME: 829 #ifdef COMPAT_FREEBSD32 830 case FIODGNAME_32: 831 #endif 832 fgn = ap->a_data; 833 p = devtoname(dev); 834 i = strlen(p) + 1; 835 if (i > fgn->len) 836 error = EINVAL; 837 else 838 error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i); 839 break; 840 default: 841 error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); 842 } 843 844 dev_relthread(dev, ref); 845 if (error == ENOIOCTL) 846 error = ENOTTY; 847 848 if (error == 0 && com == TIOCSCTTY) { 849 /* Do nothing if reassigning same control tty */ 850 sx_slock(&proctree_lock); 851 if (td->td_proc->p_session->s_ttyvp == vp) { 852 sx_sunlock(&proctree_lock); 853 return (0); 854 } 855 856 vpold = td->td_proc->p_session->s_ttyvp; 857 VREF(vp); 858 SESS_LOCK(td->td_proc->p_session); 859 td->td_proc->p_session->s_ttyvp = vp; 860 td->td_proc->p_session->s_ttydp = cdev2priv(dev); 861 SESS_UNLOCK(td->td_proc->p_session); 862 863 sx_sunlock(&proctree_lock); 864 865 /* Get rid of reference to old control tty */ 866 if (vpold) 867 vrele(vpold); 868 } 869 return (error); 870 } 871 872 /* ARGSUSED */ 873 static int 874 devfs_kqfilter_f(struct file *fp, struct knote *kn) 875 { 876 struct cdev *dev; 877 struct cdevsw *dsw; 878 int error, ref; 879 struct file *fpop; 880 struct thread *td; 881 882 td = curthread; 883 fpop = td->td_fpop; 884 error = devfs_fp_check(fp, &dev, &dsw, &ref); 885 if (error) 886 return (error); 887 error = dsw->d_kqfilter(dev, kn); 888 td->td_fpop = fpop; 889 dev_relthread(dev, ref); 890 return (error); 891 } 892 893 static inline int 894 devfs_prison_check(struct devfs_dirent *de, struct thread *td) 895 { 896 struct cdev_priv *cdp; 897 struct ucred *dcr; 898 struct proc *p; 899 int error; 900 901 cdp = de->de_cdp; 902 if (cdp == NULL) 903 return (0); 904 dcr = cdp->cdp_c.si_cred; 905 if (dcr == NULL) 906 return (0); 907 908 error = prison_check(td->td_ucred, dcr); 909 if (error == 0) 910 return (0); 911 /* We do, however, allow access to the controlling terminal */ 912 p = td->td_proc; 913 PROC_LOCK(p); 914 if (!(p->p_flag & P_CONTROLT)) { 915 PROC_UNLOCK(p); 916 return (error); 917 } 918 if (p->p_session->s_ttydp == cdp) 919 error = 0; 920 PROC_UNLOCK(p); 921 return (error); 922 } 923 924 static int 925 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) 926 { 927 struct componentname *cnp; 928 struct vnode *dvp, **vpp; 929 struct thread *td; 930 struct devfs_dirent *de, *dd; 931 struct devfs_dirent **dde; 932 struct devfs_mount *dmp; 933 struct mount *mp; 934 struct cdev *cdev; 935 int error, flags, nameiop, dvplocked; 936 char specname[SPECNAMELEN + 1], *pname; 937 938 cnp = ap->a_cnp; 939 vpp = ap->a_vpp; 940 dvp = ap->a_dvp; 941 pname = cnp->cn_nameptr; 942 td = cnp->cn_thread; 943 flags = cnp->cn_flags; 944 nameiop = cnp->cn_nameiop; 945 mp = dvp->v_mount; 946 dmp = VFSTODEVFS(mp); 947 dd = dvp->v_data; 948 *vpp = NULLVP; 949 950 if ((flags & ISLASTCN) && nameiop == RENAME) 951 return (EOPNOTSUPP); 952 953 if (dvp->v_type != VDIR) 954 return (ENOTDIR); 955 956 if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) 957 return (EIO); 958 959 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); 960 if (error) 961 return (error); 962 963 if (cnp->cn_namelen == 1 && *pname == '.') { 964 if ((flags & ISLASTCN) && nameiop != LOOKUP) 965 return (EINVAL); 966 *vpp = dvp; 967 VREF(dvp); 968 return (0); 969 } 970 971 if (flags & ISDOTDOT) { 972 if ((flags & ISLASTCN) && nameiop != LOOKUP) 973 return (EINVAL); 974 de = devfs_parent_dirent(dd); 975 if (de == NULL) 976 return (ENOENT); 977 dvplocked = VOP_ISLOCKED(dvp); 978 VOP_UNLOCK(dvp, 0); 979 error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, 980 vpp); 981 *dm_unlock = 0; 982 vn_lock(dvp, dvplocked | LK_RETRY); 983 return (error); 984 } 985 986 dd = dvp->v_data; 987 de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0); 988 while (de == NULL) { /* While(...) so we can use break */ 989 990 if (nameiop == DELETE) 991 return (ENOENT); 992 993 /* 994 * OK, we didn't have an entry for the name we were asked for 995 * so we try to see if anybody can create it on demand. 996 */ 997 pname = devfs_fqpn(specname, dmp, dd, cnp); 998 if (pname == NULL) 999 break; 1000 1001 cdev = NULL; 1002 DEVFS_DMP_HOLD(dmp); 1003 sx_xunlock(&dmp->dm_lock); 1004 sx_slock(&clone_drain_lock); 1005 EVENTHANDLER_INVOKE(dev_clone, 1006 td->td_ucred, pname, strlen(pname), &cdev); 1007 sx_sunlock(&clone_drain_lock); 1008 1009 if (cdev == NULL) 1010 sx_xlock(&dmp->dm_lock); 1011 else if (devfs_populate_vp(dvp) != 0) { 1012 *dm_unlock = 0; 1013 sx_xlock(&dmp->dm_lock); 1014 if (DEVFS_DMP_DROP(dmp)) { 1015 sx_xunlock(&dmp->dm_lock); 1016 devfs_unmount_final(dmp); 1017 } else 1018 sx_xunlock(&dmp->dm_lock); 1019 dev_rel(cdev); 1020 return (ENOENT); 1021 } 1022 if (DEVFS_DMP_DROP(dmp)) { 1023 *dm_unlock = 0; 1024 sx_xunlock(&dmp->dm_lock); 1025 devfs_unmount_final(dmp); 1026 if (cdev != NULL) 1027 dev_rel(cdev); 1028 return (ENOENT); 1029 } 1030 1031 if (cdev == NULL) 1032 break; 1033 1034 dev_lock(); 1035 dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; 1036 if (dde != NULL && *dde != NULL) 1037 de = *dde; 1038 dev_unlock(); 1039 dev_rel(cdev); 1040 break; 1041 } 1042 1043 if (de == NULL || de->de_flags & DE_WHITEOUT) { 1044 if ((nameiop == CREATE || nameiop == RENAME) && 1045 (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { 1046 cnp->cn_flags |= SAVENAME; 1047 return (EJUSTRETURN); 1048 } 1049 return (ENOENT); 1050 } 1051 1052 if (devfs_prison_check(de, td)) 1053 return (ENOENT); 1054 1055 if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { 1056 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); 1057 if (error) 1058 return (error); 1059 if (*vpp == dvp) { 1060 VREF(dvp); 1061 *vpp = dvp; 1062 return (0); 1063 } 1064 } 1065 error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp); 1066 *dm_unlock = 0; 1067 return (error); 1068 } 1069 1070 static int 1071 devfs_lookup(struct vop_lookup_args *ap) 1072 { 1073 int j; 1074 struct devfs_mount *dmp; 1075 int dm_unlock; 1076 1077 if (devfs_populate_vp(ap->a_dvp) != 0) 1078 return (ENOTDIR); 1079 1080 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1081 dm_unlock = 1; 1082 j = devfs_lookupx(ap, &dm_unlock); 1083 if (dm_unlock == 1) 1084 sx_xunlock(&dmp->dm_lock); 1085 return (j); 1086 } 1087 1088 static int 1089 devfs_mknod(struct vop_mknod_args *ap) 1090 { 1091 struct componentname *cnp; 1092 struct vnode *dvp, **vpp; 1093 struct devfs_dirent *dd, *de; 1094 struct devfs_mount *dmp; 1095 int error; 1096 1097 /* 1098 * The only type of node we should be creating here is a 1099 * character device, for anything else return EOPNOTSUPP. 1100 */ 1101 if (ap->a_vap->va_type != VCHR) 1102 return (EOPNOTSUPP); 1103 dvp = ap->a_dvp; 1104 dmp = VFSTODEVFS(dvp->v_mount); 1105 1106 cnp = ap->a_cnp; 1107 vpp = ap->a_vpp; 1108 dd = dvp->v_data; 1109 1110 error = ENOENT; 1111 sx_xlock(&dmp->dm_lock); 1112 TAILQ_FOREACH(de, &dd->de_dlist, de_list) { 1113 if (cnp->cn_namelen != de->de_dirent->d_namlen) 1114 continue; 1115 if (de->de_dirent->d_type == DT_CHR && 1116 (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) 1117 continue; 1118 if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, 1119 de->de_dirent->d_namlen) != 0) 1120 continue; 1121 if (de->de_flags & DE_WHITEOUT) 1122 break; 1123 goto notfound; 1124 } 1125 if (de == NULL) 1126 goto notfound; 1127 de->de_flags &= ~DE_WHITEOUT; 1128 error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp); 1129 return (error); 1130 notfound: 1131 sx_xunlock(&dmp->dm_lock); 1132 return (error); 1133 } 1134 1135 /* ARGSUSED */ 1136 static int 1137 devfs_open(struct vop_open_args *ap) 1138 { 1139 struct thread *td = ap->a_td; 1140 struct vnode *vp = ap->a_vp; 1141 struct cdev *dev = vp->v_rdev; 1142 struct file *fp = ap->a_fp; 1143 int error, ref, vlocked; 1144 struct cdevsw *dsw; 1145 struct file *fpop; 1146 struct mtx *mtxp; 1147 1148 if (vp->v_type == VBLK) 1149 return (ENXIO); 1150 1151 if (dev == NULL) 1152 return (ENXIO); 1153 1154 /* Make this field valid before any I/O in d_open. */ 1155 if (dev->si_iosize_max == 0) 1156 dev->si_iosize_max = DFLTPHYS; 1157 1158 dsw = dev_refthread(dev, &ref); 1159 if (dsw == NULL) 1160 return (ENXIO); 1161 if (fp == NULL && dsw->d_fdopen != NULL) { 1162 dev_relthread(dev, ref); 1163 return (ENXIO); 1164 } 1165 1166 vlocked = VOP_ISLOCKED(vp); 1167 VOP_UNLOCK(vp, 0); 1168 1169 fpop = td->td_fpop; 1170 td->td_fpop = fp; 1171 if (fp != NULL) { 1172 fp->f_data = dev; 1173 fp->f_vnode = vp; 1174 } 1175 if (dsw->d_fdopen != NULL) 1176 error = dsw->d_fdopen(dev, ap->a_mode, td, fp); 1177 else 1178 error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); 1179 /* Clean up any cdevpriv upon error. */ 1180 if (error != 0) 1181 devfs_clear_cdevpriv(); 1182 td->td_fpop = fpop; 1183 1184 vn_lock(vp, vlocked | LK_RETRY); 1185 dev_relthread(dev, ref); 1186 if (error != 0) { 1187 if (error == ERESTART) 1188 error = EINTR; 1189 return (error); 1190 } 1191 1192 #if 0 /* /dev/console */ 1193 KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); 1194 #else 1195 if (fp == NULL) 1196 return (error); 1197 #endif 1198 if (fp->f_ops == &badfileops) 1199 finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); 1200 mtxp = mtx_pool_find(mtxpool_sleep, fp); 1201 1202 /* 1203 * Hint to the dofilewrite() to not force the buffer draining 1204 * on the writer to the file. Most likely, the write would 1205 * not need normal buffers. 1206 */ 1207 mtx_lock(mtxp); 1208 fp->f_vnread_flags |= FDEVFS_VNODE; 1209 mtx_unlock(mtxp); 1210 return (error); 1211 } 1212 1213 static int 1214 devfs_pathconf(struct vop_pathconf_args *ap) 1215 { 1216 1217 switch (ap->a_name) { 1218 case _PC_FILESIZEBITS: 1219 *ap->a_retval = 64; 1220 return (0); 1221 case _PC_NAME_MAX: 1222 *ap->a_retval = NAME_MAX; 1223 return (0); 1224 case _PC_LINK_MAX: 1225 *ap->a_retval = INT_MAX; 1226 return (0); 1227 case _PC_SYMLINK_MAX: 1228 *ap->a_retval = MAXPATHLEN; 1229 return (0); 1230 case _PC_MAX_CANON: 1231 if (ap->a_vp->v_vflag & VV_ISTTY) { 1232 *ap->a_retval = MAX_CANON; 1233 return (0); 1234 } 1235 return (EINVAL); 1236 case _PC_MAX_INPUT: 1237 if (ap->a_vp->v_vflag & VV_ISTTY) { 1238 *ap->a_retval = MAX_INPUT; 1239 return (0); 1240 } 1241 return (EINVAL); 1242 case _PC_VDISABLE: 1243 if (ap->a_vp->v_vflag & VV_ISTTY) { 1244 *ap->a_retval = _POSIX_VDISABLE; 1245 return (0); 1246 } 1247 return (EINVAL); 1248 case _PC_MAC_PRESENT: 1249 #ifdef MAC 1250 /* 1251 * If MAC is enabled, devfs automatically supports 1252 * trivial non-persistant label storage. 1253 */ 1254 *ap->a_retval = 1; 1255 #else 1256 *ap->a_retval = 0; 1257 #endif 1258 return (0); 1259 case _PC_CHOWN_RESTRICTED: 1260 *ap->a_retval = 1; 1261 return (0); 1262 default: 1263 return (vop_stdpathconf(ap)); 1264 } 1265 /* NOTREACHED */ 1266 } 1267 1268 /* ARGSUSED */ 1269 static int 1270 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) 1271 { 1272 struct cdev *dev; 1273 struct cdevsw *dsw; 1274 int error, ref; 1275 struct file *fpop; 1276 1277 fpop = td->td_fpop; 1278 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1279 if (error != 0) { 1280 error = vnops.fo_poll(fp, events, cred, td); 1281 return (error); 1282 } 1283 error = dsw->d_poll(dev, events, td); 1284 td->td_fpop = fpop; 1285 dev_relthread(dev, ref); 1286 return(error); 1287 } 1288 1289 /* 1290 * Print out the contents of a special device vnode. 1291 */ 1292 static int 1293 devfs_print(struct vop_print_args *ap) 1294 { 1295 1296 printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); 1297 return (0); 1298 } 1299 1300 static int 1301 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, 1302 int flags, struct thread *td) 1303 { 1304 struct cdev *dev; 1305 int ioflag, error, ref; 1306 ssize_t resid; 1307 struct cdevsw *dsw; 1308 struct file *fpop; 1309 1310 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1311 return (EINVAL); 1312 fpop = td->td_fpop; 1313 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1314 if (error != 0) { 1315 error = vnops.fo_read(fp, uio, cred, flags, td); 1316 return (error); 1317 } 1318 resid = uio->uio_resid; 1319 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); 1320 if (ioflag & O_DIRECT) 1321 ioflag |= IO_DIRECT; 1322 1323 foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); 1324 error = dsw->d_read(dev, uio, ioflag); 1325 if (uio->uio_resid != resid || (error == 0 && resid != 0)) 1326 devfs_timestamp(&dev->si_atime); 1327 td->td_fpop = fpop; 1328 dev_relthread(dev, ref); 1329 1330 foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); 1331 return (error); 1332 } 1333 1334 static int 1335 devfs_readdir(struct vop_readdir_args *ap) 1336 { 1337 int error; 1338 struct uio *uio; 1339 struct dirent *dp; 1340 struct devfs_dirent *dd; 1341 struct devfs_dirent *de; 1342 struct devfs_mount *dmp; 1343 off_t off; 1344 int *tmp_ncookies = NULL; 1345 1346 if (ap->a_vp->v_type != VDIR) 1347 return (ENOTDIR); 1348 1349 uio = ap->a_uio; 1350 if (uio->uio_offset < 0) 1351 return (EINVAL); 1352 1353 /* 1354 * XXX: This is a temporary hack to get around this filesystem not 1355 * supporting cookies. We store the location of the ncookies pointer 1356 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() 1357 * and set the number of cookies to 0. We then set the pointer to 1358 * NULL so that vfs_read_dirent doesn't try to call realloc() on 1359 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies 1360 * pointer to its original location before returning to the caller. 1361 */ 1362 if (ap->a_ncookies != NULL) { 1363 tmp_ncookies = ap->a_ncookies; 1364 *ap->a_ncookies = 0; 1365 ap->a_ncookies = NULL; 1366 } 1367 1368 dmp = VFSTODEVFS(ap->a_vp->v_mount); 1369 if (devfs_populate_vp(ap->a_vp) != 0) { 1370 if (tmp_ncookies != NULL) 1371 ap->a_ncookies = tmp_ncookies; 1372 return (EIO); 1373 } 1374 error = 0; 1375 de = ap->a_vp->v_data; 1376 off = 0; 1377 TAILQ_FOREACH(dd, &de->de_dlist, de_list) { 1378 KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); 1379 if (dd->de_flags & (DE_COVERED | DE_WHITEOUT)) 1380 continue; 1381 if (devfs_prison_check(dd, uio->uio_td)) 1382 continue; 1383 if (dd->de_dirent->d_type == DT_DIR) 1384 de = dd->de_dir; 1385 else 1386 de = dd; 1387 dp = dd->de_dirent; 1388 MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp)); 1389 if (dp->d_reclen > uio->uio_resid) 1390 break; 1391 dp->d_fileno = de->de_inode; 1392 if (off >= uio->uio_offset) { 1393 error = vfs_read_dirent(ap, dp, off); 1394 if (error) 1395 break; 1396 } 1397 off += dp->d_reclen; 1398 } 1399 sx_xunlock(&dmp->dm_lock); 1400 uio->uio_offset = off; 1401 1402 /* 1403 * Restore ap->a_ncookies if it wasn't originally NULL in the first 1404 * place. 1405 */ 1406 if (tmp_ncookies != NULL) 1407 ap->a_ncookies = tmp_ncookies; 1408 1409 return (error); 1410 } 1411 1412 static int 1413 devfs_readlink(struct vop_readlink_args *ap) 1414 { 1415 struct devfs_dirent *de; 1416 1417 de = ap->a_vp->v_data; 1418 return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); 1419 } 1420 1421 static int 1422 devfs_reclaim(struct vop_reclaim_args *ap) 1423 { 1424 struct vnode *vp; 1425 struct devfs_dirent *de; 1426 1427 vp = ap->a_vp; 1428 mtx_lock(&devfs_de_interlock); 1429 de = vp->v_data; 1430 if (de != NULL) { 1431 de->de_vnode = NULL; 1432 vp->v_data = NULL; 1433 } 1434 mtx_unlock(&devfs_de_interlock); 1435 vnode_destroy_vobject(vp); 1436 return (0); 1437 } 1438 1439 static int 1440 devfs_reclaim_vchr(struct vop_reclaim_args *ap) 1441 { 1442 struct vnode *vp; 1443 struct cdev *dev; 1444 1445 vp = ap->a_vp; 1446 MPASS(vp->v_type == VCHR); 1447 1448 devfs_reclaim(ap); 1449 1450 VI_LOCK(vp); 1451 dev_lock(); 1452 dev = vp->v_rdev; 1453 vp->v_rdev = NULL; 1454 if (dev != NULL) 1455 dev->si_usecount -= vp->v_usecount; 1456 dev_unlock(); 1457 VI_UNLOCK(vp); 1458 if (dev != NULL) 1459 dev_rel(dev); 1460 return (0); 1461 } 1462 1463 static int 1464 devfs_remove(struct vop_remove_args *ap) 1465 { 1466 struct vnode *dvp = ap->a_dvp; 1467 struct vnode *vp = ap->a_vp; 1468 struct devfs_dirent *dd; 1469 struct devfs_dirent *de, *de_covered; 1470 struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); 1471 1472 ASSERT_VOP_ELOCKED(dvp, "devfs_remove"); 1473 ASSERT_VOP_ELOCKED(vp, "devfs_remove"); 1474 1475 sx_xlock(&dmp->dm_lock); 1476 dd = ap->a_dvp->v_data; 1477 de = vp->v_data; 1478 if (de->de_cdp == NULL) { 1479 TAILQ_REMOVE(&dd->de_dlist, de, de_list); 1480 if (de->de_dirent->d_type == DT_LNK) { 1481 de_covered = devfs_find(dd, de->de_dirent->d_name, 1482 de->de_dirent->d_namlen, 0); 1483 if (de_covered != NULL) 1484 de_covered->de_flags &= ~DE_COVERED; 1485 } 1486 /* We need to unlock dvp because devfs_delete() may lock it. */ 1487 VOP_UNLOCK(vp, 0); 1488 if (dvp != vp) 1489 VOP_UNLOCK(dvp, 0); 1490 devfs_delete(dmp, de, 0); 1491 sx_xunlock(&dmp->dm_lock); 1492 if (dvp != vp) 1493 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 1494 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1495 } else { 1496 de->de_flags |= DE_WHITEOUT; 1497 sx_xunlock(&dmp->dm_lock); 1498 } 1499 return (0); 1500 } 1501 1502 /* 1503 * Revoke is called on a tty when a terminal session ends. The vnode 1504 * is orphaned by setting v_op to deadfs so we need to let go of it 1505 * as well so that we create a new one next time around. 1506 * 1507 */ 1508 static int 1509 devfs_revoke(struct vop_revoke_args *ap) 1510 { 1511 struct vnode *vp = ap->a_vp, *vp2; 1512 struct cdev *dev; 1513 struct cdev_priv *cdp; 1514 struct devfs_dirent *de; 1515 u_int i; 1516 1517 KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); 1518 1519 dev = vp->v_rdev; 1520 cdp = cdev2priv(dev); 1521 1522 dev_lock(); 1523 cdp->cdp_inuse++; 1524 dev_unlock(); 1525 1526 vhold(vp); 1527 vgone(vp); 1528 vdrop(vp); 1529 1530 VOP_UNLOCK(vp,0); 1531 loop: 1532 for (;;) { 1533 mtx_lock(&devfs_de_interlock); 1534 dev_lock(); 1535 vp2 = NULL; 1536 for (i = 0; i <= cdp->cdp_maxdirent; i++) { 1537 de = cdp->cdp_dirents[i]; 1538 if (de == NULL) 1539 continue; 1540 1541 vp2 = de->de_vnode; 1542 if (vp2 != NULL) { 1543 dev_unlock(); 1544 VI_LOCK(vp2); 1545 mtx_unlock(&devfs_de_interlock); 1546 if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, 1547 curthread)) 1548 goto loop; 1549 vhold(vp2); 1550 vgone(vp2); 1551 vdrop(vp2); 1552 vput(vp2); 1553 break; 1554 } 1555 } 1556 if (vp2 != NULL) { 1557 continue; 1558 } 1559 dev_unlock(); 1560 mtx_unlock(&devfs_de_interlock); 1561 break; 1562 } 1563 dev_lock(); 1564 cdp->cdp_inuse--; 1565 if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { 1566 TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); 1567 dev_unlock(); 1568 dev_rel(&cdp->cdp_c); 1569 } else 1570 dev_unlock(); 1571 1572 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1573 return (0); 1574 } 1575 1576 static int 1577 devfs_rioctl(struct vop_ioctl_args *ap) 1578 { 1579 struct vnode *vp; 1580 struct devfs_mount *dmp; 1581 int error; 1582 1583 vp = ap->a_vp; 1584 vn_lock(vp, LK_SHARED | LK_RETRY); 1585 if (vp->v_iflag & VI_DOOMED) { 1586 VOP_UNLOCK(vp, 0); 1587 return (EBADF); 1588 } 1589 dmp = VFSTODEVFS(vp->v_mount); 1590 sx_xlock(&dmp->dm_lock); 1591 VOP_UNLOCK(vp, 0); 1592 DEVFS_DMP_HOLD(dmp); 1593 devfs_populate(dmp); 1594 if (DEVFS_DMP_DROP(dmp)) { 1595 sx_xunlock(&dmp->dm_lock); 1596 devfs_unmount_final(dmp); 1597 return (ENOENT); 1598 } 1599 error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); 1600 sx_xunlock(&dmp->dm_lock); 1601 return (error); 1602 } 1603 1604 static int 1605 devfs_rread(struct vop_read_args *ap) 1606 { 1607 1608 if (ap->a_vp->v_type != VDIR) 1609 return (EINVAL); 1610 return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); 1611 } 1612 1613 static int 1614 devfs_setattr(struct vop_setattr_args *ap) 1615 { 1616 struct devfs_dirent *de; 1617 struct vattr *vap; 1618 struct vnode *vp; 1619 struct thread *td; 1620 int c, error; 1621 uid_t uid; 1622 gid_t gid; 1623 1624 vap = ap->a_vap; 1625 vp = ap->a_vp; 1626 td = curthread; 1627 if ((vap->va_type != VNON) || 1628 (vap->va_nlink != VNOVAL) || 1629 (vap->va_fsid != VNOVAL) || 1630 (vap->va_fileid != VNOVAL) || 1631 (vap->va_blocksize != VNOVAL) || 1632 (vap->va_flags != VNOVAL && vap->va_flags != 0) || 1633 (vap->va_rdev != VNOVAL) || 1634 ((int)vap->va_bytes != VNOVAL) || 1635 (vap->va_gen != VNOVAL)) { 1636 return (EINVAL); 1637 } 1638 1639 error = devfs_populate_vp(vp); 1640 if (error != 0) 1641 return (error); 1642 1643 de = vp->v_data; 1644 if (vp->v_type == VDIR) 1645 de = de->de_dir; 1646 1647 c = 0; 1648 if (vap->va_uid == (uid_t)VNOVAL) 1649 uid = de->de_uid; 1650 else 1651 uid = vap->va_uid; 1652 if (vap->va_gid == (gid_t)VNOVAL) 1653 gid = de->de_gid; 1654 else 1655 gid = vap->va_gid; 1656 if (uid != de->de_uid || gid != de->de_gid) { 1657 if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || 1658 (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { 1659 error = priv_check(td, PRIV_VFS_CHOWN); 1660 if (error != 0) 1661 goto ret; 1662 } 1663 de->de_uid = uid; 1664 de->de_gid = gid; 1665 c = 1; 1666 } 1667 1668 if (vap->va_mode != (mode_t)VNOVAL) { 1669 if (ap->a_cred->cr_uid != de->de_uid) { 1670 error = priv_check(td, PRIV_VFS_ADMIN); 1671 if (error != 0) 1672 goto ret; 1673 } 1674 de->de_mode = vap->va_mode; 1675 c = 1; 1676 } 1677 1678 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { 1679 error = vn_utimes_perm(vp, vap, ap->a_cred, td); 1680 if (error != 0) 1681 goto ret; 1682 if (vap->va_atime.tv_sec != VNOVAL) { 1683 if (vp->v_type == VCHR) 1684 vp->v_rdev->si_atime = vap->va_atime; 1685 else 1686 de->de_atime = vap->va_atime; 1687 } 1688 if (vap->va_mtime.tv_sec != VNOVAL) { 1689 if (vp->v_type == VCHR) 1690 vp->v_rdev->si_mtime = vap->va_mtime; 1691 else 1692 de->de_mtime = vap->va_mtime; 1693 } 1694 c = 1; 1695 } 1696 1697 if (c) { 1698 if (vp->v_type == VCHR) 1699 vfs_timestamp(&vp->v_rdev->si_ctime); 1700 else 1701 vfs_timestamp(&de->de_mtime); 1702 } 1703 1704 ret: 1705 sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock); 1706 return (error); 1707 } 1708 1709 #ifdef MAC 1710 static int 1711 devfs_setlabel(struct vop_setlabel_args *ap) 1712 { 1713 struct vnode *vp; 1714 struct devfs_dirent *de; 1715 1716 vp = ap->a_vp; 1717 de = vp->v_data; 1718 1719 mac_vnode_relabel(ap->a_cred, vp, ap->a_label); 1720 mac_devfs_update(vp->v_mount, de, vp); 1721 1722 return (0); 1723 } 1724 #endif 1725 1726 static int 1727 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) 1728 { 1729 1730 return (vnops.fo_stat(fp, sb, cred, td)); 1731 } 1732 1733 static int 1734 devfs_symlink(struct vop_symlink_args *ap) 1735 { 1736 int i, error; 1737 struct devfs_dirent *dd; 1738 struct devfs_dirent *de, *de_covered, *de_dotdot; 1739 struct devfs_mount *dmp; 1740 1741 error = priv_check(curthread, PRIV_DEVFS_SYMLINK); 1742 if (error) 1743 return(error); 1744 dmp = VFSTODEVFS(ap->a_dvp->v_mount); 1745 if (devfs_populate_vp(ap->a_dvp) != 0) 1746 return (ENOENT); 1747 1748 dd = ap->a_dvp->v_data; 1749 de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); 1750 de->de_flags = DE_USER; 1751 de->de_uid = 0; 1752 de->de_gid = 0; 1753 de->de_mode = 0755; 1754 de->de_inode = alloc_unr(devfs_inos); 1755 de->de_dir = dd; 1756 de->de_dirent->d_type = DT_LNK; 1757 i = strlen(ap->a_target) + 1; 1758 de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); 1759 bcopy(ap->a_target, de->de_symlink, i); 1760 #ifdef MAC 1761 mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); 1762 #endif 1763 de_covered = devfs_find(dd, de->de_dirent->d_name, 1764 de->de_dirent->d_namlen, 0); 1765 if (de_covered != NULL) { 1766 if ((de_covered->de_flags & DE_USER) != 0) { 1767 devfs_delete(dmp, de, DEVFS_DEL_NORECURSE); 1768 sx_xunlock(&dmp->dm_lock); 1769 return (EEXIST); 1770 } 1771 KASSERT((de_covered->de_flags & DE_COVERED) == 0, 1772 ("devfs_symlink: entry %p already covered", de_covered)); 1773 de_covered->de_flags |= DE_COVERED; 1774 } 1775 1776 de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */ 1777 de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */ 1778 TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list); 1779 devfs_dir_ref_de(dmp, dd); 1780 devfs_rules_apply(dmp, de); 1781 1782 return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp)); 1783 } 1784 1785 static int 1786 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) 1787 { 1788 1789 return (vnops.fo_truncate(fp, length, cred, td)); 1790 } 1791 1792 static int 1793 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, 1794 int flags, struct thread *td) 1795 { 1796 struct cdev *dev; 1797 int error, ioflag, ref; 1798 ssize_t resid; 1799 struct cdevsw *dsw; 1800 struct file *fpop; 1801 1802 if (uio->uio_resid > DEVFS_IOSIZE_MAX) 1803 return (EINVAL); 1804 fpop = td->td_fpop; 1805 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1806 if (error != 0) { 1807 error = vnops.fo_write(fp, uio, cred, flags, td); 1808 return (error); 1809 } 1810 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); 1811 ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); 1812 if (ioflag & O_DIRECT) 1813 ioflag |= IO_DIRECT; 1814 foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); 1815 1816 resid = uio->uio_resid; 1817 1818 error = dsw->d_write(dev, uio, ioflag); 1819 if (uio->uio_resid != resid || (error == 0 && resid != 0)) { 1820 devfs_timestamp(&dev->si_ctime); 1821 dev->si_mtime = dev->si_ctime; 1822 } 1823 td->td_fpop = fpop; 1824 dev_relthread(dev, ref); 1825 1826 foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); 1827 return (error); 1828 } 1829 1830 static int 1831 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, 1832 vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, 1833 struct thread *td) 1834 { 1835 struct cdev *dev; 1836 struct cdevsw *dsw; 1837 struct mount *mp; 1838 struct vnode *vp; 1839 struct file *fpop; 1840 vm_object_t object; 1841 vm_prot_t maxprot; 1842 int error, ref; 1843 1844 vp = fp->f_vnode; 1845 1846 /* 1847 * Ensure that file and memory protections are 1848 * compatible. 1849 */ 1850 mp = vp->v_mount; 1851 if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { 1852 maxprot = VM_PROT_NONE; 1853 if ((prot & VM_PROT_EXECUTE) != 0) 1854 return (EACCES); 1855 } else 1856 maxprot = VM_PROT_EXECUTE; 1857 if ((fp->f_flag & FREAD) != 0) 1858 maxprot |= VM_PROT_READ; 1859 else if ((prot & VM_PROT_READ) != 0) 1860 return (EACCES); 1861 1862 /* 1863 * If we are sharing potential changes via MAP_SHARED and we 1864 * are trying to get write permission although we opened it 1865 * without asking for it, bail out. 1866 * 1867 * Note that most character devices always share mappings. 1868 * The one exception is that D_MMAP_ANON devices 1869 * (i.e. /dev/zero) permit private writable mappings. 1870 * 1871 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests 1872 * as well as updating maxprot to permit writing for 1873 * D_MMAP_ANON devices rather than doing that here. 1874 */ 1875 if ((flags & MAP_SHARED) != 0) { 1876 if ((fp->f_flag & FWRITE) != 0) 1877 maxprot |= VM_PROT_WRITE; 1878 else if ((prot & VM_PROT_WRITE) != 0) 1879 return (EACCES); 1880 } 1881 maxprot &= cap_maxprot; 1882 1883 fpop = td->td_fpop; 1884 error = devfs_fp_check(fp, &dev, &dsw, &ref); 1885 if (error != 0) 1886 return (error); 1887 1888 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, 1889 &object); 1890 td->td_fpop = fpop; 1891 dev_relthread(dev, ref); 1892 if (error != 0) 1893 return (error); 1894 1895 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1896 foff, FALSE, td); 1897 if (error != 0) 1898 vm_object_deallocate(object); 1899 return (error); 1900 } 1901 1902 dev_t 1903 dev2udev(struct cdev *x) 1904 { 1905 if (x == NULL) 1906 return (NODEV); 1907 return (cdev2priv(x)->cdp_inode); 1908 } 1909 1910 static struct fileops devfs_ops_f = { 1911 .fo_read = devfs_read_f, 1912 .fo_write = devfs_write_f, 1913 .fo_truncate = devfs_truncate_f, 1914 .fo_ioctl = devfs_ioctl_f, 1915 .fo_poll = devfs_poll_f, 1916 .fo_kqfilter = devfs_kqfilter_f, 1917 .fo_stat = devfs_stat_f, 1918 .fo_close = devfs_close_f, 1919 .fo_chmod = vn_chmod, 1920 .fo_chown = vn_chown, 1921 .fo_sendfile = vn_sendfile, 1922 .fo_seek = vn_seek, 1923 .fo_fill_kinfo = vn_fill_kinfo, 1924 .fo_mmap = devfs_mmap_f, 1925 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE 1926 }; 1927 1928 /* Vops for non-CHR vnodes in /dev. */ 1929 static struct vop_vector devfs_vnodeops = { 1930 .vop_default = &default_vnodeops, 1931 1932 .vop_access = devfs_access, 1933 .vop_getattr = devfs_getattr, 1934 .vop_ioctl = devfs_rioctl, 1935 .vop_lookup = devfs_lookup, 1936 .vop_mknod = devfs_mknod, 1937 .vop_pathconf = devfs_pathconf, 1938 .vop_read = devfs_rread, 1939 .vop_readdir = devfs_readdir, 1940 .vop_readlink = devfs_readlink, 1941 .vop_reclaim = devfs_reclaim, 1942 .vop_remove = devfs_remove, 1943 .vop_revoke = devfs_revoke, 1944 .vop_setattr = devfs_setattr, 1945 #ifdef MAC 1946 .vop_setlabel = devfs_setlabel, 1947 #endif 1948 .vop_symlink = devfs_symlink, 1949 .vop_vptocnp = devfs_vptocnp, 1950 }; 1951 1952 /* Vops for VCHR vnodes in /dev. */ 1953 static struct vop_vector devfs_specops = { 1954 .vop_default = &default_vnodeops, 1955 1956 .vop_access = devfs_access, 1957 .vop_bmap = VOP_PANIC, 1958 .vop_close = devfs_close, 1959 .vop_create = VOP_PANIC, 1960 .vop_fsync = vop_stdfsync, 1961 .vop_getattr = devfs_getattr, 1962 .vop_ioctl = devfs_ioctl, 1963 .vop_link = VOP_PANIC, 1964 .vop_mkdir = VOP_PANIC, 1965 .vop_mknod = VOP_PANIC, 1966 .vop_open = devfs_open, 1967 .vop_pathconf = devfs_pathconf, 1968 .vop_poll = dead_poll, 1969 .vop_print = devfs_print, 1970 .vop_read = dead_read, 1971 .vop_readdir = VOP_PANIC, 1972 .vop_readlink = VOP_PANIC, 1973 .vop_reallocblks = VOP_PANIC, 1974 .vop_reclaim = devfs_reclaim_vchr, 1975 .vop_remove = devfs_remove, 1976 .vop_rename = VOP_PANIC, 1977 .vop_revoke = devfs_revoke, 1978 .vop_rmdir = VOP_PANIC, 1979 .vop_setattr = devfs_setattr, 1980 #ifdef MAC 1981 .vop_setlabel = devfs_setlabel, 1982 #endif 1983 .vop_strategy = VOP_PANIC, 1984 .vop_symlink = VOP_PANIC, 1985 .vop_vptocnp = devfs_vptocnp, 1986 .vop_write = dead_write, 1987 }; 1988 1989 /* 1990 * Our calling convention to the device drivers used to be that we passed 1991 * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 1992 * flags instead since that's what open(), close() and ioctl() takes and 1993 * we don't really want vnode.h in device drivers. 1994 * We solved the source compatibility by redefining some vnode flags to 1995 * be the same as the fcntl ones and by sending down the bitwise OR of 1996 * the respective fcntl/vnode flags. These CTASSERTS make sure nobody 1997 * pulls the rug out under this. 1998 */ 1999 CTASSERT(O_NONBLOCK == IO_NDELAY); 2000 CTASSERT(O_FSYNC == IO_SYNC); 2001