1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1992, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software donated to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94 35 */ 36 37 /* 38 * /dev/fd Filesystem 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/capsicum.h> 44 #include <sys/conf.h> 45 #include <sys/dirent.h> 46 #include <sys/filedesc.h> 47 #include <sys/kernel.h> /* boottime */ 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/malloc.h> 51 #include <sys/file.h> /* Must come after sys/malloc.h */ 52 #include <sys/mount.h> 53 #include <sys/namei.h> 54 #include <sys/proc.h> 55 #include <sys/stat.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 60 #include <fs/fdescfs/fdesc.h> 61 62 #define NFDCACHE 4 63 #define FD_NHASH(ix) \ 64 (&fdhashtbl[(ix) & fdhash]) 65 static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; 66 static u_long fdhash; 67 68 struct mtx fdesc_hashmtx; 69 70 static vop_getattr_t fdesc_getattr; 71 static vop_lookup_t fdesc_lookup; 72 static vop_open_t fdesc_open; 73 static vop_pathconf_t fdesc_pathconf; 74 static vop_readdir_t fdesc_readdir; 75 static vop_readlink_t fdesc_readlink; 76 static vop_reclaim_t fdesc_reclaim; 77 static vop_setattr_t fdesc_setattr; 78 79 static struct vop_vector fdesc_vnodeops = { 80 .vop_default = &default_vnodeops, 81 82 .vop_access = VOP_NULL, 83 .vop_getattr = fdesc_getattr, 84 .vop_lookup = fdesc_lookup, 85 .vop_open = fdesc_open, 86 .vop_pathconf = fdesc_pathconf, 87 .vop_readdir = fdesc_readdir, 88 .vop_readlink = fdesc_readlink, 89 .vop_reclaim = fdesc_reclaim, 90 .vop_setattr = fdesc_setattr, 91 }; 92 VFS_VOP_VECTOR_REGISTER(fdesc_vnodeops); 93 94 static void fdesc_remove_entry(struct fdescnode *); 95 96 /* 97 * Initialise cache headers 98 */ 99 int 100 fdesc_init(struct vfsconf *vfsp) 101 { 102 103 mtx_init(&fdesc_hashmtx, "fdescfs_hash", NULL, MTX_DEF); 104 fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); 105 return (0); 106 } 107 108 /* 109 * Uninit ready for unload. 110 */ 111 int 112 fdesc_uninit(struct vfsconf *vfsp) 113 { 114 115 hashdestroy(fdhashtbl, M_CACHE, fdhash); 116 mtx_destroy(&fdesc_hashmtx); 117 return (0); 118 } 119 120 /* 121 * Remove an entry from the hash if it exists. 122 */ 123 static void 124 fdesc_remove_entry(struct fdescnode *fd) 125 { 126 struct fdhashhead *fc; 127 struct fdescnode *fd2; 128 129 fc = FD_NHASH(fd->fd_ix); 130 mtx_lock(&fdesc_hashmtx); 131 LIST_FOREACH(fd2, fc, fd_hash) { 132 if (fd == fd2) { 133 LIST_REMOVE(fd, fd_hash); 134 break; 135 } 136 } 137 mtx_unlock(&fdesc_hashmtx); 138 } 139 140 int 141 fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp, 142 struct vnode **vpp) 143 { 144 struct fdescmount *fmp; 145 struct fdhashhead *fc; 146 struct fdescnode *fd, *fd2; 147 struct vnode *vp, *vp2; 148 enum vgetstate vgs; 149 int error; 150 151 fc = FD_NHASH(ix); 152 loop: 153 mtx_lock(&fdesc_hashmtx); 154 /* 155 * If a forced unmount is progressing, we need to drop it. The flags are 156 * protected by the hashmtx. 157 */ 158 fmp = mp->mnt_data; 159 if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { 160 mtx_unlock(&fdesc_hashmtx); 161 return (-1); 162 } 163 164 LIST_FOREACH(fd, fc, fd_hash) { 165 if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { 166 /* Get reference to vnode in case it's being free'd */ 167 vp = fd->fd_vnode; 168 vgs = vget_prep(vp); 169 mtx_unlock(&fdesc_hashmtx); 170 if (vget_finish(vp, LK_EXCLUSIVE, vgs) != 0) 171 goto loop; 172 *vpp = vp; 173 return (0); 174 } 175 } 176 mtx_unlock(&fdesc_hashmtx); 177 178 fd = malloc(sizeof(struct fdescnode), M_TEMP, M_WAITOK); 179 180 error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, &vp); 181 if (error) { 182 free(fd, M_TEMP); 183 return (error); 184 } 185 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 186 vp->v_data = fd; 187 fd->fd_vnode = vp; 188 fd->fd_type = ftype; 189 fd->fd_fd = fd_fd; 190 fd->fd_ix = ix; 191 if (ftype == Fdesc) { 192 if ((fmp->flags & FMNT_RDLNKF) != 0) 193 vp->v_type = VLNK; 194 else if ((fmp->flags & FMNT_LINRDLNKF) != 0) 195 vp->v_vflag |= VV_READLINK; 196 } 197 error = insmntque1(vp, mp); 198 if (error != 0) { 199 vgone(vp); 200 vput(vp); 201 *vpp = NULLVP; 202 return (error); 203 } 204 205 /* Make sure that someone didn't beat us when inserting the vnode. */ 206 mtx_lock(&fdesc_hashmtx); 207 /* 208 * If a forced unmount is progressing, we need to drop it. The flags are 209 * protected by the hashmtx. 210 */ 211 fmp = mp->mnt_data; 212 if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { 213 mtx_unlock(&fdesc_hashmtx); 214 vgone(vp); 215 vput(vp); 216 *vpp = NULLVP; 217 return (-1); 218 } 219 220 LIST_FOREACH(fd2, fc, fd_hash) { 221 if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) { 222 /* Get reference to vnode in case it's being free'd */ 223 vp2 = fd2->fd_vnode; 224 vgs = vget_prep(vp2); 225 mtx_unlock(&fdesc_hashmtx); 226 error = vget_finish(vp2, LK_EXCLUSIVE, vgs); 227 /* Someone beat us, dec use count and wait for reclaim */ 228 vgone(vp); 229 vput(vp); 230 /* If we didn't get it, return no vnode. */ 231 if (error) 232 vp2 = NULLVP; 233 *vpp = vp2; 234 return (error); 235 } 236 } 237 238 /* If we came here, we can insert it safely. */ 239 LIST_INSERT_HEAD(fc, fd, fd_hash); 240 mtx_unlock(&fdesc_hashmtx); 241 vn_set_state(vp, VSTATE_CONSTRUCTED); 242 *vpp = vp; 243 return (0); 244 } 245 246 struct fdesc_get_ino_args { 247 fdntype ftype; 248 unsigned fd_fd; 249 int ix; 250 struct file *fp; 251 struct thread *td; 252 bool fdropped; 253 }; 254 255 static int 256 fdesc_get_ino_alloc(struct mount *mp, void *arg, int lkflags, 257 struct vnode **rvp) 258 { 259 struct fdesc_get_ino_args *a; 260 struct fdescmount *fdm; 261 struct vnode *vp; 262 int error; 263 264 a = arg; 265 fdm = VFSTOFDESC(mp); 266 if ((fdm->flags & FMNT_NODUP) != 0 && a->fp->f_type == DTYPE_VNODE) { 267 vp = a->fp->f_vnode; 268 vget(vp, lkflags | LK_RETRY); 269 *rvp = vp; 270 error = 0; 271 } else { 272 error = fdesc_allocvp(a->ftype, a->fd_fd, a->ix, mp, rvp); 273 } 274 fdrop(a->fp, a->td); 275 a->fdropped = true; 276 return (error); 277 } 278 279 /* 280 * vp is the current namei directory 281 * ndp is the name to locate in that directory... 282 */ 283 static int 284 fdesc_lookup(struct vop_lookup_args *ap) 285 { 286 struct vnode **vpp = ap->a_vpp; 287 struct vnode *dvp = ap->a_dvp; 288 struct componentname *cnp = ap->a_cnp; 289 char *pname = cnp->cn_nameptr; 290 struct thread *td = curthread; 291 struct file *fp; 292 struct fdesc_get_ino_args arg; 293 int nlen = cnp->cn_namelen; 294 u_int fd, fd1; 295 int error; 296 struct vnode *fvp; 297 298 if ((cnp->cn_flags & ISLASTCN) && 299 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 300 error = EROFS; 301 goto bad; 302 } 303 304 if (cnp->cn_namelen == 1 && *pname == '.') { 305 *vpp = dvp; 306 VREF(dvp); 307 return (0); 308 } 309 310 if (VTOFDESC(dvp)->fd_type != Froot) { 311 error = ENOTDIR; 312 goto bad; 313 } 314 315 fd = 0; 316 /* the only time a leading 0 is acceptable is if it's "0" */ 317 if (*pname == '0' && nlen != 1) { 318 error = ENOENT; 319 goto bad; 320 } 321 while (nlen--) { 322 if (*pname < '0' || *pname > '9') { 323 error = ENOENT; 324 goto bad; 325 } 326 fd1 = 10 * fd + *pname++ - '0'; 327 if (fd1 < fd) { 328 error = ENOENT; 329 goto bad; 330 } 331 fd = fd1; 332 } 333 334 /* 335 * No rights to check since 'fp' isn't actually used. 336 */ 337 if ((error = fget(td, fd, &cap_no_rights, &fp)) != 0) 338 goto bad; 339 340 /* 341 * Make sure we do not deadlock looking up the dvp itself. 342 * 343 * Unlock our root node (dvp) when doing this, since we might 344 * deadlock since the vnode might be locked by another thread 345 * and the root vnode lock will be obtained afterwards (in case 346 * we're looking up the fd of the root vnode), which will be the 347 * opposite lock order. 348 */ 349 arg.ftype = Fdesc; 350 arg.fd_fd = fd; 351 arg.ix = FD_DESC + fd; 352 arg.fp = fp; 353 arg.td = td; 354 arg.fdropped = false; 355 error = vn_vget_ino_gen(dvp, fdesc_get_ino_alloc, &arg, 356 LK_EXCLUSIVE, &fvp); 357 358 if (!arg.fdropped) { 359 /* 360 * In case we're holding the last reference to the file, the dvp 361 * will be re-acquired. 362 */ 363 VOP_UNLOCK(dvp); 364 fdrop(fp, td); 365 366 vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); 367 fvp = dvp; 368 if (error == 0 && VN_IS_DOOMED(dvp)) 369 error = ENOENT; 370 } 371 372 if (error) 373 goto bad; 374 *vpp = fvp; 375 return (0); 376 377 bad: 378 *vpp = NULL; 379 return (error); 380 } 381 382 static int 383 fdesc_open(struct vop_open_args *ap) 384 { 385 struct vnode *vp = ap->a_vp; 386 387 if (VTOFDESC(vp)->fd_type == Froot) 388 return (0); 389 390 /* 391 * XXX Kludge: set td->td_proc->p_dupfd to contain the value of the file 392 * descriptor being sought for duplication. The error return ensures 393 * that the vnode for this device will be released by vn_open. Open 394 * will detect this special error and take the actions in dupfdopen. 395 * Other callers of vn_open or VOP_OPEN will simply report the 396 * error. 397 */ 398 ap->a_td->td_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ 399 return (ENODEV); 400 } 401 402 static int 403 fdesc_pathconf(struct vop_pathconf_args *ap) 404 { 405 struct vnode *vp = ap->a_vp; 406 int error; 407 408 switch (ap->a_name) { 409 case _PC_NAME_MAX: 410 *ap->a_retval = NAME_MAX; 411 return (0); 412 case _PC_LINK_MAX: 413 if (VTOFDESC(vp)->fd_type == Froot) 414 *ap->a_retval = 2; 415 else 416 *ap->a_retval = 1; 417 return (0); 418 default: 419 if (VTOFDESC(vp)->fd_type == Froot) 420 return (vop_stdpathconf(ap)); 421 vref(vp); 422 VOP_UNLOCK(vp); 423 error = kern_fpathconf(curthread, VTOFDESC(vp)->fd_fd, 424 ap->a_name, ap->a_retval); 425 vn_lock(vp, LK_SHARED | LK_RETRY); 426 vunref(vp); 427 return (error); 428 } 429 } 430 431 static int 432 fdesc_getattr(struct vop_getattr_args *ap) 433 { 434 struct vnode *vp = ap->a_vp; 435 struct vattr *vap = ap->a_vap; 436 struct timeval boottime; 437 438 getboottime(&boottime); 439 vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 440 vap->va_fileid = VTOFDESC(vp)->fd_ix; 441 vap->va_uid = 0; 442 vap->va_gid = 0; 443 vap->va_blocksize = DEV_BSIZE; 444 vap->va_atime.tv_sec = boottime.tv_sec; 445 vap->va_atime.tv_nsec = 0; 446 vap->va_mtime = vap->va_atime; 447 vap->va_ctime = vap->va_mtime; 448 vap->va_gen = 0; 449 vap->va_flags = 0; 450 vap->va_bytes = 0; 451 vap->va_filerev = 0; 452 453 switch (VTOFDESC(vp)->fd_type) { 454 case Froot: 455 vap->va_type = VDIR; 456 vap->va_nlink = 2; 457 vap->va_size = DEV_BSIZE; 458 vap->va_rdev = NODEV; 459 break; 460 461 case Fdesc: 462 vap->va_type = (VFSTOFDESC(vp->v_mount)->flags & 463 (FMNT_RDLNKF | FMNT_LINRDLNKF)) == 0 ? VCHR : VLNK; 464 vap->va_nlink = 1; 465 vap->va_size = 0; 466 vap->va_rdev = makedev(0, vap->va_fileid); 467 break; 468 469 default: 470 panic("fdesc_getattr"); 471 break; 472 } 473 474 vp->v_type = vap->va_type; 475 return (0); 476 } 477 478 static int 479 fdesc_setattr(struct vop_setattr_args *ap) 480 { 481 struct vattr *vap = ap->a_vap; 482 struct vnode *vp; 483 struct mount *mp; 484 struct file *fp; 485 struct thread *td = curthread; 486 cap_rights_t rights; 487 unsigned fd; 488 int error; 489 490 /* 491 * Can't mess with the root vnode 492 */ 493 if (VTOFDESC(ap->a_vp)->fd_type == Froot) 494 return (EACCES); 495 496 fd = VTOFDESC(ap->a_vp)->fd_fd; 497 498 /* 499 * Allow setattr where there is an underlying vnode. 500 * For O_PATH descriptors, disallow truncate. 501 */ 502 if (vap->va_size != VNOVAL) { 503 error = getvnode(td, fd, 504 cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp); 505 } else { 506 error = getvnode_path(td, fd, 507 cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp); 508 } 509 if (error) { 510 /* 511 * getvnode() returns EINVAL if the file descriptor is not 512 * backed by a vnode. Silently drop all changes except 513 * chflags(2) in this case. 514 */ 515 if (error == EINVAL) { 516 if (vap->va_flags != VNOVAL) 517 error = EOPNOTSUPP; 518 else 519 error = 0; 520 } 521 return (error); 522 } 523 vp = fp->f_vnode; 524 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) == 0) { 525 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 526 error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred); 527 VOP_UNLOCK(vp); 528 vn_finished_write(mp); 529 } 530 fdrop(fp, td); 531 return (error); 532 } 533 534 #define UIO_MX _GENERIC_DIRLEN(10) /* number of symbols in INT_MAX printout */ 535 536 static int 537 fdesc_readdir(struct vop_readdir_args *ap) 538 { 539 struct fdescmount *fmp; 540 struct uio *uio = ap->a_uio; 541 struct filedesc *fdp; 542 struct dirent d; 543 struct dirent *dp = &d; 544 int error, i, off, fcnt; 545 546 if (VTOFDESC(ap->a_vp)->fd_type != Froot) 547 panic("fdesc_readdir: not dir"); 548 549 fmp = VFSTOFDESC(ap->a_vp->v_mount); 550 if (ap->a_ncookies != NULL) 551 *ap->a_ncookies = 0; 552 553 off = (int)uio->uio_offset; 554 if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || 555 uio->uio_resid < UIO_MX) 556 return (EINVAL); 557 i = (u_int)off / UIO_MX; 558 fdp = uio->uio_td->td_proc->p_fd; 559 error = 0; 560 561 fcnt = i - 2; /* The first two nodes are `.' and `..' */ 562 563 FILEDESC_SLOCK(fdp); 564 while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { 565 bzero((caddr_t)dp, UIO_MX); 566 switch (i) { 567 case 0: /* `.' */ 568 case 1: /* `..' */ 569 dp->d_fileno = i + FD_ROOT; 570 dp->d_namlen = i + 1; 571 dp->d_reclen = UIO_MX; 572 bcopy("..", dp->d_name, dp->d_namlen); 573 dp->d_type = DT_DIR; 574 dirent_terminate(dp); 575 break; 576 default: 577 if (fdp->fd_ofiles[fcnt].fde_file == NULL) 578 break; 579 dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); 580 dp->d_reclen = UIO_MX; 581 dp->d_type = (fmp->flags & (FMNT_RDLNKF | 582 FMNT_LINRDLNKF)) == 0 ? DT_CHR : DT_LNK; 583 dp->d_fileno = i + FD_DESC; 584 dirent_terminate(dp); 585 break; 586 } 587 /* NOTE: d_off is the offset of the *next* entry. */ 588 dp->d_off = UIO_MX * (i + 1); 589 if (dp->d_namlen != 0) { 590 /* 591 * And ship to userland 592 */ 593 FILEDESC_SUNLOCK(fdp); 594 error = uiomove(dp, UIO_MX, uio); 595 if (error) 596 goto done; 597 FILEDESC_SLOCK(fdp); 598 } 599 i++; 600 fcnt++; 601 } 602 FILEDESC_SUNLOCK(fdp); 603 604 done: 605 uio->uio_offset = i * UIO_MX; 606 return (error); 607 } 608 609 static int 610 fdesc_reclaim(struct vop_reclaim_args *ap) 611 { 612 struct vnode *vp; 613 struct fdescnode *fd; 614 615 vp = ap->a_vp; 616 fd = VTOFDESC(vp); 617 fdesc_remove_entry(fd); 618 free(vp->v_data, M_TEMP); 619 vp->v_data = NULL; 620 return (0); 621 } 622 623 static int 624 fdesc_readlink(struct vop_readlink_args *va) 625 { 626 struct vnode *vp, *vn; 627 struct thread *td; 628 struct uio *uio; 629 struct file *fp; 630 char *freepath, *fullpath; 631 size_t pathlen; 632 int lockflags, fd_fd; 633 int error; 634 635 freepath = NULL; 636 vn = va->a_vp; 637 if (VTOFDESC(vn)->fd_type != Fdesc) 638 panic("fdesc_readlink: not fdescfs link"); 639 fd_fd = ((struct fdescnode *)vn->v_data)->fd_fd; 640 lockflags = VOP_ISLOCKED(vn); 641 VOP_UNLOCK(vn); 642 643 td = curthread; 644 error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL); 645 if (error != 0) 646 goto out; 647 648 switch (fp->f_type) { 649 case DTYPE_VNODE: 650 vp = fp->f_vnode; 651 error = vn_fullpath(vp, &fullpath, &freepath); 652 break; 653 default: 654 fullpath = "anon_inode:[unknown]"; 655 break; 656 } 657 if (error == 0) { 658 uio = va->a_uio; 659 pathlen = strlen(fullpath); 660 error = uiomove(fullpath, pathlen, uio); 661 } 662 if (freepath != NULL) 663 free(freepath, M_TEMP); 664 fdrop(fp, td); 665 666 out: 667 vn_lock(vn, lockflags | LK_RETRY); 668 return (error); 669 } 670