1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2001 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_pseudofs.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/systm.h> 37 #include <sys/ctype.h> 38 #include <sys/dirent.h> 39 #include <sys/fcntl.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mount.h> 44 #include <sys/mutex.h> 45 #include <sys/namei.h> 46 #include <sys/proc.h> 47 #include <sys/sbuf.h> 48 #include <sys/sx.h> 49 #include <sys/sysctl.h> 50 #include <sys/vnode.h> 51 52 #include <fs/pseudofs/pseudofs.h> 53 #include <fs/pseudofs/pseudofs_internal.h> 54 55 #define KASSERT_PN_IS_DIR(pn) \ 56 KASSERT((pn)->pn_type == pfstype_root || \ 57 (pn)->pn_type == pfstype_dir || \ 58 (pn)->pn_type == pfstype_procdir, \ 59 ("%s(): VDIR vnode refers to non-directory pfs_node", __func__)) 60 61 #define KASSERT_PN_IS_FILE(pn) \ 62 KASSERT((pn)->pn_type == pfstype_file, \ 63 ("%s(): VREG vnode refers to non-file pfs_node", __func__)) 64 65 #define KASSERT_PN_IS_LINK(pn) \ 66 KASSERT((pn)->pn_type == pfstype_symlink, \ 67 ("%s(): VLNK vnode refers to non-link pfs_node", __func__)) 68 69 #define PFS_MAXBUFSIZ 1024 * 1024 70 71 /* 72 * Returns the fileno, adjusted for target pid 73 */ 74 static uint32_t 75 pn_fileno(struct pfs_node *pn, pid_t pid) 76 { 77 78 KASSERT(pn->pn_fileno > 0, 79 ("%s(): no fileno allocated", __func__)); 80 if (pid != NO_PID) 81 return (pn->pn_fileno * NO_PID + pid); 82 return (pn->pn_fileno); 83 } 84 85 /* 86 * Returns non-zero if given file is visible to given thread. 87 */ 88 static int 89 pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc) 90 { 91 92 if (proc == NULL) 93 return (0); 94 95 PROC_LOCK_ASSERT(proc, MA_OWNED); 96 97 if ((proc->p_flag & P_WEXIT) != 0) 98 return (0); 99 if (p_cansee(td, proc) != 0) 100 return (0); 101 return (pn_vis(td, proc, pn)); 102 } 103 104 static int 105 pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid, 106 struct proc **p) 107 { 108 struct proc *proc; 109 110 PFS_TRACE(("%s (pid: %d, req: %d)", 111 pn->pn_name, pid, td->td_proc->p_pid)); 112 113 if (p) 114 *p = NULL; 115 if (pid == NO_PID) 116 PFS_RETURN (pn_vis(td, NULL, pn)); 117 proc = pfind(pid); 118 if (proc == NULL) 119 PFS_RETURN (0); 120 if (pfs_visible_proc(td, pn, proc)) { 121 if (p) 122 *p = proc; 123 else 124 PROC_UNLOCK(proc); 125 PFS_RETURN (1); 126 } 127 PROC_UNLOCK(proc); 128 PFS_RETURN (0); 129 } 130 131 static int 132 pfs_lookup_proc(pid_t pid, struct proc **p) 133 { 134 struct proc *proc; 135 136 proc = pfind(pid); 137 if (proc == NULL) 138 return (0); 139 if ((proc->p_flag & P_WEXIT) != 0) { 140 PROC_UNLOCK(proc); 141 return (0); 142 } 143 _PHOLD(proc); 144 PROC_UNLOCK(proc); 145 *p = proc; 146 return (1); 147 } 148 149 /* 150 * Verify permissions 151 */ 152 static int 153 pfs_access(struct vop_access_args *va) 154 { 155 struct vnode *vn = va->a_vp; 156 struct pfs_vdata *pvd = vn->v_data; 157 struct vattr vattr; 158 int error; 159 160 PFS_TRACE(("%s", pvd->pvd_pn->pn_name)); 161 (void)pvd; 162 163 error = VOP_GETATTR(vn, &vattr, va->a_cred); 164 if (error) 165 PFS_RETURN (error); 166 error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid, 167 va->a_accmode, va->a_cred); 168 PFS_RETURN (error); 169 } 170 171 /* 172 * Close a file or directory 173 */ 174 static int 175 pfs_close(struct vop_close_args *va) 176 { 177 struct vnode *vn = va->a_vp; 178 struct pfs_vdata *pvd = vn->v_data; 179 struct pfs_node *pn = pvd->pvd_pn; 180 struct proc *proc; 181 int error; 182 183 PFS_TRACE(("%s", pn->pn_name)); 184 pfs_assert_not_owned(pn); 185 186 /* 187 * Do nothing unless this is the last close and the node has a 188 * last-close handler. 189 */ 190 if (vrefcnt(vn) > 1 || pn->pn_close == NULL) 191 PFS_RETURN (0); 192 193 if (pvd->pvd_pid != NO_PID) { 194 proc = pfind(pvd->pvd_pid); 195 } else { 196 proc = NULL; 197 } 198 199 error = pn_close(va->a_td, proc, pn); 200 201 if (proc != NULL) 202 PROC_UNLOCK(proc); 203 204 PFS_RETURN (error); 205 } 206 207 /* 208 * Get file attributes 209 */ 210 static int 211 pfs_getattr(struct vop_getattr_args *va) 212 { 213 struct vnode *vn = va->a_vp; 214 struct pfs_vdata *pvd = vn->v_data; 215 struct pfs_node *pn = pvd->pvd_pn; 216 struct vattr *vap = va->a_vap; 217 struct proc *proc; 218 int error = 0; 219 220 PFS_TRACE(("%s", pn->pn_name)); 221 pfs_assert_not_owned(pn); 222 223 if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) 224 PFS_RETURN (ENOENT); 225 226 vap->va_type = vn->v_type; 227 vap->va_fileid = pn_fileno(pn, pvd->pvd_pid); 228 vap->va_flags = 0; 229 vap->va_blocksize = PAGE_SIZE; 230 vap->va_bytes = vap->va_size = 0; 231 vap->va_filerev = 0; 232 vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0]; 233 vap->va_nlink = 1; 234 nanotime(&vap->va_ctime); 235 vap->va_atime = vap->va_mtime = vap->va_ctime; 236 237 switch (pn->pn_type) { 238 case pfstype_procdir: 239 case pfstype_root: 240 case pfstype_dir: 241 #if 0 242 pfs_lock(pn); 243 /* compute link count */ 244 pfs_unlock(pn); 245 #endif 246 vap->va_mode = 0555; 247 break; 248 case pfstype_file: 249 case pfstype_symlink: 250 vap->va_mode = 0444; 251 break; 252 default: 253 printf("shouldn't be here!\n"); 254 vap->va_mode = 0; 255 break; 256 } 257 258 if (proc != NULL) { 259 vap->va_uid = proc->p_ucred->cr_ruid; 260 vap->va_gid = proc->p_ucred->cr_rgid; 261 } else { 262 vap->va_uid = 0; 263 vap->va_gid = 0; 264 } 265 266 if (pn->pn_attr != NULL) 267 error = pn_attr(curthread, proc, pn, vap); 268 269 if(proc != NULL) 270 PROC_UNLOCK(proc); 271 272 PFS_RETURN (error); 273 } 274 275 /* 276 * Perform an ioctl 277 */ 278 static int 279 pfs_ioctl(struct vop_ioctl_args *va) 280 { 281 struct vnode *vn; 282 struct pfs_vdata *pvd; 283 struct pfs_node *pn; 284 struct proc *proc; 285 int error; 286 287 vn = va->a_vp; 288 vn_lock(vn, LK_SHARED | LK_RETRY); 289 if (VN_IS_DOOMED(vn)) { 290 VOP_UNLOCK(vn); 291 return (EBADF); 292 } 293 pvd = vn->v_data; 294 pn = pvd->pvd_pn; 295 296 PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command)); 297 pfs_assert_not_owned(pn); 298 299 if (vn->v_type != VREG) { 300 VOP_UNLOCK(vn); 301 PFS_RETURN (EINVAL); 302 } 303 KASSERT_PN_IS_FILE(pn); 304 305 if (pn->pn_ioctl == NULL) { 306 VOP_UNLOCK(vn); 307 PFS_RETURN (ENOTTY); 308 } 309 310 /* 311 * This is necessary because process' privileges may 312 * have changed since the open() call. 313 */ 314 if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) { 315 VOP_UNLOCK(vn); 316 PFS_RETURN (EIO); 317 } 318 319 error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data); 320 321 if (proc != NULL) 322 PROC_UNLOCK(proc); 323 324 VOP_UNLOCK(vn); 325 PFS_RETURN (error); 326 } 327 328 /* 329 * Perform getextattr 330 */ 331 static int 332 pfs_getextattr(struct vop_getextattr_args *va) 333 { 334 struct vnode *vn = va->a_vp; 335 struct pfs_vdata *pvd = vn->v_data; 336 struct pfs_node *pn = pvd->pvd_pn; 337 struct proc *proc; 338 int error; 339 340 PFS_TRACE(("%s", pn->pn_name)); 341 pfs_assert_not_owned(pn); 342 343 /* 344 * This is necessary because either process' privileges may 345 * have changed since the open() call. 346 */ 347 if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) 348 PFS_RETURN (EIO); 349 350 if (pn->pn_getextattr == NULL) 351 error = EOPNOTSUPP; 352 else 353 error = pn_getextattr(curthread, proc, pn, 354 va->a_attrnamespace, va->a_name, va->a_uio, 355 va->a_size, va->a_cred); 356 357 if (proc != NULL) 358 PROC_UNLOCK(proc); 359 360 PFS_RETURN (error); 361 } 362 363 /* 364 * Convert a vnode to its component name 365 */ 366 static int 367 pfs_vptocnp(struct vop_vptocnp_args *ap) 368 { 369 struct vnode *vp = ap->a_vp; 370 struct vnode **dvp = ap->a_vpp; 371 struct pfs_vdata *pvd = vp->v_data; 372 struct pfs_node *pd = pvd->pvd_pn; 373 struct pfs_node *pn; 374 struct mount *mp; 375 char *buf = ap->a_buf; 376 size_t *buflen = ap->a_buflen; 377 char pidbuf[PFS_NAMELEN]; 378 pid_t pid = pvd->pvd_pid; 379 int len, i, error, locked; 380 381 i = *buflen; 382 error = 0; 383 384 pfs_lock(pd); 385 386 if (vp->v_type == VDIR && pd->pn_type == pfstype_root) { 387 *dvp = vp; 388 vhold(*dvp); 389 pfs_unlock(pd); 390 PFS_RETURN (0); 391 } else if (vp->v_type == VDIR && pd->pn_type == pfstype_procdir) { 392 len = snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 393 i -= len; 394 if (i < 0) { 395 error = ENOMEM; 396 goto failed; 397 } 398 bcopy(pidbuf, buf + i, len); 399 } else { 400 len = strlen(pd->pn_name); 401 i -= len; 402 if (i < 0) { 403 error = ENOMEM; 404 goto failed; 405 } 406 bcopy(pd->pn_name, buf + i, len); 407 } 408 409 pn = pd->pn_parent; 410 pfs_unlock(pd); 411 412 mp = vp->v_mount; 413 error = vfs_busy(mp, 0); 414 if (error) 415 return (error); 416 417 /* 418 * vp is held by caller. 419 */ 420 locked = VOP_ISLOCKED(vp); 421 VOP_UNLOCK(vp); 422 423 error = pfs_vncache_alloc(mp, dvp, pn, pid); 424 if (error) { 425 vn_lock(vp, locked | LK_RETRY); 426 vfs_unbusy(mp); 427 PFS_RETURN(error); 428 } 429 430 *buflen = i; 431 VOP_UNLOCK(*dvp); 432 vn_lock(vp, locked | LK_RETRY); 433 vfs_unbusy(mp); 434 435 PFS_RETURN (0); 436 failed: 437 pfs_unlock(pd); 438 PFS_RETURN(error); 439 } 440 441 /* 442 * Look up a file or directory 443 */ 444 static int 445 pfs_lookup(struct vop_cachedlookup_args *va) 446 { 447 struct vnode *vn = va->a_dvp; 448 struct vnode **vpp = va->a_vpp; 449 struct componentname *cnp = va->a_cnp; 450 struct pfs_vdata *pvd = vn->v_data; 451 struct pfs_node *pd = pvd->pvd_pn; 452 struct pfs_node *pn, *pdn = NULL; 453 struct mount *mp; 454 pid_t pid = pvd->pvd_pid; 455 char *pname; 456 int error, i, namelen, visible; 457 458 PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr)); 459 pfs_assert_not_owned(pd); 460 461 if (vn->v_type != VDIR) 462 PFS_RETURN (ENOTDIR); 463 KASSERT_PN_IS_DIR(pd); 464 465 /* 466 * Don't support DELETE or RENAME. CREATE is supported so 467 * that O_CREAT will work, but the lookup will still fail if 468 * the file does not exist. 469 */ 470 if ((cnp->cn_flags & ISLASTCN) && 471 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 472 PFS_RETURN (EOPNOTSUPP); 473 474 /* shortcut: check if the name is too long */ 475 if (cnp->cn_namelen >= PFS_NAMELEN) 476 PFS_RETURN (ENOENT); 477 478 /* check that parent directory is visible... */ 479 if (!pfs_visible(curthread, pd, pvd->pvd_pid, NULL)) 480 PFS_RETURN (ENOENT); 481 482 /* self */ 483 namelen = cnp->cn_namelen; 484 pname = cnp->cn_nameptr; 485 if (namelen == 1 && pname[0] == '.') { 486 pn = pd; 487 *vpp = vn; 488 VREF(vn); 489 PFS_RETURN (0); 490 } 491 492 mp = vn->v_mount; 493 494 /* parent */ 495 if (cnp->cn_flags & ISDOTDOT) { 496 if (pd->pn_type == pfstype_root) 497 PFS_RETURN (EIO); 498 error = vfs_busy(mp, MBF_NOWAIT); 499 if (error != 0) { 500 vfs_ref(mp); 501 VOP_UNLOCK(vn); 502 error = vfs_busy(mp, 0); 503 vn_lock(vn, LK_EXCLUSIVE | LK_RETRY); 504 vfs_rel(mp); 505 if (error != 0) 506 PFS_RETURN(ENOENT); 507 if (VN_IS_DOOMED(vn)) { 508 vfs_unbusy(mp); 509 PFS_RETURN(ENOENT); 510 } 511 } 512 VOP_UNLOCK(vn); 513 KASSERT(pd->pn_parent != NULL, 514 ("%s(): non-root directory has no parent", __func__)); 515 /* 516 * This one is tricky. Descendents of procdir nodes 517 * inherit their parent's process affinity, but 518 * there's no easy reverse mapping. For simplicity, 519 * we assume that if this node is a procdir, its 520 * parent isn't (which is correct as long as 521 * descendents of procdir nodes are never procdir 522 * nodes themselves) 523 */ 524 if (pd->pn_type == pfstype_procdir) 525 pid = NO_PID; 526 pfs_lock(pd); 527 pn = pd->pn_parent; 528 pfs_unlock(pd); 529 goto got_pnode; 530 } 531 532 pfs_lock(pd); 533 534 /* named node */ 535 for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next) 536 if (pn->pn_type == pfstype_procdir) 537 pdn = pn; 538 else if (strncmp(pname, pn->pn_name, namelen) == 0 && 539 pn->pn_name[namelen] == '\0') { 540 pfs_unlock(pd); 541 goto got_pnode; 542 } 543 544 /* process dependent node */ 545 if ((pn = pdn) != NULL) { 546 pid = 0; 547 for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i) 548 if ((pid = pid * 10 + pname[i] - '0') > PID_MAX) 549 break; 550 if (i == cnp->cn_namelen) { 551 pfs_unlock(pd); 552 goto got_pnode; 553 } 554 } 555 556 pfs_unlock(pd); 557 558 PFS_RETURN (ENOENT); 559 560 got_pnode: 561 pfs_assert_not_owned(pd); 562 pfs_assert_not_owned(pn); 563 visible = pfs_visible(curthread, pn, pid, NULL); 564 if (!visible) { 565 error = ENOENT; 566 goto failed; 567 } 568 569 error = pfs_vncache_alloc(mp, vpp, pn, pid); 570 if (error) 571 goto failed; 572 573 if (cnp->cn_flags & ISDOTDOT) { 574 vfs_unbusy(mp); 575 vn_lock(vn, LK_EXCLUSIVE | LK_RETRY); 576 if (VN_IS_DOOMED(vn)) { 577 vput(*vpp); 578 *vpp = NULL; 579 PFS_RETURN(ENOENT); 580 } 581 } 582 if (cnp->cn_flags & MAKEENTRY && !VN_IS_DOOMED(vn)) 583 cache_enter(vn, *vpp, cnp); 584 PFS_RETURN (0); 585 failed: 586 if (cnp->cn_flags & ISDOTDOT) { 587 vfs_unbusy(mp); 588 vn_lock(vn, LK_EXCLUSIVE | LK_RETRY); 589 *vpp = NULL; 590 } 591 PFS_RETURN(error); 592 } 593 594 /* 595 * Open a file or directory. 596 */ 597 static int 598 pfs_open(struct vop_open_args *va) 599 { 600 struct vnode *vn = va->a_vp; 601 struct pfs_vdata *pvd = vn->v_data; 602 struct pfs_node *pn = pvd->pvd_pn; 603 int mode = va->a_mode; 604 605 PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode)); 606 pfs_assert_not_owned(pn); 607 608 /* check if the requested mode is permitted */ 609 if (((mode & FREAD) && !(mode & PFS_RD)) || 610 ((mode & FWRITE) && !(mode & PFS_WR))) 611 PFS_RETURN (EPERM); 612 613 /* we don't support locking */ 614 if ((mode & O_SHLOCK) || (mode & O_EXLOCK)) 615 PFS_RETURN (EOPNOTSUPP); 616 617 PFS_RETURN (0); 618 } 619 620 struct sbuf_seek_helper { 621 off_t skip_bytes; 622 struct uio *uio; 623 }; 624 625 static int 626 pfs_sbuf_uio_drain(void *arg, const char *data, int len) 627 { 628 struct sbuf_seek_helper *ssh; 629 struct uio *uio; 630 int error, skipped; 631 632 ssh = arg; 633 uio = ssh->uio; 634 skipped = 0; 635 636 /* Need to discard first uio_offset bytes. */ 637 if (ssh->skip_bytes > 0) { 638 if (ssh->skip_bytes >= len) { 639 ssh->skip_bytes -= len; 640 return (len); 641 } 642 643 data += ssh->skip_bytes; 644 len -= ssh->skip_bytes; 645 skipped = ssh->skip_bytes; 646 ssh->skip_bytes = 0; 647 } 648 649 error = uiomove(__DECONST(void *, data), len, uio); 650 if (error != 0) 651 return (-error); 652 653 /* 654 * The fill function has more to emit, but the reader is finished. 655 * This is similar to the truncated read case for non-draining PFS 656 * sbufs, and should be handled appropriately in fill-routines. 657 */ 658 if (uio->uio_resid == 0) 659 return (-ENOBUFS); 660 661 return (skipped + len); 662 } 663 664 /* 665 * Read from a file 666 */ 667 static int 668 pfs_read(struct vop_read_args *va) 669 { 670 struct vnode *vn = va->a_vp; 671 struct pfs_vdata *pvd = vn->v_data; 672 struct pfs_node *pn = pvd->pvd_pn; 673 struct uio *uio = va->a_uio; 674 struct proc *proc; 675 struct sbuf *sb = NULL; 676 int error, locked; 677 off_t buflen, buflim; 678 struct sbuf_seek_helper ssh; 679 680 PFS_TRACE(("%s", pn->pn_name)); 681 pfs_assert_not_owned(pn); 682 683 if (vn->v_type != VREG) 684 PFS_RETURN (EINVAL); 685 KASSERT_PN_IS_FILE(pn); 686 687 if (!(pn->pn_flags & PFS_RD)) 688 PFS_RETURN (EBADF); 689 690 if (pn->pn_fill == NULL) 691 PFS_RETURN (EIO); 692 693 /* 694 * This is necessary because either process' privileges may 695 * have changed since the open() call. 696 */ 697 if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) 698 PFS_RETURN (EIO); 699 if (proc != NULL) { 700 _PHOLD(proc); 701 PROC_UNLOCK(proc); 702 } 703 704 vhold(vn); 705 locked = VOP_ISLOCKED(vn); 706 VOP_UNLOCK(vn); 707 708 if (pn->pn_flags & PFS_RAWRD) { 709 PFS_TRACE(("%zd resid", uio->uio_resid)); 710 error = pn_fill(curthread, proc, pn, NULL, uio); 711 PFS_TRACE(("%zd resid", uio->uio_resid)); 712 goto ret; 713 } 714 715 if (uio->uio_resid < 0 || uio->uio_offset < 0 || 716 uio->uio_resid > OFF_MAX - uio->uio_offset) { 717 error = EINVAL; 718 goto ret; 719 } 720 buflen = uio->uio_offset + uio->uio_resid + 1; 721 if (pn->pn_flags & PFS_AUTODRAIN) 722 /* 723 * We can use a smaller buffer if we can stream output to the 724 * consumer. 725 */ 726 buflim = PAGE_SIZE; 727 else 728 buflim = PFS_MAXBUFSIZ; 729 if (buflen > buflim) 730 buflen = buflim; 731 732 sb = sbuf_new(sb, NULL, buflen, 0); 733 if (sb == NULL) { 734 error = EIO; 735 goto ret; 736 } 737 738 if (pn->pn_flags & PFS_AUTODRAIN) { 739 ssh.skip_bytes = uio->uio_offset; 740 ssh.uio = uio; 741 sbuf_set_drain(sb, pfs_sbuf_uio_drain, &ssh); 742 } 743 744 error = pn_fill(curthread, proc, pn, sb, uio); 745 746 if (error) { 747 sbuf_delete(sb); 748 goto ret; 749 } 750 751 /* 752 * XXX: If the buffer overflowed, sbuf_len() will not return 753 * the data length. Then just use the full length because an 754 * overflowed sbuf must be full. 755 */ 756 error = sbuf_finish(sb); 757 if ((pn->pn_flags & PFS_AUTODRAIN)) { 758 /* 759 * ENOBUFS just indicates early termination of the fill 760 * function as the caller's buffer was already filled. Squash 761 * to zero. 762 */ 763 if (uio->uio_resid == 0 && error == ENOBUFS) 764 error = 0; 765 } else { 766 if (error == 0) 767 buflen = sbuf_len(sb); 768 else 769 /* The trailing byte is not valid. */ 770 buflen--; 771 error = uiomove_frombuf(sbuf_data(sb), buflen, uio); 772 } 773 sbuf_delete(sb); 774 ret: 775 vn_lock(vn, locked | LK_RETRY); 776 vdrop(vn); 777 if (proc != NULL) 778 PRELE(proc); 779 PFS_RETURN (error); 780 } 781 782 /* 783 * Iterate through directory entries 784 */ 785 static int 786 pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd, 787 struct pfs_node **pn, struct proc **p) 788 { 789 int visible; 790 791 sx_assert(&allproc_lock, SX_SLOCKED); 792 pfs_assert_owned(pd); 793 again: 794 if (*pn == NULL) { 795 /* first node */ 796 *pn = pd->pn_nodes; 797 } else if ((*pn)->pn_type != pfstype_procdir) { 798 /* next node */ 799 *pn = (*pn)->pn_next; 800 } 801 if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) { 802 /* next process */ 803 if (*p == NULL) 804 *p = LIST_FIRST(&allproc); 805 else 806 *p = LIST_NEXT(*p, p_list); 807 /* out of processes: next node */ 808 if (*p == NULL) 809 *pn = (*pn)->pn_next; 810 else 811 PROC_LOCK(*p); 812 } 813 814 if ((*pn) == NULL) 815 return (-1); 816 817 if (*p != NULL) { 818 visible = pfs_visible_proc(td, *pn, *p); 819 PROC_UNLOCK(*p); 820 } else if (proc != NULL) { 821 visible = pfs_visible_proc(td, *pn, proc); 822 } else { 823 visible = pn_vis(td, NULL, *pn); 824 } 825 if (!visible) 826 goto again; 827 828 return (0); 829 } 830 831 /* Directory entry list */ 832 struct pfsentry { 833 STAILQ_ENTRY(pfsentry) link; 834 struct dirent entry; 835 }; 836 STAILQ_HEAD(pfsdirentlist, pfsentry); 837 838 /* 839 * Return directory entries. 840 */ 841 static int 842 pfs_readdir(struct vop_readdir_args *va) 843 { 844 struct vnode *vn = va->a_vp; 845 struct pfs_vdata *pvd = vn->v_data; 846 struct pfs_node *pd = pvd->pvd_pn; 847 pid_t pid = pvd->pvd_pid; 848 struct proc *p, *proc; 849 struct pfs_node *pn; 850 struct uio *uio; 851 struct pfsentry *pfsent, *pfsent2; 852 struct pfsdirentlist lst; 853 off_t offset; 854 int error, i, resid; 855 856 STAILQ_INIT(&lst); 857 error = 0; 858 KASSERT(pd->pn_info == vn->v_mount->mnt_data, 859 ("%s(): pn_info does not match mountpoint", __func__)); 860 PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid)); 861 pfs_assert_not_owned(pd); 862 863 if (vn->v_type != VDIR) 864 PFS_RETURN (ENOTDIR); 865 KASSERT_PN_IS_DIR(pd); 866 uio = va->a_uio; 867 868 /* only allow reading entire entries */ 869 offset = uio->uio_offset; 870 resid = uio->uio_resid; 871 if (offset < 0 || offset % PFS_DELEN != 0 || 872 (resid && resid < PFS_DELEN)) 873 PFS_RETURN (EINVAL); 874 if (resid == 0) 875 PFS_RETURN (0); 876 877 proc = NULL; 878 if (pid != NO_PID && !pfs_lookup_proc(pid, &proc)) 879 PFS_RETURN (ENOENT); 880 881 sx_slock(&allproc_lock); 882 pfs_lock(pd); 883 884 KASSERT(pid == NO_PID || proc != NULL, 885 ("%s(): no process for pid %lu", __func__, (unsigned long)pid)); 886 887 if (pid != NO_PID) { 888 PROC_LOCK(proc); 889 890 /* check if the directory is visible to the caller */ 891 if (!pfs_visible_proc(curthread, pd, proc)) { 892 _PRELE(proc); 893 PROC_UNLOCK(proc); 894 pfs_unlock(pd); 895 sx_sunlock(&allproc_lock); 896 PFS_RETURN (ENOENT); 897 } 898 } 899 900 /* skip unwanted entries */ 901 for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) { 902 if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) { 903 /* nothing left... */ 904 if (proc != NULL) { 905 _PRELE(proc); 906 PROC_UNLOCK(proc); 907 } 908 pfs_unlock(pd); 909 sx_sunlock(&allproc_lock); 910 PFS_RETURN (0); 911 } 912 } 913 914 /* fill in entries */ 915 while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 && 916 resid >= PFS_DELEN) { 917 if ((pfsent = malloc(sizeof(struct pfsentry), M_IOV, 918 M_NOWAIT | M_ZERO)) == NULL) { 919 error = ENOMEM; 920 break; 921 } 922 pfsent->entry.d_reclen = PFS_DELEN; 923 pfsent->entry.d_fileno = pn_fileno(pn, pid); 924 /* PFS_DELEN was picked to fit PFS_NAMLEN */ 925 for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i) 926 pfsent->entry.d_name[i] = pn->pn_name[i]; 927 pfsent->entry.d_namlen = i; 928 /* NOTE: d_off is the offset of the *next* entry. */ 929 pfsent->entry.d_off = offset + PFS_DELEN; 930 switch (pn->pn_type) { 931 case pfstype_procdir: 932 KASSERT(p != NULL, 933 ("reached procdir node with p == NULL")); 934 pfsent->entry.d_namlen = snprintf(pfsent->entry.d_name, 935 PFS_NAMELEN, "%d", p->p_pid); 936 /* fall through */ 937 case pfstype_root: 938 case pfstype_dir: 939 case pfstype_this: 940 case pfstype_parent: 941 pfsent->entry.d_type = DT_DIR; 942 break; 943 case pfstype_file: 944 pfsent->entry.d_type = DT_REG; 945 break; 946 case pfstype_symlink: 947 pfsent->entry.d_type = DT_LNK; 948 break; 949 default: 950 panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type); 951 } 952 PFS_TRACE(("%s", pfsent->entry.d_name)); 953 dirent_terminate(&pfsent->entry); 954 STAILQ_INSERT_TAIL(&lst, pfsent, link); 955 offset += PFS_DELEN; 956 resid -= PFS_DELEN; 957 } 958 if (proc != NULL) { 959 _PRELE(proc); 960 PROC_UNLOCK(proc); 961 } 962 pfs_unlock(pd); 963 sx_sunlock(&allproc_lock); 964 i = 0; 965 STAILQ_FOREACH_SAFE(pfsent, &lst, link, pfsent2) { 966 if (error == 0) 967 error = uiomove(&pfsent->entry, PFS_DELEN, uio); 968 free(pfsent, M_IOV); 969 i++; 970 } 971 PFS_TRACE(("%ju bytes", (uintmax_t)(i * PFS_DELEN))); 972 PFS_RETURN (error); 973 } 974 975 /* 976 * Read a symbolic link 977 */ 978 static int 979 pfs_readlink(struct vop_readlink_args *va) 980 { 981 struct vnode *vn = va->a_vp; 982 struct pfs_vdata *pvd = vn->v_data; 983 struct pfs_node *pn = pvd->pvd_pn; 984 struct uio *uio = va->a_uio; 985 struct proc *proc = NULL; 986 char buf[PATH_MAX]; 987 struct sbuf sb; 988 int error, locked; 989 990 PFS_TRACE(("%s", pn->pn_name)); 991 pfs_assert_not_owned(pn); 992 993 if (vn->v_type != VLNK) 994 PFS_RETURN (EINVAL); 995 KASSERT_PN_IS_LINK(pn); 996 997 if (pn->pn_fill == NULL) 998 PFS_RETURN (EIO); 999 1000 if (pvd->pvd_pid != NO_PID) { 1001 if ((proc = pfind(pvd->pvd_pid)) == NULL) 1002 PFS_RETURN (EIO); 1003 if (proc->p_flag & P_WEXIT) { 1004 PROC_UNLOCK(proc); 1005 PFS_RETURN (EIO); 1006 } 1007 _PHOLD(proc); 1008 PROC_UNLOCK(proc); 1009 } 1010 vhold(vn); 1011 locked = VOP_ISLOCKED(vn); 1012 VOP_UNLOCK(vn); 1013 1014 /* sbuf_new() can't fail with a static buffer */ 1015 sbuf_new(&sb, buf, sizeof buf, 0); 1016 1017 error = pn_fill(curthread, proc, pn, &sb, NULL); 1018 1019 if (proc != NULL) 1020 PRELE(proc); 1021 vn_lock(vn, locked | LK_RETRY); 1022 vdrop(vn); 1023 1024 if (error) { 1025 sbuf_delete(&sb); 1026 PFS_RETURN (error); 1027 } 1028 1029 if (sbuf_finish(&sb) != 0) { 1030 sbuf_delete(&sb); 1031 PFS_RETURN (ENAMETOOLONG); 1032 } 1033 1034 error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio); 1035 sbuf_delete(&sb); 1036 PFS_RETURN (error); 1037 } 1038 1039 /* 1040 * Reclaim a vnode 1041 */ 1042 static int 1043 pfs_reclaim(struct vop_reclaim_args *va) 1044 { 1045 struct vnode *vn = va->a_vp; 1046 struct pfs_vdata *pvd = vn->v_data; 1047 struct pfs_node *pn = pvd->pvd_pn; 1048 1049 PFS_TRACE(("%s", pn->pn_name)); 1050 pfs_assert_not_owned(pn); 1051 1052 return (pfs_vncache_free(va->a_vp)); 1053 } 1054 1055 /* 1056 * Set attributes 1057 */ 1058 static int 1059 pfs_setattr(struct vop_setattr_args *va) 1060 { 1061 struct vnode *vn = va->a_vp; 1062 struct pfs_vdata *pvd = vn->v_data; 1063 struct pfs_node *pn = pvd->pvd_pn; 1064 1065 PFS_TRACE(("%s", pn->pn_name)); 1066 pfs_assert_not_owned(pn); 1067 1068 /* Silently ignore unchangeable attributes. */ 1069 PFS_RETURN (0); 1070 } 1071 1072 /* 1073 * Write to a file 1074 */ 1075 static int 1076 pfs_write(struct vop_write_args *va) 1077 { 1078 struct vnode *vn = va->a_vp; 1079 struct pfs_vdata *pvd = vn->v_data; 1080 struct pfs_node *pn = pvd->pvd_pn; 1081 struct uio *uio = va->a_uio; 1082 struct proc *proc; 1083 struct sbuf sb; 1084 int error; 1085 1086 PFS_TRACE(("%s", pn->pn_name)); 1087 pfs_assert_not_owned(pn); 1088 1089 if (vn->v_type != VREG) 1090 PFS_RETURN (EINVAL); 1091 KASSERT_PN_IS_FILE(pn); 1092 1093 if (!(pn->pn_flags & PFS_WR)) 1094 PFS_RETURN (EBADF); 1095 1096 if (pn->pn_fill == NULL) 1097 PFS_RETURN (EIO); 1098 1099 if (uio->uio_resid > PFS_MAXBUFSIZ) 1100 PFS_RETURN (EIO); 1101 1102 /* 1103 * This is necessary because either process' privileges may 1104 * have changed since the open() call. 1105 */ 1106 if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) 1107 PFS_RETURN (EIO); 1108 if (proc != NULL) { 1109 _PHOLD(proc); 1110 PROC_UNLOCK(proc); 1111 } 1112 1113 if (pn->pn_flags & PFS_RAWWR) { 1114 error = pn_fill(curthread, proc, pn, NULL, uio); 1115 if (proc != NULL) 1116 PRELE(proc); 1117 PFS_RETURN (error); 1118 } 1119 1120 sbuf_uionew(&sb, uio, &error); 1121 if (error) { 1122 if (proc != NULL) 1123 PRELE(proc); 1124 PFS_RETURN (error); 1125 } 1126 1127 error = pn_fill(curthread, proc, pn, &sb, uio); 1128 1129 sbuf_delete(&sb); 1130 if (proc != NULL) 1131 PRELE(proc); 1132 PFS_RETURN (error); 1133 } 1134 1135 /* 1136 * Vnode operations 1137 */ 1138 struct vop_vector pfs_vnodeops = { 1139 .vop_default = &default_vnodeops, 1140 1141 .vop_access = pfs_access, 1142 .vop_cachedlookup = pfs_lookup, 1143 .vop_close = pfs_close, 1144 .vop_create = VOP_EOPNOTSUPP, 1145 .vop_getattr = pfs_getattr, 1146 .vop_getextattr = pfs_getextattr, 1147 .vop_ioctl = pfs_ioctl, 1148 .vop_link = VOP_EOPNOTSUPP, 1149 .vop_lookup = vfs_cache_lookup, 1150 .vop_mkdir = VOP_EOPNOTSUPP, 1151 .vop_mknod = VOP_EOPNOTSUPP, 1152 .vop_open = pfs_open, 1153 .vop_read = pfs_read, 1154 .vop_readdir = pfs_readdir, 1155 .vop_readlink = pfs_readlink, 1156 .vop_reclaim = pfs_reclaim, 1157 .vop_remove = VOP_EOPNOTSUPP, 1158 .vop_rename = VOP_EOPNOTSUPP, 1159 .vop_rmdir = VOP_EOPNOTSUPP, 1160 .vop_setattr = pfs_setattr, 1161 .vop_symlink = VOP_EOPNOTSUPP, 1162 .vop_vptocnp = pfs_vptocnp, 1163 .vop_write = pfs_write, 1164 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1165 /* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */ 1166 }; 1167 VFS_VOP_VECTOR_REGISTER(pfs_vnodeops); 1168