1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ddb.h" 43 #include "opt_ktrace.h" 44 #include "opt_procdesc.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 49 #include <sys/capability.h> 50 #include <sys/conf.h> 51 #include <sys/domain.h> 52 #include <sys/fcntl.h> 53 #include <sys/file.h> 54 #include <sys/filedesc.h> 55 #include <sys/filio.h> 56 #include <sys/jail.h> 57 #include <sys/kernel.h> 58 #include <sys/limits.h> 59 #include <sys/lock.h> 60 #include <sys/malloc.h> 61 #include <sys/mman.h> 62 #include <sys/mount.h> 63 #include <sys/mqueue.h> 64 #include <sys/mutex.h> 65 #include <sys/namei.h> 66 #include <sys/selinfo.h> 67 #include <sys/pipe.h> 68 #include <sys/priv.h> 69 #include <sys/proc.h> 70 #include <sys/procdesc.h> 71 #include <sys/protosw.h> 72 #include <sys/racct.h> 73 #include <sys/resourcevar.h> 74 #include <sys/sbuf.h> 75 #include <sys/signalvar.h> 76 #include <sys/socketvar.h> 77 #include <sys/stat.h> 78 #include <sys/sx.h> 79 #include <sys/syscallsubr.h> 80 #include <sys/sysctl.h> 81 #include <sys/sysproto.h> 82 #include <sys/tty.h> 83 #include <sys/unistd.h> 84 #include <sys/un.h> 85 #include <sys/unpcb.h> 86 #include <sys/user.h> 87 #include <sys/vnode.h> 88 #ifdef KTRACE 89 #include <sys/ktrace.h> 90 #endif 91 92 #include <net/vnet.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_pcb.h> 96 97 #include <security/audit/audit.h> 98 99 #include <vm/uma.h> 100 #include <vm/vm.h> 101 102 #include <ddb/ddb.h> 103 104 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 105 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 106 "file desc to leader structures"); 107 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 108 MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities"); 109 110 MALLOC_DECLARE(M_FADVISE); 111 112 static uma_zone_t file_zone; 113 114 115 static int closefp(struct filedesc *fdp, int fd, struct file *fp, 116 struct thread *td, int holdleaders); 117 static int fd_first_free(struct filedesc *fdp, int low, int size); 118 static int fd_last_used(struct filedesc *fdp, int size); 119 static void fdgrowtable(struct filedesc *fdp, int nfd); 120 static void fdunused(struct filedesc *fdp, int fd); 121 static void fdused(struct filedesc *fdp, int fd); 122 static int fill_pipe_info(struct pipe *pi, struct kinfo_file *kif); 123 static int fill_procdesc_info(struct procdesc *pdp, 124 struct kinfo_file *kif); 125 static int fill_pts_info(struct tty *tp, struct kinfo_file *kif); 126 static int fill_shm_info(struct file *fp, struct kinfo_file *kif); 127 static int fill_socket_info(struct socket *so, struct kinfo_file *kif); 128 static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif); 129 130 /* 131 * Each process has: 132 * 133 * - An array of open file descriptors (fd_ofiles) 134 * - An array of file flags (fd_ofileflags) 135 * - A bitmap recording which descriptors are in use (fd_map) 136 * 137 * A process starts out with NDFILE descriptors. The value of NDFILE has 138 * been selected based the historical limit of 20 open files, and an 139 * assumption that the majority of processes, especially short-lived 140 * processes like shells, will never need more. 141 * 142 * If this initial allocation is exhausted, a larger descriptor table and 143 * map are allocated dynamically, and the pointers in the process's struct 144 * filedesc are updated to point to those. This is repeated every time 145 * the process runs out of file descriptors (provided it hasn't hit its 146 * resource limit). 147 * 148 * Since threads may hold references to individual descriptor table 149 * entries, the tables are never freed. Instead, they are placed on a 150 * linked list and freed only when the struct filedesc is released. 151 */ 152 #define NDFILE 20 153 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 154 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 155 #define NDSLOT(x) ((x) / NDENTRIES) 156 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 157 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 158 159 /* 160 * SLIST entry used to keep track of ofiles which must be reclaimed when 161 * the process exits. 162 */ 163 struct freetable { 164 struct filedescent *ft_table; 165 SLIST_ENTRY(freetable) ft_next; 166 }; 167 168 /* 169 * Initial allocation: a filedesc structure + the head of SLIST used to 170 * keep track of old ofiles + enough space for NDFILE descriptors. 171 */ 172 struct filedesc0 { 173 struct filedesc fd_fd; 174 SLIST_HEAD(, freetable) fd_free; 175 struct filedescent fd_dfiles[NDFILE]; 176 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 177 }; 178 179 /* 180 * Descriptor management. 181 */ 182 volatile int openfiles; /* actual number of open files */ 183 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 184 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 185 186 /* A mutex to protect the association between a proc and filedesc. */ 187 static struct mtx fdesc_mtx; 188 189 /* 190 * If low >= size, just return low. Otherwise find the first zero bit in the 191 * given bitmap, starting at low and not exceeding size - 1. Return size if 192 * not found. 193 */ 194 static int 195 fd_first_free(struct filedesc *fdp, int low, int size) 196 { 197 NDSLOTTYPE *map = fdp->fd_map; 198 NDSLOTTYPE mask; 199 int off, maxoff; 200 201 if (low >= size) 202 return (low); 203 204 off = NDSLOT(low); 205 if (low % NDENTRIES) { 206 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 207 if ((mask &= ~map[off]) != 0UL) 208 return (off * NDENTRIES + ffsl(mask) - 1); 209 ++off; 210 } 211 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 212 if (map[off] != ~0UL) 213 return (off * NDENTRIES + ffsl(~map[off]) - 1); 214 return (size); 215 } 216 217 /* 218 * Find the highest non-zero bit in the given bitmap, starting at 0 and 219 * not exceeding size - 1. Return -1 if not found. 220 */ 221 static int 222 fd_last_used(struct filedesc *fdp, int size) 223 { 224 NDSLOTTYPE *map = fdp->fd_map; 225 NDSLOTTYPE mask; 226 int off, minoff; 227 228 off = NDSLOT(size); 229 if (size % NDENTRIES) { 230 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 231 if ((mask &= map[off]) != 0) 232 return (off * NDENTRIES + flsl(mask) - 1); 233 --off; 234 } 235 for (minoff = NDSLOT(0); off >= minoff; --off) 236 if (map[off] != 0) 237 return (off * NDENTRIES + flsl(map[off]) - 1); 238 return (-1); 239 } 240 241 static int 242 fdisused(struct filedesc *fdp, int fd) 243 { 244 245 FILEDESC_LOCK_ASSERT(fdp); 246 247 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 248 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 249 250 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 251 } 252 253 /* 254 * Mark a file descriptor as used. 255 */ 256 static void 257 fdused(struct filedesc *fdp, int fd) 258 { 259 260 FILEDESC_XLOCK_ASSERT(fdp); 261 262 KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd)); 263 264 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 265 if (fd > fdp->fd_lastfile) 266 fdp->fd_lastfile = fd; 267 if (fd == fdp->fd_freefile) 268 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 269 } 270 271 /* 272 * Mark a file descriptor as unused. 273 */ 274 static void 275 fdunused(struct filedesc *fdp, int fd) 276 { 277 278 FILEDESC_XLOCK_ASSERT(fdp); 279 280 KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); 281 KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, 282 ("fd=%d is still in use", fd)); 283 284 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 285 if (fd < fdp->fd_freefile) 286 fdp->fd_freefile = fd; 287 if (fd == fdp->fd_lastfile) 288 fdp->fd_lastfile = fd_last_used(fdp, fd); 289 } 290 291 /* 292 * Free a file descriptor. 293 */ 294 static inline void 295 fdfree(struct filedesc *fdp, int fd) 296 { 297 struct filedescent *fde; 298 299 fde = &fdp->fd_ofiles[fd]; 300 filecaps_free(&fde->fde_caps); 301 bzero(fde, sizeof(*fde)); 302 fdunused(fdp, fd); 303 } 304 305 /* 306 * System calls on descriptors. 307 */ 308 #ifndef _SYS_SYSPROTO_H_ 309 struct getdtablesize_args { 310 int dummy; 311 }; 312 #endif 313 /* ARGSUSED */ 314 int 315 sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap) 316 { 317 struct proc *p = td->td_proc; 318 uint64_t lim; 319 320 PROC_LOCK(p); 321 td->td_retval[0] = 322 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 323 lim = racct_get_limit(td->td_proc, RACCT_NOFILE); 324 PROC_UNLOCK(p); 325 if (lim < td->td_retval[0]) 326 td->td_retval[0] = lim; 327 return (0); 328 } 329 330 /* 331 * Duplicate a file descriptor to a particular value. 332 * 333 * Note: keep in mind that a potential race condition exists when closing 334 * descriptors from a shared descriptor table (via rfork). 335 */ 336 #ifndef _SYS_SYSPROTO_H_ 337 struct dup2_args { 338 u_int from; 339 u_int to; 340 }; 341 #endif 342 /* ARGSUSED */ 343 int 344 sys_dup2(struct thread *td, struct dup2_args *uap) 345 { 346 347 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 348 td->td_retval)); 349 } 350 351 /* 352 * Duplicate a file descriptor. 353 */ 354 #ifndef _SYS_SYSPROTO_H_ 355 struct dup_args { 356 u_int fd; 357 }; 358 #endif 359 /* ARGSUSED */ 360 int 361 sys_dup(struct thread *td, struct dup_args *uap) 362 { 363 364 return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval)); 365 } 366 367 /* 368 * The file control system call. 369 */ 370 #ifndef _SYS_SYSPROTO_H_ 371 struct fcntl_args { 372 int fd; 373 int cmd; 374 long arg; 375 }; 376 #endif 377 /* ARGSUSED */ 378 int 379 sys_fcntl(struct thread *td, struct fcntl_args *uap) 380 { 381 struct flock fl; 382 struct __oflock ofl; 383 intptr_t arg; 384 int error; 385 int cmd; 386 387 error = 0; 388 cmd = uap->cmd; 389 switch (uap->cmd) { 390 case F_OGETLK: 391 case F_OSETLK: 392 case F_OSETLKW: 393 /* 394 * Convert old flock structure to new. 395 */ 396 error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl)); 397 fl.l_start = ofl.l_start; 398 fl.l_len = ofl.l_len; 399 fl.l_pid = ofl.l_pid; 400 fl.l_type = ofl.l_type; 401 fl.l_whence = ofl.l_whence; 402 fl.l_sysid = 0; 403 404 switch (uap->cmd) { 405 case F_OGETLK: 406 cmd = F_GETLK; 407 break; 408 case F_OSETLK: 409 cmd = F_SETLK; 410 break; 411 case F_OSETLKW: 412 cmd = F_SETLKW; 413 break; 414 } 415 arg = (intptr_t)&fl; 416 break; 417 case F_GETLK: 418 case F_SETLK: 419 case F_SETLKW: 420 case F_SETLK_REMOTE: 421 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 422 arg = (intptr_t)&fl; 423 break; 424 default: 425 arg = uap->arg; 426 break; 427 } 428 if (error) 429 return (error); 430 error = kern_fcntl(td, uap->fd, cmd, arg); 431 if (error) 432 return (error); 433 if (uap->cmd == F_OGETLK) { 434 ofl.l_start = fl.l_start; 435 ofl.l_len = fl.l_len; 436 ofl.l_pid = fl.l_pid; 437 ofl.l_type = fl.l_type; 438 ofl.l_whence = fl.l_whence; 439 error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl)); 440 } else if (uap->cmd == F_GETLK) { 441 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 442 } 443 return (error); 444 } 445 446 int 447 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 448 { 449 struct filedesc *fdp; 450 struct flock *flp; 451 struct file *fp, *fp2; 452 struct filedescent *fde; 453 struct proc *p; 454 struct vnode *vp; 455 int error, flg, tmp; 456 u_int old, new; 457 uint64_t bsize; 458 off_t foffset; 459 460 error = 0; 461 flg = F_POSIX; 462 p = td->td_proc; 463 fdp = p->p_fd; 464 465 switch (cmd) { 466 case F_DUPFD: 467 tmp = arg; 468 error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval); 469 break; 470 471 case F_DUPFD_CLOEXEC: 472 tmp = arg; 473 error = do_dup(td, DUP_FCNTL | DUP_CLOEXEC, fd, tmp, 474 td->td_retval); 475 break; 476 477 case F_DUP2FD: 478 tmp = arg; 479 error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval); 480 break; 481 482 case F_DUP2FD_CLOEXEC: 483 tmp = arg; 484 error = do_dup(td, DUP_FIXED | DUP_CLOEXEC, fd, tmp, 485 td->td_retval); 486 break; 487 488 case F_GETFD: 489 FILEDESC_SLOCK(fdp); 490 if ((fp = fget_locked(fdp, fd)) == NULL) { 491 FILEDESC_SUNLOCK(fdp); 492 error = EBADF; 493 break; 494 } 495 fde = &fdp->fd_ofiles[fd]; 496 td->td_retval[0] = 497 (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; 498 FILEDESC_SUNLOCK(fdp); 499 break; 500 501 case F_SETFD: 502 FILEDESC_XLOCK(fdp); 503 if ((fp = fget_locked(fdp, fd)) == NULL) { 504 FILEDESC_XUNLOCK(fdp); 505 error = EBADF; 506 break; 507 } 508 fde = &fdp->fd_ofiles[fd]; 509 fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | 510 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 511 FILEDESC_XUNLOCK(fdp); 512 break; 513 514 case F_GETFL: 515 error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETFL, &fp, NULL); 516 if (error != 0) 517 break; 518 td->td_retval[0] = OFLAGS(fp->f_flag); 519 fdrop(fp, td); 520 break; 521 522 case F_SETFL: 523 error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETFL, &fp, NULL); 524 if (error != 0) 525 break; 526 do { 527 tmp = flg = fp->f_flag; 528 tmp &= ~FCNTLFLAGS; 529 tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 530 } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); 531 tmp = fp->f_flag & FNONBLOCK; 532 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 533 if (error != 0) { 534 fdrop(fp, td); 535 break; 536 } 537 tmp = fp->f_flag & FASYNC; 538 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 539 if (error == 0) { 540 fdrop(fp, td); 541 break; 542 } 543 atomic_clear_int(&fp->f_flag, FNONBLOCK); 544 tmp = 0; 545 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 546 fdrop(fp, td); 547 break; 548 549 case F_GETOWN: 550 error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETOWN, &fp, NULL); 551 if (error != 0) 552 break; 553 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 554 if (error == 0) 555 td->td_retval[0] = tmp; 556 fdrop(fp, td); 557 break; 558 559 case F_SETOWN: 560 error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETOWN, &fp, NULL); 561 if (error != 0) 562 break; 563 tmp = arg; 564 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 565 fdrop(fp, td); 566 break; 567 568 case F_SETLK_REMOTE: 569 error = priv_check(td, PRIV_NFS_LOCKD); 570 if (error) 571 return (error); 572 flg = F_REMOTE; 573 goto do_setlk; 574 575 case F_SETLKW: 576 flg |= F_WAIT; 577 /* FALLTHROUGH F_SETLK */ 578 579 case F_SETLK: 580 do_setlk: 581 error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); 582 if (error != 0) 583 break; 584 if (fp->f_type != DTYPE_VNODE) { 585 error = EBADF; 586 fdrop(fp, td); 587 break; 588 } 589 590 flp = (struct flock *)arg; 591 if (flp->l_whence == SEEK_CUR) { 592 foffset = foffset_get(fp); 593 if (foffset < 0 || 594 (flp->l_start > 0 && 595 foffset > OFF_MAX - flp->l_start)) { 596 FILEDESC_SUNLOCK(fdp); 597 error = EOVERFLOW; 598 fdrop(fp, td); 599 break; 600 } 601 flp->l_start += foffset; 602 } 603 604 vp = fp->f_vnode; 605 switch (flp->l_type) { 606 case F_RDLCK: 607 if ((fp->f_flag & FREAD) == 0) { 608 error = EBADF; 609 break; 610 } 611 PROC_LOCK(p->p_leader); 612 p->p_leader->p_flag |= P_ADVLOCK; 613 PROC_UNLOCK(p->p_leader); 614 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 615 flp, flg); 616 break; 617 case F_WRLCK: 618 if ((fp->f_flag & FWRITE) == 0) { 619 error = EBADF; 620 break; 621 } 622 PROC_LOCK(p->p_leader); 623 p->p_leader->p_flag |= P_ADVLOCK; 624 PROC_UNLOCK(p->p_leader); 625 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 626 flp, flg); 627 break; 628 case F_UNLCK: 629 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 630 flp, flg); 631 break; 632 case F_UNLCKSYS: 633 /* 634 * Temporary api for testing remote lock 635 * infrastructure. 636 */ 637 if (flg != F_REMOTE) { 638 error = EINVAL; 639 break; 640 } 641 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 642 F_UNLCKSYS, flp, flg); 643 break; 644 default: 645 error = EINVAL; 646 break; 647 } 648 if (error != 0 || flp->l_type == F_UNLCK || 649 flp->l_type == F_UNLCKSYS) { 650 fdrop(fp, td); 651 break; 652 } 653 654 /* 655 * Check for a race with close. 656 * 657 * The vnode is now advisory locked (or unlocked, but this case 658 * is not really important) as the caller requested. 659 * We had to drop the filedesc lock, so we need to recheck if 660 * the descriptor is still valid, because if it was closed 661 * in the meantime we need to remove advisory lock from the 662 * vnode - close on any descriptor leading to an advisory 663 * locked vnode, removes that lock. 664 * We will return 0 on purpose in that case, as the result of 665 * successful advisory lock might have been externally visible 666 * already. This is fine - effectively we pretend to the caller 667 * that the closing thread was a bit slower and that the 668 * advisory lock succeeded before the close. 669 */ 670 error = fget_unlocked(fdp, fd, 0, 0, &fp2, NULL); 671 if (error != 0) { 672 fdrop(fp, td); 673 break; 674 } 675 if (fp != fp2) { 676 flp->l_whence = SEEK_SET; 677 flp->l_start = 0; 678 flp->l_len = 0; 679 flp->l_type = F_UNLCK; 680 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 681 F_UNLCK, flp, F_POSIX); 682 } 683 fdrop(fp, td); 684 fdrop(fp2, td); 685 break; 686 687 case F_GETLK: 688 error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); 689 if (error != 0) 690 break; 691 if (fp->f_type != DTYPE_VNODE) { 692 error = EBADF; 693 fdrop(fp, td); 694 break; 695 } 696 flp = (struct flock *)arg; 697 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 698 flp->l_type != F_UNLCK) { 699 error = EINVAL; 700 fdrop(fp, td); 701 break; 702 } 703 if (flp->l_whence == SEEK_CUR) { 704 foffset = foffset_get(fp); 705 if ((flp->l_start > 0 && 706 foffset > OFF_MAX - flp->l_start) || 707 (flp->l_start < 0 && 708 foffset < OFF_MIN - flp->l_start)) { 709 FILEDESC_SUNLOCK(fdp); 710 error = EOVERFLOW; 711 fdrop(fp, td); 712 break; 713 } 714 flp->l_start += foffset; 715 } 716 vp = fp->f_vnode; 717 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 718 F_POSIX); 719 fdrop(fp, td); 720 break; 721 722 case F_RDAHEAD: 723 arg = arg ? 128 * 1024: 0; 724 /* FALLTHROUGH */ 725 case F_READAHEAD: 726 error = fget_unlocked(fdp, fd, 0, 0, &fp, NULL); 727 if (error != 0) 728 break; 729 if (fp->f_type != DTYPE_VNODE) { 730 fdrop(fp, td); 731 error = EBADF; 732 break; 733 } 734 if (arg >= 0) { 735 vp = fp->f_vnode; 736 error = vn_lock(vp, LK_SHARED); 737 if (error != 0) { 738 fdrop(fp, td); 739 break; 740 } 741 bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize; 742 VOP_UNLOCK(vp, 0); 743 fp->f_seqcount = (arg + bsize - 1) / bsize; 744 do { 745 new = old = fp->f_flag; 746 new |= FRDAHEAD; 747 } while (!atomic_cmpset_rel_int(&fp->f_flag, old, new)); 748 } else { 749 do { 750 new = old = fp->f_flag; 751 new &= ~FRDAHEAD; 752 } while (!atomic_cmpset_rel_int(&fp->f_flag, old, new)); 753 } 754 fdrop(fp, td); 755 break; 756 757 default: 758 error = EINVAL; 759 break; 760 } 761 return (error); 762 } 763 764 /* 765 * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). 766 */ 767 int 768 do_dup(struct thread *td, int flags, int old, int new, 769 register_t *retval) 770 { 771 struct filedesc *fdp; 772 struct filedescent *oldfde, *newfde; 773 struct proc *p; 774 struct file *fp; 775 struct file *delfp; 776 int error, maxfd; 777 778 p = td->td_proc; 779 fdp = p->p_fd; 780 781 /* 782 * Verify we have a valid descriptor to dup from and possibly to 783 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should 784 * return EINVAL when the new descriptor is out of bounds. 785 */ 786 if (old < 0) 787 return (EBADF); 788 if (new < 0) 789 return (flags & DUP_FCNTL ? EINVAL : EBADF); 790 PROC_LOCK(p); 791 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 792 PROC_UNLOCK(p); 793 if (new >= maxfd) 794 return (flags & DUP_FCNTL ? EINVAL : EBADF); 795 796 FILEDESC_XLOCK(fdp); 797 if (fget_locked(fdp, old) == NULL) { 798 FILEDESC_XUNLOCK(fdp); 799 return (EBADF); 800 } 801 oldfde = &fdp->fd_ofiles[old]; 802 if (flags & DUP_FIXED && old == new) { 803 *retval = new; 804 if (flags & DUP_CLOEXEC) 805 fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; 806 FILEDESC_XUNLOCK(fdp); 807 return (0); 808 } 809 fp = oldfde->fde_file; 810 fhold(fp); 811 812 /* 813 * If the caller specified a file descriptor, make sure the file 814 * table is large enough to hold it, and grab it. Otherwise, just 815 * allocate a new descriptor the usual way. 816 */ 817 if (flags & DUP_FIXED) { 818 if (new >= fdp->fd_nfiles) { 819 /* 820 * The resource limits are here instead of e.g. 821 * fdalloc(), because the file descriptor table may be 822 * shared between processes, so we can't really use 823 * racct_add()/racct_sub(). Instead of counting the 824 * number of actually allocated descriptors, just put 825 * the limit on the size of the file descriptor table. 826 */ 827 #ifdef RACCT 828 PROC_LOCK(p); 829 error = racct_set(p, RACCT_NOFILE, new + 1); 830 PROC_UNLOCK(p); 831 if (error != 0) { 832 FILEDESC_XUNLOCK(fdp); 833 fdrop(fp, td); 834 return (EMFILE); 835 } 836 #endif 837 fdgrowtable(fdp, new + 1); 838 oldfde = &fdp->fd_ofiles[old]; 839 } 840 newfde = &fdp->fd_ofiles[new]; 841 if (newfde->fde_file == NULL) 842 fdused(fdp, new); 843 } else { 844 if ((error = fdalloc(td, new, &new)) != 0) { 845 FILEDESC_XUNLOCK(fdp); 846 fdrop(fp, td); 847 return (error); 848 } 849 newfde = &fdp->fd_ofiles[new]; 850 } 851 852 KASSERT(fp == oldfde->fde_file, ("old fd has been modified")); 853 KASSERT(old != new, ("new fd is same as old")); 854 855 delfp = newfde->fde_file; 856 857 /* 858 * Duplicate the source descriptor. 859 */ 860 *newfde = *oldfde; 861 filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); 862 if ((flags & DUP_CLOEXEC) != 0) 863 newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; 864 else 865 newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; 866 if (new > fdp->fd_lastfile) 867 fdp->fd_lastfile = new; 868 *retval = new; 869 870 if (delfp != NULL) { 871 (void) closefp(fdp, new, delfp, td, 1); 872 /* closefp() drops the FILEDESC lock for us. */ 873 } else { 874 FILEDESC_XUNLOCK(fdp); 875 } 876 877 return (0); 878 } 879 880 /* 881 * If sigio is on the list associated with a process or process group, 882 * disable signalling from the device, remove sigio from the list and 883 * free sigio. 884 */ 885 void 886 funsetown(struct sigio **sigiop) 887 { 888 struct sigio *sigio; 889 890 SIGIO_LOCK(); 891 sigio = *sigiop; 892 if (sigio == NULL) { 893 SIGIO_UNLOCK(); 894 return; 895 } 896 *(sigio->sio_myref) = NULL; 897 if ((sigio)->sio_pgid < 0) { 898 struct pgrp *pg = (sigio)->sio_pgrp; 899 PGRP_LOCK(pg); 900 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 901 sigio, sio_pgsigio); 902 PGRP_UNLOCK(pg); 903 } else { 904 struct proc *p = (sigio)->sio_proc; 905 PROC_LOCK(p); 906 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 907 sigio, sio_pgsigio); 908 PROC_UNLOCK(p); 909 } 910 SIGIO_UNLOCK(); 911 crfree(sigio->sio_ucred); 912 free(sigio, M_SIGIO); 913 } 914 915 /* 916 * Free a list of sigio structures. 917 * We only need to lock the SIGIO_LOCK because we have made ourselves 918 * inaccessible to callers of fsetown and therefore do not need to lock 919 * the proc or pgrp struct for the list manipulation. 920 */ 921 void 922 funsetownlst(struct sigiolst *sigiolst) 923 { 924 struct proc *p; 925 struct pgrp *pg; 926 struct sigio *sigio; 927 928 sigio = SLIST_FIRST(sigiolst); 929 if (sigio == NULL) 930 return; 931 p = NULL; 932 pg = NULL; 933 934 /* 935 * Every entry of the list should belong 936 * to a single proc or pgrp. 937 */ 938 if (sigio->sio_pgid < 0) { 939 pg = sigio->sio_pgrp; 940 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 941 } else /* if (sigio->sio_pgid > 0) */ { 942 p = sigio->sio_proc; 943 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 944 } 945 946 SIGIO_LOCK(); 947 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 948 *(sigio->sio_myref) = NULL; 949 if (pg != NULL) { 950 KASSERT(sigio->sio_pgid < 0, 951 ("Proc sigio in pgrp sigio list")); 952 KASSERT(sigio->sio_pgrp == pg, 953 ("Bogus pgrp in sigio list")); 954 PGRP_LOCK(pg); 955 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 956 sio_pgsigio); 957 PGRP_UNLOCK(pg); 958 } else /* if (p != NULL) */ { 959 KASSERT(sigio->sio_pgid > 0, 960 ("Pgrp sigio in proc sigio list")); 961 KASSERT(sigio->sio_proc == p, 962 ("Bogus proc in sigio list")); 963 PROC_LOCK(p); 964 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 965 sio_pgsigio); 966 PROC_UNLOCK(p); 967 } 968 SIGIO_UNLOCK(); 969 crfree(sigio->sio_ucred); 970 free(sigio, M_SIGIO); 971 SIGIO_LOCK(); 972 } 973 SIGIO_UNLOCK(); 974 } 975 976 /* 977 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 978 * 979 * After permission checking, add a sigio structure to the sigio list for 980 * the process or process group. 981 */ 982 int 983 fsetown(pid_t pgid, struct sigio **sigiop) 984 { 985 struct proc *proc; 986 struct pgrp *pgrp; 987 struct sigio *sigio; 988 int ret; 989 990 if (pgid == 0) { 991 funsetown(sigiop); 992 return (0); 993 } 994 995 ret = 0; 996 997 /* Allocate and fill in the new sigio out of locks. */ 998 sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK); 999 sigio->sio_pgid = pgid; 1000 sigio->sio_ucred = crhold(curthread->td_ucred); 1001 sigio->sio_myref = sigiop; 1002 1003 sx_slock(&proctree_lock); 1004 if (pgid > 0) { 1005 proc = pfind(pgid); 1006 if (proc == NULL) { 1007 ret = ESRCH; 1008 goto fail; 1009 } 1010 1011 /* 1012 * Policy - Don't allow a process to FSETOWN a process 1013 * in another session. 1014 * 1015 * Remove this test to allow maximum flexibility or 1016 * restrict FSETOWN to the current process or process 1017 * group for maximum safety. 1018 */ 1019 PROC_UNLOCK(proc); 1020 if (proc->p_session != curthread->td_proc->p_session) { 1021 ret = EPERM; 1022 goto fail; 1023 } 1024 1025 pgrp = NULL; 1026 } else /* if (pgid < 0) */ { 1027 pgrp = pgfind(-pgid); 1028 if (pgrp == NULL) { 1029 ret = ESRCH; 1030 goto fail; 1031 } 1032 PGRP_UNLOCK(pgrp); 1033 1034 /* 1035 * Policy - Don't allow a process to FSETOWN a process 1036 * in another session. 1037 * 1038 * Remove this test to allow maximum flexibility or 1039 * restrict FSETOWN to the current process or process 1040 * group for maximum safety. 1041 */ 1042 if (pgrp->pg_session != curthread->td_proc->p_session) { 1043 ret = EPERM; 1044 goto fail; 1045 } 1046 1047 proc = NULL; 1048 } 1049 funsetown(sigiop); 1050 if (pgid > 0) { 1051 PROC_LOCK(proc); 1052 /* 1053 * Since funsetownlst() is called without the proctree 1054 * locked, we need to check for P_WEXIT. 1055 * XXX: is ESRCH correct? 1056 */ 1057 if ((proc->p_flag & P_WEXIT) != 0) { 1058 PROC_UNLOCK(proc); 1059 ret = ESRCH; 1060 goto fail; 1061 } 1062 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 1063 sigio->sio_proc = proc; 1064 PROC_UNLOCK(proc); 1065 } else { 1066 PGRP_LOCK(pgrp); 1067 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 1068 sigio->sio_pgrp = pgrp; 1069 PGRP_UNLOCK(pgrp); 1070 } 1071 sx_sunlock(&proctree_lock); 1072 SIGIO_LOCK(); 1073 *sigiop = sigio; 1074 SIGIO_UNLOCK(); 1075 return (0); 1076 1077 fail: 1078 sx_sunlock(&proctree_lock); 1079 crfree(sigio->sio_ucred); 1080 free(sigio, M_SIGIO); 1081 return (ret); 1082 } 1083 1084 /* 1085 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 1086 */ 1087 pid_t 1088 fgetown(sigiop) 1089 struct sigio **sigiop; 1090 { 1091 pid_t pgid; 1092 1093 SIGIO_LOCK(); 1094 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 1095 SIGIO_UNLOCK(); 1096 return (pgid); 1097 } 1098 1099 /* 1100 * Function drops the filedesc lock on return. 1101 */ 1102 static int 1103 closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, 1104 int holdleaders) 1105 { 1106 int error; 1107 1108 FILEDESC_XLOCK_ASSERT(fdp); 1109 1110 if (holdleaders) { 1111 if (td->td_proc->p_fdtol != NULL) { 1112 /* 1113 * Ask fdfree() to sleep to ensure that all relevant 1114 * process leaders can be traversed in closef(). 1115 */ 1116 fdp->fd_holdleaderscount++; 1117 } else { 1118 holdleaders = 0; 1119 } 1120 } 1121 1122 /* 1123 * We now hold the fp reference that used to be owned by the 1124 * descriptor array. We have to unlock the FILEDESC *AFTER* 1125 * knote_fdclose to prevent a race of the fd getting opened, a knote 1126 * added, and deleteing a knote for the new fd. 1127 */ 1128 knote_fdclose(td, fd); 1129 1130 /* 1131 * We need to notify mqueue if the object is of type mqueue. 1132 */ 1133 if (fp->f_type == DTYPE_MQUEUE) 1134 mq_fdclose(td, fd, fp); 1135 FILEDESC_XUNLOCK(fdp); 1136 1137 error = closef(fp, td); 1138 if (holdleaders) { 1139 FILEDESC_XLOCK(fdp); 1140 fdp->fd_holdleaderscount--; 1141 if (fdp->fd_holdleaderscount == 0 && 1142 fdp->fd_holdleaderswakeup != 0) { 1143 fdp->fd_holdleaderswakeup = 0; 1144 wakeup(&fdp->fd_holdleaderscount); 1145 } 1146 FILEDESC_XUNLOCK(fdp); 1147 } 1148 return (error); 1149 } 1150 1151 /* 1152 * Close a file descriptor. 1153 */ 1154 #ifndef _SYS_SYSPROTO_H_ 1155 struct close_args { 1156 int fd; 1157 }; 1158 #endif 1159 /* ARGSUSED */ 1160 int 1161 sys_close(td, uap) 1162 struct thread *td; 1163 struct close_args *uap; 1164 { 1165 1166 return (kern_close(td, uap->fd)); 1167 } 1168 1169 int 1170 kern_close(td, fd) 1171 struct thread *td; 1172 int fd; 1173 { 1174 struct filedesc *fdp; 1175 struct file *fp; 1176 1177 fdp = td->td_proc->p_fd; 1178 1179 AUDIT_SYSCLOSE(td, fd); 1180 1181 FILEDESC_XLOCK(fdp); 1182 if ((fp = fget_locked(fdp, fd)) == NULL) { 1183 FILEDESC_XUNLOCK(fdp); 1184 return (EBADF); 1185 } 1186 fdfree(fdp, fd); 1187 1188 /* closefp() drops the FILEDESC lock for us. */ 1189 return (closefp(fdp, fd, fp, td, 1)); 1190 } 1191 1192 /* 1193 * Close open file descriptors. 1194 */ 1195 #ifndef _SYS_SYSPROTO_H_ 1196 struct closefrom_args { 1197 int lowfd; 1198 }; 1199 #endif 1200 /* ARGSUSED */ 1201 int 1202 sys_closefrom(struct thread *td, struct closefrom_args *uap) 1203 { 1204 struct filedesc *fdp; 1205 int fd; 1206 1207 fdp = td->td_proc->p_fd; 1208 AUDIT_ARG_FD(uap->lowfd); 1209 1210 /* 1211 * Treat negative starting file descriptor values identical to 1212 * closefrom(0) which closes all files. 1213 */ 1214 if (uap->lowfd < 0) 1215 uap->lowfd = 0; 1216 FILEDESC_SLOCK(fdp); 1217 for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) { 1218 if (fdp->fd_ofiles[fd].fde_file != NULL) { 1219 FILEDESC_SUNLOCK(fdp); 1220 (void)kern_close(td, fd); 1221 FILEDESC_SLOCK(fdp); 1222 } 1223 } 1224 FILEDESC_SUNLOCK(fdp); 1225 return (0); 1226 } 1227 1228 #if defined(COMPAT_43) 1229 /* 1230 * Return status information about a file descriptor. 1231 */ 1232 #ifndef _SYS_SYSPROTO_H_ 1233 struct ofstat_args { 1234 int fd; 1235 struct ostat *sb; 1236 }; 1237 #endif 1238 /* ARGSUSED */ 1239 int 1240 ofstat(struct thread *td, struct ofstat_args *uap) 1241 { 1242 struct ostat oub; 1243 struct stat ub; 1244 int error; 1245 1246 error = kern_fstat(td, uap->fd, &ub); 1247 if (error == 0) { 1248 cvtstat(&ub, &oub); 1249 error = copyout(&oub, uap->sb, sizeof(oub)); 1250 } 1251 return (error); 1252 } 1253 #endif /* COMPAT_43 */ 1254 1255 /* 1256 * Return status information about a file descriptor. 1257 */ 1258 #ifndef _SYS_SYSPROTO_H_ 1259 struct fstat_args { 1260 int fd; 1261 struct stat *sb; 1262 }; 1263 #endif 1264 /* ARGSUSED */ 1265 int 1266 sys_fstat(struct thread *td, struct fstat_args *uap) 1267 { 1268 struct stat ub; 1269 int error; 1270 1271 error = kern_fstat(td, uap->fd, &ub); 1272 if (error == 0) 1273 error = copyout(&ub, uap->sb, sizeof(ub)); 1274 return (error); 1275 } 1276 1277 int 1278 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1279 { 1280 struct file *fp; 1281 int error; 1282 1283 AUDIT_ARG_FD(fd); 1284 1285 if ((error = fget(td, fd, CAP_FSTAT, &fp)) != 0) 1286 return (error); 1287 1288 AUDIT_ARG_FILE(td->td_proc, fp); 1289 1290 error = fo_stat(fp, sbp, td->td_ucred, td); 1291 fdrop(fp, td); 1292 #ifdef KTRACE 1293 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 1294 ktrstat(sbp); 1295 #endif 1296 return (error); 1297 } 1298 1299 /* 1300 * Return status information about a file descriptor. 1301 */ 1302 #ifndef _SYS_SYSPROTO_H_ 1303 struct nfstat_args { 1304 int fd; 1305 struct nstat *sb; 1306 }; 1307 #endif 1308 /* ARGSUSED */ 1309 int 1310 sys_nfstat(struct thread *td, struct nfstat_args *uap) 1311 { 1312 struct nstat nub; 1313 struct stat ub; 1314 int error; 1315 1316 error = kern_fstat(td, uap->fd, &ub); 1317 if (error == 0) { 1318 cvtnstat(&ub, &nub); 1319 error = copyout(&nub, uap->sb, sizeof(nub)); 1320 } 1321 return (error); 1322 } 1323 1324 /* 1325 * Return pathconf information about a file descriptor. 1326 */ 1327 #ifndef _SYS_SYSPROTO_H_ 1328 struct fpathconf_args { 1329 int fd; 1330 int name; 1331 }; 1332 #endif 1333 /* ARGSUSED */ 1334 int 1335 sys_fpathconf(struct thread *td, struct fpathconf_args *uap) 1336 { 1337 struct file *fp; 1338 struct vnode *vp; 1339 int error; 1340 1341 if ((error = fget(td, uap->fd, CAP_FPATHCONF, &fp)) != 0) 1342 return (error); 1343 1344 /* If asynchronous I/O is available, it works for all descriptors. */ 1345 if (uap->name == _PC_ASYNC_IO) { 1346 td->td_retval[0] = async_io_version; 1347 goto out; 1348 } 1349 vp = fp->f_vnode; 1350 if (vp != NULL) { 1351 vn_lock(vp, LK_SHARED | LK_RETRY); 1352 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1353 VOP_UNLOCK(vp, 0); 1354 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1355 if (uap->name != _PC_PIPE_BUF) { 1356 error = EINVAL; 1357 } else { 1358 td->td_retval[0] = PIPE_BUF; 1359 error = 0; 1360 } 1361 } else { 1362 error = EOPNOTSUPP; 1363 } 1364 out: 1365 fdrop(fp, td); 1366 return (error); 1367 } 1368 1369 /* 1370 * Initialize filecaps structure. 1371 */ 1372 void 1373 filecaps_init(struct filecaps *fcaps) 1374 { 1375 1376 bzero(fcaps, sizeof(*fcaps)); 1377 fcaps->fc_nioctls = -1; 1378 } 1379 1380 /* 1381 * Copy filecaps structure allocating memory for ioctls array if needed. 1382 */ 1383 void 1384 filecaps_copy(const struct filecaps *src, struct filecaps *dst) 1385 { 1386 size_t size; 1387 1388 *dst = *src; 1389 if (src->fc_ioctls != NULL) { 1390 KASSERT(src->fc_nioctls > 0, 1391 ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); 1392 1393 size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; 1394 dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK); 1395 bcopy(src->fc_ioctls, dst->fc_ioctls, size); 1396 } 1397 } 1398 1399 /* 1400 * Move filecaps structure to the new place and clear the old place. 1401 */ 1402 void 1403 filecaps_move(struct filecaps *src, struct filecaps *dst) 1404 { 1405 1406 *dst = *src; 1407 bzero(src, sizeof(*src)); 1408 } 1409 1410 /* 1411 * Fill the given filecaps structure with full rights. 1412 */ 1413 static void 1414 filecaps_fill(struct filecaps *fcaps) 1415 { 1416 1417 fcaps->fc_rights = CAP_ALL; 1418 fcaps->fc_ioctls = NULL; 1419 fcaps->fc_nioctls = -1; 1420 fcaps->fc_fcntls = CAP_FCNTL_ALL; 1421 } 1422 1423 /* 1424 * Free memory allocated within filecaps structure. 1425 */ 1426 void 1427 filecaps_free(struct filecaps *fcaps) 1428 { 1429 1430 free(fcaps->fc_ioctls, M_FILECAPS); 1431 bzero(fcaps, sizeof(*fcaps)); 1432 } 1433 1434 /* 1435 * Validate the given filecaps structure. 1436 */ 1437 static void 1438 filecaps_validate(const struct filecaps *fcaps, const char *func) 1439 { 1440 1441 KASSERT((fcaps->fc_rights & ~CAP_MASK_VALID) == 0, 1442 ("%s: invalid rights", func)); 1443 KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, 1444 ("%s: invalid fcntls", func)); 1445 KASSERT(fcaps->fc_fcntls == 0 || (fcaps->fc_rights & CAP_FCNTL) != 0, 1446 ("%s: fcntls without CAP_FCNTL", func)); 1447 KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : 1448 (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), 1449 ("%s: invalid ioctls", func)); 1450 KASSERT(fcaps->fc_nioctls == 0 || (fcaps->fc_rights & CAP_IOCTL) != 0, 1451 ("%s: ioctls without CAP_IOCTL", func)); 1452 } 1453 1454 /* 1455 * Grow the file table to accomodate (at least) nfd descriptors. 1456 */ 1457 static void 1458 fdgrowtable(struct filedesc *fdp, int nfd) 1459 { 1460 struct filedesc0 *fdp0; 1461 struct freetable *ft; 1462 struct filedescent *ntable; 1463 struct filedescent *otable; 1464 int nnfiles, onfiles; 1465 NDSLOTTYPE *nmap, *omap; 1466 1467 FILEDESC_XLOCK_ASSERT(fdp); 1468 1469 KASSERT(fdp->fd_nfiles > 0, ("zero-length file table")); 1470 1471 /* save old values */ 1472 onfiles = fdp->fd_nfiles; 1473 otable = fdp->fd_ofiles; 1474 omap = fdp->fd_map; 1475 1476 /* compute the size of the new table */ 1477 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1478 if (nnfiles <= onfiles) 1479 /* the table is already large enough */ 1480 return; 1481 1482 /* 1483 * Allocate a new table and map. We need enough space for the 1484 * file entries themselves and the struct freetable we will use 1485 * when we decommission the table and place it on the freelist. 1486 * We place the struct freetable in the middle so we don't have 1487 * to worry about padding. 1488 */ 1489 ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), 1490 M_FILEDESC, M_ZERO | M_WAITOK); 1491 nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, 1492 M_ZERO | M_WAITOK); 1493 1494 /* copy the old data over and point at the new tables */ 1495 memcpy(ntable, otable, onfiles * sizeof(*otable)); 1496 memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); 1497 1498 /* update the pointers and counters */ 1499 fdp->fd_nfiles = nnfiles; 1500 memcpy(ntable, otable, onfiles * sizeof(ntable[0])); 1501 fdp->fd_ofiles = ntable; 1502 fdp->fd_map = nmap; 1503 1504 /* 1505 * Do not free the old file table, as some threads may still 1506 * reference entries within it. Instead, place it on a freelist 1507 * which will be processed when the struct filedesc is released. 1508 * 1509 * Do, however, free the old map. 1510 * 1511 * Note that if onfiles == NDFILE, we're dealing with the original 1512 * static allocation contained within (struct filedesc0 *)fdp, 1513 * which must not be freed. 1514 */ 1515 if (onfiles > NDFILE) { 1516 ft = (struct freetable *)&otable[onfiles]; 1517 fdp0 = (struct filedesc0 *)fdp; 1518 ft->ft_table = otable; 1519 SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); 1520 free(omap, M_FILEDESC); 1521 } 1522 } 1523 1524 /* 1525 * Allocate a file descriptor for the process. 1526 */ 1527 int 1528 fdalloc(struct thread *td, int minfd, int *result) 1529 { 1530 struct proc *p = td->td_proc; 1531 struct filedesc *fdp = p->p_fd; 1532 int fd = -1, maxfd, allocfd; 1533 #ifdef RACCT 1534 int error; 1535 #endif 1536 1537 FILEDESC_XLOCK_ASSERT(fdp); 1538 1539 if (fdp->fd_freefile > minfd) 1540 minfd = fdp->fd_freefile; 1541 1542 PROC_LOCK(p); 1543 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1544 PROC_UNLOCK(p); 1545 1546 /* 1547 * Search the bitmap for a free descriptor starting at minfd. 1548 * If none is found, grow the file table. 1549 */ 1550 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1551 if (fd >= maxfd) 1552 return (EMFILE); 1553 if (fd >= fdp->fd_nfiles) { 1554 allocfd = min(fd * 2, maxfd); 1555 #ifdef RACCT 1556 PROC_LOCK(p); 1557 error = racct_set(p, RACCT_NOFILE, allocfd); 1558 PROC_UNLOCK(p); 1559 if (error != 0) 1560 return (EMFILE); 1561 #endif 1562 /* 1563 * fd is already equal to first free descriptor >= minfd, so 1564 * we only need to grow the table and we are done. 1565 */ 1566 fdgrowtable(fdp, allocfd); 1567 } 1568 1569 /* 1570 * Perform some sanity checks, then mark the file descriptor as 1571 * used and return it to the caller. 1572 */ 1573 KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles), 1574 ("invalid descriptor %d", fd)); 1575 KASSERT(!fdisused(fdp, fd), 1576 ("fd_first_free() returned non-free descriptor")); 1577 KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, 1578 ("file descriptor isn't free")); 1579 KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); 1580 fdused(fdp, fd); 1581 *result = fd; 1582 return (0); 1583 } 1584 1585 /* 1586 * Allocate n file descriptors for the process. 1587 */ 1588 int 1589 fdallocn(struct thread *td, int minfd, int *fds, int n) 1590 { 1591 struct proc *p = td->td_proc; 1592 struct filedesc *fdp = p->p_fd; 1593 int i; 1594 1595 FILEDESC_XLOCK_ASSERT(fdp); 1596 1597 if (!fdavail(td, n)) 1598 return (EMFILE); 1599 1600 for (i = 0; i < n; i++) 1601 if (fdalloc(td, 0, &fds[i]) != 0) 1602 break; 1603 1604 if (i < n) { 1605 for (i--; i >= 0; i--) 1606 fdunused(fdp, fds[i]); 1607 return (EMFILE); 1608 } 1609 1610 return (0); 1611 } 1612 1613 /* 1614 * Check to see whether n user file descriptors are available to the process 1615 * p. 1616 */ 1617 int 1618 fdavail(struct thread *td, int n) 1619 { 1620 struct proc *p = td->td_proc; 1621 struct filedesc *fdp = td->td_proc->p_fd; 1622 int i, lim, last; 1623 1624 FILEDESC_LOCK_ASSERT(fdp); 1625 1626 /* 1627 * XXX: This is only called from uipc_usrreq.c:unp_externalize(); 1628 * call racct_add() from there instead of dealing with containers 1629 * here. 1630 */ 1631 PROC_LOCK(p); 1632 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1633 PROC_UNLOCK(p); 1634 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1635 return (1); 1636 last = min(fdp->fd_nfiles, lim); 1637 for (i = fdp->fd_freefile; i < last; i++) { 1638 if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) 1639 return (1); 1640 } 1641 return (0); 1642 } 1643 1644 /* 1645 * Create a new open file structure and allocate a file decriptor for the 1646 * process that refers to it. We add one reference to the file for the 1647 * descriptor table and one reference for resultfp. This is to prevent us 1648 * being preempted and the entry in the descriptor table closed after we 1649 * release the FILEDESC lock. 1650 */ 1651 int 1652 falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags) 1653 { 1654 struct file *fp; 1655 int error, fd; 1656 1657 error = falloc_noinstall(td, &fp); 1658 if (error) 1659 return (error); /* no reference held on error */ 1660 1661 error = finstall(td, fp, &fd, flags, NULL); 1662 if (error) { 1663 fdrop(fp, td); /* one reference (fp only) */ 1664 return (error); 1665 } 1666 1667 if (resultfp != NULL) 1668 *resultfp = fp; /* copy out result */ 1669 else 1670 fdrop(fp, td); /* release local reference */ 1671 1672 if (resultfd != NULL) 1673 *resultfd = fd; 1674 1675 return (0); 1676 } 1677 1678 /* 1679 * Create a new open file structure without allocating a file descriptor. 1680 */ 1681 int 1682 falloc_noinstall(struct thread *td, struct file **resultfp) 1683 { 1684 struct file *fp; 1685 int maxuserfiles = maxfiles - (maxfiles / 20); 1686 static struct timeval lastfail; 1687 static int curfail; 1688 1689 KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__)); 1690 1691 if ((openfiles >= maxuserfiles && 1692 priv_check(td, PRIV_MAXFILES) != 0) || 1693 openfiles >= maxfiles) { 1694 if (ppsratecheck(&lastfail, &curfail, 1)) { 1695 printf("kern.maxfiles limit exceeded by uid %i, " 1696 "please see tuning(7).\n", td->td_ucred->cr_ruid); 1697 } 1698 return (ENFILE); 1699 } 1700 atomic_add_int(&openfiles, 1); 1701 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1702 refcount_init(&fp->f_count, 1); 1703 fp->f_cred = crhold(td->td_ucred); 1704 fp->f_ops = &badfileops; 1705 fp->f_data = NULL; 1706 fp->f_vnode = NULL; 1707 *resultfp = fp; 1708 return (0); 1709 } 1710 1711 /* 1712 * Install a file in a file descriptor table. 1713 */ 1714 int 1715 finstall(struct thread *td, struct file *fp, int *fd, int flags, 1716 struct filecaps *fcaps) 1717 { 1718 struct filedesc *fdp = td->td_proc->p_fd; 1719 struct filedescent *fde; 1720 int error; 1721 1722 KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); 1723 KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); 1724 if (fcaps != NULL) 1725 filecaps_validate(fcaps, __func__); 1726 1727 FILEDESC_XLOCK(fdp); 1728 if ((error = fdalloc(td, 0, fd))) { 1729 FILEDESC_XUNLOCK(fdp); 1730 return (error); 1731 } 1732 fhold(fp); 1733 fde = &fdp->fd_ofiles[*fd]; 1734 fde->fde_file = fp; 1735 if ((flags & O_CLOEXEC) != 0) 1736 fde->fde_flags |= UF_EXCLOSE; 1737 if (fcaps != NULL) 1738 filecaps_move(fcaps, &fde->fde_caps); 1739 else 1740 filecaps_fill(&fde->fde_caps); 1741 FILEDESC_XUNLOCK(fdp); 1742 return (0); 1743 } 1744 1745 /* 1746 * Build a new filedesc structure from another. 1747 * Copy the current, root, and jail root vnode references. 1748 */ 1749 struct filedesc * 1750 fdinit(struct filedesc *fdp) 1751 { 1752 struct filedesc0 *newfdp; 1753 1754 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1755 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1756 if (fdp != NULL) { 1757 FILEDESC_XLOCK(fdp); 1758 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1759 if (newfdp->fd_fd.fd_cdir) 1760 VREF(newfdp->fd_fd.fd_cdir); 1761 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1762 if (newfdp->fd_fd.fd_rdir) 1763 VREF(newfdp->fd_fd.fd_rdir); 1764 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1765 if (newfdp->fd_fd.fd_jdir) 1766 VREF(newfdp->fd_fd.fd_jdir); 1767 FILEDESC_XUNLOCK(fdp); 1768 } 1769 1770 /* Create the file descriptor table. */ 1771 newfdp->fd_fd.fd_refcnt = 1; 1772 newfdp->fd_fd.fd_holdcnt = 1; 1773 newfdp->fd_fd.fd_cmask = CMASK; 1774 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1775 newfdp->fd_fd.fd_nfiles = NDFILE; 1776 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1777 newfdp->fd_fd.fd_lastfile = -1; 1778 return (&newfdp->fd_fd); 1779 } 1780 1781 static struct filedesc * 1782 fdhold(struct proc *p) 1783 { 1784 struct filedesc *fdp; 1785 1786 mtx_lock(&fdesc_mtx); 1787 fdp = p->p_fd; 1788 if (fdp != NULL) 1789 fdp->fd_holdcnt++; 1790 mtx_unlock(&fdesc_mtx); 1791 return (fdp); 1792 } 1793 1794 static void 1795 fddrop(struct filedesc *fdp) 1796 { 1797 struct filedesc0 *fdp0; 1798 struct freetable *ft; 1799 int i; 1800 1801 mtx_lock(&fdesc_mtx); 1802 i = --fdp->fd_holdcnt; 1803 mtx_unlock(&fdesc_mtx); 1804 if (i > 0) 1805 return; 1806 1807 FILEDESC_LOCK_DESTROY(fdp); 1808 fdp0 = (struct filedesc0 *)fdp; 1809 while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) { 1810 SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next); 1811 free(ft->ft_table, M_FILEDESC); 1812 } 1813 free(fdp, M_FILEDESC); 1814 } 1815 1816 /* 1817 * Share a filedesc structure. 1818 */ 1819 struct filedesc * 1820 fdshare(struct filedesc *fdp) 1821 { 1822 1823 FILEDESC_XLOCK(fdp); 1824 fdp->fd_refcnt++; 1825 FILEDESC_XUNLOCK(fdp); 1826 return (fdp); 1827 } 1828 1829 /* 1830 * Unshare a filedesc structure, if necessary by making a copy 1831 */ 1832 void 1833 fdunshare(struct proc *p, struct thread *td) 1834 { 1835 1836 FILEDESC_XLOCK(p->p_fd); 1837 if (p->p_fd->fd_refcnt > 1) { 1838 struct filedesc *tmp; 1839 1840 FILEDESC_XUNLOCK(p->p_fd); 1841 tmp = fdcopy(p->p_fd); 1842 fdescfree(td); 1843 p->p_fd = tmp; 1844 } else 1845 FILEDESC_XUNLOCK(p->p_fd); 1846 } 1847 1848 /* 1849 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1850 * this is to ease callers, not catch errors. 1851 */ 1852 struct filedesc * 1853 fdcopy(struct filedesc *fdp) 1854 { 1855 struct filedesc *newfdp; 1856 struct filedescent *nfde, *ofde; 1857 int i; 1858 1859 /* Certain daemons might not have file descriptors. */ 1860 if (fdp == NULL) 1861 return (NULL); 1862 1863 newfdp = fdinit(fdp); 1864 FILEDESC_SLOCK(fdp); 1865 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1866 FILEDESC_SUNLOCK(fdp); 1867 FILEDESC_XLOCK(newfdp); 1868 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1869 FILEDESC_XUNLOCK(newfdp); 1870 FILEDESC_SLOCK(fdp); 1871 } 1872 /* copy all passable descriptors (i.e. not kqueue) */ 1873 newfdp->fd_freefile = -1; 1874 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1875 ofde = &fdp->fd_ofiles[i]; 1876 if (fdisused(fdp, i) && 1877 (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && 1878 ofde->fde_file->f_ops != &badfileops) { 1879 nfde = &newfdp->fd_ofiles[i]; 1880 *nfde = *ofde; 1881 filecaps_copy(&ofde->fde_caps, &nfde->fde_caps); 1882 fhold(nfde->fde_file); 1883 newfdp->fd_lastfile = i; 1884 } else { 1885 if (newfdp->fd_freefile == -1) 1886 newfdp->fd_freefile = i; 1887 } 1888 } 1889 newfdp->fd_cmask = fdp->fd_cmask; 1890 FILEDESC_SUNLOCK(fdp); 1891 FILEDESC_XLOCK(newfdp); 1892 for (i = 0; i <= newfdp->fd_lastfile; ++i) { 1893 if (newfdp->fd_ofiles[i].fde_file != NULL) 1894 fdused(newfdp, i); 1895 } 1896 if (newfdp->fd_freefile == -1) 1897 newfdp->fd_freefile = i; 1898 FILEDESC_XUNLOCK(newfdp); 1899 return (newfdp); 1900 } 1901 1902 /* 1903 * Release a filedesc structure. 1904 */ 1905 void 1906 fdescfree(struct thread *td) 1907 { 1908 struct filedesc *fdp; 1909 int i; 1910 struct filedesc_to_leader *fdtol; 1911 struct file *fp; 1912 struct vnode *cdir, *jdir, *rdir, *vp; 1913 struct flock lf; 1914 1915 /* Certain daemons might not have file descriptors. */ 1916 fdp = td->td_proc->p_fd; 1917 if (fdp == NULL) 1918 return; 1919 1920 #ifdef RACCT 1921 PROC_LOCK(td->td_proc); 1922 racct_set(td->td_proc, RACCT_NOFILE, 0); 1923 PROC_UNLOCK(td->td_proc); 1924 #endif 1925 1926 /* Check for special need to clear POSIX style locks */ 1927 fdtol = td->td_proc->p_fdtol; 1928 if (fdtol != NULL) { 1929 FILEDESC_XLOCK(fdp); 1930 KASSERT(fdtol->fdl_refcount > 0, 1931 ("filedesc_to_refcount botch: fdl_refcount=%d", 1932 fdtol->fdl_refcount)); 1933 if (fdtol->fdl_refcount == 1 && 1934 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1935 for (i = 0; i <= fdp->fd_lastfile; i++) { 1936 fp = fdp->fd_ofiles[i].fde_file; 1937 if (fp == NULL || fp->f_type != DTYPE_VNODE) 1938 continue; 1939 fhold(fp); 1940 FILEDESC_XUNLOCK(fdp); 1941 lf.l_whence = SEEK_SET; 1942 lf.l_start = 0; 1943 lf.l_len = 0; 1944 lf.l_type = F_UNLCK; 1945 vp = fp->f_vnode; 1946 (void) VOP_ADVLOCK(vp, 1947 (caddr_t)td->td_proc->p_leader, F_UNLCK, 1948 &lf, F_POSIX); 1949 FILEDESC_XLOCK(fdp); 1950 fdrop(fp, td); 1951 } 1952 } 1953 retry: 1954 if (fdtol->fdl_refcount == 1) { 1955 if (fdp->fd_holdleaderscount > 0 && 1956 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1957 /* 1958 * close() or do_dup() has cleared a reference 1959 * in a shared file descriptor table. 1960 */ 1961 fdp->fd_holdleaderswakeup = 1; 1962 sx_sleep(&fdp->fd_holdleaderscount, 1963 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 1964 goto retry; 1965 } 1966 if (fdtol->fdl_holdcount > 0) { 1967 /* 1968 * Ensure that fdtol->fdl_leader remains 1969 * valid in closef(). 1970 */ 1971 fdtol->fdl_wakeup = 1; 1972 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 1973 "fdlhold", 0); 1974 goto retry; 1975 } 1976 } 1977 fdtol->fdl_refcount--; 1978 if (fdtol->fdl_refcount == 0 && 1979 fdtol->fdl_holdcount == 0) { 1980 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1981 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1982 } else 1983 fdtol = NULL; 1984 td->td_proc->p_fdtol = NULL; 1985 FILEDESC_XUNLOCK(fdp); 1986 if (fdtol != NULL) 1987 free(fdtol, M_FILEDESC_TO_LEADER); 1988 } 1989 FILEDESC_XLOCK(fdp); 1990 i = --fdp->fd_refcnt; 1991 FILEDESC_XUNLOCK(fdp); 1992 if (i > 0) 1993 return; 1994 1995 for (i = 0; i <= fdp->fd_lastfile; i++) { 1996 fp = fdp->fd_ofiles[i].fde_file; 1997 if (fp != NULL) { 1998 FILEDESC_XLOCK(fdp); 1999 fdfree(fdp, i); 2000 FILEDESC_XUNLOCK(fdp); 2001 (void) closef(fp, td); 2002 } 2003 } 2004 FILEDESC_XLOCK(fdp); 2005 2006 /* XXX This should happen earlier. */ 2007 mtx_lock(&fdesc_mtx); 2008 td->td_proc->p_fd = NULL; 2009 mtx_unlock(&fdesc_mtx); 2010 2011 if (fdp->fd_nfiles > NDFILE) 2012 free(fdp->fd_ofiles, M_FILEDESC); 2013 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 2014 free(fdp->fd_map, M_FILEDESC); 2015 2016 fdp->fd_nfiles = 0; 2017 2018 cdir = fdp->fd_cdir; 2019 fdp->fd_cdir = NULL; 2020 rdir = fdp->fd_rdir; 2021 fdp->fd_rdir = NULL; 2022 jdir = fdp->fd_jdir; 2023 fdp->fd_jdir = NULL; 2024 FILEDESC_XUNLOCK(fdp); 2025 2026 if (cdir != NULL) 2027 vrele(cdir); 2028 if (rdir != NULL) 2029 vrele(rdir); 2030 if (jdir != NULL) 2031 vrele(jdir); 2032 2033 fddrop(fdp); 2034 } 2035 2036 /* 2037 * For setugid programs, we don't want to people to use that setugidness 2038 * to generate error messages which write to a file which otherwise would 2039 * otherwise be off-limits to the process. We check for filesystems where 2040 * the vnode can change out from under us after execve (like [lin]procfs). 2041 * 2042 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 2043 * sufficient. We also don't check for setugidness since we know we are. 2044 */ 2045 static int 2046 is_unsafe(struct file *fp) 2047 { 2048 if (fp->f_type == DTYPE_VNODE) { 2049 struct vnode *vp = fp->f_vnode; 2050 2051 if ((vp->v_vflag & VV_PROCDEP) != 0) 2052 return (1); 2053 } 2054 return (0); 2055 } 2056 2057 /* 2058 * Make this setguid thing safe, if at all possible. 2059 */ 2060 void 2061 setugidsafety(struct thread *td) 2062 { 2063 struct filedesc *fdp; 2064 struct file *fp; 2065 int i; 2066 2067 /* Certain daemons might not have file descriptors. */ 2068 fdp = td->td_proc->p_fd; 2069 if (fdp == NULL) 2070 return; 2071 2072 /* 2073 * Note: fdp->fd_ofiles may be reallocated out from under us while 2074 * we are blocked in a close. Be careful! 2075 */ 2076 FILEDESC_XLOCK(fdp); 2077 for (i = 0; i <= fdp->fd_lastfile; i++) { 2078 if (i > 2) 2079 break; 2080 fp = fdp->fd_ofiles[i].fde_file; 2081 if (fp != NULL && is_unsafe(fp)) { 2082 knote_fdclose(td, i); 2083 /* 2084 * NULL-out descriptor prior to close to avoid 2085 * a race while close blocks. 2086 */ 2087 fdfree(fdp, i); 2088 FILEDESC_XUNLOCK(fdp); 2089 (void) closef(fp, td); 2090 FILEDESC_XLOCK(fdp); 2091 } 2092 } 2093 FILEDESC_XUNLOCK(fdp); 2094 } 2095 2096 /* 2097 * If a specific file object occupies a specific file descriptor, close the 2098 * file descriptor entry and drop a reference on the file object. This is a 2099 * convenience function to handle a subsequent error in a function that calls 2100 * falloc() that handles the race that another thread might have closed the 2101 * file descriptor out from under the thread creating the file object. 2102 */ 2103 void 2104 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 2105 { 2106 2107 FILEDESC_XLOCK(fdp); 2108 if (fdp->fd_ofiles[idx].fde_file == fp) { 2109 fdfree(fdp, idx); 2110 FILEDESC_XUNLOCK(fdp); 2111 fdrop(fp, td); 2112 } else 2113 FILEDESC_XUNLOCK(fdp); 2114 } 2115 2116 /* 2117 * Close any files on exec? 2118 */ 2119 void 2120 fdcloseexec(struct thread *td) 2121 { 2122 struct filedesc *fdp; 2123 struct filedescent *fde; 2124 struct file *fp; 2125 int i; 2126 2127 /* Certain daemons might not have file descriptors. */ 2128 fdp = td->td_proc->p_fd; 2129 if (fdp == NULL) 2130 return; 2131 2132 /* 2133 * We cannot cache fd_ofiles since operations 2134 * may block and rip them out from under us. 2135 */ 2136 FILEDESC_XLOCK(fdp); 2137 for (i = 0; i <= fdp->fd_lastfile; i++) { 2138 fde = &fdp->fd_ofiles[i]; 2139 fp = fde->fde_file; 2140 if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || 2141 (fde->fde_flags & UF_EXCLOSE))) { 2142 fdfree(fdp, i); 2143 (void) closefp(fdp, i, fp, td, 0); 2144 /* closefp() drops the FILEDESC lock. */ 2145 FILEDESC_XLOCK(fdp); 2146 } 2147 } 2148 FILEDESC_XUNLOCK(fdp); 2149 } 2150 2151 /* 2152 * It is unsafe for set[ug]id processes to be started with file 2153 * descriptors 0..2 closed, as these descriptors are given implicit 2154 * significance in the Standard C library. fdcheckstd() will create a 2155 * descriptor referencing /dev/null for each of stdin, stdout, and 2156 * stderr that is not already open. 2157 */ 2158 int 2159 fdcheckstd(struct thread *td) 2160 { 2161 struct filedesc *fdp; 2162 register_t retval, save; 2163 int i, error, devnull; 2164 2165 fdp = td->td_proc->p_fd; 2166 if (fdp == NULL) 2167 return (0); 2168 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2169 devnull = -1; 2170 error = 0; 2171 for (i = 0; i < 3; i++) { 2172 if (fdp->fd_ofiles[i].fde_file != NULL) 2173 continue; 2174 if (devnull < 0) { 2175 save = td->td_retval[0]; 2176 error = kern_open(td, "/dev/null", UIO_SYSSPACE, 2177 O_RDWR, 0); 2178 devnull = td->td_retval[0]; 2179 td->td_retval[0] = save; 2180 if (error) 2181 break; 2182 KASSERT(devnull == i, ("oof, we didn't get our fd")); 2183 } else { 2184 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 2185 if (error != 0) 2186 break; 2187 } 2188 } 2189 return (error); 2190 } 2191 2192 /* 2193 * Internal form of close. Decrement reference count on file structure. 2194 * Note: td may be NULL when closing a file that was being passed in a 2195 * message. 2196 * 2197 * XXXRW: Giant is not required for the caller, but often will be held; this 2198 * makes it moderately likely the Giant will be recursed in the VFS case. 2199 */ 2200 int 2201 closef(struct file *fp, struct thread *td) 2202 { 2203 struct vnode *vp; 2204 struct flock lf; 2205 struct filedesc_to_leader *fdtol; 2206 struct filedesc *fdp; 2207 2208 /* 2209 * POSIX record locking dictates that any close releases ALL 2210 * locks owned by this process. This is handled by setting 2211 * a flag in the unlock to free ONLY locks obeying POSIX 2212 * semantics, and not to free BSD-style file locks. 2213 * If the descriptor was in a message, POSIX-style locks 2214 * aren't passed with the descriptor, and the thread pointer 2215 * will be NULL. Callers should be careful only to pass a 2216 * NULL thread pointer when there really is no owning 2217 * context that might have locks, or the locks will be 2218 * leaked. 2219 */ 2220 if (fp->f_type == DTYPE_VNODE && td != NULL) { 2221 vp = fp->f_vnode; 2222 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 2223 lf.l_whence = SEEK_SET; 2224 lf.l_start = 0; 2225 lf.l_len = 0; 2226 lf.l_type = F_UNLCK; 2227 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 2228 F_UNLCK, &lf, F_POSIX); 2229 } 2230 fdtol = td->td_proc->p_fdtol; 2231 if (fdtol != NULL) { 2232 /* 2233 * Handle special case where file descriptor table is 2234 * shared between multiple process leaders. 2235 */ 2236 fdp = td->td_proc->p_fd; 2237 FILEDESC_XLOCK(fdp); 2238 for (fdtol = fdtol->fdl_next; 2239 fdtol != td->td_proc->p_fdtol; 2240 fdtol = fdtol->fdl_next) { 2241 if ((fdtol->fdl_leader->p_flag & 2242 P_ADVLOCK) == 0) 2243 continue; 2244 fdtol->fdl_holdcount++; 2245 FILEDESC_XUNLOCK(fdp); 2246 lf.l_whence = SEEK_SET; 2247 lf.l_start = 0; 2248 lf.l_len = 0; 2249 lf.l_type = F_UNLCK; 2250 vp = fp->f_vnode; 2251 (void) VOP_ADVLOCK(vp, 2252 (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, 2253 F_POSIX); 2254 FILEDESC_XLOCK(fdp); 2255 fdtol->fdl_holdcount--; 2256 if (fdtol->fdl_holdcount == 0 && 2257 fdtol->fdl_wakeup != 0) { 2258 fdtol->fdl_wakeup = 0; 2259 wakeup(fdtol); 2260 } 2261 } 2262 FILEDESC_XUNLOCK(fdp); 2263 } 2264 } 2265 return (fdrop(fp, td)); 2266 } 2267 2268 /* 2269 * Initialize the file pointer with the specified properties. 2270 * 2271 * The ops are set with release semantics to be certain that the flags, type, 2272 * and data are visible when ops is. This is to prevent ops methods from being 2273 * called with bad data. 2274 */ 2275 void 2276 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) 2277 { 2278 fp->f_data = data; 2279 fp->f_flag = flag; 2280 fp->f_type = type; 2281 atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); 2282 } 2283 2284 int 2285 fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, 2286 int needfcntl, struct file **fpp, cap_rights_t *haverightsp) 2287 { 2288 struct file *fp; 2289 u_int count; 2290 #ifdef CAPABILITIES 2291 cap_rights_t haverights; 2292 int error; 2293 #endif 2294 2295 if (fd < 0 || fd >= fdp->fd_nfiles) 2296 return (EBADF); 2297 /* 2298 * Fetch the descriptor locklessly. We avoid fdrop() races by 2299 * never raising a refcount above 0. To accomplish this we have 2300 * to use a cmpset loop rather than an atomic_add. The descriptor 2301 * must be re-verified once we acquire a reference to be certain 2302 * that the identity is still correct and we did not lose a race 2303 * due to preemption. 2304 */ 2305 for (;;) { 2306 fp = fdp->fd_ofiles[fd].fde_file; 2307 if (fp == NULL) 2308 return (EBADF); 2309 #ifdef CAPABILITIES 2310 haverights = cap_rights(fdp, fd); 2311 error = cap_check(haverights, needrights); 2312 if (error != 0) 2313 return (error); 2314 if ((needrights & CAP_FCNTL) != 0) { 2315 error = cap_fcntl_check(fdp, fd, needfcntl); 2316 if (error != 0) 2317 return (error); 2318 } 2319 #endif 2320 count = fp->f_count; 2321 if (count == 0) 2322 continue; 2323 /* 2324 * Use an acquire barrier to prevent caching of fd_ofiles 2325 * so it is refreshed for verification. 2326 */ 2327 if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) 2328 continue; 2329 if (fp == fdp->fd_ofiles[fd].fde_file) 2330 break; 2331 fdrop(fp, curthread); 2332 } 2333 *fpp = fp; 2334 if (haverightsp != NULL) { 2335 #ifdef CAPABILITIES 2336 *haverightsp = haverights; 2337 #else 2338 *haverightsp = CAP_ALL; 2339 #endif 2340 } 2341 return (0); 2342 } 2343 2344 /* 2345 * Extract the file pointer associated with the specified descriptor for the 2346 * current user process. 2347 * 2348 * If the descriptor doesn't exist or doesn't match 'flags', EBADF is 2349 * returned. 2350 * 2351 * File's rights will be checked against the capability rights mask. 2352 * 2353 * If an error occured the non-zero error is returned and *fpp is set to 2354 * NULL. Otherwise *fpp is held and set and zero is returned. Caller is 2355 * responsible for fdrop(). 2356 */ 2357 static __inline int 2358 _fget(struct thread *td, int fd, struct file **fpp, int flags, 2359 cap_rights_t needrights, u_char *maxprotp) 2360 { 2361 struct filedesc *fdp; 2362 struct file *fp; 2363 cap_rights_t haverights; 2364 int error; 2365 2366 *fpp = NULL; 2367 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 2368 return (EBADF); 2369 if (maxprotp != NULL) 2370 needrights |= CAP_MMAP; 2371 error = fget_unlocked(fdp, fd, needrights, 0, &fp, &haverights); 2372 if (error != 0) 2373 return (error); 2374 if (fp->f_ops == &badfileops) { 2375 fdrop(fp, td); 2376 return (EBADF); 2377 } 2378 2379 #ifdef CAPABILITIES 2380 /* 2381 * If requested, convert capability rights to access flags. 2382 */ 2383 if (maxprotp != NULL) 2384 *maxprotp = cap_rights_to_vmprot(haverights); 2385 #else /* !CAPABILITIES */ 2386 if (maxprotp != NULL) 2387 *maxprotp = VM_PROT_ALL; 2388 #endif /* CAPABILITIES */ 2389 2390 /* 2391 * FREAD and FWRITE failure return EBADF as per POSIX. 2392 */ 2393 error = 0; 2394 switch (flags) { 2395 case FREAD: 2396 case FWRITE: 2397 if ((fp->f_flag & flags) == 0) 2398 error = EBADF; 2399 break; 2400 case FEXEC: 2401 if ((fp->f_flag & (FREAD | FEXEC)) == 0 || 2402 ((fp->f_flag & FWRITE) != 0)) 2403 error = EBADF; 2404 break; 2405 case 0: 2406 break; 2407 default: 2408 KASSERT(0, ("wrong flags")); 2409 } 2410 2411 if (error != 0) { 2412 fdrop(fp, td); 2413 return (error); 2414 } 2415 2416 *fpp = fp; 2417 return (0); 2418 } 2419 2420 int 2421 fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) 2422 { 2423 2424 return(_fget(td, fd, fpp, 0, rights, NULL)); 2425 } 2426 2427 int 2428 fget_mmap(struct thread *td, int fd, cap_rights_t rights, u_char *maxprotp, 2429 struct file **fpp) 2430 { 2431 2432 return (_fget(td, fd, fpp, 0, rights, maxprotp)); 2433 } 2434 2435 int 2436 fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) 2437 { 2438 2439 return(_fget(td, fd, fpp, FREAD, rights, NULL)); 2440 } 2441 2442 int 2443 fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) 2444 { 2445 2446 return (_fget(td, fd, fpp, FWRITE, rights, NULL)); 2447 } 2448 2449 /* 2450 * Like fget() but loads the underlying vnode, or returns an error if the 2451 * descriptor does not represent a vnode. Note that pipes use vnodes but 2452 * never have VM objects. The returned vnode will be vref()'d. 2453 * 2454 * XXX: what about the unused flags ? 2455 */ 2456 static __inline int 2457 _fgetvp(struct thread *td, int fd, int flags, cap_rights_t needrights, 2458 struct vnode **vpp) 2459 { 2460 struct file *fp; 2461 int error; 2462 2463 *vpp = NULL; 2464 error = _fget(td, fd, &fp, flags, needrights, NULL); 2465 if (error) 2466 return (error); 2467 if (fp->f_vnode == NULL) { 2468 error = EINVAL; 2469 } else { 2470 *vpp = fp->f_vnode; 2471 vref(*vpp); 2472 } 2473 fdrop(fp, td); 2474 2475 return (error); 2476 } 2477 2478 int 2479 fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) 2480 { 2481 2482 return (_fgetvp(td, fd, 0, rights, vpp)); 2483 } 2484 2485 int 2486 fgetvp_rights(struct thread *td, int fd, cap_rights_t need, 2487 struct filecaps *havecaps, struct vnode **vpp) 2488 { 2489 struct filedesc *fdp; 2490 struct file *fp; 2491 #ifdef CAPABILITIES 2492 int error; 2493 #endif 2494 2495 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 2496 return (EBADF); 2497 2498 fp = fget_locked(fdp, fd); 2499 if (fp == NULL || fp->f_ops == &badfileops) 2500 return (EBADF); 2501 2502 #ifdef CAPABILITIES 2503 error = cap_check(cap_rights(fdp, fd), need); 2504 if (error != 0) 2505 return (error); 2506 #endif 2507 2508 if (fp->f_vnode == NULL) 2509 return (EINVAL); 2510 2511 *vpp = fp->f_vnode; 2512 vref(*vpp); 2513 filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps); 2514 2515 return (0); 2516 } 2517 2518 int 2519 fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) 2520 { 2521 2522 return (_fgetvp(td, fd, FREAD, rights, vpp)); 2523 } 2524 2525 int 2526 fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) 2527 { 2528 2529 return (_fgetvp(td, fd, FEXEC, rights, vpp)); 2530 } 2531 2532 #ifdef notyet 2533 int 2534 fgetvp_write(struct thread *td, int fd, cap_rights_t rights, 2535 struct vnode **vpp) 2536 { 2537 2538 return (_fgetvp(td, fd, FWRITE, rights, vpp)); 2539 } 2540 #endif 2541 2542 /* 2543 * Like fget() but loads the underlying socket, or returns an error if the 2544 * descriptor does not represent a socket. 2545 * 2546 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2547 * in the future. 2548 * 2549 * Note: fgetsock() and fputsock() are deprecated, as consumers should rely 2550 * on their file descriptor reference to prevent the socket from being free'd 2551 * during use. 2552 */ 2553 int 2554 fgetsock(struct thread *td, int fd, cap_rights_t rights, struct socket **spp, 2555 u_int *fflagp) 2556 { 2557 struct file *fp; 2558 int error; 2559 2560 *spp = NULL; 2561 if (fflagp != NULL) 2562 *fflagp = 0; 2563 if ((error = _fget(td, fd, &fp, 0, rights, NULL)) != 0) 2564 return (error); 2565 if (fp->f_type != DTYPE_SOCKET) { 2566 error = ENOTSOCK; 2567 } else { 2568 *spp = fp->f_data; 2569 if (fflagp) 2570 *fflagp = fp->f_flag; 2571 SOCK_LOCK(*spp); 2572 soref(*spp); 2573 SOCK_UNLOCK(*spp); 2574 } 2575 fdrop(fp, td); 2576 2577 return (error); 2578 } 2579 2580 /* 2581 * Drop the reference count on the socket and XXX release the SX lock in the 2582 * future. The last reference closes the socket. 2583 * 2584 * Note: fputsock() is deprecated, see comment for fgetsock(). 2585 */ 2586 void 2587 fputsock(struct socket *so) 2588 { 2589 2590 ACCEPT_LOCK(); 2591 SOCK_LOCK(so); 2592 CURVNET_SET(so->so_vnet); 2593 sorele(so); 2594 CURVNET_RESTORE(); 2595 } 2596 2597 /* 2598 * Handle the last reference to a file being closed. 2599 */ 2600 int 2601 _fdrop(struct file *fp, struct thread *td) 2602 { 2603 int error; 2604 2605 error = 0; 2606 if (fp->f_count != 0) 2607 panic("fdrop: count %d", fp->f_count); 2608 if (fp->f_ops != &badfileops) 2609 error = fo_close(fp, td); 2610 atomic_subtract_int(&openfiles, 1); 2611 crfree(fp->f_cred); 2612 free(fp->f_advice, M_FADVISE); 2613 uma_zfree(file_zone, fp); 2614 2615 return (error); 2616 } 2617 2618 /* 2619 * Apply an advisory lock on a file descriptor. 2620 * 2621 * Just attempt to get a record lock of the requested type on the entire file 2622 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2623 */ 2624 #ifndef _SYS_SYSPROTO_H_ 2625 struct flock_args { 2626 int fd; 2627 int how; 2628 }; 2629 #endif 2630 /* ARGSUSED */ 2631 int 2632 sys_flock(struct thread *td, struct flock_args *uap) 2633 { 2634 struct file *fp; 2635 struct vnode *vp; 2636 struct flock lf; 2637 int error; 2638 2639 if ((error = fget(td, uap->fd, CAP_FLOCK, &fp)) != 0) 2640 return (error); 2641 if (fp->f_type != DTYPE_VNODE) { 2642 fdrop(fp, td); 2643 return (EOPNOTSUPP); 2644 } 2645 2646 vp = fp->f_vnode; 2647 lf.l_whence = SEEK_SET; 2648 lf.l_start = 0; 2649 lf.l_len = 0; 2650 if (uap->how & LOCK_UN) { 2651 lf.l_type = F_UNLCK; 2652 atomic_clear_int(&fp->f_flag, FHASLOCK); 2653 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2654 goto done2; 2655 } 2656 if (uap->how & LOCK_EX) 2657 lf.l_type = F_WRLCK; 2658 else if (uap->how & LOCK_SH) 2659 lf.l_type = F_RDLCK; 2660 else { 2661 error = EBADF; 2662 goto done2; 2663 } 2664 atomic_set_int(&fp->f_flag, FHASLOCK); 2665 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2666 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2667 done2: 2668 fdrop(fp, td); 2669 return (error); 2670 } 2671 /* 2672 * Duplicate the specified descriptor to a free descriptor. 2673 */ 2674 int 2675 dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, 2676 int openerror, int *indxp) 2677 { 2678 struct file *fp; 2679 int error, indx; 2680 2681 KASSERT(openerror == ENODEV || openerror == ENXIO, 2682 ("unexpected error %d in %s", openerror, __func__)); 2683 2684 /* 2685 * If the to-be-dup'd fd number is greater than the allowed number 2686 * of file descriptors, or the fd to be dup'd has already been 2687 * closed, then reject. 2688 */ 2689 FILEDESC_XLOCK(fdp); 2690 if ((fp = fget_locked(fdp, dfd)) == NULL) { 2691 FILEDESC_XUNLOCK(fdp); 2692 return (EBADF); 2693 } 2694 2695 error = fdalloc(td, 0, &indx); 2696 if (error != 0) { 2697 FILEDESC_XUNLOCK(fdp); 2698 return (error); 2699 } 2700 2701 /* 2702 * There are two cases of interest here. 2703 * 2704 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2705 * 2706 * For ENXIO steal away the file structure from (dfd) and store it in 2707 * (indx). (dfd) is effectively closed by this operation. 2708 */ 2709 switch (openerror) { 2710 case ENODEV: 2711 /* 2712 * Check that the mode the file is being opened for is a 2713 * subset of the mode of the existing descriptor. 2714 */ 2715 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { 2716 fdunused(fdp, indx); 2717 FILEDESC_XUNLOCK(fdp); 2718 return (EACCES); 2719 } 2720 fhold(fp); 2721 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2722 filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, 2723 &fdp->fd_ofiles[indx].fde_caps); 2724 break; 2725 case ENXIO: 2726 /* 2727 * Steal away the file pointer from dfd and stuff it into indx. 2728 */ 2729 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2730 bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); 2731 fdunused(fdp, dfd); 2732 break; 2733 } 2734 FILEDESC_XUNLOCK(fdp); 2735 *indxp = indx; 2736 return (0); 2737 } 2738 2739 /* 2740 * Scan all active processes and prisons to see if any of them have a current 2741 * or root directory of `olddp'. If so, replace them with the new mount point. 2742 */ 2743 void 2744 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2745 { 2746 struct filedesc *fdp; 2747 struct prison *pr; 2748 struct proc *p; 2749 int nrele; 2750 2751 if (vrefcnt(olddp) == 1) 2752 return; 2753 nrele = 0; 2754 sx_slock(&allproc_lock); 2755 FOREACH_PROC_IN_SYSTEM(p) { 2756 fdp = fdhold(p); 2757 if (fdp == NULL) 2758 continue; 2759 FILEDESC_XLOCK(fdp); 2760 if (fdp->fd_cdir == olddp) { 2761 vref(newdp); 2762 fdp->fd_cdir = newdp; 2763 nrele++; 2764 } 2765 if (fdp->fd_rdir == olddp) { 2766 vref(newdp); 2767 fdp->fd_rdir = newdp; 2768 nrele++; 2769 } 2770 if (fdp->fd_jdir == olddp) { 2771 vref(newdp); 2772 fdp->fd_jdir = newdp; 2773 nrele++; 2774 } 2775 FILEDESC_XUNLOCK(fdp); 2776 fddrop(fdp); 2777 } 2778 sx_sunlock(&allproc_lock); 2779 if (rootvnode == olddp) { 2780 vref(newdp); 2781 rootvnode = newdp; 2782 nrele++; 2783 } 2784 mtx_lock(&prison0.pr_mtx); 2785 if (prison0.pr_root == olddp) { 2786 vref(newdp); 2787 prison0.pr_root = newdp; 2788 nrele++; 2789 } 2790 mtx_unlock(&prison0.pr_mtx); 2791 sx_slock(&allprison_lock); 2792 TAILQ_FOREACH(pr, &allprison, pr_list) { 2793 mtx_lock(&pr->pr_mtx); 2794 if (pr->pr_root == olddp) { 2795 vref(newdp); 2796 pr->pr_root = newdp; 2797 nrele++; 2798 } 2799 mtx_unlock(&pr->pr_mtx); 2800 } 2801 sx_sunlock(&allprison_lock); 2802 while (nrele--) 2803 vrele(olddp); 2804 } 2805 2806 struct filedesc_to_leader * 2807 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2808 { 2809 struct filedesc_to_leader *fdtol; 2810 2811 fdtol = malloc(sizeof(struct filedesc_to_leader), 2812 M_FILEDESC_TO_LEADER, 2813 M_WAITOK); 2814 fdtol->fdl_refcount = 1; 2815 fdtol->fdl_holdcount = 0; 2816 fdtol->fdl_wakeup = 0; 2817 fdtol->fdl_leader = leader; 2818 if (old != NULL) { 2819 FILEDESC_XLOCK(fdp); 2820 fdtol->fdl_next = old->fdl_next; 2821 fdtol->fdl_prev = old; 2822 old->fdl_next = fdtol; 2823 fdtol->fdl_next->fdl_prev = fdtol; 2824 FILEDESC_XUNLOCK(fdp); 2825 } else { 2826 fdtol->fdl_next = fdtol; 2827 fdtol->fdl_prev = fdtol; 2828 } 2829 return (fdtol); 2830 } 2831 2832 /* 2833 * Get file structures globally. 2834 */ 2835 static int 2836 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2837 { 2838 struct xfile xf; 2839 struct filedesc *fdp; 2840 struct file *fp; 2841 struct proc *p; 2842 int error, n; 2843 2844 error = sysctl_wire_old_buffer(req, 0); 2845 if (error != 0) 2846 return (error); 2847 if (req->oldptr == NULL) { 2848 n = 0; 2849 sx_slock(&allproc_lock); 2850 FOREACH_PROC_IN_SYSTEM(p) { 2851 if (p->p_state == PRS_NEW) 2852 continue; 2853 fdp = fdhold(p); 2854 if (fdp == NULL) 2855 continue; 2856 /* overestimates sparse tables. */ 2857 if (fdp->fd_lastfile > 0) 2858 n += fdp->fd_lastfile; 2859 fddrop(fdp); 2860 } 2861 sx_sunlock(&allproc_lock); 2862 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2863 } 2864 error = 0; 2865 bzero(&xf, sizeof(xf)); 2866 xf.xf_size = sizeof(xf); 2867 sx_slock(&allproc_lock); 2868 FOREACH_PROC_IN_SYSTEM(p) { 2869 PROC_LOCK(p); 2870 if (p->p_state == PRS_NEW) { 2871 PROC_UNLOCK(p); 2872 continue; 2873 } 2874 if (p_cansee(req->td, p) != 0) { 2875 PROC_UNLOCK(p); 2876 continue; 2877 } 2878 xf.xf_pid = p->p_pid; 2879 xf.xf_uid = p->p_ucred->cr_uid; 2880 PROC_UNLOCK(p); 2881 fdp = fdhold(p); 2882 if (fdp == NULL) 2883 continue; 2884 FILEDESC_SLOCK(fdp); 2885 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2886 if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) 2887 continue; 2888 xf.xf_fd = n; 2889 xf.xf_file = fp; 2890 xf.xf_data = fp->f_data; 2891 xf.xf_vnode = fp->f_vnode; 2892 xf.xf_type = fp->f_type; 2893 xf.xf_count = fp->f_count; 2894 xf.xf_msgcount = 0; 2895 xf.xf_offset = foffset_get(fp); 2896 xf.xf_flag = fp->f_flag; 2897 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2898 if (error) 2899 break; 2900 } 2901 FILEDESC_SUNLOCK(fdp); 2902 fddrop(fdp); 2903 if (error) 2904 break; 2905 } 2906 sx_sunlock(&allproc_lock); 2907 return (error); 2908 } 2909 2910 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2911 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2912 2913 #ifdef KINFO_OFILE_SIZE 2914 CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE); 2915 #endif 2916 2917 #ifdef COMPAT_FREEBSD7 2918 static int 2919 export_vnode_for_osysctl(struct vnode *vp, int type, 2920 struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req) 2921 { 2922 int error; 2923 char *fullpath, *freepath; 2924 2925 bzero(kif, sizeof(*kif)); 2926 kif->kf_structsize = sizeof(*kif); 2927 2928 vref(vp); 2929 kif->kf_fd = type; 2930 kif->kf_type = KF_TYPE_VNODE; 2931 /* This function only handles directories. */ 2932 if (vp->v_type != VDIR) { 2933 vrele(vp); 2934 return (ENOTDIR); 2935 } 2936 kif->kf_vnode_type = KF_VTYPE_VDIR; 2937 2938 /* 2939 * This is not a true file descriptor, so we set a bogus refcount 2940 * and offset to indicate these fields should be ignored. 2941 */ 2942 kif->kf_ref_count = -1; 2943 kif->kf_offset = -1; 2944 2945 freepath = NULL; 2946 fullpath = "-"; 2947 FILEDESC_SUNLOCK(fdp); 2948 vn_fullpath(curthread, vp, &fullpath, &freepath); 2949 vrele(vp); 2950 strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); 2951 if (freepath != NULL) 2952 free(freepath, M_TEMP); 2953 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 2954 FILEDESC_SLOCK(fdp); 2955 return (error); 2956 } 2957 2958 /* 2959 * Get per-process file descriptors for use by procstat(1), et al. 2960 */ 2961 static int 2962 sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) 2963 { 2964 char *fullpath, *freepath; 2965 struct kinfo_ofile *kif; 2966 struct filedesc *fdp; 2967 int error, i, *name; 2968 struct shmfd *shmfd; 2969 struct socket *so; 2970 struct vnode *vp; 2971 struct file *fp; 2972 struct proc *p; 2973 struct tty *tp; 2974 2975 name = (int *)arg1; 2976 error = pget((pid_t)name[0], PGET_CANDEBUG, &p); 2977 if (error != 0) 2978 return (error); 2979 fdp = fdhold(p); 2980 PROC_UNLOCK(p); 2981 if (fdp == NULL) 2982 return (ENOENT); 2983 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 2984 FILEDESC_SLOCK(fdp); 2985 if (fdp->fd_cdir != NULL) 2986 export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, 2987 fdp, req); 2988 if (fdp->fd_rdir != NULL) 2989 export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, 2990 fdp, req); 2991 if (fdp->fd_jdir != NULL) 2992 export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, 2993 fdp, req); 2994 for (i = 0; i < fdp->fd_nfiles; i++) { 2995 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) 2996 continue; 2997 bzero(kif, sizeof(*kif)); 2998 kif->kf_structsize = sizeof(*kif); 2999 vp = NULL; 3000 so = NULL; 3001 tp = NULL; 3002 shmfd = NULL; 3003 kif->kf_fd = i; 3004 3005 switch (fp->f_type) { 3006 case DTYPE_VNODE: 3007 kif->kf_type = KF_TYPE_VNODE; 3008 vp = fp->f_vnode; 3009 break; 3010 3011 case DTYPE_SOCKET: 3012 kif->kf_type = KF_TYPE_SOCKET; 3013 so = fp->f_data; 3014 break; 3015 3016 case DTYPE_PIPE: 3017 kif->kf_type = KF_TYPE_PIPE; 3018 break; 3019 3020 case DTYPE_FIFO: 3021 kif->kf_type = KF_TYPE_FIFO; 3022 vp = fp->f_vnode; 3023 break; 3024 3025 case DTYPE_KQUEUE: 3026 kif->kf_type = KF_TYPE_KQUEUE; 3027 break; 3028 3029 case DTYPE_CRYPTO: 3030 kif->kf_type = KF_TYPE_CRYPTO; 3031 break; 3032 3033 case DTYPE_MQUEUE: 3034 kif->kf_type = KF_TYPE_MQUEUE; 3035 break; 3036 3037 case DTYPE_SHM: 3038 kif->kf_type = KF_TYPE_SHM; 3039 shmfd = fp->f_data; 3040 break; 3041 3042 case DTYPE_SEM: 3043 kif->kf_type = KF_TYPE_SEM; 3044 break; 3045 3046 case DTYPE_PTS: 3047 kif->kf_type = KF_TYPE_PTS; 3048 tp = fp->f_data; 3049 break; 3050 3051 #ifdef PROCDESC 3052 case DTYPE_PROCDESC: 3053 kif->kf_type = KF_TYPE_PROCDESC; 3054 break; 3055 #endif 3056 3057 default: 3058 kif->kf_type = KF_TYPE_UNKNOWN; 3059 break; 3060 } 3061 kif->kf_ref_count = fp->f_count; 3062 if (fp->f_flag & FREAD) 3063 kif->kf_flags |= KF_FLAG_READ; 3064 if (fp->f_flag & FWRITE) 3065 kif->kf_flags |= KF_FLAG_WRITE; 3066 if (fp->f_flag & FAPPEND) 3067 kif->kf_flags |= KF_FLAG_APPEND; 3068 if (fp->f_flag & FASYNC) 3069 kif->kf_flags |= KF_FLAG_ASYNC; 3070 if (fp->f_flag & FFSYNC) 3071 kif->kf_flags |= KF_FLAG_FSYNC; 3072 if (fp->f_flag & FNONBLOCK) 3073 kif->kf_flags |= KF_FLAG_NONBLOCK; 3074 if (fp->f_flag & O_DIRECT) 3075 kif->kf_flags |= KF_FLAG_DIRECT; 3076 if (fp->f_flag & FHASLOCK) 3077 kif->kf_flags |= KF_FLAG_HASLOCK; 3078 kif->kf_offset = foffset_get(fp); 3079 if (vp != NULL) { 3080 vref(vp); 3081 switch (vp->v_type) { 3082 case VNON: 3083 kif->kf_vnode_type = KF_VTYPE_VNON; 3084 break; 3085 case VREG: 3086 kif->kf_vnode_type = KF_VTYPE_VREG; 3087 break; 3088 case VDIR: 3089 kif->kf_vnode_type = KF_VTYPE_VDIR; 3090 break; 3091 case VBLK: 3092 kif->kf_vnode_type = KF_VTYPE_VBLK; 3093 break; 3094 case VCHR: 3095 kif->kf_vnode_type = KF_VTYPE_VCHR; 3096 break; 3097 case VLNK: 3098 kif->kf_vnode_type = KF_VTYPE_VLNK; 3099 break; 3100 case VSOCK: 3101 kif->kf_vnode_type = KF_VTYPE_VSOCK; 3102 break; 3103 case VFIFO: 3104 kif->kf_vnode_type = KF_VTYPE_VFIFO; 3105 break; 3106 case VBAD: 3107 kif->kf_vnode_type = KF_VTYPE_VBAD; 3108 break; 3109 default: 3110 kif->kf_vnode_type = KF_VTYPE_UNKNOWN; 3111 break; 3112 } 3113 /* 3114 * It is OK to drop the filedesc lock here as we will 3115 * re-validate and re-evaluate its properties when 3116 * the loop continues. 3117 */ 3118 freepath = NULL; 3119 fullpath = "-"; 3120 FILEDESC_SUNLOCK(fdp); 3121 vn_fullpath(curthread, vp, &fullpath, &freepath); 3122 vrele(vp); 3123 strlcpy(kif->kf_path, fullpath, 3124 sizeof(kif->kf_path)); 3125 if (freepath != NULL) 3126 free(freepath, M_TEMP); 3127 FILEDESC_SLOCK(fdp); 3128 } 3129 if (so != NULL) { 3130 struct sockaddr *sa; 3131 3132 if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa) 3133 == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 3134 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 3135 free(sa, M_SONAME); 3136 } 3137 if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) 3138 == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 3139 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 3140 free(sa, M_SONAME); 3141 } 3142 kif->kf_sock_domain = 3143 so->so_proto->pr_domain->dom_family; 3144 kif->kf_sock_type = so->so_type; 3145 kif->kf_sock_protocol = so->so_proto->pr_protocol; 3146 } 3147 if (tp != NULL) { 3148 strlcpy(kif->kf_path, tty_devname(tp), 3149 sizeof(kif->kf_path)); 3150 } 3151 if (shmfd != NULL) 3152 shm_path(shmfd, kif->kf_path, sizeof(kif->kf_path)); 3153 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 3154 if (error) 3155 break; 3156 } 3157 FILEDESC_SUNLOCK(fdp); 3158 fddrop(fdp); 3159 free(kif, M_TEMP); 3160 return (0); 3161 } 3162 3163 static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD, 3164 sysctl_kern_proc_ofiledesc, "Process ofiledesc entries"); 3165 #endif /* COMPAT_FREEBSD7 */ 3166 3167 #ifdef KINFO_FILE_SIZE 3168 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); 3169 #endif 3170 3171 static int 3172 export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt, 3173 int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif, 3174 struct sbuf *sb, ssize_t *remainder) 3175 { 3176 struct { 3177 int fflag; 3178 int kf_fflag; 3179 } fflags_table[] = { 3180 { FAPPEND, KF_FLAG_APPEND }, 3181 { FASYNC, KF_FLAG_ASYNC }, 3182 { FFSYNC, KF_FLAG_FSYNC }, 3183 { FHASLOCK, KF_FLAG_HASLOCK }, 3184 { FNONBLOCK, KF_FLAG_NONBLOCK }, 3185 { FREAD, KF_FLAG_READ }, 3186 { FWRITE, KF_FLAG_WRITE }, 3187 { O_CREAT, KF_FLAG_CREAT }, 3188 { O_DIRECT, KF_FLAG_DIRECT }, 3189 { O_EXCL, KF_FLAG_EXCL }, 3190 { O_EXEC, KF_FLAG_EXEC }, 3191 { O_EXLOCK, KF_FLAG_EXLOCK }, 3192 { O_NOFOLLOW, KF_FLAG_NOFOLLOW }, 3193 { O_SHLOCK, KF_FLAG_SHLOCK }, 3194 { O_TRUNC, KF_FLAG_TRUNC } 3195 }; 3196 #define NFFLAGS (sizeof(fflags_table) / sizeof(*fflags_table)) 3197 struct vnode *vp; 3198 int error; 3199 unsigned int i; 3200 3201 if (*remainder == 0) 3202 return (0); 3203 bzero(kif, sizeof(*kif)); 3204 switch (type) { 3205 case KF_TYPE_FIFO: 3206 case KF_TYPE_VNODE: 3207 vp = (struct vnode *)data; 3208 error = fill_vnode_info(vp, kif); 3209 vrele(vp); 3210 break; 3211 case KF_TYPE_SOCKET: 3212 error = fill_socket_info((struct socket *)data, kif); 3213 break; 3214 case KF_TYPE_PIPE: 3215 error = fill_pipe_info((struct pipe *)data, kif); 3216 break; 3217 case KF_TYPE_PTS: 3218 error = fill_pts_info((struct tty *)data, kif); 3219 break; 3220 case KF_TYPE_PROCDESC: 3221 error = fill_procdesc_info((struct procdesc *)data, kif); 3222 break; 3223 case KF_TYPE_SHM: 3224 error = fill_shm_info((struct file *)data, kif); 3225 break; 3226 default: 3227 error = 0; 3228 } 3229 if (error == 0) 3230 kif->kf_status |= KF_ATTR_VALID; 3231 3232 /* 3233 * Translate file access flags. 3234 */ 3235 for (i = 0; i < NFFLAGS; i++) 3236 if (fflags & fflags_table[i].fflag) 3237 kif->kf_flags |= fflags_table[i].kf_fflag; 3238 kif->kf_cap_rights = fd_cap_rights; 3239 kif->kf_fd = fd; 3240 kif->kf_type = type; 3241 kif->kf_ref_count = refcnt; 3242 kif->kf_offset = offset; 3243 /* Pack record size down */ 3244 kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + 3245 strlen(kif->kf_path) + 1; 3246 kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); 3247 if (*remainder != -1) { 3248 if (*remainder < kif->kf_structsize) { 3249 /* Terminate export. */ 3250 *remainder = 0; 3251 return (0); 3252 } 3253 *remainder -= kif->kf_structsize; 3254 } 3255 error = sbuf_bcat(sb, kif, kif->kf_structsize); 3256 return (error); 3257 } 3258 3259 /* 3260 * Store a process file descriptor information to sbuf. 3261 * 3262 * Takes a locked proc as argument, and returns with the proc unlocked. 3263 */ 3264 int 3265 kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) 3266 { 3267 struct file *fp; 3268 struct filedesc *fdp; 3269 struct kinfo_file *kif; 3270 struct vnode *cttyvp, *textvp, *tracevp; 3271 int64_t offset; 3272 void *data; 3273 ssize_t remainder; 3274 int error, i; 3275 int type, refcnt, fflags; 3276 cap_rights_t fd_cap_rights; 3277 3278 PROC_LOCK_ASSERT(p, MA_OWNED); 3279 3280 remainder = maxlen; 3281 /* ktrace vnode */ 3282 tracevp = p->p_tracevp; 3283 if (tracevp != NULL) 3284 vref(tracevp); 3285 /* text vnode */ 3286 textvp = p->p_textvp; 3287 if (textvp != NULL) 3288 vref(textvp); 3289 /* Controlling tty. */ 3290 cttyvp = NULL; 3291 if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) { 3292 cttyvp = p->p_pgrp->pg_session->s_ttyvp; 3293 if (cttyvp != NULL) 3294 vref(cttyvp); 3295 } 3296 fdp = fdhold(p); 3297 PROC_UNLOCK(p); 3298 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 3299 if (tracevp != NULL) 3300 export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, 3301 FREAD | FWRITE, -1, -1, 0, kif, sb, &remainder); 3302 if (textvp != NULL) 3303 export_fd_to_sb(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, 3304 FREAD, -1, -1, 0, kif, sb, &remainder); 3305 if (cttyvp != NULL) 3306 export_fd_to_sb(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, 3307 FREAD | FWRITE, -1, -1, 0, kif, sb, &remainder); 3308 error = 0; 3309 if (fdp == NULL) 3310 goto fail; 3311 FILEDESC_SLOCK(fdp); 3312 /* working directory */ 3313 if (fdp->fd_cdir != NULL) { 3314 vref(fdp->fd_cdir); 3315 data = fdp->fd_cdir; 3316 FILEDESC_SUNLOCK(fdp); 3317 export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, 3318 FREAD, -1, -1, 0, kif, sb, &remainder); 3319 FILEDESC_SLOCK(fdp); 3320 } 3321 /* root directory */ 3322 if (fdp->fd_rdir != NULL) { 3323 vref(fdp->fd_rdir); 3324 data = fdp->fd_rdir; 3325 FILEDESC_SUNLOCK(fdp); 3326 export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, 3327 FREAD, -1, -1, 0, kif, sb, &remainder); 3328 FILEDESC_SLOCK(fdp); 3329 } 3330 /* jail directory */ 3331 if (fdp->fd_jdir != NULL) { 3332 vref(fdp->fd_jdir); 3333 data = fdp->fd_jdir; 3334 FILEDESC_SUNLOCK(fdp); 3335 export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, 3336 FREAD, -1, -1, 0, kif, sb, &remainder); 3337 FILEDESC_SLOCK(fdp); 3338 } 3339 for (i = 0; i < fdp->fd_nfiles; i++) { 3340 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) 3341 continue; 3342 data = NULL; 3343 #ifdef CAPABILITIES 3344 fd_cap_rights = cap_rights(fdp, i); 3345 #else /* !CAPABILITIES */ 3346 fd_cap_rights = 0; 3347 #endif 3348 switch (fp->f_type) { 3349 case DTYPE_VNODE: 3350 type = KF_TYPE_VNODE; 3351 vref(fp->f_vnode); 3352 data = fp->f_vnode; 3353 break; 3354 3355 case DTYPE_SOCKET: 3356 type = KF_TYPE_SOCKET; 3357 data = fp->f_data; 3358 break; 3359 3360 case DTYPE_PIPE: 3361 type = KF_TYPE_PIPE; 3362 data = fp->f_data; 3363 break; 3364 3365 case DTYPE_FIFO: 3366 type = KF_TYPE_FIFO; 3367 vref(fp->f_vnode); 3368 data = fp->f_vnode; 3369 break; 3370 3371 case DTYPE_KQUEUE: 3372 type = KF_TYPE_KQUEUE; 3373 break; 3374 3375 case DTYPE_CRYPTO: 3376 type = KF_TYPE_CRYPTO; 3377 break; 3378 3379 case DTYPE_MQUEUE: 3380 type = KF_TYPE_MQUEUE; 3381 break; 3382 3383 case DTYPE_SHM: 3384 type = KF_TYPE_SHM; 3385 data = fp; 3386 break; 3387 3388 case DTYPE_SEM: 3389 type = KF_TYPE_SEM; 3390 break; 3391 3392 case DTYPE_PTS: 3393 type = KF_TYPE_PTS; 3394 data = fp->f_data; 3395 break; 3396 3397 #ifdef PROCDESC 3398 case DTYPE_PROCDESC: 3399 type = KF_TYPE_PROCDESC; 3400 data = fp->f_data; 3401 break; 3402 #endif 3403 3404 default: 3405 type = KF_TYPE_UNKNOWN; 3406 break; 3407 } 3408 refcnt = fp->f_count; 3409 fflags = fp->f_flag; 3410 offset = foffset_get(fp); 3411 3412 /* 3413 * Create sysctl entry. 3414 * It is OK to drop the filedesc lock here as we will 3415 * re-validate and re-evaluate its properties when 3416 * the loop continues. 3417 */ 3418 if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) 3419 FILEDESC_SUNLOCK(fdp); 3420 error = export_fd_to_sb(data, type, i, fflags, refcnt, 3421 offset, fd_cap_rights, kif, sb, &remainder); 3422 if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) 3423 FILEDESC_SLOCK(fdp); 3424 if (error) 3425 break; 3426 } 3427 FILEDESC_SUNLOCK(fdp); 3428 fail: 3429 if (fdp != NULL) 3430 fddrop(fdp); 3431 free(kif, M_TEMP); 3432 return (error); 3433 } 3434 3435 #define FILEDESC_SBUF_SIZE (sizeof(struct kinfo_file) * 5) 3436 3437 /* 3438 * Get per-process file descriptors for use by procstat(1), et al. 3439 */ 3440 static int 3441 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) 3442 { 3443 struct sbuf sb; 3444 struct proc *p; 3445 ssize_t maxlen; 3446 int error, error2, *name; 3447 3448 name = (int *)arg1; 3449 3450 sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req); 3451 error = pget((pid_t)name[0], PGET_CANDEBUG, &p); 3452 if (error != 0) { 3453 sbuf_delete(&sb); 3454 return (error); 3455 } 3456 maxlen = req->oldptr != NULL ? req->oldlen : -1; 3457 error = kern_proc_filedesc_out(p, &sb, maxlen); 3458 error2 = sbuf_finish(&sb); 3459 sbuf_delete(&sb); 3460 return (error != 0 ? error : error2); 3461 } 3462 3463 int 3464 vntype_to_kinfo(int vtype) 3465 { 3466 struct { 3467 int vtype; 3468 int kf_vtype; 3469 } vtypes_table[] = { 3470 { VBAD, KF_VTYPE_VBAD }, 3471 { VBLK, KF_VTYPE_VBLK }, 3472 { VCHR, KF_VTYPE_VCHR }, 3473 { VDIR, KF_VTYPE_VDIR }, 3474 { VFIFO, KF_VTYPE_VFIFO }, 3475 { VLNK, KF_VTYPE_VLNK }, 3476 { VNON, KF_VTYPE_VNON }, 3477 { VREG, KF_VTYPE_VREG }, 3478 { VSOCK, KF_VTYPE_VSOCK } 3479 }; 3480 #define NVTYPES (sizeof(vtypes_table) / sizeof(*vtypes_table)) 3481 unsigned int i; 3482 3483 /* 3484 * Perform vtype translation. 3485 */ 3486 for (i = 0; i < NVTYPES; i++) 3487 if (vtypes_table[i].vtype == vtype) 3488 break; 3489 if (i < NVTYPES) 3490 return (vtypes_table[i].kf_vtype); 3491 3492 return (KF_VTYPE_UNKNOWN); 3493 } 3494 3495 static int 3496 fill_vnode_info(struct vnode *vp, struct kinfo_file *kif) 3497 { 3498 struct vattr va; 3499 char *fullpath, *freepath; 3500 int error; 3501 3502 if (vp == NULL) 3503 return (1); 3504 kif->kf_vnode_type = vntype_to_kinfo(vp->v_type); 3505 freepath = NULL; 3506 fullpath = "-"; 3507 error = vn_fullpath(curthread, vp, &fullpath, &freepath); 3508 if (error == 0) { 3509 strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); 3510 } 3511 if (freepath != NULL) 3512 free(freepath, M_TEMP); 3513 3514 /* 3515 * Retrieve vnode attributes. 3516 */ 3517 va.va_fsid = VNOVAL; 3518 va.va_rdev = NODEV; 3519 vn_lock(vp, LK_SHARED | LK_RETRY); 3520 error = VOP_GETATTR(vp, &va, curthread->td_ucred); 3521 VOP_UNLOCK(vp, 0); 3522 if (error != 0) 3523 return (error); 3524 if (va.va_fsid != VNOVAL) 3525 kif->kf_un.kf_file.kf_file_fsid = va.va_fsid; 3526 else 3527 kif->kf_un.kf_file.kf_file_fsid = 3528 vp->v_mount->mnt_stat.f_fsid.val[0]; 3529 kif->kf_un.kf_file.kf_file_fileid = va.va_fileid; 3530 kif->kf_un.kf_file.kf_file_mode = MAKEIMODE(va.va_type, va.va_mode); 3531 kif->kf_un.kf_file.kf_file_size = va.va_size; 3532 kif->kf_un.kf_file.kf_file_rdev = va.va_rdev; 3533 return (0); 3534 } 3535 3536 static int 3537 fill_socket_info(struct socket *so, struct kinfo_file *kif) 3538 { 3539 struct sockaddr *sa; 3540 struct inpcb *inpcb; 3541 struct unpcb *unpcb; 3542 int error; 3543 3544 if (so == NULL) 3545 return (1); 3546 kif->kf_sock_domain = so->so_proto->pr_domain->dom_family; 3547 kif->kf_sock_type = so->so_type; 3548 kif->kf_sock_protocol = so->so_proto->pr_protocol; 3549 kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb; 3550 switch(kif->kf_sock_domain) { 3551 case AF_INET: 3552 case AF_INET6: 3553 if (kif->kf_sock_protocol == IPPROTO_TCP) { 3554 if (so->so_pcb != NULL) { 3555 inpcb = (struct inpcb *)(so->so_pcb); 3556 kif->kf_un.kf_sock.kf_sock_inpcb = 3557 (uintptr_t)inpcb->inp_ppcb; 3558 } 3559 } 3560 break; 3561 case AF_UNIX: 3562 if (so->so_pcb != NULL) { 3563 unpcb = (struct unpcb *)(so->so_pcb); 3564 if (unpcb->unp_conn) { 3565 kif->kf_un.kf_sock.kf_sock_unpconn = 3566 (uintptr_t)unpcb->unp_conn; 3567 kif->kf_un.kf_sock.kf_sock_rcv_sb_state = 3568 so->so_rcv.sb_state; 3569 kif->kf_un.kf_sock.kf_sock_snd_sb_state = 3570 so->so_snd.sb_state; 3571 } 3572 } 3573 break; 3574 } 3575 error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); 3576 if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 3577 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 3578 free(sa, M_SONAME); 3579 } 3580 error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa); 3581 if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 3582 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 3583 free(sa, M_SONAME); 3584 } 3585 strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name, 3586 sizeof(kif->kf_path)); 3587 return (0); 3588 } 3589 3590 static int 3591 fill_pts_info(struct tty *tp, struct kinfo_file *kif) 3592 { 3593 3594 if (tp == NULL) 3595 return (1); 3596 kif->kf_un.kf_pts.kf_pts_dev = tty_udev(tp); 3597 strlcpy(kif->kf_path, tty_devname(tp), sizeof(kif->kf_path)); 3598 return (0); 3599 } 3600 3601 static int 3602 fill_pipe_info(struct pipe *pi, struct kinfo_file *kif) 3603 { 3604 3605 if (pi == NULL) 3606 return (1); 3607 kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi; 3608 kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer; 3609 kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt; 3610 return (0); 3611 } 3612 3613 static int 3614 fill_procdesc_info(struct procdesc *pdp, struct kinfo_file *kif) 3615 { 3616 3617 if (pdp == NULL) 3618 return (1); 3619 kif->kf_un.kf_proc.kf_pid = pdp->pd_pid; 3620 return (0); 3621 } 3622 3623 static int 3624 fill_shm_info(struct file *fp, struct kinfo_file *kif) 3625 { 3626 struct thread *td; 3627 struct stat sb; 3628 3629 td = curthread; 3630 if (fp->f_data == NULL) 3631 return (1); 3632 if (fo_stat(fp, &sb, td->td_ucred, td) != 0) 3633 return (1); 3634 shm_path(fp->f_data, kif->kf_path, sizeof(kif->kf_path)); 3635 kif->kf_un.kf_file.kf_file_mode = sb.st_mode; 3636 kif->kf_un.kf_file.kf_file_size = sb.st_size; 3637 return (0); 3638 } 3639 3640 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD, 3641 sysctl_kern_proc_filedesc, "Process filedesc entries"); 3642 3643 #ifdef DDB 3644 /* 3645 * For the purposes of debugging, generate a human-readable string for the 3646 * file type. 3647 */ 3648 static const char * 3649 file_type_to_name(short type) 3650 { 3651 3652 switch (type) { 3653 case 0: 3654 return ("zero"); 3655 case DTYPE_VNODE: 3656 return ("vnod"); 3657 case DTYPE_SOCKET: 3658 return ("sock"); 3659 case DTYPE_PIPE: 3660 return ("pipe"); 3661 case DTYPE_FIFO: 3662 return ("fifo"); 3663 case DTYPE_KQUEUE: 3664 return ("kque"); 3665 case DTYPE_CRYPTO: 3666 return ("crpt"); 3667 case DTYPE_MQUEUE: 3668 return ("mque"); 3669 case DTYPE_SHM: 3670 return ("shm"); 3671 case DTYPE_SEM: 3672 return ("ksem"); 3673 default: 3674 return ("unkn"); 3675 } 3676 } 3677 3678 /* 3679 * For the purposes of debugging, identify a process (if any, perhaps one of 3680 * many) that references the passed file in its file descriptor array. Return 3681 * NULL if none. 3682 */ 3683 static struct proc * 3684 file_to_first_proc(struct file *fp) 3685 { 3686 struct filedesc *fdp; 3687 struct proc *p; 3688 int n; 3689 3690 FOREACH_PROC_IN_SYSTEM(p) { 3691 if (p->p_state == PRS_NEW) 3692 continue; 3693 fdp = p->p_fd; 3694 if (fdp == NULL) 3695 continue; 3696 for (n = 0; n < fdp->fd_nfiles; n++) { 3697 if (fp == fdp->fd_ofiles[n].fde_file) 3698 return (p); 3699 } 3700 } 3701 return (NULL); 3702 } 3703 3704 static void 3705 db_print_file(struct file *fp, int header) 3706 { 3707 struct proc *p; 3708 3709 if (header) 3710 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 3711 "File", "Type", "Data", "Flag", "GCFl", "Count", 3712 "MCount", "Vnode", "FPID", "FCmd"); 3713 p = file_to_first_proc(fp); 3714 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 3715 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 3716 0, fp->f_count, 0, fp->f_vnode, 3717 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 3718 } 3719 3720 DB_SHOW_COMMAND(file, db_show_file) 3721 { 3722 struct file *fp; 3723 3724 if (!have_addr) { 3725 db_printf("usage: show file <addr>\n"); 3726 return; 3727 } 3728 fp = (struct file *)addr; 3729 db_print_file(fp, 1); 3730 } 3731 3732 DB_SHOW_COMMAND(files, db_show_files) 3733 { 3734 struct filedesc *fdp; 3735 struct file *fp; 3736 struct proc *p; 3737 int header; 3738 int n; 3739 3740 header = 1; 3741 FOREACH_PROC_IN_SYSTEM(p) { 3742 if (p->p_state == PRS_NEW) 3743 continue; 3744 if ((fdp = p->p_fd) == NULL) 3745 continue; 3746 for (n = 0; n < fdp->fd_nfiles; ++n) { 3747 if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) 3748 continue; 3749 db_print_file(fp, header); 3750 header = 0; 3751 } 3752 } 3753 } 3754 #endif 3755 3756 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 3757 &maxfilesperproc, 0, "Maximum files allowed open per process"); 3758 3759 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 3760 &maxfiles, 0, "Maximum number of files"); 3761 3762 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 3763 __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); 3764 3765 /* ARGSUSED*/ 3766 static void 3767 filelistinit(void *dummy) 3768 { 3769 3770 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 3771 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 3772 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 3773 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 3774 } 3775 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); 3776 3777 /*-------------------------------------------------------------------*/ 3778 3779 static int 3780 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, 3781 int flags, struct thread *td) 3782 { 3783 3784 return (EBADF); 3785 } 3786 3787 static int 3788 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, 3789 struct thread *td) 3790 { 3791 3792 return (EINVAL); 3793 } 3794 3795 static int 3796 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, 3797 struct thread *td) 3798 { 3799 3800 return (EBADF); 3801 } 3802 3803 static int 3804 badfo_poll(struct file *fp, int events, struct ucred *active_cred, 3805 struct thread *td) 3806 { 3807 3808 return (0); 3809 } 3810 3811 static int 3812 badfo_kqfilter(struct file *fp, struct knote *kn) 3813 { 3814 3815 return (EBADF); 3816 } 3817 3818 static int 3819 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 3820 struct thread *td) 3821 { 3822 3823 return (EBADF); 3824 } 3825 3826 static int 3827 badfo_close(struct file *fp, struct thread *td) 3828 { 3829 3830 return (EBADF); 3831 } 3832 3833 static int 3834 badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 3835 struct thread *td) 3836 { 3837 3838 return (EBADF); 3839 } 3840 3841 static int 3842 badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 3843 struct thread *td) 3844 { 3845 3846 return (EBADF); 3847 } 3848 3849 struct fileops badfileops = { 3850 .fo_read = badfo_readwrite, 3851 .fo_write = badfo_readwrite, 3852 .fo_truncate = badfo_truncate, 3853 .fo_ioctl = badfo_ioctl, 3854 .fo_poll = badfo_poll, 3855 .fo_kqfilter = badfo_kqfilter, 3856 .fo_stat = badfo_stat, 3857 .fo_close = badfo_close, 3858 .fo_chmod = badfo_chmod, 3859 .fo_chown = badfo_chown, 3860 }; 3861 3862 int 3863 invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 3864 struct thread *td) 3865 { 3866 3867 return (EINVAL); 3868 } 3869 3870 int 3871 invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 3872 struct thread *td) 3873 { 3874 3875 return (EINVAL); 3876 } 3877 3878 /*-------------------------------------------------------------------*/ 3879 3880 /* 3881 * File Descriptor pseudo-device driver (/dev/fd/). 3882 * 3883 * Opening minor device N dup()s the file (if any) connected to file 3884 * descriptor N belonging to the calling process. Note that this driver 3885 * consists of only the ``open()'' routine, because all subsequent 3886 * references to this file will be direct to the other driver. 3887 * 3888 * XXX: we could give this one a cloning event handler if necessary. 3889 */ 3890 3891 /* ARGSUSED */ 3892 static int 3893 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 3894 { 3895 3896 /* 3897 * XXX Kludge: set curthread->td_dupfd to contain the value of the 3898 * the file descriptor being sought for duplication. The error 3899 * return ensures that the vnode for this device will be released 3900 * by vn_open. Open will detect this special error and take the 3901 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 3902 * will simply report the error. 3903 */ 3904 td->td_dupfd = dev2unit(dev); 3905 return (ENODEV); 3906 } 3907 3908 static struct cdevsw fildesc_cdevsw = { 3909 .d_version = D_VERSION, 3910 .d_open = fdopen, 3911 .d_name = "FD", 3912 }; 3913 3914 static void 3915 fildesc_drvinit(void *unused) 3916 { 3917 struct cdev *dev; 3918 3919 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL, 3920 UID_ROOT, GID_WHEEL, 0666, "fd/0"); 3921 make_dev_alias(dev, "stdin"); 3922 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL, 3923 UID_ROOT, GID_WHEEL, 0666, "fd/1"); 3924 make_dev_alias(dev, "stdout"); 3925 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL, 3926 UID_ROOT, GID_WHEEL, 0666, "fd/2"); 3927 make_dev_alias(dev, "stderr"); 3928 } 3929 3930 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL); 3931